mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-25 14:24:26 +01:00
fix the addition + deletion bug
This commit is contained in:
parent
d7ddf4925e
commit
4391cba6ca
22
Cargo.lock
generated
22
Cargo.lock
generated
@ -359,6 +359,15 @@ dependencies = [
|
|||||||
"backtrace",
|
"backtrace",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "arbitrary"
|
||||||
|
version = "1.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e2d098ff73c1ca148721f37baad5ea6a465a13f9573aba8641fbbbae8164a54e"
|
||||||
|
dependencies = [
|
||||||
|
"derive_arbitrary",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "assert-json-diff"
|
name = "assert-json-diff"
|
||||||
version = "2.0.2"
|
version = "2.0.2"
|
||||||
@ -1096,6 +1105,17 @@ dependencies = [
|
|||||||
"syn 1.0.109",
|
"syn 1.0.109",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "derive_arbitrary"
|
||||||
|
version = "1.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f3cdeb9ec472d588e539a818b2dee436825730da08ad0017c4b1a17676bdc8b7"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 1.0.109",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "derive_builder"
|
name = "derive_builder"
|
||||||
version = "0.12.0"
|
version = "0.12.0"
|
||||||
@ -2711,6 +2731,7 @@ dependencies = [
|
|||||||
name = "milli"
|
name = "milli"
|
||||||
version = "1.2.0"
|
version = "1.2.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"arbitrary",
|
||||||
"big_s",
|
"big_s",
|
||||||
"bimap",
|
"bimap",
|
||||||
"bincode",
|
"bincode",
|
||||||
@ -2722,6 +2743,7 @@ dependencies = [
|
|||||||
"csv",
|
"csv",
|
||||||
"deserr",
|
"deserr",
|
||||||
"either",
|
"either",
|
||||||
|
"fastrand",
|
||||||
"filter-parser",
|
"filter-parser",
|
||||||
"flatten-serde-json",
|
"flatten-serde-json",
|
||||||
"fst",
|
"fst",
|
||||||
|
@ -0,0 +1,43 @@
|
|||||||
|
---
|
||||||
|
source: index-scheduler/src/lib.rs
|
||||||
|
---
|
||||||
|
### Autobatching Enabled = true
|
||||||
|
### Processing Tasks:
|
||||||
|
[]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### All Tasks:
|
||||||
|
0 {uid: 0, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||||
|
1 {uid: 1, status: succeeded, details: { received_document_ids: 2, deleted_documents: Some(2) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Status:
|
||||||
|
enqueued []
|
||||||
|
succeeded [0,1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Kind:
|
||||||
|
"documentAdditionOrUpdate" [0,]
|
||||||
|
"documentDeletion" [1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Tasks:
|
||||||
|
doggos [0,1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Mapper:
|
||||||
|
doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} }
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Canceled By:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Enqueued At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
[timestamp] [1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Started At:
|
||||||
|
[timestamp] [0,1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Finished At:
|
||||||
|
[timestamp] [0,1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### File Store:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
|
@ -0,0 +1,9 @@
|
|||||||
|
---
|
||||||
|
source: index-scheduler/src/lib.rs
|
||||||
|
---
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"doggo": "bork"
|
||||||
|
}
|
||||||
|
]
|
@ -0,0 +1,37 @@
|
|||||||
|
---
|
||||||
|
source: index-scheduler/src/lib.rs
|
||||||
|
---
|
||||||
|
### Autobatching Enabled = true
|
||||||
|
### Processing Tasks:
|
||||||
|
[]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### All Tasks:
|
||||||
|
0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Status:
|
||||||
|
enqueued [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Kind:
|
||||||
|
"documentAdditionOrUpdate" [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Tasks:
|
||||||
|
doggos [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Mapper:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Canceled By:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Enqueued At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Started At:
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Finished At:
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### File Store:
|
||||||
|
00000000-0000-0000-0000-000000000000
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
|
@ -0,0 +1,40 @@
|
|||||||
|
---
|
||||||
|
source: index-scheduler/src/lib.rs
|
||||||
|
---
|
||||||
|
### Autobatching Enabled = true
|
||||||
|
### Processing Tasks:
|
||||||
|
[]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### All Tasks:
|
||||||
|
0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||||
|
1 {uid: 1, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Status:
|
||||||
|
enqueued [0,1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Kind:
|
||||||
|
"documentAdditionOrUpdate" [0,]
|
||||||
|
"documentDeletion" [1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Tasks:
|
||||||
|
doggos [0,1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Mapper:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Canceled By:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Enqueued At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
[timestamp] [1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Started At:
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Finished At:
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### File Store:
|
||||||
|
00000000-0000-0000-0000-000000000000
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
|
@ -0,0 +1,43 @@
|
|||||||
|
---
|
||||||
|
source: index-scheduler/src/lib.rs
|
||||||
|
---
|
||||||
|
### Autobatching Enabled = true
|
||||||
|
### Processing Tasks:
|
||||||
|
[]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### All Tasks:
|
||||||
|
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||||
|
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Status:
|
||||||
|
enqueued [1,]
|
||||||
|
failed [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Kind:
|
||||||
|
"documentAdditionOrUpdate" [1,]
|
||||||
|
"documentDeletion" [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Tasks:
|
||||||
|
doggos [0,1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Mapper:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Canceled By:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Enqueued At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
[timestamp] [1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Started At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Finished At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### File Store:
|
||||||
|
00000000-0000-0000-0000-000000000000
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
|
@ -0,0 +1,46 @@
|
|||||||
|
---
|
||||||
|
source: index-scheduler/src/lib.rs
|
||||||
|
---
|
||||||
|
### Autobatching Enabled = true
|
||||||
|
### Processing Tasks:
|
||||||
|
[]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### All Tasks:
|
||||||
|
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||||
|
1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Status:
|
||||||
|
enqueued []
|
||||||
|
succeeded [1,]
|
||||||
|
failed [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Kind:
|
||||||
|
"documentAdditionOrUpdate" [1,]
|
||||||
|
"documentDeletion" [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Tasks:
|
||||||
|
doggos [0,1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Mapper:
|
||||||
|
doggos: { number_of_documents: 3, field_distribution: {"catto": 1, "doggo": 2, "id": 3} }
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Canceled By:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Enqueued At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
[timestamp] [1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Started At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
[timestamp] [1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Finished At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
[timestamp] [1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### File Store:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
|
@ -0,0 +1,17 @@
|
|||||||
|
---
|
||||||
|
source: index-scheduler/src/lib.rs
|
||||||
|
---
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"doggo": "jean bob"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"catto": "jorts"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"doggo": "bork"
|
||||||
|
}
|
||||||
|
]
|
@ -0,0 +1,36 @@
|
|||||||
|
---
|
||||||
|
source: index-scheduler/src/lib.rs
|
||||||
|
---
|
||||||
|
### Autobatching Enabled = true
|
||||||
|
### Processing Tasks:
|
||||||
|
[]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### All Tasks:
|
||||||
|
0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Status:
|
||||||
|
enqueued [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Kind:
|
||||||
|
"documentDeletion" [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Tasks:
|
||||||
|
doggos [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Mapper:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Canceled By:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Enqueued At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Started At:
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Finished At:
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### File Store:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
|
@ -0,0 +1,40 @@
|
|||||||
|
---
|
||||||
|
source: index-scheduler/src/lib.rs
|
||||||
|
---
|
||||||
|
### Autobatching Enabled = true
|
||||||
|
### Processing Tasks:
|
||||||
|
[]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### All Tasks:
|
||||||
|
0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||||
|
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Status:
|
||||||
|
enqueued [0,1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Kind:
|
||||||
|
"documentAdditionOrUpdate" [1,]
|
||||||
|
"documentDeletion" [0,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Tasks:
|
||||||
|
doggos [0,1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Index Mapper:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Canceled By:
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Enqueued At:
|
||||||
|
[timestamp] [0,]
|
||||||
|
[timestamp] [1,]
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Started At:
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### Finished At:
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
### File Store:
|
||||||
|
00000000-0000-0000-0000-000000000000
|
||||||
|
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
|
@ -56,6 +56,7 @@ itertools = "0.10.5"
|
|||||||
log = "0.4.17"
|
log = "0.4.17"
|
||||||
logging_timer = "1.1.0"
|
logging_timer = "1.1.0"
|
||||||
csv = "1.2.1"
|
csv = "1.2.1"
|
||||||
|
fastrand = "1.9.0"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
mimalloc = { version = "0.1.29", default-features = false }
|
mimalloc = { version = "0.1.29", default-features = false }
|
||||||
@ -64,12 +65,13 @@ insta = "1.29.0"
|
|||||||
maplit = "1.0.2"
|
maplit = "1.0.2"
|
||||||
md5 = "0.7.0"
|
md5 = "0.7.0"
|
||||||
rand = {version = "0.8.5", features = ["small_rng"] }
|
rand = {version = "0.8.5", features = ["small_rng"] }
|
||||||
|
arbitrary = { version = "1.3.0", features = ["derive"] }
|
||||||
|
|
||||||
[target.'cfg(fuzzing)'.dev-dependencies]
|
[target.'cfg(fuzzing)'.dev-dependencies]
|
||||||
fuzzcheck = "0.12.1"
|
fuzzcheck = "0.12.1"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
all-tokenizations = [ "charabia/default" ]
|
all-tokenizations = ["charabia/default"]
|
||||||
|
|
||||||
# Use POSIX semaphores instead of SysV semaphores in LMDB
|
# Use POSIX semaphores instead of SysV semaphores in LMDB
|
||||||
# For more information on this feature, see heed's Cargo.toml
|
# For more information on this feature, see heed's Cargo.toml
|
||||||
|
@ -111,7 +111,6 @@ pub enum Error {
|
|||||||
Io(#[from] io::Error),
|
Io(#[from] io::Error),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
pub fn objects_from_json_value(json: serde_json::Value) -> Vec<crate::Object> {
|
pub fn objects_from_json_value(json: serde_json::Value) -> Vec<crate::Object> {
|
||||||
let documents = match json {
|
let documents = match json {
|
||||||
object @ serde_json::Value::Object(_) => vec![object],
|
object @ serde_json::Value::Object(_) => vec![object],
|
||||||
@ -141,7 +140,6 @@ macro_rules! documents {
|
|||||||
}};
|
}};
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
pub fn documents_batch_reader_from_objects(
|
pub fn documents_batch_reader_from_objects(
|
||||||
objects: impl IntoIterator<Item = Object>,
|
objects: impl IntoIterator<Item = Object>,
|
||||||
) -> DocumentsBatchReader<std::io::Cursor<Vec<u8>>> {
|
) -> DocumentsBatchReader<std::io::Cursor<Vec<u8>>> {
|
||||||
|
@ -198,6 +198,7 @@ where
|
|||||||
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
||||||
return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents });
|
return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents });
|
||||||
}
|
}
|
||||||
|
|
||||||
let output = self
|
let output = self
|
||||||
.transform
|
.transform
|
||||||
.take()
|
.take()
|
||||||
@ -220,6 +221,7 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
let indexed_documents = output.documents_count as u64;
|
let indexed_documents = output.documents_count as u64;
|
||||||
|
|
||||||
let number_of_documents = self.execute_raw(output)?;
|
let number_of_documents = self.execute_raw(output)?;
|
||||||
|
|
||||||
Ok(DocumentAdditionResult { indexed_documents, number_of_documents })
|
Ok(DocumentAdditionResult { indexed_documents, number_of_documents })
|
||||||
@ -236,7 +238,7 @@ where
|
|||||||
primary_key,
|
primary_key,
|
||||||
fields_ids_map,
|
fields_ids_map,
|
||||||
field_distribution,
|
field_distribution,
|
||||||
mut external_documents_ids,
|
new_external_documents_ids,
|
||||||
new_documents_ids,
|
new_documents_ids,
|
||||||
replaced_documents_ids,
|
replaced_documents_ids,
|
||||||
documents_count,
|
documents_count,
|
||||||
@ -363,9 +365,6 @@ where
|
|||||||
deletion_builder.delete_documents(&replaced_documents_ids);
|
deletion_builder.delete_documents(&replaced_documents_ids);
|
||||||
let deleted_documents_result = deletion_builder.execute_inner()?;
|
let deleted_documents_result = deletion_builder.execute_inner()?;
|
||||||
debug!("{} documents actually deleted", deleted_documents_result.deleted_documents);
|
debug!("{} documents actually deleted", deleted_documents_result.deleted_documents);
|
||||||
if !deleted_documents_result.soft_deletion_used {
|
|
||||||
external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let index_documents_ids = self.index.documents_ids(self.wtxn)?;
|
let index_documents_ids = self.index.documents_ids(self.wtxn)?;
|
||||||
@ -445,6 +444,9 @@ where
|
|||||||
self.index.put_primary_key(self.wtxn, &primary_key)?;
|
self.index.put_primary_key(self.wtxn, &primary_key)?;
|
||||||
|
|
||||||
// We write the external documents ids into the main database.
|
// We write the external documents ids into the main database.
|
||||||
|
let mut external_documents_ids = self.index.external_documents_ids(self.wtxn)?;
|
||||||
|
external_documents_ids.insert_ids(&new_external_documents_ids)?;
|
||||||
|
let external_documents_ids = external_documents_ids.into_static();
|
||||||
self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?;
|
self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?;
|
||||||
|
|
||||||
let all_documents_ids = index_documents_ids | new_documents_ids;
|
let all_documents_ids = index_documents_ids | new_documents_ids;
|
||||||
@ -2515,4 +2517,170 @@ mod tests {
|
|||||||
db_snap!(index, word_position_docids, 3, @"74f556b91d161d997a89468b4da1cb8f");
|
db_snap!(index, word_position_docids, 3, @"74f556b91d161d997a89468b4da1cb8f");
|
||||||
db_snap!(index, docid_word_positions, 3, @"5287245332627675740b28bd46e1cde1");
|
db_snap!(index, docid_word_positions, 3, @"5287245332627675740b28bd46e1cde1");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn reproduce_the_bug() {
|
||||||
|
/*
|
||||||
|
[milli/examples/fuzz.rs:69] &batches = [
|
||||||
|
Batch(
|
||||||
|
[
|
||||||
|
AddDoc(
|
||||||
|
{ "id": 1, "doggo": "bernese" }, => internal 0
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
Batch(
|
||||||
|
[
|
||||||
|
DeleteDoc(
|
||||||
|
1, => delete internal 0
|
||||||
|
),
|
||||||
|
AddDoc(
|
||||||
|
{ "id": 0, "catto": "jorts" }, => internal 1
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
Batch(
|
||||||
|
[
|
||||||
|
AddDoc(
|
||||||
|
{ "id": 1, "catto": "jorts" }, => internal 2
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
]
|
||||||
|
*/
|
||||||
|
let mut index = TempIndex::new();
|
||||||
|
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysHard;
|
||||||
|
|
||||||
|
// START OF BATCH
|
||||||
|
|
||||||
|
println!("--- ENTERING BATCH 1");
|
||||||
|
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
|
||||||
|
let builder = IndexDocuments::new(
|
||||||
|
&mut wtxn,
|
||||||
|
&index,
|
||||||
|
&index.indexer_config,
|
||||||
|
index.index_documents_config.clone(),
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// OP
|
||||||
|
|
||||||
|
let documents = documents!([
|
||||||
|
{ "id": 1, "doggo": "bernese" },
|
||||||
|
]);
|
||||||
|
let (builder, added) = builder.add_documents(documents).unwrap();
|
||||||
|
insta::assert_display_snapshot!(added.unwrap(), @"1");
|
||||||
|
|
||||||
|
// FINISHING
|
||||||
|
let addition = builder.execute().unwrap();
|
||||||
|
insta::assert_debug_snapshot!(addition, @r###"
|
||||||
|
DocumentAdditionResult {
|
||||||
|
indexed_documents: 1,
|
||||||
|
number_of_documents: 1,
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, documents, @r###"
|
||||||
|
{"id":1,"doggo":"bernese"}
|
||||||
|
"###);
|
||||||
|
db_snap!(index, external_documents_ids, @r###"
|
||||||
|
soft:
|
||||||
|
hard:
|
||||||
|
1 0
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// A first batch of documents has been inserted
|
||||||
|
|
||||||
|
// BATCH 2
|
||||||
|
|
||||||
|
println!("--- ENTERING BATCH 2");
|
||||||
|
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
|
||||||
|
let builder = IndexDocuments::new(
|
||||||
|
&mut wtxn,
|
||||||
|
&index,
|
||||||
|
&index.indexer_config,
|
||||||
|
index.index_documents_config.clone(),
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let (builder, removed) = builder.remove_documents(vec![S("1")]).unwrap();
|
||||||
|
insta::assert_display_snapshot!(removed.unwrap(), @"1");
|
||||||
|
|
||||||
|
let documents = documents!([
|
||||||
|
{ "id": 0, "catto": "jorts" },
|
||||||
|
]);
|
||||||
|
let (builder, added) = builder.add_documents(documents).unwrap();
|
||||||
|
insta::assert_display_snapshot!(added.unwrap(), @"1");
|
||||||
|
|
||||||
|
let addition = builder.execute().unwrap();
|
||||||
|
insta::assert_debug_snapshot!(addition, @r###"
|
||||||
|
DocumentAdditionResult {
|
||||||
|
indexed_documents: 1,
|
||||||
|
number_of_documents: 1,
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, documents, @r###"
|
||||||
|
{"id":0,"catto":"jorts"}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
db_snap!(index, external_documents_ids, @r###"
|
||||||
|
soft:
|
||||||
|
hard:
|
||||||
|
0 1
|
||||||
|
"###);
|
||||||
|
|
||||||
|
db_snap!(index, soft_deleted_documents_ids, @"[]");
|
||||||
|
|
||||||
|
// BATCH 3
|
||||||
|
|
||||||
|
println!("--- ENTERING BATCH 3");
|
||||||
|
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
|
||||||
|
let builder = IndexDocuments::new(
|
||||||
|
&mut wtxn,
|
||||||
|
&index,
|
||||||
|
&index.indexer_config,
|
||||||
|
index.index_documents_config.clone(),
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let documents = documents!([
|
||||||
|
{ "id": 1, "catto": "jorts" },
|
||||||
|
]);
|
||||||
|
let (builder, added) = builder.add_documents(documents).unwrap();
|
||||||
|
insta::assert_display_snapshot!(added.unwrap(), @"1");
|
||||||
|
|
||||||
|
let addition = builder.execute().unwrap();
|
||||||
|
insta::assert_debug_snapshot!(addition, @r###"
|
||||||
|
DocumentAdditionResult {
|
||||||
|
indexed_documents: 1,
|
||||||
|
number_of_documents: 2,
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, documents, @r###"
|
||||||
|
{"id":1,"catto":"jorts"}
|
||||||
|
{"id":0,"catto":"jorts"}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// Ensuring all the returned IDs actually exists
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let res = index.search(&rtxn).execute().unwrap();
|
||||||
|
index.documents(&rtxn, res.documents_ids).unwrap();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -21,15 +21,14 @@ use crate::error::{Error, InternalError, UserError};
|
|||||||
use crate::index::{db_name, main_key};
|
use crate::index::{db_name, main_key};
|
||||||
use crate::update::{AvailableDocumentsIds, ClearDocuments, UpdateIndexingStep};
|
use crate::update::{AvailableDocumentsIds, ClearDocuments, UpdateIndexingStep};
|
||||||
use crate::{
|
use crate::{
|
||||||
ExternalDocumentsIds, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index,
|
FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, BEU32,
|
||||||
Result, BEU32,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
pub struct TransformOutput {
|
pub struct TransformOutput {
|
||||||
pub primary_key: String,
|
pub primary_key: String,
|
||||||
pub fields_ids_map: FieldsIdsMap,
|
pub fields_ids_map: FieldsIdsMap,
|
||||||
pub field_distribution: FieldDistribution,
|
pub field_distribution: FieldDistribution,
|
||||||
pub external_documents_ids: ExternalDocumentsIds<'static>,
|
pub new_external_documents_ids: fst::Map<Cow<'static, [u8]>>,
|
||||||
pub new_documents_ids: RoaringBitmap,
|
pub new_documents_ids: RoaringBitmap,
|
||||||
pub replaced_documents_ids: RoaringBitmap,
|
pub replaced_documents_ids: RoaringBitmap,
|
||||||
pub documents_count: usize,
|
pub documents_count: usize,
|
||||||
@ -58,8 +57,8 @@ pub struct Transform<'a, 'i> {
|
|||||||
original_sorter: grenad::Sorter<MergeFn>,
|
original_sorter: grenad::Sorter<MergeFn>,
|
||||||
flattened_sorter: grenad::Sorter<MergeFn>,
|
flattened_sorter: grenad::Sorter<MergeFn>,
|
||||||
|
|
||||||
replaced_documents_ids: RoaringBitmap,
|
pub replaced_documents_ids: RoaringBitmap,
|
||||||
new_documents_ids: RoaringBitmap,
|
pub new_documents_ids: RoaringBitmap,
|
||||||
// To increase the cache locality and decrease the heap usage we use compact smartstring.
|
// To increase the cache locality and decrease the heap usage we use compact smartstring.
|
||||||
new_external_documents_ids_builder: FxHashMap<SmartString<smartstring::Compact>, u64>,
|
new_external_documents_ids_builder: FxHashMap<SmartString<smartstring::Compact>, u64>,
|
||||||
documents_count: usize,
|
documents_count: usize,
|
||||||
@ -568,8 +567,6 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
}))?
|
}))?
|
||||||
.to_string();
|
.to_string();
|
||||||
|
|
||||||
let mut external_documents_ids = self.index.external_documents_ids(wtxn)?;
|
|
||||||
|
|
||||||
// We create a final writer to write the new documents in order from the sorter.
|
// We create a final writer to write the new documents in order from the sorter.
|
||||||
let mut writer = create_writer(
|
let mut writer = create_writer(
|
||||||
self.indexer_settings.chunk_compression_type,
|
self.indexer_settings.chunk_compression_type,
|
||||||
@ -651,13 +648,14 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
fst_new_external_documents_ids_builder.insert(key, value)
|
fst_new_external_documents_ids_builder.insert(key, value)
|
||||||
})?;
|
})?;
|
||||||
let new_external_documents_ids = fst_new_external_documents_ids_builder.into_map();
|
let new_external_documents_ids = fst_new_external_documents_ids_builder.into_map();
|
||||||
external_documents_ids.insert_ids(&new_external_documents_ids)?;
|
|
||||||
|
|
||||||
Ok(TransformOutput {
|
Ok(TransformOutput {
|
||||||
primary_key,
|
primary_key,
|
||||||
fields_ids_map: self.fields_ids_map,
|
fields_ids_map: self.fields_ids_map,
|
||||||
field_distribution,
|
field_distribution,
|
||||||
external_documents_ids: external_documents_ids.into_static(),
|
new_external_documents_ids: new_external_documents_ids
|
||||||
|
.map_data(|c| Cow::Owned(c))
|
||||||
|
.unwrap(),
|
||||||
new_documents_ids: self.new_documents_ids,
|
new_documents_ids: self.new_documents_ids,
|
||||||
replaced_documents_ids: self.replaced_documents_ids,
|
replaced_documents_ids: self.replaced_documents_ids,
|
||||||
documents_count: self.documents_count,
|
documents_count: self.documents_count,
|
||||||
@ -691,7 +689,8 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
let new_external_documents_ids = {
|
let new_external_documents_ids = {
|
||||||
let mut external_documents_ids = self.index.external_documents_ids(wtxn)?;
|
let mut external_documents_ids = self.index.external_documents_ids(wtxn)?;
|
||||||
external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?;
|
external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?;
|
||||||
external_documents_ids
|
// it is safe to get the hard document IDs
|
||||||
|
external_documents_ids.into_static().hard
|
||||||
};
|
};
|
||||||
|
|
||||||
let documents_ids = self.index.documents_ids(wtxn)?;
|
let documents_ids = self.index.documents_ids(wtxn)?;
|
||||||
@ -776,7 +775,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
primary_key,
|
primary_key,
|
||||||
fields_ids_map: new_fields_ids_map,
|
fields_ids_map: new_fields_ids_map,
|
||||||
field_distribution,
|
field_distribution,
|
||||||
external_documents_ids: new_external_documents_ids.into_static(),
|
new_external_documents_ids,
|
||||||
new_documents_ids: documents_ids,
|
new_documents_ids: documents_ids,
|
||||||
replaced_documents_ids: RoaringBitmap::default(),
|
replaced_documents_ids: RoaringBitmap::default(),
|
||||||
documents_count,
|
documents_count,
|
||||||
|
Loading…
Reference in New Issue
Block a user