From ac12a4b9c93434d79dfea8405d77be1d1b6fe78e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 28 Oct 2019 20:40:33 +0100 Subject: [PATCH] Make documents additions accept only the last duplicate document --- meilidb-core/src/error.rs | 2 -- meilidb-core/src/update/documents_addition.rs | 22 +++++++------------ 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/meilidb-core/src/error.rs b/meilidb-core/src/error.rs index 7dca7c994..3fb4d199c 100644 --- a/meilidb-core/src/error.rs +++ b/meilidb-core/src/error.rs @@ -12,7 +12,6 @@ pub enum Error { SchemaMissing, WordIndexMissing, MissingDocumentId, - DuplicateDocument, Zlmdb(heed::Error), Fst(fst::Error), SerdeJson(SerdeJsonError), @@ -80,7 +79,6 @@ impl fmt::Display for Error { SchemaMissing => write!(f, "this index does not have a schema"), WordIndexMissing => write!(f, "this index does not have a word index"), MissingDocumentId => write!(f, "document id is missing"), - DuplicateDocument => write!(f, "update contains documents with the same id"), Zlmdb(e) => write!(f, "heed error; {}", e), Fst(e) => write!(f, "fst error; {}", e), SerdeJson(e) => write!(f, "serde json error; {}", e), diff --git a/meilidb-core/src/update/documents_addition.rs b/meilidb-core/src/update/documents_addition.rs index 1973ad903..79387600c 100644 --- a/meilidb-core/src/update/documents_addition.rs +++ b/meilidb-core/src/update/documents_addition.rs @@ -1,4 +1,4 @@ -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use fst::{set::OpBuilder, SetBuilder}; use sdset::{duo::Union, SetOperation}; @@ -86,7 +86,7 @@ pub fn apply_documents_addition( docs_words_store: store::DocsWords, addition: Vec, ) -> MResult<()> { - let mut documents_ids = HashSet::new(); + let mut documents_additions = HashMap::new(); let mut indexer = RawIndexer::new(); let schema = match main_store.schema(writer)? { @@ -97,19 +97,18 @@ pub fn apply_documents_addition( let identifier = schema.identifier_name(); // 1. store documents ids for future deletion - for document in addition.iter() { + for document in addition { let document_id = match extract_document_id(identifier, &document)? { Some(id) => id, None => return Err(Error::MissingDocumentId), }; - if !documents_ids.insert(document_id) { - return Err(Error::DuplicateDocument); - } + documents_additions.insert(document_id, document); } // 2. remove the documents posting lists - let number_of_inserted_documents = documents_ids.len(); + let number_of_inserted_documents = documents_additions.len(); + let documents_ids = documents_additions.iter().map(|(id, _)| *id).collect(); apply_documents_deletion( writer, main_store, @@ -117,7 +116,7 @@ pub fn apply_documents_addition( documents_fields_counts_store, postings_lists_store, docs_words_store, - documents_ids.into_iter().collect(), + documents_ids, )?; let mut ranked_map = match main_store.ranked_map(writer)? { @@ -126,12 +125,7 @@ pub fn apply_documents_addition( }; // 3. index the documents fields in the stores - for document in addition { - let document_id = match extract_document_id(identifier, &document)? { - Some(id) => id, - None => return Err(Error::MissingDocumentId), - }; - + for (document_id, document) in documents_additions { let serializer = Serializer { txn: writer, schema: &schema,