From b0be06540aacd6611a762d30fba273edf28b0f9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 22 Aug 2019 14:18:50 +0200 Subject: [PATCH] chore: Simplify the update application --- .../src/database/documents_addition.rs | 173 +++++++-------- .../src/database/documents_deletion.rs | 209 ++++++++---------- meilidb-data/src/database/index/mod.rs | 38 ++-- meilidb-data/src/database/mod.rs | 13 +- .../src/database/synonyms_addition.rs | 135 +++++------ .../src/database/synonyms_deletion.rs | 179 +++++++-------- 6 files changed, 336 insertions(+), 411 deletions(-) diff --git a/meilidb-data/src/database/documents_addition.rs b/meilidb-data/src/database/documents_addition.rs index 3942ee92b..d78e309b7 100644 --- a/meilidb-data/src/database/documents_addition.rs +++ b/meilidb-data/src/database/documents_addition.rs @@ -1,15 +1,15 @@ use std::collections::HashSet; use std::sync::Arc; -use meilidb_core::DocumentId; use fst::{SetBuilder, set::OpBuilder}; use sdset::{SetOperation, duo::Union}; +use serde::Serialize; use crate::indexer::Indexer; use crate::serde::{extract_document_id, Serializer, RamDocumentStore}; use crate::RankedMap; -use super::{Error, Index, FinalDocumentsDeletion}; +use super::{Error, Index, apply_documents_deletion}; use super::index::Cache; pub struct DocumentsAddition<'a, D> { @@ -33,123 +33,106 @@ impl<'a, D> DocumentsAddition<'a, D> { } } -pub struct FinalDocumentsAddition<'a> { - inner: &'a Index, - document_ids: HashSet, - document_store: RamDocumentStore, - indexer: Indexer, - ranked_map: RankedMap, -} +pub fn apply_documents_addition( + index: &Index, + mut ranked_map: RankedMap, + addition: Vec, +) -> Result<(), Error> +{ + let mut document_ids = HashSet::new(); + let mut document_store = RamDocumentStore::new(); + let mut indexer = Indexer::new(); -impl<'a> FinalDocumentsAddition<'a> { - pub fn new(inner: &'a Index, ranked_map: RankedMap) -> FinalDocumentsAddition<'a> { - FinalDocumentsAddition { - inner, - document_ids: HashSet::new(), - document_store: RamDocumentStore::new(), - indexer: Indexer::new(), - ranked_map, - } - } - - pub fn update_document(&mut self, document: D) -> Result<(), Error> - where D: serde::Serialize, - { - let schema = &self.inner.schema(); - let identifier = schema.identifier_name(); + let schema = &index.schema(); + let identifier = schema.identifier_name(); + for document in addition { let document_id = match extract_document_id(identifier, &document)? { Some(id) => id, None => return Err(Error::MissingDocumentId), }; // 1. store the document id for future deletion - self.document_ids.insert(document_id); + document_ids.insert(document_id); // 2. index the document fields in ram stores let serializer = Serializer { schema, - document_store: &mut self.document_store, - indexer: &mut self.indexer, - ranked_map: &mut self.ranked_map, + document_store: &mut document_store, + indexer: &mut indexer, + ranked_map: &mut ranked_map, document_id, }; document.serialize(serializer)?; - - Ok(()) } - pub fn finalize(self) -> Result<(), Error> { - let ref_index = self.inner.as_ref(); - let docs_words = ref_index.docs_words_index; - let documents = ref_index.documents_index; - let main = ref_index.main_index; - let words = ref_index.words_index; + let ref_index = index.as_ref(); + let docs_words = ref_index.docs_words_index; + let documents = ref_index.documents_index; + let main = ref_index.main_index; + let words = ref_index.words_index; - // 1. remove the previous documents match indexes - let mut documents_deletion = FinalDocumentsDeletion::new(self.inner, self.ranked_map.clone()); - documents_deletion.extend(self.document_ids); - documents_deletion.finalize()?; + // 1. remove the previous documents match indexes + let document_ids = document_ids.into_iter().collect(); + apply_documents_deletion(index, ranked_map.clone(), document_ids)?; - // 2. insert new document attributes in the database - for ((id, attr), value) in self.document_store.into_inner() { - documents.set_document_field(id, attr, value)?; - } + // 2. insert new document attributes in the database + for ((id, attr), value) in document_store.into_inner() { + documents.set_document_field(id, attr, value)?; + } - let indexed = self.indexer.build(); - let mut delta_words_builder = SetBuilder::memory(); + let indexed = indexer.build(); + let mut delta_words_builder = SetBuilder::memory(); - for (word, delta_set) in indexed.words_doc_indexes { - delta_words_builder.insert(&word).unwrap(); + for (word, delta_set) in indexed.words_doc_indexes { + delta_words_builder.insert(&word).unwrap(); - let set = match words.doc_indexes(&word)? { - Some(set) => Union::new(&set, &delta_set).into_set_buf(), - None => delta_set, - }; - - words.set_doc_indexes(&word, &set)?; - } - - for (id, words) in indexed.docs_words { - docs_words.set_doc_words(id, &words)?; - } - - let delta_words = delta_words_builder - .into_inner() - .and_then(fst::Set::from_bytes) - .unwrap(); - - let words = match main.words_set()? { - Some(words) => { - let op = OpBuilder::new() - .add(words.stream()) - .add(delta_words.stream()) - .r#union(); - - let mut words_builder = SetBuilder::memory(); - words_builder.extend_stream(op).unwrap(); - words_builder - .into_inner() - .and_then(fst::Set::from_bytes) - .unwrap() - }, - None => delta_words, + let set = match words.doc_indexes(&word)? { + Some(set) => Union::new(&set, &delta_set).into_set_buf(), + None => delta_set, }; - main.set_words_set(&words)?; - main.set_ranked_map(&self.ranked_map)?; - - // update the "consistent" view of the Index - let cache = ref_index.cache; - let words = Arc::new(words); - let ranked_map = self.ranked_map; - let synonyms = cache.synonyms.clone(); - let schema = cache.schema.clone(); - - let cache = Cache { words, synonyms, schema, ranked_map }; - self.inner.cache.store(Arc::new(cache)); - - Ok(()) + words.set_doc_indexes(&word, &set)?; } + + for (id, words) in indexed.docs_words { + docs_words.set_doc_words(id, &words)?; + } + + let delta_words = delta_words_builder + .into_inner() + .and_then(fst::Set::from_bytes) + .unwrap(); + + let words = match main.words_set()? { + Some(words) => { + let op = OpBuilder::new() + .add(words.stream()) + .add(delta_words.stream()) + .r#union(); + + let mut words_builder = SetBuilder::memory(); + words_builder.extend_stream(op).unwrap(); + words_builder + .into_inner() + .and_then(fst::Set::from_bytes) + .unwrap() + }, + None => delta_words, + }; + + main.set_words_set(&words)?; + main.set_ranked_map(&ranked_map)?; + + // update the "consistent" view of the Index + let cache = ref_index.cache; + let words = Arc::new(words); + let synonyms = cache.synonyms.clone(); + let schema = cache.schema.clone(); + + let cache = Cache { words, synonyms, schema, ranked_map }; + index.cache.store(Arc::new(cache)); + + Ok(()) } diff --git a/meilidb-data/src/database/documents_deletion.rs b/meilidb-data/src/database/documents_deletion.rs index 51a7a9a70..107b8914c 100644 --- a/meilidb-data/src/database/documents_deletion.rs +++ b/meilidb-data/src/database/documents_deletion.rs @@ -21,10 +21,25 @@ impl<'a> DocumentsDeletion<'a> { DocumentsDeletion { index, documents: Vec::new() } } - pub fn delete_document(&mut self, document_id: DocumentId) { + pub fn delete_document_by_id(&mut self, document_id: DocumentId) { self.documents.push(document_id); } + pub fn delete_document(&mut self, document: D) -> Result<(), Error> + where D: serde::Serialize, + { + let schema = self.index.schema(); + let identifier = schema.identifier_name(); + let document_id = match extract_document_id(identifier, &document)? { + Some(id) => id, + None => return Err(Error::MissingDocumentId), + }; + + self.delete_document_by_id(document_id); + + Ok(()) + } + pub fn finalize(self) -> Result { self.index.push_documents_deletion(self.documents) } @@ -36,129 +51,95 @@ impl Extend for DocumentsDeletion<'_> { } } -pub struct FinalDocumentsDeletion<'a> { - inner: &'a Index, - documents: Vec, - ranked_map: RankedMap, -} +pub fn apply_documents_deletion( + index: &Index, + mut ranked_map: RankedMap, + deletion: Vec, +) -> Result<(), Error> +{ + let ref_index = index.as_ref(); + let schema = index.schema(); + let docs_words = ref_index.docs_words_index; + let documents = ref_index.documents_index; + let main = ref_index.main_index; + let words = ref_index.words_index; -impl<'a> FinalDocumentsDeletion<'a> { - pub fn new(inner: &'a Index, ranked_map: RankedMap) -> FinalDocumentsDeletion { - FinalDocumentsDeletion { inner, documents: Vec::new(), ranked_map } - } + let idset = SetBuf::from_dirty(deletion); - fn delete_document_by_id(&mut self, id: DocumentId) { - self.documents.push(id); - } + // collect the ranked attributes according to the schema + let ranked_attrs: Vec<_> = schema.iter() + .filter_map(|(_, attr, prop)| { + if prop.is_ranked() { Some(attr) } else { None } + }) + .collect(); - pub fn delete_document(&mut self, document: D) -> Result<(), Error> - where D: serde::Serialize, - { - let schema = &self.inner.schema(); - let identifier = schema.identifier_name(); + let mut words_document_ids = HashMap::new(); + for id in idset { + // remove all the ranked attributes from the ranked_map + for ranked_attr in &ranked_attrs { + ranked_map.remove(id, *ranked_attr); + } - let document_id = match extract_document_id(identifier, &document)? { - Some(id) => id, - None => return Err(Error::MissingDocumentId), - }; - - self.delete_document_by_id(document_id); - - Ok(()) - } - - pub fn finalize(mut self) -> Result<(), Error> { - let ref_index = self.inner.as_ref(); - let schema = self.inner.schema(); - let docs_words = ref_index.docs_words_index; - let documents = ref_index.documents_index; - let main = ref_index.main_index; - let words = ref_index.words_index; - - let idset = SetBuf::from_dirty(self.documents); - - // collect the ranked attributes according to the schema - let ranked_attrs: Vec<_> = schema.iter() - .filter_map(|(_, attr, prop)| { - if prop.is_ranked() { Some(attr) } else { None } - }) - .collect(); - - let mut words_document_ids = HashMap::new(); - for id in idset { - // remove all the ranked attributes from the ranked_map - for ranked_attr in &ranked_attrs { - self.ranked_map.remove(id, *ranked_attr); + if let Some(words) = docs_words.doc_words(id)? { + let mut stream = words.stream(); + while let Some(word) = stream.next() { + let word = word.to_vec(); + words_document_ids.entry(word).or_insert_with(Vec::new).push(id); } + } + } - if let Some(words) = docs_words.doc_words(id)? { - let mut stream = words.stream(); - while let Some(word) = stream.next() { - let word = word.to_vec(); - words_document_ids.entry(word).or_insert_with(Vec::new).push(id); - } + let mut removed_words = BTreeSet::new(); + for (word, document_ids) in words_document_ids { + let document_ids = SetBuf::from_dirty(document_ids); + + if let Some(doc_indexes) = words.doc_indexes(&word)? { + let op = DifferenceByKey::new(&doc_indexes, &document_ids, |d| d.document_id, |id| *id); + let doc_indexes = op.into_set_buf(); + + if !doc_indexes.is_empty() { + words.set_doc_indexes(&word, &doc_indexes)?; + } else { + words.del_doc_indexes(&word)?; + removed_words.insert(word); } } - let mut removed_words = BTreeSet::new(); - for (word, document_ids) in words_document_ids { - let document_ids = SetBuf::from_dirty(document_ids); - - if let Some(doc_indexes) = words.doc_indexes(&word)? { - let op = DifferenceByKey::new(&doc_indexes, &document_ids, |d| d.document_id, |id| *id); - let doc_indexes = op.into_set_buf(); - - if !doc_indexes.is_empty() { - words.set_doc_indexes(&word, &doc_indexes)?; - } else { - words.del_doc_indexes(&word)?; - removed_words.insert(word); - } - } - - for id in document_ids { - documents.del_all_document_fields(id)?; - docs_words.del_doc_words(id)?; - } + for id in document_ids { + documents.del_all_document_fields(id)?; + docs_words.del_doc_words(id)?; } - - let removed_words = fst::Set::from_iter(removed_words).unwrap(); - let words = match main.words_set()? { - Some(words_set) => { - let op = fst::set::OpBuilder::new() - .add(words_set.stream()) - .add(removed_words.stream()) - .difference(); - - let mut words_builder = SetBuilder::memory(); - words_builder.extend_stream(op).unwrap(); - words_builder - .into_inner() - .and_then(fst::Set::from_bytes) - .unwrap() - }, - None => fst::Set::default(), - }; - - main.set_words_set(&words)?; - main.set_ranked_map(&self.ranked_map)?; - - // update the "consistent" view of the Index - let cache = ref_index.cache; - let words = Arc::new(words); - let ranked_map = self.ranked_map; - let synonyms = cache.synonyms.clone(); - let schema = cache.schema.clone(); - - let cache = Cache { words, synonyms, schema, ranked_map }; - self.inner.cache.store(Arc::new(cache)); - - Ok(()) } -} -impl Extend for FinalDocumentsDeletion<'_> { - fn extend>(&mut self, iter: T) { - self.documents.extend(iter) - } + let removed_words = fst::Set::from_iter(removed_words).unwrap(); + let words = match main.words_set()? { + Some(words_set) => { + let op = fst::set::OpBuilder::new() + .add(words_set.stream()) + .add(removed_words.stream()) + .difference(); + + let mut words_builder = SetBuilder::memory(); + words_builder.extend_stream(op).unwrap(); + words_builder + .into_inner() + .and_then(fst::Set::from_bytes) + .unwrap() + }, + None => fst::Set::default(), + }; + + main.set_words_set(&words)?; + main.set_ranked_map(&ranked_map)?; + + // update the "consistent" view of the Index + let cache = ref_index.cache; + let words = Arc::new(words); + let synonyms = cache.synonyms.clone(); + let schema = cache.schema.clone(); + + let cache = Cache { words, synonyms, schema, ranked_map }; + index.cache.store(Arc::new(cache)); + + Ok(()) } diff --git a/meilidb-data/src/database/index/mod.rs b/meilidb-data/src/database/index/mod.rs index 8a979eb67..3b3d553b6 100644 --- a/meilidb-data/src/database/index/mod.rs +++ b/meilidb-data/src/database/index/mod.rs @@ -1,5 +1,4 @@ use std::collections::{HashSet, BTreeMap}; -use std::convert::TryInto; use std::sync::Arc; use std::thread; @@ -24,10 +23,10 @@ use self::synonyms_index::SynonymsIndex; use self::words_index::WordsIndex; use super::{ - DocumentsAddition, FinalDocumentsAddition, - DocumentsDeletion, FinalDocumentsDeletion, - SynonymsAddition, FinalSynonymsAddition, - SynonymsDeletion, FinalSynonymsDeletion, + DocumentsAddition, DocumentsDeletion, + SynonymsAddition, SynonymsDeletion, + apply_documents_addition, apply_documents_deletion, + apply_synonyms_addition, apply_synonyms_deletion, }; mod custom_settings_index; @@ -71,33 +70,23 @@ fn spawn_update_system(index: Index) -> thread::JoinHandle<()> { let results = &index.updates_results_index; (updates, results).transaction(|(updates, results)| { let update = updates.remove(&key)?.unwrap(); - let array_id = key.as_ref().try_into().unwrap(); - let id = u64::from_be_bytes(array_id); // this is an emulation of the try block (#31436) let result: Result<(), Error> = (|| { match bincode::deserialize(&update)? { UpdateOwned::DocumentsAddition(documents) => { let ranked_map = index.cache.load().ranked_map.clone(); - let mut addition = FinalDocumentsAddition::new(&index, ranked_map); - for document in documents { - addition.update_document(document)?; - } - addition.finalize()?; + apply_documents_addition(&index, ranked_map, documents)?; }, UpdateOwned::DocumentsDeletion(documents) => { let ranked_map = index.cache.load().ranked_map.clone(); - let mut deletion = FinalDocumentsDeletion::new(&index, ranked_map); - deletion.extend(documents); - deletion.finalize()?; + apply_documents_deletion(&index, ranked_map, documents)?; }, UpdateOwned::SynonymsAddition(synonyms) => { - let addition = FinalSynonymsAddition::from_map(&index, synonyms); - addition.finalize()?; + apply_synonyms_addition(&index, synonyms)?; }, UpdateOwned::SynonymsDeletion(synonyms) => { - let deletion = FinalSynonymsDeletion::from_map(&index, synonyms); - deletion.finalize()?; + apply_synonyms_deletion(&index, synonyms)?; }, } Ok(()) @@ -105,7 +94,7 @@ fn spawn_update_system(index: Index) -> thread::JoinHandle<()> { let result = result.map_err(|e| e.to_string()); let value = bincode::serialize(&result).unwrap(); - results.insert(&array_id, value) + results.insert(&key, value) }) .unwrap(); } @@ -310,7 +299,12 @@ impl Index { self.raw_push_update(update) } - pub(crate) fn push_documents_deletion(&self, deletion: Vec) -> Result { + pub(crate) fn push_documents_deletion( + &self, + deletion: Vec, + ) -> Result + { + let deletion = Update::<()>::DocumentsDeletion(deletion); let update = bincode::serialize(&deletion)?; self.raw_push_update(update) } @@ -320,6 +314,7 @@ impl Index { addition: BTreeMap>, ) -> Result { + let addition = Update::<()>::SynonymsAddition(addition); let update = bincode::serialize(&addition)?; self.raw_push_update(update) } @@ -329,6 +324,7 @@ impl Index { deletion: BTreeMap>>, ) -> Result { + let deletion = Update::<()>::SynonymsDeletion(deletion); let update = bincode::serialize(&deletion)?; self.raw_push_update(update) } diff --git a/meilidb-data/src/database/mod.rs b/meilidb-data/src/database/mod.rs index 2125763c7..3a35adbc9 100644 --- a/meilidb-data/src/database/mod.rs +++ b/meilidb-data/src/database/mod.rs @@ -15,10 +15,15 @@ mod synonyms_deletion; pub use self::error::Error; pub use self::index::{Index, CustomSettingsIndex}; -use self::documents_addition::{DocumentsAddition, FinalDocumentsAddition}; -use self::documents_deletion::{DocumentsDeletion, FinalDocumentsDeletion}; -use self::synonyms_addition::{SynonymsAddition, FinalSynonymsAddition}; -use self::synonyms_deletion::{SynonymsDeletion, FinalSynonymsDeletion}; +pub use self::documents_addition::DocumentsAddition; +pub use self::documents_deletion::DocumentsDeletion; +pub use self::synonyms_addition::SynonymsAddition; +pub use self::synonyms_deletion::SynonymsDeletion; + +use self::documents_addition::apply_documents_addition; +use self::documents_deletion::apply_documents_deletion; +use self::synonyms_addition::apply_synonyms_addition; +use self::synonyms_deletion::apply_synonyms_deletion; fn load_indexes(tree: &sled::Tree) -> Result, Error> { match tree.get("indexes")? { diff --git a/meilidb-data/src/database/synonyms_addition.rs b/meilidb-data/src/database/synonyms_addition.rs index 69d1c9a9c..03c904bc2 100644 --- a/meilidb-data/src/database/synonyms_addition.rs +++ b/meilidb-data/src/database/synonyms_addition.rs @@ -21,10 +21,10 @@ impl<'a> SynonymsAddition<'a> { pub fn add_synonym(&mut self, synonym: S, alternatives: I) where S: AsRef, T: AsRef, - I: Iterator, + I: IntoIterator, { let synonym = normalize_str(synonym.as_ref()); - let alternatives = alternatives.map(|s| s.as_ref().to_lowercase()); + let alternatives = alternatives.into_iter().map(|s| s.as_ref().to_lowercase()); self.synonyms.entry(synonym).or_insert_with(Vec::new).extend(alternatives); } @@ -33,87 +33,62 @@ impl<'a> SynonymsAddition<'a> { } } -pub struct FinalSynonymsAddition<'a> { - inner: &'a Index, - synonyms: BTreeMap>, -} +pub fn apply_synonyms_addition( + index: &Index, + addition: BTreeMap>, +) -> Result<(), Error> +{ + let ref_index = index.as_ref(); + let synonyms = ref_index.synonyms_index; + let main = ref_index.main_index; -impl<'a> FinalSynonymsAddition<'a> { - pub fn new(inner: &'a Index) -> FinalSynonymsAddition<'a> { - FinalSynonymsAddition { inner, synonyms: BTreeMap::new() } - } + let mut synonyms_builder = SetBuilder::memory(); - pub fn from_map( - inner: &'a Index, - synonyms: BTreeMap>, - ) -> FinalSynonymsAddition<'a> - { - FinalSynonymsAddition { inner, synonyms } - } + for (synonym, alternatives) in addition { + synonyms_builder.insert(&synonym).unwrap(); - pub fn add_synonym(&mut self, synonym: S, alternatives: I) - where S: AsRef, - T: AsRef, - I: IntoIterator, - { - let synonym = normalize_str(synonym.as_ref()); - let alternatives = alternatives.into_iter().map(|s| s.as_ref().to_lowercase()); - self.synonyms.entry(synonym).or_insert_with(Vec::new).extend(alternatives); - } - - pub fn finalize(self) -> Result<(), Error> { - let ref_index = self.inner.as_ref(); - let synonyms = ref_index.synonyms_index; - let main = ref_index.main_index; - - let mut synonyms_builder = SetBuilder::memory(); - - for (synonym, alternatives) in self.synonyms { - synonyms_builder.insert(&synonym).unwrap(); - - let alternatives = { - let alternatives = SetBuf::from_dirty(alternatives); - let mut alternatives_builder = SetBuilder::memory(); - alternatives_builder.extend_iter(alternatives).unwrap(); - alternatives_builder.into_inner().unwrap() - }; - synonyms.set_alternatives_to(synonym.as_bytes(), alternatives)?; - } - - let delta_synonyms = synonyms_builder - .into_inner() - .and_then(fst::Set::from_bytes) - .unwrap(); - - let synonyms = match main.synonyms_set()? { - Some(synonyms) => { - let op = OpBuilder::new() - .add(synonyms.stream()) - .add(delta_synonyms.stream()) - .r#union(); - - let mut synonyms_builder = SetBuilder::memory(); - synonyms_builder.extend_stream(op).unwrap(); - synonyms_builder - .into_inner() - .and_then(fst::Set::from_bytes) - .unwrap() - }, - None => delta_synonyms, + let alternatives = { + let alternatives = SetBuf::from_dirty(alternatives); + let mut alternatives_builder = SetBuilder::memory(); + alternatives_builder.extend_iter(alternatives).unwrap(); + alternatives_builder.into_inner().unwrap() }; - - main.set_synonyms_set(&synonyms)?; - - // update the "consistent" view of the Index - let cache = ref_index.cache; - let words = Arc::new(main.words_set()?.unwrap_or_default()); - let ranked_map = cache.ranked_map.clone(); - let synonyms = Arc::new(synonyms); - let schema = cache.schema.clone(); - - let cache = Cache { words, synonyms, schema, ranked_map }; - self.inner.cache.store(Arc::new(cache)); - - Ok(()) + synonyms.set_alternatives_to(synonym.as_bytes(), alternatives)?; } + + let delta_synonyms = synonyms_builder + .into_inner() + .and_then(fst::Set::from_bytes) + .unwrap(); + + let synonyms = match main.synonyms_set()? { + Some(synonyms) => { + let op = OpBuilder::new() + .add(synonyms.stream()) + .add(delta_synonyms.stream()) + .r#union(); + + let mut synonyms_builder = SetBuilder::memory(); + synonyms_builder.extend_stream(op).unwrap(); + synonyms_builder + .into_inner() + .and_then(fst::Set::from_bytes) + .unwrap() + }, + None => delta_synonyms, + }; + + main.set_synonyms_set(&synonyms)?; + + // update the "consistent" view of the Index + let cache = ref_index.cache; + let words = Arc::new(main.words_set()?.unwrap_or_default()); + let ranked_map = cache.ranked_map.clone(); + let synonyms = Arc::new(synonyms); + let schema = cache.schema.clone(); + + let cache = Cache { words, synonyms, schema, ranked_map }; + index.cache.store(Arc::new(cache)); + + Ok(()) } diff --git a/meilidb-data/src/database/synonyms_deletion.rs b/meilidb-data/src/database/synonyms_deletion.rs index ee4db7970..824c0596c 100644 --- a/meilidb-data/src/database/synonyms_deletion.rs +++ b/meilidb-data/src/database/synonyms_deletion.rs @@ -43,110 +43,95 @@ impl<'a> SynonymsDeletion<'a> { } } -pub struct FinalSynonymsDeletion<'a> { - inner: &'a Index, - synonyms: BTreeMap>>, -} +pub fn apply_synonyms_deletion( + index: &Index, + deletion: BTreeMap>>, +) -> Result<(), Error> +{ + let ref_index = index.as_ref(); + let synonyms = ref_index.synonyms_index; + let main = ref_index.main_index; -impl<'a> FinalSynonymsDeletion<'a> { - pub fn new(inner: &'a Index) -> FinalSynonymsDeletion<'a> { - FinalSynonymsDeletion { inner, synonyms: BTreeMap::new() } - } + let mut delete_whole_synonym_builder = SetBuilder::memory(); - pub fn from_map( - inner: &'a Index, - synonyms: BTreeMap>>, - ) -> FinalSynonymsDeletion<'a> - { - FinalSynonymsDeletion { inner, synonyms } - } + for (synonym, alternatives) in deletion { + match alternatives { + Some(alternatives) => { + let prev_alternatives = synonyms.alternatives_to(synonym.as_bytes())?; + let prev_alternatives = match prev_alternatives { + Some(alternatives) => alternatives, + None => continue, + }; - pub fn finalize(self) -> Result<(), Error> { - let ref_index = self.inner.as_ref(); - let synonyms = ref_index.synonyms_index; - let main = ref_index.main_index; + let delta_alternatives = { + let alternatives = SetBuf::from_dirty(alternatives); + let mut builder = SetBuilder::memory(); + builder.extend_iter(alternatives).unwrap(); + builder.into_inner() + .and_then(fst::Set::from_bytes) + .unwrap() + }; - let mut delete_whole_synonym_builder = SetBuilder::memory(); - - for (synonym, alternatives) in self.synonyms { - match alternatives { - Some(alternatives) => { - let prev_alternatives = synonyms.alternatives_to(synonym.as_bytes())?; - let prev_alternatives = match prev_alternatives { - Some(alternatives) => alternatives, - None => continue, - }; - - let delta_alternatives = { - let alternatives = SetBuf::from_dirty(alternatives); - let mut builder = SetBuilder::memory(); - builder.extend_iter(alternatives).unwrap(); - builder.into_inner() - .and_then(fst::Set::from_bytes) - .unwrap() - }; - - let op = OpBuilder::new() - .add(prev_alternatives.stream()) - .add(delta_alternatives.stream()) - .difference(); - - let (alternatives, empty_alternatives) = { - let mut builder = SetBuilder::memory(); - let len = builder.get_ref().len(); - builder.extend_stream(op).unwrap(); - let is_empty = len == builder.get_ref().len(); - let alternatives = builder.into_inner().unwrap(); - (alternatives, is_empty) - }; - - if empty_alternatives { - delete_whole_synonym_builder.insert(synonym.as_bytes())?; - } else { - synonyms.set_alternatives_to(synonym.as_bytes(), alternatives)?; - } - }, - None => { - delete_whole_synonym_builder.insert(&synonym).unwrap(); - synonyms.del_alternatives_of(synonym.as_bytes())?; - } - } - } - - let delta_synonyms = delete_whole_synonym_builder - .into_inner() - .and_then(fst::Set::from_bytes) - .unwrap(); - - let synonyms = match main.synonyms_set()? { - Some(synonyms) => { let op = OpBuilder::new() - .add(synonyms.stream()) - .add(delta_synonyms.stream()) + .add(prev_alternatives.stream()) + .add(delta_alternatives.stream()) .difference(); - let mut synonyms_builder = SetBuilder::memory(); - synonyms_builder.extend_stream(op).unwrap(); - synonyms_builder - .into_inner() - .and_then(fst::Set::from_bytes) - .unwrap() + let (alternatives, empty_alternatives) = { + let mut builder = SetBuilder::memory(); + let len = builder.get_ref().len(); + builder.extend_stream(op).unwrap(); + let is_empty = len == builder.get_ref().len(); + let alternatives = builder.into_inner().unwrap(); + (alternatives, is_empty) + }; + + if empty_alternatives { + delete_whole_synonym_builder.insert(synonym.as_bytes())?; + } else { + synonyms.set_alternatives_to(synonym.as_bytes(), alternatives)?; + } }, - None => fst::Set::default(), - }; - - main.set_synonyms_set(&synonyms)?; - - // update the "consistent" view of the Index - let cache = ref_index.cache; - let words = Arc::new(main.words_set()?.unwrap_or_default()); - let ranked_map = cache.ranked_map.clone(); - let synonyms = Arc::new(synonyms); - let schema = cache.schema.clone(); - - let cache = Cache { words, synonyms, schema, ranked_map }; - self.inner.cache.store(Arc::new(cache)); - - Ok(()) + None => { + delete_whole_synonym_builder.insert(&synonym).unwrap(); + synonyms.del_alternatives_of(synonym.as_bytes())?; + } + } } + + let delta_synonyms = delete_whole_synonym_builder + .into_inner() + .and_then(fst::Set::from_bytes) + .unwrap(); + + let synonyms = match main.synonyms_set()? { + Some(synonyms) => { + let op = OpBuilder::new() + .add(synonyms.stream()) + .add(delta_synonyms.stream()) + .difference(); + + let mut synonyms_builder = SetBuilder::memory(); + synonyms_builder.extend_stream(op).unwrap(); + synonyms_builder + .into_inner() + .and_then(fst::Set::from_bytes) + .unwrap() + }, + None => fst::Set::default(), + }; + + main.set_synonyms_set(&synonyms)?; + + // update the "consistent" view of the Index + let cache = ref_index.cache; + let words = Arc::new(main.words_set()?.unwrap_or_default()); + let ranked_map = cache.ranked_map.clone(); + let synonyms = Arc::new(synonyms); + let schema = cache.schema.clone(); + + let cache = Cache { words, synonyms, schema, ranked_map }; + index.cache.store(Arc::new(cache)); + + Ok(()) }