mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
Introduce the stop words deletion update type
This commit is contained in:
parent
776673ebae
commit
a226fd23c3
@ -195,6 +195,14 @@ impl Index {
|
||||
)
|
||||
}
|
||||
|
||||
pub fn stop_words_deletion(&self) -> update::StopWordsDeletion {
|
||||
update::StopWordsDeletion::new(
|
||||
self.updates,
|
||||
self.updates_results,
|
||||
self.updates_notifier.clone(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn current_update_id(&self, reader: &heed::RoTxn) -> MResult<Option<u64>> {
|
||||
match self.updates.last_update_id(reader)? {
|
||||
Some((id, _)) => Ok(Some(id)),
|
||||
|
@ -4,6 +4,7 @@ mod documents_addition;
|
||||
mod documents_deletion;
|
||||
mod schema_update;
|
||||
mod stop_words_addition;
|
||||
mod stop_words_deletion;
|
||||
mod synonyms_addition;
|
||||
mod synonyms_deletion;
|
||||
|
||||
@ -13,6 +14,7 @@ pub use self::documents_addition::{apply_documents_addition, DocumentsAddition};
|
||||
pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion};
|
||||
pub use self::schema_update::{apply_schema_update, push_schema_update};
|
||||
pub use self::stop_words_addition::{apply_stop_words_addition, StopWordsAddition};
|
||||
pub use self::stop_words_deletion::{apply_stop_words_deletion, StopWordsDeletion};
|
||||
pub use self::synonyms_addition::{apply_synonyms_addition, SynonymsAddition};
|
||||
pub use self::synonyms_deletion::{apply_synonyms_deletion, SynonymsDeletion};
|
||||
|
||||
@ -37,6 +39,7 @@ pub enum Update {
|
||||
SynonymsAddition(BTreeMap<String, Vec<String>>),
|
||||
SynonymsDeletion(BTreeMap<String, Option<Vec<String>>>),
|
||||
StopWordsAddition(BTreeSet<String>),
|
||||
StopWordsDeletion(BTreeSet<String>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
@ -49,6 +52,7 @@ pub enum UpdateType {
|
||||
SynonymsAddition { number: usize },
|
||||
SynonymsDeletion { number: usize },
|
||||
StopWordsAddition { number: usize },
|
||||
StopWordsDeletion { number: usize },
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
@ -226,6 +230,25 @@ pub fn update_task(writer: &mut heed::RwTxn, index: store::Index) -> MResult<Opt
|
||||
let result =
|
||||
apply_stop_words_addition(writer, index.main, index.postings_lists, stop_words);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
Update::StopWordsDeletion(stop_words) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::StopWordsDeletion {
|
||||
number: stop_words.len(),
|
||||
};
|
||||
|
||||
let result = apply_stop_words_deletion(
|
||||
writer,
|
||||
index.main,
|
||||
index.documents_fields,
|
||||
index.documents_fields_counts,
|
||||
index.postings_lists,
|
||||
index.docs_words,
|
||||
stop_words,
|
||||
);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
};
|
||||
|
@ -95,7 +95,7 @@ pub fn apply_stop_words_addition(
|
||||
main_store.put_words_fst(writer, &word_fst)?;
|
||||
}
|
||||
|
||||
// now we add all of these stop words to the main store
|
||||
// now we add all of these stop words from the main store
|
||||
let stop_words_fst = main_store.stop_words_fst(writer)?.unwrap_or_default();
|
||||
|
||||
let op = OpBuilder::new()
|
||||
|
112
meilidb-core/src/update/stop_words_deletion.rs
Normal file
112
meilidb-core/src/update/stop_words_deletion.rs
Normal file
@ -0,0 +1,112 @@
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use fst::{set::OpBuilder, SetBuilder};
|
||||
|
||||
use crate::automaton::normalize_str;
|
||||
use crate::update::documents_addition::reindex_all_documents;
|
||||
use crate::update::{next_update_id, Update};
|
||||
use crate::{store, MResult};
|
||||
|
||||
pub struct StopWordsDeletion {
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
stop_words: BTreeSet<String>,
|
||||
}
|
||||
|
||||
impl StopWordsDeletion {
|
||||
pub fn new(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
) -> StopWordsDeletion {
|
||||
StopWordsDeletion {
|
||||
updates_store,
|
||||
updates_results_store,
|
||||
updates_notifier,
|
||||
stop_words: BTreeSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn delete_stop_word<S: AsRef<str>>(&mut self, stop_word: S) {
|
||||
let stop_word = normalize_str(stop_word.as_ref());
|
||||
self.stop_words.insert(stop_word);
|
||||
}
|
||||
|
||||
pub fn finalize(self, writer: &mut heed::RwTxn) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(());
|
||||
let update_id = push_stop_words_deletion(
|
||||
writer,
|
||||
self.updates_store,
|
||||
self.updates_results_store,
|
||||
self.stop_words,
|
||||
)?;
|
||||
Ok(update_id)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push_stop_words_deletion(
|
||||
writer: &mut heed::RwTxn,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
deletion: BTreeSet<String>,
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::StopWordsDeletion(deletion);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
||||
|
||||
pub fn apply_stop_words_deletion(
|
||||
writer: &mut heed::RwTxn,
|
||||
main_store: store::Main,
|
||||
documents_fields_store: store::DocumentsFields,
|
||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||
postings_lists_store: store::PostingsLists,
|
||||
docs_words_store: store::DocsWords,
|
||||
deletion: BTreeSet<String>,
|
||||
) -> MResult<()> {
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
|
||||
for word in deletion {
|
||||
stop_words_builder.insert(&word).unwrap();
|
||||
}
|
||||
|
||||
// create the new delta stop words fst
|
||||
let delta_stop_words = stop_words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
// now we delete all of these stop words from the main store
|
||||
let stop_words_fst = main_store.stop_words_fst(writer)?.unwrap_or_default();
|
||||
|
||||
let op = OpBuilder::new()
|
||||
.add(&stop_words_fst)
|
||||
.add(&delta_stop_words)
|
||||
.difference();
|
||||
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
stop_words_builder.extend_stream(op).unwrap();
|
||||
let stop_words_fst = stop_words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
main_store.put_stop_words_fst(writer, &stop_words_fst)?;
|
||||
|
||||
// now that we have setup the stop words
|
||||
// lets reindex everything...
|
||||
reindex_all_documents(
|
||||
writer,
|
||||
main_store,
|
||||
documents_fields_store,
|
||||
documents_fields_counts_store,
|
||||
postings_lists_store,
|
||||
docs_words_store,
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
Loading…
Reference in New Issue
Block a user