mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 20:07:09 +02:00
Rewrite the stop-words endpoint; fix #417
This commit is contained in:
parent
f0590d3301
commit
91c6539baf
6 changed files with 135 additions and 184 deletions
|
@ -287,16 +287,8 @@ impl Index {
|
|||
)
|
||||
}
|
||||
|
||||
pub fn stop_words_addition(&self) -> update::StopWordsAddition {
|
||||
update::StopWordsAddition::new(
|
||||
self.updates,
|
||||
self.updates_results,
|
||||
self.updates_notifier.clone(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn stop_words_deletion(&self) -> update::StopWordsDeletion {
|
||||
update::StopWordsDeletion::new(
|
||||
pub fn stop_words_update(&self) -> update::StopWordsUpdate {
|
||||
update::StopWordsUpdate::new(
|
||||
self.updates,
|
||||
self.updates_results,
|
||||
self.updates_notifier.clone(),
|
||||
|
|
|
@ -3,8 +3,7 @@ mod customs_update;
|
|||
mod documents_addition;
|
||||
mod documents_deletion;
|
||||
mod schema_update;
|
||||
mod stop_words_addition;
|
||||
mod stop_words_deletion;
|
||||
mod stop_words_update;
|
||||
mod synonyms_update;
|
||||
|
||||
pub use self::clear_all::{apply_clear_all, push_clear_all};
|
||||
|
@ -14,8 +13,7 @@ pub use self::documents_addition::{
|
|||
};
|
||||
pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion};
|
||||
pub use self::schema_update::{apply_schema_update, push_schema_update};
|
||||
pub use self::stop_words_addition::{apply_stop_words_addition, StopWordsAddition};
|
||||
pub use self::stop_words_deletion::{apply_stop_words_deletion, StopWordsDeletion};
|
||||
pub use self::stop_words_update::{apply_stop_words_update, StopWordsUpdate};
|
||||
pub use self::synonyms_update::{apply_synonyms_update, SynonymsUpdate};
|
||||
|
||||
use std::cmp;
|
||||
|
@ -89,16 +87,9 @@ impl Update {
|
|||
}
|
||||
}
|
||||
|
||||
fn stop_words_addition(data: BTreeSet<String>) -> Update {
|
||||
fn stop_words_update(data: BTreeSet<String>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::StopWordsAddition(data),
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn stop_words_deletion(data: BTreeSet<String>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::StopWordsDeletion(data),
|
||||
data: UpdateData::StopWordsUpdate(data),
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
@ -113,8 +104,7 @@ pub enum UpdateData {
|
|||
DocumentsPartial(Vec<HashMap<String, serde_json::Value>>),
|
||||
DocumentsDeletion(Vec<DocumentId>),
|
||||
SynonymsUpdate(BTreeMap<String, Vec<String>>),
|
||||
StopWordsAddition(BTreeSet<String>),
|
||||
StopWordsDeletion(BTreeSet<String>),
|
||||
StopWordsUpdate(BTreeSet<String>),
|
||||
}
|
||||
|
||||
impl UpdateData {
|
||||
|
@ -135,11 +125,8 @@ impl UpdateData {
|
|||
UpdateData::SynonymsUpdate(addition) => UpdateType::SynonymsUpdate {
|
||||
number: addition.len(),
|
||||
},
|
||||
UpdateData::StopWordsAddition(addition) => UpdateType::StopWordsAddition {
|
||||
number: addition.len(),
|
||||
},
|
||||
UpdateData::StopWordsDeletion(deletion) => UpdateType::StopWordsDeletion {
|
||||
number: deletion.len(),
|
||||
UpdateData::StopWordsUpdate(update) => UpdateType::StopWordsUpdate {
|
||||
number: update.len(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
@ -155,8 +142,7 @@ pub enum UpdateType {
|
|||
DocumentsPartial { number: usize },
|
||||
DocumentsDeletion { number: usize },
|
||||
SynonymsUpdate { number: usize },
|
||||
StopWordsAddition { number: usize },
|
||||
StopWordsDeletion { number: usize },
|
||||
StopWordsUpdate { number: usize },
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
|
@ -321,22 +307,10 @@ pub fn update_task<'a, 'b>(
|
|||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
UpdateData::StopWordsAddition(stop_words) => {
|
||||
UpdateData::StopWordsUpdate(stop_words) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::StopWordsAddition {
|
||||
number: stop_words.len(),
|
||||
};
|
||||
|
||||
let result =
|
||||
apply_stop_words_addition(writer, index.main, index.postings_lists, stop_words);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
UpdateData::StopWordsDeletion(stop_words) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::StopWordsDeletion {
|
||||
let update_type = UpdateType::StopWordsUpdate {
|
||||
number: stop_words.len(),
|
||||
};
|
||||
|
||||
|
|
|
@ -1,107 +1 @@
|
|||
use std::collections::BTreeSet;
|
||||
|
||||
use fst::{set::OpBuilder, SetBuilder};
|
||||
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::automaton::normalize_str;
|
||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||
use crate::update::documents_addition::reindex_all_documents;
|
||||
use crate::update::{next_update_id, Update};
|
||||
use crate::{store, MResult};
|
||||
|
||||
pub struct StopWordsDeletion {
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
stop_words: BTreeSet<String>,
|
||||
}
|
||||
|
||||
impl StopWordsDeletion {
|
||||
pub fn new(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> StopWordsDeletion {
|
||||
StopWordsDeletion {
|
||||
updates_store,
|
||||
updates_results_store,
|
||||
updates_notifier,
|
||||
stop_words: BTreeSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn delete_stop_word<S: AsRef<str>>(&mut self, stop_word: S) {
|
||||
let stop_word = normalize_str(stop_word.as_ref());
|
||||
self.stop_words.insert(stop_word);
|
||||
}
|
||||
|
||||
pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
let update_id = push_stop_words_deletion(
|
||||
writer,
|
||||
self.updates_store,
|
||||
self.updates_results_store,
|
||||
self.stop_words,
|
||||
)?;
|
||||
Ok(update_id)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push_stop_words_deletion(
|
||||
writer: &mut heed::RwTxn<UpdateT>,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
deletion: BTreeSet<String>,
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::stop_words_deletion(deletion);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
||||
|
||||
pub fn apply_stop_words_deletion(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
deletion: BTreeSet<String>,
|
||||
) -> MResult<()> {
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
|
||||
for word in deletion {
|
||||
stop_words_builder.insert(&word).unwrap();
|
||||
}
|
||||
|
||||
// create the new delta stop words fst
|
||||
let delta_stop_words = stop_words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
// now we delete all of these stop words from the main store
|
||||
let stop_words_fst = index.main.stop_words_fst(writer)?.unwrap_or_default();
|
||||
|
||||
let op = OpBuilder::new()
|
||||
.add(&stop_words_fst)
|
||||
.add(&delta_stop_words)
|
||||
.difference();
|
||||
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
stop_words_builder.extend_stream(op).unwrap();
|
||||
let stop_words_fst = stop_words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
index.main.put_stop_words_fst(writer, &stop_words_fst)?;
|
||||
|
||||
// now that we have setup the stop words
|
||||
// lets reindex everything...
|
||||
if let Ok(number) = index.main.number_of_documents(writer) {
|
||||
if number > 0 {
|
||||
reindex_all_documents(writer, index)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -2,26 +2,27 @@ use std::collections::BTreeSet;
|
|||
|
||||
use fst::{set::OpBuilder, SetBuilder};
|
||||
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::automaton::normalize_str;
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||
use crate::update::documents_addition::reindex_all_documents;
|
||||
use crate::update::{next_update_id, Update};
|
||||
use crate::{store, MResult};
|
||||
|
||||
pub struct StopWordsAddition {
|
||||
pub struct StopWordsUpdate {
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
stop_words: BTreeSet<String>,
|
||||
}
|
||||
|
||||
impl StopWordsAddition {
|
||||
impl StopWordsUpdate {
|
||||
pub fn new(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> StopWordsAddition {
|
||||
StopWordsAddition {
|
||||
) -> StopWordsUpdate {
|
||||
StopWordsUpdate {
|
||||
updates_store,
|
||||
updates_results_store,
|
||||
updates_notifier,
|
||||
|
@ -36,7 +37,7 @@ impl StopWordsAddition {
|
|||
|
||||
pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
let update_id = push_stop_words_addition(
|
||||
let update_id = push_stop_words_update(
|
||||
writer,
|
||||
self.updates_store,
|
||||
self.updates_results_store,
|
||||
|
@ -46,21 +47,64 @@ impl StopWordsAddition {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn push_stop_words_addition(
|
||||
pub fn push_stop_words_update(
|
||||
writer: &mut heed::RwTxn<UpdateT>,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
addition: BTreeSet<String>,
|
||||
update: BTreeSet<String>,
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::stop_words_addition(addition);
|
||||
let update = Update::stop_words_update(update);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
||||
|
||||
pub fn apply_stop_words_addition(
|
||||
pub fn apply_stop_words_update(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
main_store: store::Main,
|
||||
documents_fields_store: store::DocumentsFields,
|
||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||
postings_lists_store: store::PostingsLists,
|
||||
docs_words_store: store::DocsWords,
|
||||
stop_words: BTreeSet<String>,
|
||||
) -> MResult<()> {
|
||||
|
||||
let old_stop_words: BTreeSet<String> = main_store
|
||||
.stop_words_fst(writer)?
|
||||
.unwrap_or_default()
|
||||
.stream()
|
||||
.into_strs().unwrap().into_iter().collect();
|
||||
|
||||
let deletion: BTreeSet<String> = old_stop_words.clone().difference(&stop_words).cloned().collect();
|
||||
let addition: BTreeSet<String> = stop_words.clone().difference(&old_stop_words).cloned().collect();
|
||||
|
||||
if !addition.is_empty() {
|
||||
apply_stop_words_addition(
|
||||
writer,
|
||||
main_store,
|
||||
postings_lists_store,
|
||||
addition
|
||||
)?;
|
||||
}
|
||||
|
||||
if !deletion.is_empty() {
|
||||
apply_stop_words_deletion(
|
||||
writer,
|
||||
main_store,
|
||||
documents_fields_store,
|
||||
documents_fields_counts_store,
|
||||
postings_lists_store,
|
||||
docs_words_store,
|
||||
deletion
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn apply_stop_words_addition(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
main_store: store::Main,
|
||||
postings_lists_store: store::PostingsLists,
|
||||
|
@ -116,3 +160,59 @@ pub fn apply_stop_words_addition(
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn apply_stop_words_deletion(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
main_store: store::Main,
|
||||
documents_fields_store: store::DocumentsFields,
|
||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||
postings_lists_store: store::PostingsLists,
|
||||
docs_words_store: store::DocsWords,
|
||||
deletion: BTreeSet<String>,
|
||||
) -> MResult<()> {
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
|
||||
for word in deletion {
|
||||
stop_words_builder.insert(&word).unwrap();
|
||||
}
|
||||
|
||||
// create the new delta stop words fst
|
||||
let delta_stop_words = stop_words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
// now we delete all of these stop words from the main store
|
||||
let stop_words_fst = main_store.stop_words_fst(writer)?.unwrap_or_default();
|
||||
|
||||
let op = OpBuilder::new()
|
||||
.add(&stop_words_fst)
|
||||
.add(&delta_stop_words)
|
||||
.difference();
|
||||
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
stop_words_builder.extend_stream(op).unwrap();
|
||||
let stop_words_fst = stop_words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
main_store.put_stop_words_fst(writer, &stop_words_fst)?;
|
||||
|
||||
// now that we have setup the stop words
|
||||
// lets reindex everything...
|
||||
if let Ok(number) = main_store.number_of_documents(writer) {
|
||||
if number > 0 {
|
||||
reindex_all_documents(
|
||||
writer,
|
||||
main_store,
|
||||
documents_fields_store,
|
||||
documents_fields_counts_store,
|
||||
postings_lists_store,
|
||||
docs_words_store,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue