From 91c6539bafb1f86c4450d68dbe07194cd20afacc Mon Sep 17 00:00:00 2001 From: qdequele Date: Thu, 2 Jan 2020 16:30:34 +0100 Subject: [PATCH] Rewrite the stop-words endpoint; fix #417 --- meilisearch-core/src/store/mod.rs | 12 +- meilisearch-core/src/update/mod.rs | 46 ++----- .../src/update/stop_words_deletion.rs | 106 ---------------- ...words_addition.rs => stop_words_update.rs} | 120 ++++++++++++++++-- meilisearch-http/src/routes/mod.rs | 16 +-- meilisearch-http/src/routes/stop_words.rs | 19 +-- 6 files changed, 135 insertions(+), 184 deletions(-) rename meilisearch-core/src/update/{stop_words_addition.rs => stop_words_update.rs} (50%) diff --git a/meilisearch-core/src/store/mod.rs b/meilisearch-core/src/store/mod.rs index 488e6d6a4..37dfb8ada 100644 --- a/meilisearch-core/src/store/mod.rs +++ b/meilisearch-core/src/store/mod.rs @@ -287,16 +287,8 @@ impl Index { ) } - pub fn stop_words_addition(&self) -> update::StopWordsAddition { - update::StopWordsAddition::new( - self.updates, - self.updates_results, - self.updates_notifier.clone(), - ) - } - - pub fn stop_words_deletion(&self) -> update::StopWordsDeletion { - update::StopWordsDeletion::new( + pub fn stop_words_update(&self) -> update::StopWordsUpdate { + update::StopWordsUpdate::new( self.updates, self.updates_results, self.updates_notifier.clone(), diff --git a/meilisearch-core/src/update/mod.rs b/meilisearch-core/src/update/mod.rs index 47df4bf0a..04639a459 100644 --- a/meilisearch-core/src/update/mod.rs +++ b/meilisearch-core/src/update/mod.rs @@ -3,8 +3,7 @@ mod customs_update; mod documents_addition; mod documents_deletion; mod schema_update; -mod stop_words_addition; -mod stop_words_deletion; +mod stop_words_update; mod synonyms_update; pub use self::clear_all::{apply_clear_all, push_clear_all}; @@ -14,8 +13,7 @@ pub use self::documents_addition::{ }; pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion}; pub use self::schema_update::{apply_schema_update, push_schema_update}; -pub use self::stop_words_addition::{apply_stop_words_addition, StopWordsAddition}; -pub use self::stop_words_deletion::{apply_stop_words_deletion, StopWordsDeletion}; +pub use self::stop_words_update::{apply_stop_words_update, StopWordsUpdate}; pub use self::synonyms_update::{apply_synonyms_update, SynonymsUpdate}; use std::cmp; @@ -89,16 +87,9 @@ impl Update { } } - fn stop_words_addition(data: BTreeSet) -> Update { + fn stop_words_update(data: BTreeSet) -> Update { Update { - data: UpdateData::StopWordsAddition(data), - enqueued_at: Utc::now(), - } - } - - fn stop_words_deletion(data: BTreeSet) -> Update { - Update { - data: UpdateData::StopWordsDeletion(data), + data: UpdateData::StopWordsUpdate(data), enqueued_at: Utc::now(), } } @@ -113,8 +104,7 @@ pub enum UpdateData { DocumentsPartial(Vec>), DocumentsDeletion(Vec), SynonymsUpdate(BTreeMap>), - StopWordsAddition(BTreeSet), - StopWordsDeletion(BTreeSet), + StopWordsUpdate(BTreeSet), } impl UpdateData { @@ -135,11 +125,8 @@ impl UpdateData { UpdateData::SynonymsUpdate(addition) => UpdateType::SynonymsUpdate { number: addition.len(), }, - UpdateData::StopWordsAddition(addition) => UpdateType::StopWordsAddition { - number: addition.len(), - }, - UpdateData::StopWordsDeletion(deletion) => UpdateType::StopWordsDeletion { - number: deletion.len(), + UpdateData::StopWordsUpdate(update) => UpdateType::StopWordsUpdate { + number: update.len(), }, } } @@ -155,8 +142,7 @@ pub enum UpdateType { DocumentsPartial { number: usize }, DocumentsDeletion { number: usize }, SynonymsUpdate { number: usize }, - StopWordsAddition { number: usize }, - StopWordsDeletion { number: usize }, + StopWordsUpdate { number: usize }, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -321,22 +307,10 @@ pub fn update_task<'a, 'b>( (update_type, result, start.elapsed()) } - UpdateData::StopWordsAddition(stop_words) => { + UpdateData::StopWordsUpdate(stop_words) => { let start = Instant::now(); - let update_type = UpdateType::StopWordsAddition { - number: stop_words.len(), - }; - - let result = - apply_stop_words_addition(writer, index.main, index.postings_lists, stop_words); - - (update_type, result, start.elapsed()) - } - UpdateData::StopWordsDeletion(stop_words) => { - let start = Instant::now(); - - let update_type = UpdateType::StopWordsDeletion { + let update_type = UpdateType::StopWordsUpdate { number: stop_words.len(), }; diff --git a/meilisearch-core/src/update/stop_words_deletion.rs b/meilisearch-core/src/update/stop_words_deletion.rs index 39af132ce..8b1378917 100644 --- a/meilisearch-core/src/update/stop_words_deletion.rs +++ b/meilisearch-core/src/update/stop_words_deletion.rs @@ -1,107 +1 @@ -use std::collections::BTreeSet; -use fst::{set::OpBuilder, SetBuilder}; - -use crate::database::{MainT, UpdateT}; -use crate::automaton::normalize_str; -use crate::database::{UpdateEvent, UpdateEventsEmitter}; -use crate::update::documents_addition::reindex_all_documents; -use crate::update::{next_update_id, Update}; -use crate::{store, MResult}; - -pub struct StopWordsDeletion { - updates_store: store::Updates, - updates_results_store: store::UpdatesResults, - updates_notifier: UpdateEventsEmitter, - stop_words: BTreeSet, -} - -impl StopWordsDeletion { - pub fn new( - updates_store: store::Updates, - updates_results_store: store::UpdatesResults, - updates_notifier: UpdateEventsEmitter, - ) -> StopWordsDeletion { - StopWordsDeletion { - updates_store, - updates_results_store, - updates_notifier, - stop_words: BTreeSet::new(), - } - } - - pub fn delete_stop_word>(&mut self, stop_word: S) { - let stop_word = normalize_str(stop_word.as_ref()); - self.stop_words.insert(stop_word); - } - - pub fn finalize(self, writer: &mut heed::RwTxn) -> MResult { - let _ = self.updates_notifier.send(UpdateEvent::NewUpdate); - let update_id = push_stop_words_deletion( - writer, - self.updates_store, - self.updates_results_store, - self.stop_words, - )?; - Ok(update_id) - } -} - -pub fn push_stop_words_deletion( - writer: &mut heed::RwTxn, - updates_store: store::Updates, - updates_results_store: store::UpdatesResults, - deletion: BTreeSet, -) -> MResult { - let last_update_id = next_update_id(writer, updates_store, updates_results_store)?; - - let update = Update::stop_words_deletion(deletion); - updates_store.put_update(writer, last_update_id, &update)?; - - Ok(last_update_id) -} - -pub fn apply_stop_words_deletion( - writer: &mut heed::RwTxn, - index: &store::Index, - deletion: BTreeSet, -) -> MResult<()> { - let mut stop_words_builder = SetBuilder::memory(); - - for word in deletion { - stop_words_builder.insert(&word).unwrap(); - } - - // create the new delta stop words fst - let delta_stop_words = stop_words_builder - .into_inner() - .and_then(fst::Set::from_bytes) - .unwrap(); - - // now we delete all of these stop words from the main store - let stop_words_fst = index.main.stop_words_fst(writer)?.unwrap_or_default(); - - let op = OpBuilder::new() - .add(&stop_words_fst) - .add(&delta_stop_words) - .difference(); - - let mut stop_words_builder = SetBuilder::memory(); - stop_words_builder.extend_stream(op).unwrap(); - let stop_words_fst = stop_words_builder - .into_inner() - .and_then(fst::Set::from_bytes) - .unwrap(); - - index.main.put_stop_words_fst(writer, &stop_words_fst)?; - - // now that we have setup the stop words - // lets reindex everything... - if let Ok(number) = index.main.number_of_documents(writer) { - if number > 0 { - reindex_all_documents(writer, index)?; - } - } - - Ok(()) -} diff --git a/meilisearch-core/src/update/stop_words_addition.rs b/meilisearch-core/src/update/stop_words_update.rs similarity index 50% rename from meilisearch-core/src/update/stop_words_addition.rs rename to meilisearch-core/src/update/stop_words_update.rs index 536854586..0ff0aa631 100644 --- a/meilisearch-core/src/update/stop_words_addition.rs +++ b/meilisearch-core/src/update/stop_words_update.rs @@ -2,26 +2,27 @@ use std::collections::BTreeSet; use fst::{set::OpBuilder, SetBuilder}; -use crate::database::{MainT, UpdateT}; use crate::automaton::normalize_str; +use crate::database::{MainT, UpdateT}; use crate::database::{UpdateEvent, UpdateEventsEmitter}; +use crate::update::documents_addition::reindex_all_documents; use crate::update::{next_update_id, Update}; use crate::{store, MResult}; -pub struct StopWordsAddition { +pub struct StopWordsUpdate { updates_store: store::Updates, updates_results_store: store::UpdatesResults, updates_notifier: UpdateEventsEmitter, stop_words: BTreeSet, } -impl StopWordsAddition { +impl StopWordsUpdate { pub fn new( updates_store: store::Updates, updates_results_store: store::UpdatesResults, updates_notifier: UpdateEventsEmitter, - ) -> StopWordsAddition { - StopWordsAddition { + ) -> StopWordsUpdate { + StopWordsUpdate { updates_store, updates_results_store, updates_notifier, @@ -36,7 +37,7 @@ impl StopWordsAddition { pub fn finalize(self, writer: &mut heed::RwTxn) -> MResult { let _ = self.updates_notifier.send(UpdateEvent::NewUpdate); - let update_id = push_stop_words_addition( + let update_id = push_stop_words_update( writer, self.updates_store, self.updates_results_store, @@ -46,21 +47,64 @@ impl StopWordsAddition { } } -pub fn push_stop_words_addition( +pub fn push_stop_words_update( writer: &mut heed::RwTxn, updates_store: store::Updates, updates_results_store: store::UpdatesResults, - addition: BTreeSet, + update: BTreeSet, ) -> MResult { let last_update_id = next_update_id(writer, updates_store, updates_results_store)?; - let update = Update::stop_words_addition(addition); + let update = Update::stop_words_update(update); updates_store.put_update(writer, last_update_id, &update)?; Ok(last_update_id) } -pub fn apply_stop_words_addition( +pub fn apply_stop_words_update( + writer: &mut heed::RwTxn, + main_store: store::Main, + documents_fields_store: store::DocumentsFields, + documents_fields_counts_store: store::DocumentsFieldsCounts, + postings_lists_store: store::PostingsLists, + docs_words_store: store::DocsWords, + stop_words: BTreeSet, +) -> MResult<()> { + + let old_stop_words: BTreeSet = main_store + .stop_words_fst(writer)? + .unwrap_or_default() + .stream() + .into_strs().unwrap().into_iter().collect(); + + let deletion: BTreeSet = old_stop_words.clone().difference(&stop_words).cloned().collect(); + let addition: BTreeSet = stop_words.clone().difference(&old_stop_words).cloned().collect(); + + if !addition.is_empty() { + apply_stop_words_addition( + writer, + main_store, + postings_lists_store, + addition + )?; + } + + if !deletion.is_empty() { + apply_stop_words_deletion( + writer, + main_store, + documents_fields_store, + documents_fields_counts_store, + postings_lists_store, + docs_words_store, + deletion + )?; + } + + Ok(()) +} + +fn apply_stop_words_addition( writer: &mut heed::RwTxn, main_store: store::Main, postings_lists_store: store::PostingsLists, @@ -116,3 +160,59 @@ pub fn apply_stop_words_addition( Ok(()) } + +fn apply_stop_words_deletion( + writer: &mut heed::RwTxn, + main_store: store::Main, + documents_fields_store: store::DocumentsFields, + documents_fields_counts_store: store::DocumentsFieldsCounts, + postings_lists_store: store::PostingsLists, + docs_words_store: store::DocsWords, + deletion: BTreeSet, +) -> MResult<()> { + let mut stop_words_builder = SetBuilder::memory(); + + for word in deletion { + stop_words_builder.insert(&word).unwrap(); + } + + // create the new delta stop words fst + let delta_stop_words = stop_words_builder + .into_inner() + .and_then(fst::Set::from_bytes) + .unwrap(); + + // now we delete all of these stop words from the main store + let stop_words_fst = main_store.stop_words_fst(writer)?.unwrap_or_default(); + + let op = OpBuilder::new() + .add(&stop_words_fst) + .add(&delta_stop_words) + .difference(); + + let mut stop_words_builder = SetBuilder::memory(); + stop_words_builder.extend_stream(op).unwrap(); + let stop_words_fst = stop_words_builder + .into_inner() + .and_then(fst::Set::from_bytes) + .unwrap(); + + main_store.put_stop_words_fst(writer, &stop_words_fst)?; + + // now that we have setup the stop words + // lets reindex everything... + if let Ok(number) = main_store.number_of_documents(writer) { + if number > 0 { + reindex_all_documents( + writer, + main_store, + documents_fields_store, + documents_fields_counts_store, + postings_lists_store, + docs_words_store, + )?; + } + } + + Ok(()) +} diff --git a/meilisearch-http/src/routes/mod.rs b/meilisearch-http/src/routes/mod.rs index e27d2887b..d15982350 100644 --- a/meilisearch-http/src/routes/mod.rs +++ b/meilisearch-http/src/routes/mod.rs @@ -80,16 +80,12 @@ pub fn load_routes(app: &mut tide::App) { .get(synonym::get) .post(synonym::update); - router.at("/stop-words").nest(|router| { - router - .at("/") - .get(stop_words::list) - .patch(stop_words::add) - .post(stop_words::delete); - }); - - router - .at("/settings") + router.at("/settings").nest(|router| { + router.at("/stop-words") + .get(stop_words::get) + .post(stop_words::update) + .delete(stop_words::delete); + }) .get(setting::get) .post(setting::update); }); diff --git a/meilisearch-http/src/routes/stop_words.rs b/meilisearch-http/src/routes/stop_words.rs index 45169f1e3..04f401523 100644 --- a/meilisearch-http/src/routes/stop_words.rs +++ b/meilisearch-http/src/routes/stop_words.rs @@ -8,7 +8,7 @@ use crate::models::token::ACL::*; use crate::routes::document::IndexUpdateResponse; use crate::Data; -pub async fn list(ctx: Context) -> SResult { +pub async fn get(ctx: Context) -> SResult { ctx.is_allowed(SettingsRead)?; let index = ctx.index()?; @@ -29,7 +29,7 @@ pub async fn list(ctx: Context) -> SResult { Ok(tide::response::json(stop_words)) } -pub async fn add(mut ctx: Context) -> SResult { +pub async fn update(mut ctx: Context) -> SResult { ctx.is_allowed(SettingsRead)?; let index = ctx.index()?; @@ -38,12 +38,12 @@ pub async fn add(mut ctx: Context) -> SResult { let db = &ctx.state().db; let mut writer = db.update_write_txn().map_err(ResponseError::internal)?; - let mut stop_words_addition = index.stop_words_addition(); + let mut stop_words_update = index.stop_words_update(); for stop_word in data { - stop_words_addition.add_stop_word(stop_word); + stop_words_update.add_stop_word(stop_word); } - let update_id = stop_words_addition + let update_id = stop_words_update .finalize(&mut writer) .map_err(ResponseError::internal)?; @@ -55,19 +55,14 @@ pub async fn add(mut ctx: Context) -> SResult { .into_response()) } -pub async fn delete(mut ctx: Context) -> SResult { +pub async fn delete(ctx: Context) -> SResult { ctx.is_allowed(SettingsRead)?; let index = ctx.index()?; - let data: Vec = ctx.body_json().await.map_err(ResponseError::bad_request)?; - let db = &ctx.state().db; let mut writer = db.update_write_txn().map_err(ResponseError::internal)?; - let mut stop_words_deletion = index.stop_words_deletion(); - for stop_word in data { - stop_words_deletion.delete_stop_word(stop_word); - } + let stop_words_deletion = index.stop_words_update(); let update_id = stop_words_deletion .finalize(&mut writer)