mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Rewrite the stop-words endpoint; fix #417
This commit is contained in:
parent
f0590d3301
commit
91c6539baf
@ -287,16 +287,8 @@ impl Index {
|
||||
)
|
||||
}
|
||||
|
||||
pub fn stop_words_addition(&self) -> update::StopWordsAddition {
|
||||
update::StopWordsAddition::new(
|
||||
self.updates,
|
||||
self.updates_results,
|
||||
self.updates_notifier.clone(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn stop_words_deletion(&self) -> update::StopWordsDeletion {
|
||||
update::StopWordsDeletion::new(
|
||||
pub fn stop_words_update(&self) -> update::StopWordsUpdate {
|
||||
update::StopWordsUpdate::new(
|
||||
self.updates,
|
||||
self.updates_results,
|
||||
self.updates_notifier.clone(),
|
||||
|
@ -3,8 +3,7 @@ mod customs_update;
|
||||
mod documents_addition;
|
||||
mod documents_deletion;
|
||||
mod schema_update;
|
||||
mod stop_words_addition;
|
||||
mod stop_words_deletion;
|
||||
mod stop_words_update;
|
||||
mod synonyms_update;
|
||||
|
||||
pub use self::clear_all::{apply_clear_all, push_clear_all};
|
||||
@ -14,8 +13,7 @@ pub use self::documents_addition::{
|
||||
};
|
||||
pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion};
|
||||
pub use self::schema_update::{apply_schema_update, push_schema_update};
|
||||
pub use self::stop_words_addition::{apply_stop_words_addition, StopWordsAddition};
|
||||
pub use self::stop_words_deletion::{apply_stop_words_deletion, StopWordsDeletion};
|
||||
pub use self::stop_words_update::{apply_stop_words_update, StopWordsUpdate};
|
||||
pub use self::synonyms_update::{apply_synonyms_update, SynonymsUpdate};
|
||||
|
||||
use std::cmp;
|
||||
@ -89,16 +87,9 @@ impl Update {
|
||||
}
|
||||
}
|
||||
|
||||
fn stop_words_addition(data: BTreeSet<String>) -> Update {
|
||||
fn stop_words_update(data: BTreeSet<String>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::StopWordsAddition(data),
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn stop_words_deletion(data: BTreeSet<String>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::StopWordsDeletion(data),
|
||||
data: UpdateData::StopWordsUpdate(data),
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
@ -113,8 +104,7 @@ pub enum UpdateData {
|
||||
DocumentsPartial(Vec<HashMap<String, serde_json::Value>>),
|
||||
DocumentsDeletion(Vec<DocumentId>),
|
||||
SynonymsUpdate(BTreeMap<String, Vec<String>>),
|
||||
StopWordsAddition(BTreeSet<String>),
|
||||
StopWordsDeletion(BTreeSet<String>),
|
||||
StopWordsUpdate(BTreeSet<String>),
|
||||
}
|
||||
|
||||
impl UpdateData {
|
||||
@ -135,11 +125,8 @@ impl UpdateData {
|
||||
UpdateData::SynonymsUpdate(addition) => UpdateType::SynonymsUpdate {
|
||||
number: addition.len(),
|
||||
},
|
||||
UpdateData::StopWordsAddition(addition) => UpdateType::StopWordsAddition {
|
||||
number: addition.len(),
|
||||
},
|
||||
UpdateData::StopWordsDeletion(deletion) => UpdateType::StopWordsDeletion {
|
||||
number: deletion.len(),
|
||||
UpdateData::StopWordsUpdate(update) => UpdateType::StopWordsUpdate {
|
||||
number: update.len(),
|
||||
},
|
||||
}
|
||||
}
|
||||
@ -155,8 +142,7 @@ pub enum UpdateType {
|
||||
DocumentsPartial { number: usize },
|
||||
DocumentsDeletion { number: usize },
|
||||
SynonymsUpdate { number: usize },
|
||||
StopWordsAddition { number: usize },
|
||||
StopWordsDeletion { number: usize },
|
||||
StopWordsUpdate { number: usize },
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
@ -321,22 +307,10 @@ pub fn update_task<'a, 'b>(
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
UpdateData::StopWordsAddition(stop_words) => {
|
||||
UpdateData::StopWordsUpdate(stop_words) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::StopWordsAddition {
|
||||
number: stop_words.len(),
|
||||
};
|
||||
|
||||
let result =
|
||||
apply_stop_words_addition(writer, index.main, index.postings_lists, stop_words);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
UpdateData::StopWordsDeletion(stop_words) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::StopWordsDeletion {
|
||||
let update_type = UpdateType::StopWordsUpdate {
|
||||
number: stop_words.len(),
|
||||
};
|
||||
|
||||
|
@ -1,107 +1 @@
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use fst::{set::OpBuilder, SetBuilder};
|
||||
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::automaton::normalize_str;
|
||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||
use crate::update::documents_addition::reindex_all_documents;
|
||||
use crate::update::{next_update_id, Update};
|
||||
use crate::{store, MResult};
|
||||
|
||||
pub struct StopWordsDeletion {
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
stop_words: BTreeSet<String>,
|
||||
}
|
||||
|
||||
impl StopWordsDeletion {
|
||||
pub fn new(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> StopWordsDeletion {
|
||||
StopWordsDeletion {
|
||||
updates_store,
|
||||
updates_results_store,
|
||||
updates_notifier,
|
||||
stop_words: BTreeSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn delete_stop_word<S: AsRef<str>>(&mut self, stop_word: S) {
|
||||
let stop_word = normalize_str(stop_word.as_ref());
|
||||
self.stop_words.insert(stop_word);
|
||||
}
|
||||
|
||||
pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
let update_id = push_stop_words_deletion(
|
||||
writer,
|
||||
self.updates_store,
|
||||
self.updates_results_store,
|
||||
self.stop_words,
|
||||
)?;
|
||||
Ok(update_id)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push_stop_words_deletion(
|
||||
writer: &mut heed::RwTxn<UpdateT>,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
deletion: BTreeSet<String>,
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::stop_words_deletion(deletion);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
||||
|
||||
pub fn apply_stop_words_deletion(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
deletion: BTreeSet<String>,
|
||||
) -> MResult<()> {
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
|
||||
for word in deletion {
|
||||
stop_words_builder.insert(&word).unwrap();
|
||||
}
|
||||
|
||||
// create the new delta stop words fst
|
||||
let delta_stop_words = stop_words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
// now we delete all of these stop words from the main store
|
||||
let stop_words_fst = index.main.stop_words_fst(writer)?.unwrap_or_default();
|
||||
|
||||
let op = OpBuilder::new()
|
||||
.add(&stop_words_fst)
|
||||
.add(&delta_stop_words)
|
||||
.difference();
|
||||
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
stop_words_builder.extend_stream(op).unwrap();
|
||||
let stop_words_fst = stop_words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
index.main.put_stop_words_fst(writer, &stop_words_fst)?;
|
||||
|
||||
// now that we have setup the stop words
|
||||
// lets reindex everything...
|
||||
if let Ok(number) = index.main.number_of_documents(writer) {
|
||||
if number > 0 {
|
||||
reindex_all_documents(writer, index)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -2,26 +2,27 @@ use std::collections::BTreeSet;
|
||||
|
||||
use fst::{set::OpBuilder, SetBuilder};
|
||||
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::automaton::normalize_str;
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||
use crate::update::documents_addition::reindex_all_documents;
|
||||
use crate::update::{next_update_id, Update};
|
||||
use crate::{store, MResult};
|
||||
|
||||
pub struct StopWordsAddition {
|
||||
pub struct StopWordsUpdate {
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
stop_words: BTreeSet<String>,
|
||||
}
|
||||
|
||||
impl StopWordsAddition {
|
||||
impl StopWordsUpdate {
|
||||
pub fn new(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> StopWordsAddition {
|
||||
StopWordsAddition {
|
||||
) -> StopWordsUpdate {
|
||||
StopWordsUpdate {
|
||||
updates_store,
|
||||
updates_results_store,
|
||||
updates_notifier,
|
||||
@ -36,7 +37,7 @@ impl StopWordsAddition {
|
||||
|
||||
pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
let update_id = push_stop_words_addition(
|
||||
let update_id = push_stop_words_update(
|
||||
writer,
|
||||
self.updates_store,
|
||||
self.updates_results_store,
|
||||
@ -46,21 +47,64 @@ impl StopWordsAddition {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push_stop_words_addition(
|
||||
pub fn push_stop_words_update(
|
||||
writer: &mut heed::RwTxn<UpdateT>,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
addition: BTreeSet<String>,
|
||||
update: BTreeSet<String>,
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::stop_words_addition(addition);
|
||||
let update = Update::stop_words_update(update);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
||||
|
||||
pub fn apply_stop_words_addition(
|
||||
pub fn apply_stop_words_update(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
main_store: store::Main,
|
||||
documents_fields_store: store::DocumentsFields,
|
||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||
postings_lists_store: store::PostingsLists,
|
||||
docs_words_store: store::DocsWords,
|
||||
stop_words: BTreeSet<String>,
|
||||
) -> MResult<()> {
|
||||
|
||||
let old_stop_words: BTreeSet<String> = main_store
|
||||
.stop_words_fst(writer)?
|
||||
.unwrap_or_default()
|
||||
.stream()
|
||||
.into_strs().unwrap().into_iter().collect();
|
||||
|
||||
let deletion: BTreeSet<String> = old_stop_words.clone().difference(&stop_words).cloned().collect();
|
||||
let addition: BTreeSet<String> = stop_words.clone().difference(&old_stop_words).cloned().collect();
|
||||
|
||||
if !addition.is_empty() {
|
||||
apply_stop_words_addition(
|
||||
writer,
|
||||
main_store,
|
||||
postings_lists_store,
|
||||
addition
|
||||
)?;
|
||||
}
|
||||
|
||||
if !deletion.is_empty() {
|
||||
apply_stop_words_deletion(
|
||||
writer,
|
||||
main_store,
|
||||
documents_fields_store,
|
||||
documents_fields_counts_store,
|
||||
postings_lists_store,
|
||||
docs_words_store,
|
||||
deletion
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn apply_stop_words_addition(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
main_store: store::Main,
|
||||
postings_lists_store: store::PostingsLists,
|
||||
@ -116,3 +160,59 @@ pub fn apply_stop_words_addition(
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn apply_stop_words_deletion(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
main_store: store::Main,
|
||||
documents_fields_store: store::DocumentsFields,
|
||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||
postings_lists_store: store::PostingsLists,
|
||||
docs_words_store: store::DocsWords,
|
||||
deletion: BTreeSet<String>,
|
||||
) -> MResult<()> {
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
|
||||
for word in deletion {
|
||||
stop_words_builder.insert(&word).unwrap();
|
||||
}
|
||||
|
||||
// create the new delta stop words fst
|
||||
let delta_stop_words = stop_words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
// now we delete all of these stop words from the main store
|
||||
let stop_words_fst = main_store.stop_words_fst(writer)?.unwrap_or_default();
|
||||
|
||||
let op = OpBuilder::new()
|
||||
.add(&stop_words_fst)
|
||||
.add(&delta_stop_words)
|
||||
.difference();
|
||||
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
stop_words_builder.extend_stream(op).unwrap();
|
||||
let stop_words_fst = stop_words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
main_store.put_stop_words_fst(writer, &stop_words_fst)?;
|
||||
|
||||
// now that we have setup the stop words
|
||||
// lets reindex everything...
|
||||
if let Ok(number) = main_store.number_of_documents(writer) {
|
||||
if number > 0 {
|
||||
reindex_all_documents(
|
||||
writer,
|
||||
main_store,
|
||||
documents_fields_store,
|
||||
documents_fields_counts_store,
|
||||
postings_lists_store,
|
||||
docs_words_store,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
@ -80,16 +80,12 @@ pub fn load_routes(app: &mut tide::App<Data>) {
|
||||
.get(synonym::get)
|
||||
.post(synonym::update);
|
||||
|
||||
router.at("/stop-words").nest(|router| {
|
||||
router
|
||||
.at("/")
|
||||
.get(stop_words::list)
|
||||
.patch(stop_words::add)
|
||||
.post(stop_words::delete);
|
||||
});
|
||||
|
||||
router
|
||||
.at("/settings")
|
||||
router.at("/settings").nest(|router| {
|
||||
router.at("/stop-words")
|
||||
.get(stop_words::get)
|
||||
.post(stop_words::update)
|
||||
.delete(stop_words::delete);
|
||||
})
|
||||
.get(setting::get)
|
||||
.post(setting::update);
|
||||
});
|
||||
|
@ -8,7 +8,7 @@ use crate::models::token::ACL::*;
|
||||
use crate::routes::document::IndexUpdateResponse;
|
||||
use crate::Data;
|
||||
|
||||
pub async fn list(ctx: Context<Data>) -> SResult<Response> {
|
||||
pub async fn get(ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(SettingsRead)?;
|
||||
let index = ctx.index()?;
|
||||
|
||||
@ -29,7 +29,7 @@ pub async fn list(ctx: Context<Data>) -> SResult<Response> {
|
||||
Ok(tide::response::json(stop_words))
|
||||
}
|
||||
|
||||
pub async fn add(mut ctx: Context<Data>) -> SResult<Response> {
|
||||
pub async fn update(mut ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(SettingsRead)?;
|
||||
let index = ctx.index()?;
|
||||
|
||||
@ -38,12 +38,12 @@ pub async fn add(mut ctx: Context<Data>) -> SResult<Response> {
|
||||
let db = &ctx.state().db;
|
||||
let mut writer = db.update_write_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let mut stop_words_addition = index.stop_words_addition();
|
||||
let mut stop_words_update = index.stop_words_update();
|
||||
for stop_word in data {
|
||||
stop_words_addition.add_stop_word(stop_word);
|
||||
stop_words_update.add_stop_word(stop_word);
|
||||
}
|
||||
|
||||
let update_id = stop_words_addition
|
||||
let update_id = stop_words_update
|
||||
.finalize(&mut writer)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
@ -55,19 +55,14 @@ pub async fn add(mut ctx: Context<Data>) -> SResult<Response> {
|
||||
.into_response())
|
||||
}
|
||||
|
||||
pub async fn delete(mut ctx: Context<Data>) -> SResult<Response> {
|
||||
pub async fn delete(ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(SettingsRead)?;
|
||||
let index = ctx.index()?;
|
||||
|
||||
let data: Vec<String> = ctx.body_json().await.map_err(ResponseError::bad_request)?;
|
||||
|
||||
let db = &ctx.state().db;
|
||||
let mut writer = db.update_write_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let mut stop_words_deletion = index.stop_words_deletion();
|
||||
for stop_word in data {
|
||||
stop_words_deletion.delete_stop_word(stop_word);
|
||||
}
|
||||
let stop_words_deletion = index.stop_words_update();
|
||||
|
||||
let update_id = stop_words_deletion
|
||||
.finalize(&mut writer)
|
||||
|
Loading…
Reference in New Issue
Block a user