Rewrite the stop-words endpoint; fix #417

This commit is contained in:
qdequele 2020-01-02 16:30:34 +01:00
parent f0590d3301
commit 91c6539baf
No known key found for this signature in database
GPG Key ID: B3F0A000EBF11745
6 changed files with 135 additions and 184 deletions

View File

@ -287,16 +287,8 @@ impl Index {
)
}
pub fn stop_words_addition(&self) -> update::StopWordsAddition {
update::StopWordsAddition::new(
self.updates,
self.updates_results,
self.updates_notifier.clone(),
)
}
pub fn stop_words_deletion(&self) -> update::StopWordsDeletion {
update::StopWordsDeletion::new(
pub fn stop_words_update(&self) -> update::StopWordsUpdate {
update::StopWordsUpdate::new(
self.updates,
self.updates_results,
self.updates_notifier.clone(),

View File

@ -3,8 +3,7 @@ mod customs_update;
mod documents_addition;
mod documents_deletion;
mod schema_update;
mod stop_words_addition;
mod stop_words_deletion;
mod stop_words_update;
mod synonyms_update;
pub use self::clear_all::{apply_clear_all, push_clear_all};
@ -14,8 +13,7 @@ pub use self::documents_addition::{
};
pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion};
pub use self::schema_update::{apply_schema_update, push_schema_update};
pub use self::stop_words_addition::{apply_stop_words_addition, StopWordsAddition};
pub use self::stop_words_deletion::{apply_stop_words_deletion, StopWordsDeletion};
pub use self::stop_words_update::{apply_stop_words_update, StopWordsUpdate};
pub use self::synonyms_update::{apply_synonyms_update, SynonymsUpdate};
use std::cmp;
@ -89,16 +87,9 @@ impl Update {
}
}
fn stop_words_addition(data: BTreeSet<String>) -> Update {
fn stop_words_update(data: BTreeSet<String>) -> Update {
Update {
data: UpdateData::StopWordsAddition(data),
enqueued_at: Utc::now(),
}
}
fn stop_words_deletion(data: BTreeSet<String>) -> Update {
Update {
data: UpdateData::StopWordsDeletion(data),
data: UpdateData::StopWordsUpdate(data),
enqueued_at: Utc::now(),
}
}
@ -113,8 +104,7 @@ pub enum UpdateData {
DocumentsPartial(Vec<HashMap<String, serde_json::Value>>),
DocumentsDeletion(Vec<DocumentId>),
SynonymsUpdate(BTreeMap<String, Vec<String>>),
StopWordsAddition(BTreeSet<String>),
StopWordsDeletion(BTreeSet<String>),
StopWordsUpdate(BTreeSet<String>),
}
impl UpdateData {
@ -135,11 +125,8 @@ impl UpdateData {
UpdateData::SynonymsUpdate(addition) => UpdateType::SynonymsUpdate {
number: addition.len(),
},
UpdateData::StopWordsAddition(addition) => UpdateType::StopWordsAddition {
number: addition.len(),
},
UpdateData::StopWordsDeletion(deletion) => UpdateType::StopWordsDeletion {
number: deletion.len(),
UpdateData::StopWordsUpdate(update) => UpdateType::StopWordsUpdate {
number: update.len(),
},
}
}
@ -155,8 +142,7 @@ pub enum UpdateType {
DocumentsPartial { number: usize },
DocumentsDeletion { number: usize },
SynonymsUpdate { number: usize },
StopWordsAddition { number: usize },
StopWordsDeletion { number: usize },
StopWordsUpdate { number: usize },
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@ -321,22 +307,10 @@ pub fn update_task<'a, 'b>(
(update_type, result, start.elapsed())
}
UpdateData::StopWordsAddition(stop_words) => {
UpdateData::StopWordsUpdate(stop_words) => {
let start = Instant::now();
let update_type = UpdateType::StopWordsAddition {
number: stop_words.len(),
};
let result =
apply_stop_words_addition(writer, index.main, index.postings_lists, stop_words);
(update_type, result, start.elapsed())
}
UpdateData::StopWordsDeletion(stop_words) => {
let start = Instant::now();
let update_type = UpdateType::StopWordsDeletion {
let update_type = UpdateType::StopWordsUpdate {
number: stop_words.len(),
};

View File

@ -1,107 +1 @@
use std::collections::BTreeSet;
use fst::{set::OpBuilder, SetBuilder};
use crate::database::{MainT, UpdateT};
use crate::automaton::normalize_str;
use crate::database::{UpdateEvent, UpdateEventsEmitter};
use crate::update::documents_addition::reindex_all_documents;
use crate::update::{next_update_id, Update};
use crate::{store, MResult};
pub struct StopWordsDeletion {
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
stop_words: BTreeSet<String>,
}
impl StopWordsDeletion {
pub fn new(
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
) -> StopWordsDeletion {
StopWordsDeletion {
updates_store,
updates_results_store,
updates_notifier,
stop_words: BTreeSet::new(),
}
}
pub fn delete_stop_word<S: AsRef<str>>(&mut self, stop_word: S) {
let stop_word = normalize_str(stop_word.as_ref());
self.stop_words.insert(stop_word);
}
pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
let update_id = push_stop_words_deletion(
writer,
self.updates_store,
self.updates_results_store,
self.stop_words,
)?;
Ok(update_id)
}
}
pub fn push_stop_words_deletion(
writer: &mut heed::RwTxn<UpdateT>,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
deletion: BTreeSet<String>,
) -> MResult<u64> {
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = Update::stop_words_deletion(deletion);
updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id)
}
pub fn apply_stop_words_deletion(
writer: &mut heed::RwTxn<MainT>,
index: &store::Index,
deletion: BTreeSet<String>,
) -> MResult<()> {
let mut stop_words_builder = SetBuilder::memory();
for word in deletion {
stop_words_builder.insert(&word).unwrap();
}
// create the new delta stop words fst
let delta_stop_words = stop_words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap();
// now we delete all of these stop words from the main store
let stop_words_fst = index.main.stop_words_fst(writer)?.unwrap_or_default();
let op = OpBuilder::new()
.add(&stop_words_fst)
.add(&delta_stop_words)
.difference();
let mut stop_words_builder = SetBuilder::memory();
stop_words_builder.extend_stream(op).unwrap();
let stop_words_fst = stop_words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap();
index.main.put_stop_words_fst(writer, &stop_words_fst)?;
// now that we have setup the stop words
// lets reindex everything...
if let Ok(number) = index.main.number_of_documents(writer) {
if number > 0 {
reindex_all_documents(writer, index)?;
}
}
Ok(())
}

View File

@ -2,26 +2,27 @@ use std::collections::BTreeSet;
use fst::{set::OpBuilder, SetBuilder};
use crate::database::{MainT, UpdateT};
use crate::automaton::normalize_str;
use crate::database::{MainT, UpdateT};
use crate::database::{UpdateEvent, UpdateEventsEmitter};
use crate::update::documents_addition::reindex_all_documents;
use crate::update::{next_update_id, Update};
use crate::{store, MResult};
pub struct StopWordsAddition {
pub struct StopWordsUpdate {
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
stop_words: BTreeSet<String>,
}
impl StopWordsAddition {
impl StopWordsUpdate {
pub fn new(
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
) -> StopWordsAddition {
StopWordsAddition {
) -> StopWordsUpdate {
StopWordsUpdate {
updates_store,
updates_results_store,
updates_notifier,
@ -36,7 +37,7 @@ impl StopWordsAddition {
pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
let update_id = push_stop_words_addition(
let update_id = push_stop_words_update(
writer,
self.updates_store,
self.updates_results_store,
@ -46,21 +47,64 @@ impl StopWordsAddition {
}
}
pub fn push_stop_words_addition(
pub fn push_stop_words_update(
writer: &mut heed::RwTxn<UpdateT>,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
addition: BTreeSet<String>,
update: BTreeSet<String>,
) -> MResult<u64> {
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = Update::stop_words_addition(addition);
let update = Update::stop_words_update(update);
updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id)
}
pub fn apply_stop_words_addition(
pub fn apply_stop_words_update(
writer: &mut heed::RwTxn<MainT>,
main_store: store::Main,
documents_fields_store: store::DocumentsFields,
documents_fields_counts_store: store::DocumentsFieldsCounts,
postings_lists_store: store::PostingsLists,
docs_words_store: store::DocsWords,
stop_words: BTreeSet<String>,
) -> MResult<()> {
let old_stop_words: BTreeSet<String> = main_store
.stop_words_fst(writer)?
.unwrap_or_default()
.stream()
.into_strs().unwrap().into_iter().collect();
let deletion: BTreeSet<String> = old_stop_words.clone().difference(&stop_words).cloned().collect();
let addition: BTreeSet<String> = stop_words.clone().difference(&old_stop_words).cloned().collect();
if !addition.is_empty() {
apply_stop_words_addition(
writer,
main_store,
postings_lists_store,
addition
)?;
}
if !deletion.is_empty() {
apply_stop_words_deletion(
writer,
main_store,
documents_fields_store,
documents_fields_counts_store,
postings_lists_store,
docs_words_store,
deletion
)?;
}
Ok(())
}
fn apply_stop_words_addition(
writer: &mut heed::RwTxn<MainT>,
main_store: store::Main,
postings_lists_store: store::PostingsLists,
@ -116,3 +160,59 @@ pub fn apply_stop_words_addition(
Ok(())
}
fn apply_stop_words_deletion(
writer: &mut heed::RwTxn<MainT>,
main_store: store::Main,
documents_fields_store: store::DocumentsFields,
documents_fields_counts_store: store::DocumentsFieldsCounts,
postings_lists_store: store::PostingsLists,
docs_words_store: store::DocsWords,
deletion: BTreeSet<String>,
) -> MResult<()> {
let mut stop_words_builder = SetBuilder::memory();
for word in deletion {
stop_words_builder.insert(&word).unwrap();
}
// create the new delta stop words fst
let delta_stop_words = stop_words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap();
// now we delete all of these stop words from the main store
let stop_words_fst = main_store.stop_words_fst(writer)?.unwrap_or_default();
let op = OpBuilder::new()
.add(&stop_words_fst)
.add(&delta_stop_words)
.difference();
let mut stop_words_builder = SetBuilder::memory();
stop_words_builder.extend_stream(op).unwrap();
let stop_words_fst = stop_words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap();
main_store.put_stop_words_fst(writer, &stop_words_fst)?;
// now that we have setup the stop words
// lets reindex everything...
if let Ok(number) = main_store.number_of_documents(writer) {
if number > 0 {
reindex_all_documents(
writer,
main_store,
documents_fields_store,
documents_fields_counts_store,
postings_lists_store,
docs_words_store,
)?;
}
}
Ok(())
}

View File

@ -80,16 +80,12 @@ pub fn load_routes(app: &mut tide::App<Data>) {
.get(synonym::get)
.post(synonym::update);
router.at("/stop-words").nest(|router| {
router
.at("/")
.get(stop_words::list)
.patch(stop_words::add)
.post(stop_words::delete);
});
router
.at("/settings")
router.at("/settings").nest(|router| {
router.at("/stop-words")
.get(stop_words::get)
.post(stop_words::update)
.delete(stop_words::delete);
})
.get(setting::get)
.post(setting::update);
});

View File

@ -8,7 +8,7 @@ use crate::models::token::ACL::*;
use crate::routes::document::IndexUpdateResponse;
use crate::Data;
pub async fn list(ctx: Context<Data>) -> SResult<Response> {
pub async fn get(ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(SettingsRead)?;
let index = ctx.index()?;
@ -29,7 +29,7 @@ pub async fn list(ctx: Context<Data>) -> SResult<Response> {
Ok(tide::response::json(stop_words))
}
pub async fn add(mut ctx: Context<Data>) -> SResult<Response> {
pub async fn update(mut ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(SettingsRead)?;
let index = ctx.index()?;
@ -38,12 +38,12 @@ pub async fn add(mut ctx: Context<Data>) -> SResult<Response> {
let db = &ctx.state().db;
let mut writer = db.update_write_txn().map_err(ResponseError::internal)?;
let mut stop_words_addition = index.stop_words_addition();
let mut stop_words_update = index.stop_words_update();
for stop_word in data {
stop_words_addition.add_stop_word(stop_word);
stop_words_update.add_stop_word(stop_word);
}
let update_id = stop_words_addition
let update_id = stop_words_update
.finalize(&mut writer)
.map_err(ResponseError::internal)?;
@ -55,19 +55,14 @@ pub async fn add(mut ctx: Context<Data>) -> SResult<Response> {
.into_response())
}
pub async fn delete(mut ctx: Context<Data>) -> SResult<Response> {
pub async fn delete(ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(SettingsRead)?;
let index = ctx.index()?;
let data: Vec<String> = ctx.body_json().await.map_err(ResponseError::bad_request)?;
let db = &ctx.state().db;
let mut writer = db.update_write_txn().map_err(ResponseError::internal)?;
let mut stop_words_deletion = index.stop_words_deletion();
for stop_word in data {
stop_words_deletion.delete_stop_word(stop_word);
}
let stop_words_deletion = index.stop_words_update();
let update_id = stop_words_deletion
.finalize(&mut writer)