mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 03:47:02 +02:00
Rewrite the synonym endpoint
This commit is contained in:
parent
cc10804607
commit
a4f26e8e48
7 changed files with 38 additions and 422 deletions
|
@ -5,8 +5,7 @@ mod documents_deletion;
|
|||
mod schema_update;
|
||||
mod stop_words_addition;
|
||||
mod stop_words_deletion;
|
||||
mod synonyms_addition;
|
||||
mod synonyms_deletion;
|
||||
mod synonyms_update;
|
||||
|
||||
pub use self::clear_all::{apply_clear_all, push_clear_all};
|
||||
pub use self::customs_update::{apply_customs_update, push_customs_update};
|
||||
|
@ -17,8 +16,7 @@ pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion};
|
|||
pub use self::schema_update::{apply_schema_update, push_schema_update};
|
||||
pub use self::stop_words_addition::{apply_stop_words_addition, StopWordsAddition};
|
||||
pub use self::stop_words_deletion::{apply_stop_words_deletion, StopWordsDeletion};
|
||||
pub use self::synonyms_addition::{apply_synonyms_addition, SynonymsAddition};
|
||||
pub use self::synonyms_deletion::{apply_synonyms_deletion, SynonymsDeletion};
|
||||
pub use self::synonyms_update::{apply_synonyms_update, SynonymsUpdate};
|
||||
|
||||
use std::cmp;
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap};
|
||||
|
@ -82,16 +80,9 @@ impl Update {
|
|||
}
|
||||
}
|
||||
|
||||
fn synonyms_addition(data: BTreeMap<String, Vec<String>>) -> Update {
|
||||
fn synonyms_update(data: BTreeMap<String, Vec<String>>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::SynonymsAddition(data),
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn synonyms_deletion(data: BTreeMap<String, Option<Vec<String>>>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::SynonymsDeletion(data),
|
||||
data: UpdateData::SynonymsUpdate(data),
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
@ -119,8 +110,7 @@ pub enum UpdateData {
|
|||
DocumentsAddition(Vec<HashMap<String, serde_json::Value>>),
|
||||
DocumentsPartial(Vec<HashMap<String, serde_json::Value>>),
|
||||
DocumentsDeletion(Vec<DocumentId>),
|
||||
SynonymsAddition(BTreeMap<String, Vec<String>>),
|
||||
SynonymsDeletion(BTreeMap<String, Option<Vec<String>>>),
|
||||
SynonymsUpdate(BTreeMap<String, Vec<String>>),
|
||||
StopWordsAddition(BTreeSet<String>),
|
||||
StopWordsDeletion(BTreeSet<String>),
|
||||
}
|
||||
|
@ -140,12 +130,9 @@ impl UpdateData {
|
|||
UpdateData::DocumentsDeletion(deletion) => UpdateType::DocumentsDeletion {
|
||||
number: deletion.len(),
|
||||
},
|
||||
UpdateData::SynonymsAddition(addition) => UpdateType::SynonymsAddition {
|
||||
UpdateData::SynonymsUpdate(addition) => UpdateType::SynonymsUpdate {
|
||||
number: addition.len(),
|
||||
},
|
||||
UpdateData::SynonymsDeletion(deletion) => UpdateType::SynonymsDeletion {
|
||||
number: deletion.len(),
|
||||
},
|
||||
UpdateData::StopWordsAddition(addition) => UpdateType::StopWordsAddition {
|
||||
number: addition.len(),
|
||||
},
|
||||
|
@ -165,8 +152,7 @@ pub enum UpdateType {
|
|||
DocumentsAddition { number: usize },
|
||||
DocumentsPartial { number: usize },
|
||||
DocumentsDeletion { number: usize },
|
||||
SynonymsAddition { number: usize },
|
||||
SynonymsDeletion { number: usize },
|
||||
SynonymsUpdate { number: usize },
|
||||
StopWordsAddition { number: usize },
|
||||
StopWordsDeletion { number: usize },
|
||||
}
|
||||
|
@ -361,25 +347,14 @@ pub fn update_task<'a, 'b>(
|
|||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
UpdateData::SynonymsAddition(synonyms) => {
|
||||
UpdateData::SynonymsUpdate(synonyms) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::SynonymsAddition {
|
||||
let update_type = UpdateType::SynonymsUpdate {
|
||||
number: synonyms.len(),
|
||||
};
|
||||
|
||||
let result = apply_synonyms_addition(writer, index.main, index.synonyms, synonyms);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
UpdateData::SynonymsDeletion(synonyms) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::SynonymsDeletion {
|
||||
number: synonyms.len(),
|
||||
};
|
||||
|
||||
let result = apply_synonyms_deletion(writer, index.main, index.synonyms, synonyms);
|
||||
let result = apply_synonyms_update(writer, index.main, index.synonyms, synonyms);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
|
|
|
@ -1,158 +0,0 @@
|
|||
use std::collections::BTreeMap;
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use fst::{set::OpBuilder, SetBuilder};
|
||||
use sdset::SetBuf;
|
||||
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::automaton::normalize_str;
|
||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||
use crate::update::{next_update_id, Update};
|
||||
use crate::{store, MResult};
|
||||
|
||||
pub struct SynonymsDeletion {
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
synonyms: BTreeMap<String, Option<Vec<String>>>,
|
||||
}
|
||||
|
||||
impl SynonymsDeletion {
|
||||
pub fn new(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> SynonymsDeletion {
|
||||
SynonymsDeletion {
|
||||
updates_store,
|
||||
updates_results_store,
|
||||
updates_notifier,
|
||||
synonyms: BTreeMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn delete_all_alternatives_of<S: AsRef<str>>(&mut self, synonym: S) {
|
||||
let synonym = normalize_str(synonym.as_ref());
|
||||
self.synonyms.insert(synonym, None);
|
||||
}
|
||||
|
||||
pub fn delete_specific_alternatives_of<S, T, I>(&mut self, synonym: S, alternatives: I)
|
||||
where
|
||||
S: AsRef<str>,
|
||||
T: AsRef<str>,
|
||||
I: Iterator<Item = T>,
|
||||
{
|
||||
let synonym = normalize_str(synonym.as_ref());
|
||||
let value = self.synonyms.entry(synonym).or_insert(None);
|
||||
let alternatives = alternatives.map(|s| s.as_ref().to_lowercase());
|
||||
match value {
|
||||
Some(v) => v.extend(alternatives),
|
||||
None => *value = Some(Vec::from_iter(alternatives)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
let update_id = push_synonyms_deletion(
|
||||
writer,
|
||||
self.updates_store,
|
||||
self.updates_results_store,
|
||||
self.synonyms,
|
||||
)?;
|
||||
Ok(update_id)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push_synonyms_deletion(
|
||||
writer: &mut heed::RwTxn<UpdateT>,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
deletion: BTreeMap<String, Option<Vec<String>>>,
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::synonyms_deletion(deletion);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
||||
|
||||
pub fn apply_synonyms_deletion(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
main_store: store::Main,
|
||||
synonyms_store: store::Synonyms,
|
||||
deletion: BTreeMap<String, Option<Vec<String>>>,
|
||||
) -> MResult<()> {
|
||||
let mut delete_whole_synonym_builder = SetBuilder::memory();
|
||||
|
||||
for (synonym, alternatives) in deletion {
|
||||
match alternatives {
|
||||
Some(alternatives) => {
|
||||
let prev_alternatives = synonyms_store.synonyms(writer, synonym.as_bytes())?;
|
||||
let prev_alternatives = match prev_alternatives {
|
||||
Some(alternatives) => alternatives,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let delta_alternatives = {
|
||||
let alternatives = SetBuf::from_dirty(alternatives);
|
||||
let mut builder = SetBuilder::memory();
|
||||
builder.extend_iter(alternatives).unwrap();
|
||||
builder.into_inner().and_then(fst::Set::from_bytes).unwrap()
|
||||
};
|
||||
|
||||
let op = OpBuilder::new()
|
||||
.add(prev_alternatives.stream())
|
||||
.add(delta_alternatives.stream())
|
||||
.difference();
|
||||
|
||||
let (alternatives, empty_alternatives) = {
|
||||
let mut builder = SetBuilder::memory();
|
||||
let len = builder.get_ref().len();
|
||||
builder.extend_stream(op).unwrap();
|
||||
let is_empty = len == builder.get_ref().len();
|
||||
let bytes = builder.into_inner().unwrap();
|
||||
let alternatives = fst::Set::from_bytes(bytes).unwrap();
|
||||
|
||||
(alternatives, is_empty)
|
||||
};
|
||||
|
||||
if empty_alternatives {
|
||||
delete_whole_synonym_builder.insert(synonym.as_bytes())?;
|
||||
} else {
|
||||
synonyms_store.put_synonyms(writer, synonym.as_bytes(), &alternatives)?;
|
||||
}
|
||||
}
|
||||
None => {
|
||||
delete_whole_synonym_builder.insert(&synonym).unwrap();
|
||||
synonyms_store.del_synonyms(writer, synonym.as_bytes())?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let delta_synonyms = delete_whole_synonym_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
let synonyms = match main_store.synonyms_fst(writer)? {
|
||||
Some(synonyms) => {
|
||||
let op = OpBuilder::new()
|
||||
.add(synonyms.stream())
|
||||
.add(delta_synonyms.stream())
|
||||
.difference();
|
||||
|
||||
let mut synonyms_builder = SetBuilder::memory();
|
||||
synonyms_builder.extend_stream(op).unwrap();
|
||||
synonyms_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap()
|
||||
}
|
||||
None => fst::Set::default(),
|
||||
};
|
||||
|
||||
main_store.put_synonyms_fst(writer, &synonyms)?;
|
||||
|
||||
Ok(())
|
||||
}
|
|
@ -1,6 +1,6 @@
|
|||
use std::collections::BTreeMap;
|
||||
|
||||
use fst::{set::OpBuilder, SetBuilder};
|
||||
use fst::SetBuilder;
|
||||
use sdset::SetBuf;
|
||||
|
||||
use crate::database::{MainT, UpdateT};
|
||||
|
@ -9,20 +9,20 @@ use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
|||
use crate::update::{next_update_id, Update};
|
||||
use crate::{store, MResult};
|
||||
|
||||
pub struct SynonymsAddition {
|
||||
pub struct SynonymsUpdate {
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
synonyms: BTreeMap<String, Vec<String>>,
|
||||
}
|
||||
|
||||
impl SynonymsAddition {
|
||||
impl SynonymsUpdate {
|
||||
pub fn new(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> SynonymsAddition {
|
||||
SynonymsAddition {
|
||||
) -> SynonymsUpdate {
|
||||
SynonymsUpdate {
|
||||
updates_store,
|
||||
updates_results_store,
|
||||
updates_notifier,
|
||||
|
@ -46,7 +46,7 @@ impl SynonymsAddition {
|
|||
|
||||
pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
let update_id = push_synonyms_addition(
|
||||
let update_id = push_synonyms_update(
|
||||
writer,
|
||||
self.updates_store,
|
||||
self.updates_results_store,
|
||||
|
@ -56,7 +56,7 @@ impl SynonymsAddition {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn push_synonyms_addition(
|
||||
pub fn push_synonyms_update(
|
||||
writer: &mut heed::RwTxn<UpdateT>,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
|
@ -64,20 +64,20 @@ pub fn push_synonyms_addition(
|
|||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::synonyms_addition(addition);
|
||||
let update = Update::synonyms_update(addition);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
||||
|
||||
pub fn apply_synonyms_addition(
|
||||
pub fn apply_synonyms_update(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
main_store: store::Main,
|
||||
synonyms_store: store::Synonyms,
|
||||
addition: BTreeMap<String, Vec<String>>,
|
||||
) -> MResult<()> {
|
||||
let mut synonyms_builder = SetBuilder::memory();
|
||||
|
||||
synonyms_store.clear(writer)?;
|
||||
for (word, alternatives) in addition {
|
||||
synonyms_builder.insert(&word).unwrap();
|
||||
|
||||
|
@ -92,28 +92,11 @@ pub fn apply_synonyms_addition(
|
|||
synonyms_store.put_synonyms(writer, word.as_bytes(), &alternatives)?;
|
||||
}
|
||||
|
||||
let delta_synonyms = synonyms_builder
|
||||
let synonyms = synonyms_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
let synonyms = match main_store.synonyms_fst(writer)? {
|
||||
Some(synonyms) => {
|
||||
let op = OpBuilder::new()
|
||||
.add(synonyms.stream())
|
||||
.add(delta_synonyms.stream())
|
||||
.r#union();
|
||||
|
||||
let mut synonyms_builder = SetBuilder::memory();
|
||||
synonyms_builder.extend_stream(op).unwrap();
|
||||
synonyms_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap()
|
||||
}
|
||||
None => delta_synonyms,
|
||||
};
|
||||
|
||||
main_store.put_synonyms_fst(writer, &synonyms)?;
|
||||
|
||||
Ok(())
|
Loading…
Add table
Add a link
Reference in a new issue