mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-15 13:58:36 +02:00
feat(search, update): synonyms
This commit is contained in:
parent
995d1a07d4
commit
e39aabbfe6
4 changed files with 132 additions and 26 deletions
|
@ -13,6 +13,7 @@ use crate::criterion::Criterion;
|
|||
use crate::facet::FacetType;
|
||||
use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
|
||||
use crate::update::index_documents::{IndexDocumentsMethod, Transform};
|
||||
use meilisearch_tokenizer::{AnalyzerConfig, Analyzer};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Setting<T> {
|
||||
|
@ -71,6 +72,7 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
|||
criteria: Setting<Vec<String>>,
|
||||
stop_words: Setting<BTreeSet<String>>,
|
||||
distinct_attribute: Setting<String>,
|
||||
synonyms: Setting<HashMap<String, Vec<String>>>,
|
||||
}
|
||||
|
||||
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
|
@ -96,6 +98,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||
criteria: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
update_id,
|
||||
}
|
||||
}
|
||||
|
@ -144,12 +147,24 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn reset_distinct_attribute(&mut self) {
|
||||
self.distinct_attribute = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_distinct_attribute(&mut self, distinct_attribute: String) {
|
||||
self.distinct_attribute = Setting::Set(distinct_attribute);
|
||||
}
|
||||
|
||||
pub fn reset_distinct_attribute(&mut self) {
|
||||
self.distinct_attribute = Setting::Reset;
|
||||
pub fn reset_synonyms(&mut self) {
|
||||
self.synonyms = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_synonyms(&mut self, synonyms: HashMap<String, Vec<String>>) {
|
||||
self.synonyms = if synonyms.is_empty() {
|
||||
Setting::Reset
|
||||
} else {
|
||||
Setting::Set(synonyms)
|
||||
}
|
||||
}
|
||||
|
||||
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> anyhow::Result<()>
|
||||
|
@ -294,7 +309,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||
let current = self.index.stop_words(self.wtxn)?;
|
||||
// since we can't compare a BTreeSet with an FST we are going to convert the
|
||||
// BTreeSet to an FST and then compare bytes per bytes the two FSTs.
|
||||
let fst = fst::Set::from_iter(&*stop_words)?;
|
||||
let fst = fst::Set::from_iter(stop_words)?;
|
||||
|
||||
// Does the new FST differ from the previous one?
|
||||
if current.map_or(true, |current| current.as_fst().as_bytes() != fst.as_fst().as_bytes()) {
|
||||
|
@ -310,6 +325,55 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||
}
|
||||
}
|
||||
|
||||
fn update_synonyms(&mut self) -> anyhow::Result<bool> {
|
||||
match self.synonyms {
|
||||
Setting::Set(ref synonyms) => {
|
||||
let old_synonyms = self.index.synonyms(self.wtxn)?.unwrap_or_default();
|
||||
|
||||
let mut config = AnalyzerConfig::default();
|
||||
|
||||
let stop_words = self.index.stop_words(self.wtxn)?;
|
||||
if let Some(stop_words) = &stop_words {
|
||||
config.stop_words(stop_words);
|
||||
}
|
||||
|
||||
let analyzer = Analyzer::new(config);
|
||||
|
||||
let normalize = |text: &String| {
|
||||
analyzer
|
||||
.analyze(text)
|
||||
.tokens()
|
||||
.filter_map(|token|
|
||||
if token.is_word() { Some(token.text().to_string()) } else { None }
|
||||
)
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
|
||||
let new_synonyms = synonyms
|
||||
.iter()
|
||||
.map(|(word, synonyms)| {
|
||||
let normalized_word = normalize(word);
|
||||
let normalized_synonyms = synonyms.iter()
|
||||
.map(normalize)
|
||||
.unique()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
(normalized_word, normalized_synonyms)
|
||||
})
|
||||
.collect();
|
||||
|
||||
if new_synonyms != old_synonyms {
|
||||
self.index.put_synonyms(self.wtxn, &new_synonyms)?;
|
||||
Ok(true)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
Setting::Reset => Ok(self.index.delete_synonyms(self.wtxn)?),
|
||||
Setting::NotSet => Ok(false),
|
||||
}
|
||||
}
|
||||
|
||||
fn update_facets(&mut self) -> anyhow::Result<bool> {
|
||||
match self.faceted_fields {
|
||||
Setting::Set(ref fields) => {
|
||||
|
@ -359,9 +423,10 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||
// update_criteria MUST be called after update_facets, since criterion fields must be set
|
||||
// as facets.
|
||||
self.update_criteria()?;
|
||||
let synonyms_updated = self.update_synonyms()?;
|
||||
let searchable_updated = self.update_searchable()?;
|
||||
|
||||
if facets_updated || searchable_updated || stop_words_updated {
|
||||
if stop_words_updated || facets_updated || synonyms_updated || searchable_updated {
|
||||
self.reindex(&progress_callback, old_fields_ids_map)?;
|
||||
}
|
||||
Ok(())
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue