From e39aabbfe6e35b40261910d09c9b5b13cc2dfaa5 Mon Sep 17 00:00:00 2001 From: Alexey Shekhirin Date: Wed, 7 Apr 2021 11:53:57 +0300 Subject: [PATCH 1/2] feat(search, update): synonyms --- http-ui/src/main.rs | 12 ++++++ milli/src/index.rs | 52 +++++++++++++++++++----- milli/src/search/query_tree.rs | 21 +++++----- milli/src/update/settings.rs | 73 ++++++++++++++++++++++++++++++++-- 4 files changed, 132 insertions(+), 26 deletions(-) diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs index 08e28be56..605b6a7ba 100644 --- a/http-ui/src/main.rs +++ b/http-ui/src/main.rs @@ -260,6 +260,9 @@ struct Settings { #[serde(default, skip_serializing_if = "Setting::is_not_set")] stop_words: Setting>, + + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + synonyms: Setting>>, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -431,6 +434,13 @@ async fn main() -> anyhow::Result<()> { Setting::NotSet => () } + // We transpose the settings JSON struct into a real setting update. + match settings.synonyms { + Setting::Set(synonyms) => builder.set_synonyms(synonyms), + Setting::Reset => builder.reset_synonyms(), + Setting::NotSet => () + } + let result = builder.execute(|indexing_step, update_id| { let (current, total) = match indexing_step { TransformFromUserIntoGenericFormat { documents_seen } => (documents_seen, None), @@ -1011,6 +1021,7 @@ mod tests { faceted_attributes: Setting::Set(hashmap! { "age".into() => "integer".into() }), criteria: Setting::Set(vec!["asc(age)".to_string()]), stop_words: Setting::Set(btreeset! { "and".to_string() }), + synonyms: Setting::NotSet }; assert_tokens(&settings, &[ @@ -1053,6 +1064,7 @@ mod tests { faceted_attributes: Setting::Reset, criteria: Setting::Reset, stop_words: Setting::Reset, + synonyms: Setting::NotSet }; assert_tokens(&settings, &[ diff --git a/milli/src/index.rs b/milli/src/index.rs index 7be618789..d743445e3 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -3,19 +3,19 @@ use std::collections::HashMap; use std::path::Path; use anyhow::Context; +use chrono::{DateTime, Utc}; +use heed::{Database, PolyDatabase, RoTxn, RwTxn}; use heed::types::*; -use heed::{PolyDatabase, Database, RwTxn, RoTxn}; use roaring::RoaringBitmap; -use chrono::{Utc, DateTime}; +use crate::{Criterion, default_criteria, FacetDistribution, FieldsDistribution, Search}; +use crate::{BEU32, DocumentId, ExternalDocumentsIds, FieldId}; +use crate::{ + BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, + ObkvCodec, RoaringBitmapCodec, RoaringBitmapLenCodec, StrStrU8Codec, +}; use crate::facet::FacetType; use crate::fields_ids_map::FieldsIdsMap; -use crate::{default_criteria, Criterion, Search, FacetDistribution, FieldsDistribution}; -use crate::{BEU32, DocumentId, FieldId, ExternalDocumentsIds}; -use crate::{ - RoaringBitmapCodec, RoaringBitmapLenCodec, BEU32StrCodec, - StrStrU8Codec, ObkvCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, -}; pub const CRITERIA_KEY: &str = "criteria"; pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields"; @@ -31,6 +31,7 @@ pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids"; pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids"; pub const WORDS_FST_KEY: &str = "words-fst"; pub const STOP_WORDS_KEY: &str = "stop-words"; +pub const SYNONYMS_KEY: &str = "synonyms"; pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst"; const CREATED_AT_KEY: &str = "created-at"; const UPDATED_AT_KEY: &str = "updated-at"; @@ -376,12 +377,12 @@ impl Index { /* words fst */ - /// Writes the FST which is the words dictionnary of the engine. + /// Writes the FST which is the words dictionary of the engine. pub fn put_words_fst>(&self, wtxn: &mut RwTxn, fst: &fst::Set) -> heed::Result<()> { self.main.put::<_, Str, ByteSlice>(wtxn, WORDS_FST_KEY, fst.as_fst().as_bytes()) } - /// Returns the FST which is the words dictionnary of the engine. + /// Returns the FST which is the words dictionary of the engine. pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result>> { match self.main.get::<_, Str, ByteSlice>(rtxn, WORDS_FST_KEY)? { Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?), @@ -398,6 +399,7 @@ impl Index { pub fn delete_stop_words(&self, wtxn: &mut RwTxn) -> heed::Result { self.main.delete::<_, Str>(wtxn, STOP_WORDS_KEY) } + pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result>> { match self.main.get::<_, Str, ByteSlice>(rtxn, STOP_WORDS_KEY)? { Some(bytes) => Ok(Some(fst::Set::new(bytes)?)), @@ -405,6 +407,34 @@ impl Index { } } + /* synonyms */ + + pub fn put_synonyms(&self, wtxn: &mut RwTxn, synonyms: &HashMap, Vec>>) -> heed::Result<()> { + self.main.put::<_, Str, SerdeBincode<_>>(wtxn, SYNONYMS_KEY, synonyms) + } + + pub fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result { + self.main.delete::<_, Str>(wtxn, SYNONYMS_KEY) + } + + pub fn synonyms(&self, rtxn: &RoTxn) -> anyhow::Result, Vec>>>> { + match self.main.get::<_, Str, SerdeBincode, Vec>>>>(rtxn, SYNONYMS_KEY)? { + Some(synonyms) => Ok(Some(synonyms)), + None => Ok(None), + } + } + + pub fn words_synonyms>(&self, rtxn: &RoTxn, words: &[S]) -> anyhow::Result>>> { + let words: Vec<_> = words.iter().map(|s| s.as_ref().to_string()).collect(); + + match self.synonyms(rtxn)? { + Some(synonyms) => Ok(Some( + synonyms.get(&words).cloned().unwrap_or(Vec::default()) + )), + None => Ok(None) + } + } + /* words prefixes fst */ /// Writes the FST which is the words prefixes dictionnary of the engine. @@ -536,7 +566,7 @@ pub(crate) mod tests { let rtxn = index.read_txn().unwrap(); let fields_distribution = index.fields_distribution(&rtxn).unwrap(); - assert_eq!(fields_distribution, hashmap!{ + assert_eq!(fields_distribution, hashmap! { "name".to_string() => 2, "age".to_string() => 1, }); diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index 1941f0c6f..b2fd62771 100644 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -155,7 +155,7 @@ impl fmt::Debug for Query { trait Context { fn word_docids(&self, word: &str) -> heed::Result>; - fn synonyms>(&self, words: &[S]) -> heed::Result>>>; + fn synonyms>(&self, words: &[S]) -> anyhow::Result>>>; fn word_documents_count(&self, word: &str) -> heed::Result> { match self.word_docids(word)? { Some(rb) => Ok(Some(rb.len())), @@ -177,12 +177,12 @@ impl<'a> Context for QueryTreeBuilder<'a> { self.index.word_docids.get(self.rtxn, word) } - fn word_documents_count(&self, word: &str) -> heed::Result> { - self.index.word_documents_count(self.rtxn, word) + fn synonyms>(&self, words: &[S]) -> anyhow::Result>>> { + self.index.words_synonyms(self.rtxn, words) } - fn synonyms>(&self, _words: &[S]) -> heed::Result>>> { - Ok(None) + fn word_documents_count(&self, word: &str) -> heed::Result> { + self.index.word_documents_count(self.rtxn, word) } } @@ -270,10 +270,10 @@ fn typos(word: String, authorize_typos: bool) -> QueryKind { } } -/// Fetch synonyms from the `Context` for the provided word +/// Fetch synonyms from the `Context` for the provided words /// and create the list of operations for the query tree -fn synonyms(ctx: &impl Context, word: &[&str]) -> heed::Result>> { - let synonyms = ctx.synonyms(word)?; +fn synonyms(ctx: &impl Context, words: &[&str]) -> anyhow::Result>> { + let synonyms = ctx.synonyms(words)?; Ok(synonyms.map(|synonyms| { synonyms.into_iter().map(|synonym| { @@ -581,14 +581,13 @@ mod test { Ok(self.postings.get(word).cloned()) } - fn synonyms>(&self, words: &[S]) -> heed::Result>>> { - let words: Vec<_> = words.iter().map(|s| s.as_ref().to_owned()).collect(); + fn synonyms>(&self, words: &[S]) -> anyhow::Result>>> { + let words: Vec<_> = words.iter().map(|s| s.as_ref().to_string()).collect(); Ok(self.synonyms.get(&words).cloned()) } } impl Default for TestContext { - fn default() -> TestContext { let mut rng = StdRng::seed_from_u64(102); let rng = &mut rng; diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index e63948082..336c0e253 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -13,6 +13,7 @@ use crate::criterion::Criterion; use crate::facet::FacetType; use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep}; use crate::update::index_documents::{IndexDocumentsMethod, Transform}; +use meilisearch_tokenizer::{AnalyzerConfig, Analyzer}; #[derive(Debug, Clone, PartialEq)] pub enum Setting { @@ -71,6 +72,7 @@ pub struct Settings<'a, 't, 'u, 'i> { criteria: Setting>, stop_words: Setting>, distinct_attribute: Setting, + synonyms: Setting>>, } impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { @@ -96,6 +98,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { criteria: Setting::NotSet, stop_words: Setting::NotSet, distinct_attribute: Setting::NotSet, + synonyms: Setting::NotSet, update_id, } } @@ -144,12 +147,24 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { } } + pub fn reset_distinct_attribute(&mut self) { + self.distinct_attribute = Setting::Reset; + } + pub fn set_distinct_attribute(&mut self, distinct_attribute: String) { self.distinct_attribute = Setting::Set(distinct_attribute); } - pub fn reset_distinct_attribute(&mut self) { - self.distinct_attribute = Setting::Reset; + pub fn reset_synonyms(&mut self) { + self.synonyms = Setting::Reset; + } + + pub fn set_synonyms(&mut self, synonyms: HashMap>) { + self.synonyms = if synonyms.is_empty() { + Setting::Reset + } else { + Setting::Set(synonyms) + } } fn reindex(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> anyhow::Result<()> @@ -294,7 +309,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { let current = self.index.stop_words(self.wtxn)?; // since we can't compare a BTreeSet with an FST we are going to convert the // BTreeSet to an FST and then compare bytes per bytes the two FSTs. - let fst = fst::Set::from_iter(&*stop_words)?; + let fst = fst::Set::from_iter(stop_words)?; // Does the new FST differ from the previous one? if current.map_or(true, |current| current.as_fst().as_bytes() != fst.as_fst().as_bytes()) { @@ -310,6 +325,55 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { } } + fn update_synonyms(&mut self) -> anyhow::Result { + match self.synonyms { + Setting::Set(ref synonyms) => { + let old_synonyms = self.index.synonyms(self.wtxn)?.unwrap_or_default(); + + let mut config = AnalyzerConfig::default(); + + let stop_words = self.index.stop_words(self.wtxn)?; + if let Some(stop_words) = &stop_words { + config.stop_words(stop_words); + } + + let analyzer = Analyzer::new(config); + + let normalize = |text: &String| { + analyzer + .analyze(text) + .tokens() + .filter_map(|token| + if token.is_word() { Some(token.text().to_string()) } else { None } + ) + .collect::>() + }; + + let new_synonyms = synonyms + .iter() + .map(|(word, synonyms)| { + let normalized_word = normalize(word); + let normalized_synonyms = synonyms.iter() + .map(normalize) + .unique() + .collect::>(); + + (normalized_word, normalized_synonyms) + }) + .collect(); + + if new_synonyms != old_synonyms { + self.index.put_synonyms(self.wtxn, &new_synonyms)?; + Ok(true) + } else { + Ok(false) + } + } + Setting::Reset => Ok(self.index.delete_synonyms(self.wtxn)?), + Setting::NotSet => Ok(false), + } + } + fn update_facets(&mut self) -> anyhow::Result { match self.faceted_fields { Setting::Set(ref fields) => { @@ -359,9 +423,10 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { // update_criteria MUST be called after update_facets, since criterion fields must be set // as facets. self.update_criteria()?; + let synonyms_updated = self.update_synonyms()?; let searchable_updated = self.update_searchable()?; - if facets_updated || searchable_updated || stop_words_updated { + if stop_words_updated || facets_updated || synonyms_updated || searchable_updated { self.reindex(&progress_callback, old_fields_ids_map)?; } Ok(()) From 33860bc3b7fce784d6a9e9574f17e662d88aed94 Mon Sep 17 00:00:00 2001 From: Alexey Shekhirin Date: Fri, 9 Apr 2021 22:56:20 +0300 Subject: [PATCH 2/2] test(update, settings): set & reset synonyms fixes after review more fixes after review --- http-ui/src/main.rs | 19 ++++-- milli/src/index.rs | 19 ++---- milli/src/search/query_tree.rs | 14 ++-- milli/src/update/settings.rs | 117 ++++++++++++++++++++++++++------- 4 files changed, 119 insertions(+), 50 deletions(-) diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs index 605b6a7ba..ad9f1646d 100644 --- a/http-ui/src/main.rs +++ b/http-ui/src/main.rs @@ -1021,11 +1021,11 @@ mod tests { faceted_attributes: Setting::Set(hashmap! { "age".into() => "integer".into() }), criteria: Setting::Set(vec!["asc(age)".to_string()]), stop_words: Setting::Set(btreeset! { "and".to_string() }), - synonyms: Setting::NotSet + synonyms: Setting::Set(hashmap! { "alex".to_string() => vec!["alexey".to_string()] }) }; assert_tokens(&settings, &[ - Token::Struct { name: "Settings", len: 5 }, + Token::Struct { name: "Settings", len: 6 }, Token::Str("displayedAttributes"), Token::Some, Token::Seq { len: Some(1) }, @@ -1052,6 +1052,14 @@ mod tests { Token::Seq { len: Some(1) }, Token::Str("and"), Token::SeqEnd, + Token::Str("synonyms"), + Token::Some, + Token::Map { len: Some(1) }, + Token::Str("alex"), + Token::Seq {len: Some(1) }, + Token::Str("alexey"), + Token::SeqEnd, + Token::MapEnd, Token::StructEnd, ]); } @@ -1064,11 +1072,11 @@ mod tests { faceted_attributes: Setting::Reset, criteria: Setting::Reset, stop_words: Setting::Reset, - synonyms: Setting::NotSet + synonyms: Setting::Reset, }; assert_tokens(&settings, &[ - Token::Struct { name: "Settings", len: 5 }, + Token::Struct { name: "Settings", len: 6 }, Token::Str("displayedAttributes"), Token::None, Token::Str("searchableAttributes"), @@ -1079,6 +1087,8 @@ mod tests { Token::None, Token::Str("stopWords"), Token::None, + Token::Str("synonyms"), + Token::None, Token::StructEnd, ]); } @@ -1091,6 +1101,7 @@ mod tests { faceted_attributes: Setting::NotSet, criteria: Setting::NotSet, stop_words: Setting::NotSet, + synonyms: Setting::NotSet, }; assert_tokens(&settings, &[ diff --git a/milli/src/index.rs b/milli/src/index.rs index d743445e3..045eabc3c 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -417,22 +417,13 @@ impl Index { self.main.delete::<_, Str>(wtxn, SYNONYMS_KEY) } - pub fn synonyms(&self, rtxn: &RoTxn) -> anyhow::Result, Vec>>>> { - match self.main.get::<_, Str, SerdeBincode, Vec>>>>(rtxn, SYNONYMS_KEY)? { - Some(synonyms) => Ok(Some(synonyms)), - None => Ok(None), - } + pub fn synonyms(&self, rtxn: &RoTxn) -> heed::Result, Vec>>> { + Ok(self.main.get::<_, Str, SerdeBincode<_>>(rtxn, SYNONYMS_KEY)?.unwrap_or_default()) } - pub fn words_synonyms>(&self, rtxn: &RoTxn, words: &[S]) -> anyhow::Result>>> { - let words: Vec<_> = words.iter().map(|s| s.as_ref().to_string()).collect(); - - match self.synonyms(rtxn)? { - Some(synonyms) => Ok(Some( - synonyms.get(&words).cloned().unwrap_or(Vec::default()) - )), - None => Ok(None) - } + pub fn words_synonyms>(&self, rtxn: &RoTxn, words: &[S]) -> heed::Result>>> { + let words: Vec<_> = words.iter().map(|s| s.as_ref().to_owned()).collect(); + Ok(self.synonyms(rtxn)?.remove(&words)) } /* words prefixes fst */ diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index b2fd62771..d21227507 100644 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -155,7 +155,7 @@ impl fmt::Debug for Query { trait Context { fn word_docids(&self, word: &str) -> heed::Result>; - fn synonyms>(&self, words: &[S]) -> anyhow::Result>>>; + fn synonyms>(&self, words: &[S]) -> heed::Result>>>; fn word_documents_count(&self, word: &str) -> heed::Result> { match self.word_docids(word)? { Some(rb) => Ok(Some(rb.len())), @@ -177,7 +177,7 @@ impl<'a> Context for QueryTreeBuilder<'a> { self.index.word_docids.get(self.rtxn, word) } - fn synonyms>(&self, words: &[S]) -> anyhow::Result>>> { + fn synonyms>(&self, words: &[S]) -> heed::Result>>> { self.index.words_synonyms(self.rtxn, words) } @@ -270,10 +270,10 @@ fn typos(word: String, authorize_typos: bool) -> QueryKind { } } -/// Fetch synonyms from the `Context` for the provided words +/// Fetch synonyms from the `Context` for the provided word /// and create the list of operations for the query tree -fn synonyms(ctx: &impl Context, words: &[&str]) -> anyhow::Result>> { - let synonyms = ctx.synonyms(words)?; +fn synonyms(ctx: &impl Context, word: &[&str]) -> heed::Result>> { + let synonyms = ctx.synonyms(word)?; Ok(synonyms.map(|synonyms| { synonyms.into_iter().map(|synonym| { @@ -581,8 +581,8 @@ mod test { Ok(self.postings.get(word).cloned()) } - fn synonyms>(&self, words: &[S]) -> anyhow::Result>>> { - let words: Vec<_> = words.iter().map(|s| s.as_ref().to_string()).collect(); + fn synonyms>(&self, words: &[S]) -> heed::Result>>> { + let words: Vec<_> = words.iter().map(|s| s.as_ref().to_owned()).collect(); Ok(self.synonyms.get(&words).cloned()) } } diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 336c0e253..a0cfbd315 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -5,6 +5,7 @@ use anyhow::Context; use chrono::Utc; use grenad::CompressionType; use itertools::Itertools; +use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; use rayon::ThreadPool; use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -13,7 +14,6 @@ use crate::criterion::Criterion; use crate::facet::FacetType; use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep}; use crate::update::index_documents::{IndexDocumentsMethod, Transform}; -use meilisearch_tokenizer::{AnalyzerConfig, Analyzer}; #[derive(Debug, Clone, PartialEq)] pub enum Setting { @@ -328,18 +328,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { fn update_synonyms(&mut self) -> anyhow::Result { match self.synonyms { Setting::Set(ref synonyms) => { - let old_synonyms = self.index.synonyms(self.wtxn)?.unwrap_or_default(); - - let mut config = AnalyzerConfig::default(); - - let stop_words = self.index.stop_words(self.wtxn)?; - if let Some(stop_words) = &stop_words { - config.stop_words(stop_words); - } - - let analyzer = Analyzer::new(config); - - let normalize = |text: &String| { + fn normalize(analyzer: &Analyzer<&[u8]>, text: &str) -> Vec { analyzer .analyze(text) .tokens() @@ -347,20 +336,40 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { if token.is_word() { Some(token.text().to_string()) } else { None } ) .collect::>() - }; + } - let new_synonyms = synonyms - .iter() - .map(|(word, synonyms)| { - let normalized_word = normalize(word); - let normalized_synonyms = synonyms.iter() - .map(normalize) - .unique() - .collect::>(); + let mut config = AnalyzerConfig::default(); + let stop_words = self.index.stop_words(self.wtxn)?; + if let Some(stop_words) = &stop_words { + config.stop_words(stop_words); + } + let analyzer = Analyzer::new(config); - (normalized_word, normalized_synonyms) - }) - .collect(); + let mut new_synonyms = HashMap::new(); + for (word, synonyms) in synonyms { + // Normalize both the word and associated synonyms. + let normalized_word = normalize(&analyzer, word); + let normalized_synonyms = synonyms + .iter() + .map(|synonym| normalize(&analyzer, synonym)); + + // Store the normalized synonyms under the normalized word, + // merging the possible duplicate words. + let entry = new_synonyms + .entry(normalized_word) + .or_insert_with(Vec::new); + entry.extend(normalized_synonyms); + } + + // Make sure that we don't have duplicate synonyms. + new_synonyms + .iter_mut() + .for_each(|(_, synonyms)| { + synonyms.sort_unstable(); + synonyms.dedup(); + }); + + let old_synonyms = self.index.synonyms(self.wtxn)?; if new_synonyms != old_synonyms { self.index.put_synonyms(self.wtxn, &new_synonyms)?; @@ -734,6 +743,64 @@ mod tests { assert_eq!(result.documents_ids.len(), 1); // there is one benoit in our data } + #[test] + fn set_and_reset_synonyms() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // Send 3 documents with ids from 1 to 3. + let mut wtxn = index.write_txn().unwrap(); + let content = &b"name,age,maxim\nkevin,23,I love dogs\nkevina,21,Doggos are the best\nbenoit,34,The crepes are really good\n"[..]; + let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); + builder.update_format(UpdateFormat::Csv); + builder.execute(content, |_, _| ()).unwrap(); + + // In the same transaction provide some synonyms + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_synonyms(hashmap! { + "blini".to_string() => vec!["crepes".to_string()], + "super like".to_string() => vec!["love".to_string()], + "puppies".to_string() => vec!["dogs".to_string(), "doggos".to_string()] + }); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + // Ensure synonyms are effectively stored + let rtxn = index.read_txn().unwrap(); + let synonyms = index.synonyms(&rtxn).unwrap(); + assert!(!synonyms.is_empty()); // at this point the index should return something + + // Check that we can use synonyms + let result = index.search(&rtxn).query("blini").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 1); + let result = index.search(&rtxn).query("super like").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 1); + let result = index.search(&rtxn).query("puppies").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 2); + + // Reset the synonyms + let mut wtxn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.reset_synonyms(); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + // Ensure synonyms are reset + let rtxn = index.read_txn().unwrap(); + let synonyms = index.synonyms(&rtxn).unwrap(); + assert!(synonyms.is_empty()); + + // Check that synonyms are no longer work + let result = index.search(&rtxn).query("blini").execute().unwrap(); + assert!(result.documents_ids.is_empty()); + let result = index.search(&rtxn).query("super like").execute().unwrap(); + assert!(result.documents_ids.is_empty()); + let result = index.search(&rtxn).query("puppies").execute().unwrap(); + assert!(result.documents_ids.is_empty()); + } + #[test] fn setting_searchable_recomputes_other_settings() { let path = tempfile::tempdir().unwrap();