mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-11 23:48:56 +01:00
commit
1095874e7e
@ -260,6 +260,9 @@ struct Settings {
|
|||||||
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
stop_words: Setting<BTreeSet<String>>,
|
stop_words: Setting<BTreeSet<String>>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
|
synonyms: Setting<HashMap<String, Vec<String>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
@ -431,6 +434,13 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
Setting::NotSet => ()
|
Setting::NotSet => ()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We transpose the settings JSON struct into a real setting update.
|
||||||
|
match settings.synonyms {
|
||||||
|
Setting::Set(synonyms) => builder.set_synonyms(synonyms),
|
||||||
|
Setting::Reset => builder.reset_synonyms(),
|
||||||
|
Setting::NotSet => ()
|
||||||
|
}
|
||||||
|
|
||||||
let result = builder.execute(|indexing_step, update_id| {
|
let result = builder.execute(|indexing_step, update_id| {
|
||||||
let (current, total) = match indexing_step {
|
let (current, total) = match indexing_step {
|
||||||
TransformFromUserIntoGenericFormat { documents_seen } => (documents_seen, None),
|
TransformFromUserIntoGenericFormat { documents_seen } => (documents_seen, None),
|
||||||
@ -1011,10 +1021,11 @@ mod tests {
|
|||||||
faceted_attributes: Setting::Set(hashmap! { "age".into() => "integer".into() }),
|
faceted_attributes: Setting::Set(hashmap! { "age".into() => "integer".into() }),
|
||||||
criteria: Setting::Set(vec!["asc(age)".to_string()]),
|
criteria: Setting::Set(vec!["asc(age)".to_string()]),
|
||||||
stop_words: Setting::Set(btreeset! { "and".to_string() }),
|
stop_words: Setting::Set(btreeset! { "and".to_string() }),
|
||||||
|
synonyms: Setting::Set(hashmap! { "alex".to_string() => vec!["alexey".to_string()] })
|
||||||
};
|
};
|
||||||
|
|
||||||
assert_tokens(&settings, &[
|
assert_tokens(&settings, &[
|
||||||
Token::Struct { name: "Settings", len: 5 },
|
Token::Struct { name: "Settings", len: 6 },
|
||||||
Token::Str("displayedAttributes"),
|
Token::Str("displayedAttributes"),
|
||||||
Token::Some,
|
Token::Some,
|
||||||
Token::Seq { len: Some(1) },
|
Token::Seq { len: Some(1) },
|
||||||
@ -1041,6 +1052,14 @@ mod tests {
|
|||||||
Token::Seq { len: Some(1) },
|
Token::Seq { len: Some(1) },
|
||||||
Token::Str("and"),
|
Token::Str("and"),
|
||||||
Token::SeqEnd,
|
Token::SeqEnd,
|
||||||
|
Token::Str("synonyms"),
|
||||||
|
Token::Some,
|
||||||
|
Token::Map { len: Some(1) },
|
||||||
|
Token::Str("alex"),
|
||||||
|
Token::Seq {len: Some(1) },
|
||||||
|
Token::Str("alexey"),
|
||||||
|
Token::SeqEnd,
|
||||||
|
Token::MapEnd,
|
||||||
Token::StructEnd,
|
Token::StructEnd,
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
@ -1053,10 +1072,11 @@ mod tests {
|
|||||||
faceted_attributes: Setting::Reset,
|
faceted_attributes: Setting::Reset,
|
||||||
criteria: Setting::Reset,
|
criteria: Setting::Reset,
|
||||||
stop_words: Setting::Reset,
|
stop_words: Setting::Reset,
|
||||||
|
synonyms: Setting::Reset,
|
||||||
};
|
};
|
||||||
|
|
||||||
assert_tokens(&settings, &[
|
assert_tokens(&settings, &[
|
||||||
Token::Struct { name: "Settings", len: 5 },
|
Token::Struct { name: "Settings", len: 6 },
|
||||||
Token::Str("displayedAttributes"),
|
Token::Str("displayedAttributes"),
|
||||||
Token::None,
|
Token::None,
|
||||||
Token::Str("searchableAttributes"),
|
Token::Str("searchableAttributes"),
|
||||||
@ -1067,6 +1087,8 @@ mod tests {
|
|||||||
Token::None,
|
Token::None,
|
||||||
Token::Str("stopWords"),
|
Token::Str("stopWords"),
|
||||||
Token::None,
|
Token::None,
|
||||||
|
Token::Str("synonyms"),
|
||||||
|
Token::None,
|
||||||
Token::StructEnd,
|
Token::StructEnd,
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
@ -1079,6 +1101,7 @@ mod tests {
|
|||||||
faceted_attributes: Setting::NotSet,
|
faceted_attributes: Setting::NotSet,
|
||||||
criteria: Setting::NotSet,
|
criteria: Setting::NotSet,
|
||||||
stop_words: Setting::NotSet,
|
stop_words: Setting::NotSet,
|
||||||
|
synonyms: Setting::NotSet,
|
||||||
};
|
};
|
||||||
|
|
||||||
assert_tokens(&settings, &[
|
assert_tokens(&settings, &[
|
||||||
|
@ -3,19 +3,19 @@ use std::collections::HashMap;
|
|||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
|
use chrono::{DateTime, Utc};
|
||||||
|
use heed::{Database, PolyDatabase, RoTxn, RwTxn};
|
||||||
use heed::types::*;
|
use heed::types::*;
|
||||||
use heed::{PolyDatabase, Database, RwTxn, RoTxn};
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use chrono::{Utc, DateTime};
|
|
||||||
|
|
||||||
|
use crate::{Criterion, default_criteria, FacetDistribution, FieldsDistribution, Search};
|
||||||
|
use crate::{BEU32, DocumentId, ExternalDocumentsIds, FieldId};
|
||||||
|
use crate::{
|
||||||
|
BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec,
|
||||||
|
ObkvCodec, RoaringBitmapCodec, RoaringBitmapLenCodec, StrStrU8Codec,
|
||||||
|
};
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::fields_ids_map::FieldsIdsMap;
|
use crate::fields_ids_map::FieldsIdsMap;
|
||||||
use crate::{default_criteria, Criterion, Search, FacetDistribution, FieldsDistribution};
|
|
||||||
use crate::{BEU32, DocumentId, FieldId, ExternalDocumentsIds};
|
|
||||||
use crate::{
|
|
||||||
RoaringBitmapCodec, RoaringBitmapLenCodec, BEU32StrCodec,
|
|
||||||
StrStrU8Codec, ObkvCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec,
|
|
||||||
};
|
|
||||||
|
|
||||||
pub const CRITERIA_KEY: &str = "criteria";
|
pub const CRITERIA_KEY: &str = "criteria";
|
||||||
pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
|
pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
|
||||||
@ -31,6 +31,7 @@ pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
|
|||||||
pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids";
|
pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids";
|
||||||
pub const WORDS_FST_KEY: &str = "words-fst";
|
pub const WORDS_FST_KEY: &str = "words-fst";
|
||||||
pub const STOP_WORDS_KEY: &str = "stop-words";
|
pub const STOP_WORDS_KEY: &str = "stop-words";
|
||||||
|
pub const SYNONYMS_KEY: &str = "synonyms";
|
||||||
pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
|
pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
|
||||||
const CREATED_AT_KEY: &str = "created-at";
|
const CREATED_AT_KEY: &str = "created-at";
|
||||||
const UPDATED_AT_KEY: &str = "updated-at";
|
const UPDATED_AT_KEY: &str = "updated-at";
|
||||||
@ -376,12 +377,12 @@ impl Index {
|
|||||||
|
|
||||||
/* words fst */
|
/* words fst */
|
||||||
|
|
||||||
/// Writes the FST which is the words dictionnary of the engine.
|
/// Writes the FST which is the words dictionary of the engine.
|
||||||
pub fn put_words_fst<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
|
pub fn put_words_fst<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
|
||||||
self.main.put::<_, Str, ByteSlice>(wtxn, WORDS_FST_KEY, fst.as_fst().as_bytes())
|
self.main.put::<_, Str, ByteSlice>(wtxn, WORDS_FST_KEY, fst.as_fst().as_bytes())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the FST which is the words dictionnary of the engine.
|
/// Returns the FST which is the words dictionary of the engine.
|
||||||
pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result<fst::Set<Cow<'t, [u8]>>> {
|
pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result<fst::Set<Cow<'t, [u8]>>> {
|
||||||
match self.main.get::<_, Str, ByteSlice>(rtxn, WORDS_FST_KEY)? {
|
match self.main.get::<_, Str, ByteSlice>(rtxn, WORDS_FST_KEY)? {
|
||||||
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
|
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
|
||||||
@ -398,6 +399,7 @@ impl Index {
|
|||||||
pub fn delete_stop_words(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
pub fn delete_stop_words(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
self.main.delete::<_, Str>(wtxn, STOP_WORDS_KEY)
|
self.main.delete::<_, Str>(wtxn, STOP_WORDS_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result<Option<fst::Set<&'t [u8]>>> {
|
pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result<Option<fst::Set<&'t [u8]>>> {
|
||||||
match self.main.get::<_, Str, ByteSlice>(rtxn, STOP_WORDS_KEY)? {
|
match self.main.get::<_, Str, ByteSlice>(rtxn, STOP_WORDS_KEY)? {
|
||||||
Some(bytes) => Ok(Some(fst::Set::new(bytes)?)),
|
Some(bytes) => Ok(Some(fst::Set::new(bytes)?)),
|
||||||
@ -405,6 +407,25 @@ impl Index {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* synonyms */
|
||||||
|
|
||||||
|
pub fn put_synonyms(&self, wtxn: &mut RwTxn, synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>) -> heed::Result<()> {
|
||||||
|
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, SYNONYMS_KEY, synonyms)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
|
self.main.delete::<_, Str>(wtxn, SYNONYMS_KEY)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn synonyms(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Vec<String>, Vec<Vec<String>>>> {
|
||||||
|
Ok(self.main.get::<_, Str, SerdeBincode<_>>(rtxn, SYNONYMS_KEY)?.unwrap_or_default())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn words_synonyms<S: AsRef<str>>(&self, rtxn: &RoTxn, words: &[S]) -> heed::Result<Option<Vec<Vec<String>>>> {
|
||||||
|
let words: Vec<_> = words.iter().map(|s| s.as_ref().to_owned()).collect();
|
||||||
|
Ok(self.synonyms(rtxn)?.remove(&words))
|
||||||
|
}
|
||||||
|
|
||||||
/* words prefixes fst */
|
/* words prefixes fst */
|
||||||
|
|
||||||
/// Writes the FST which is the words prefixes dictionnary of the engine.
|
/// Writes the FST which is the words prefixes dictionnary of the engine.
|
||||||
@ -536,7 +557,7 @@ pub(crate) mod tests {
|
|||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
let fields_distribution = index.fields_distribution(&rtxn).unwrap();
|
let fields_distribution = index.fields_distribution(&rtxn).unwrap();
|
||||||
assert_eq!(fields_distribution, hashmap!{
|
assert_eq!(fields_distribution, hashmap! {
|
||||||
"name".to_string() => 2,
|
"name".to_string() => 2,
|
||||||
"age".to_string() => 1,
|
"age".to_string() => 1,
|
||||||
});
|
});
|
||||||
|
@ -177,12 +177,12 @@ impl<'a> Context for QueryTreeBuilder<'a> {
|
|||||||
self.index.word_docids.get(self.rtxn, word)
|
self.index.word_docids.get(self.rtxn, word)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn word_documents_count(&self, word: &str) -> heed::Result<Option<u64>> {
|
fn synonyms<S: AsRef<str>>(&self, words: &[S]) -> heed::Result<Option<Vec<Vec<String>>>> {
|
||||||
self.index.word_documents_count(self.rtxn, word)
|
self.index.words_synonyms(self.rtxn, words)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn synonyms<S: AsRef<str>>(&self, _words: &[S]) -> heed::Result<Option<Vec<Vec<String>>>> {
|
fn word_documents_count(&self, word: &str) -> heed::Result<Option<u64>> {
|
||||||
Ok(None)
|
self.index.word_documents_count(self.rtxn, word)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -588,7 +588,6 @@ mod test {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Default for TestContext {
|
impl Default for TestContext {
|
||||||
|
|
||||||
fn default() -> TestContext {
|
fn default() -> TestContext {
|
||||||
let mut rng = StdRng::seed_from_u64(102);
|
let mut rng = StdRng::seed_from_u64(102);
|
||||||
let rng = &mut rng;
|
let rng = &mut rng;
|
||||||
|
@ -5,6 +5,7 @@ use anyhow::Context;
|
|||||||
use chrono::Utc;
|
use chrono::Utc;
|
||||||
use grenad::CompressionType;
|
use grenad::CompressionType;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
|
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
|
||||||
use rayon::ThreadPool;
|
use rayon::ThreadPool;
|
||||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||||
|
|
||||||
@ -71,6 +72,7 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
|||||||
criteria: Setting<Vec<String>>,
|
criteria: Setting<Vec<String>>,
|
||||||
stop_words: Setting<BTreeSet<String>>,
|
stop_words: Setting<BTreeSet<String>>,
|
||||||
distinct_attribute: Setting<String>,
|
distinct_attribute: Setting<String>,
|
||||||
|
synonyms: Setting<HashMap<String, Vec<String>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||||
@ -96,6 +98,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
criteria: Setting::NotSet,
|
criteria: Setting::NotSet,
|
||||||
stop_words: Setting::NotSet,
|
stop_words: Setting::NotSet,
|
||||||
distinct_attribute: Setting::NotSet,
|
distinct_attribute: Setting::NotSet,
|
||||||
|
synonyms: Setting::NotSet,
|
||||||
update_id,
|
update_id,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -144,12 +147,24 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn reset_distinct_attribute(&mut self) {
|
||||||
|
self.distinct_attribute = Setting::Reset;
|
||||||
|
}
|
||||||
|
|
||||||
pub fn set_distinct_attribute(&mut self, distinct_attribute: String) {
|
pub fn set_distinct_attribute(&mut self, distinct_attribute: String) {
|
||||||
self.distinct_attribute = Setting::Set(distinct_attribute);
|
self.distinct_attribute = Setting::Set(distinct_attribute);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn reset_distinct_attribute(&mut self) {
|
pub fn reset_synonyms(&mut self) {
|
||||||
self.distinct_attribute = Setting::Reset;
|
self.synonyms = Setting::Reset;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_synonyms(&mut self, synonyms: HashMap<String, Vec<String>>) {
|
||||||
|
self.synonyms = if synonyms.is_empty() {
|
||||||
|
Setting::Reset
|
||||||
|
} else {
|
||||||
|
Setting::Set(synonyms)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> anyhow::Result<()>
|
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> anyhow::Result<()>
|
||||||
@ -294,7 +309,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
let current = self.index.stop_words(self.wtxn)?;
|
let current = self.index.stop_words(self.wtxn)?;
|
||||||
// since we can't compare a BTreeSet with an FST we are going to convert the
|
// since we can't compare a BTreeSet with an FST we are going to convert the
|
||||||
// BTreeSet to an FST and then compare bytes per bytes the two FSTs.
|
// BTreeSet to an FST and then compare bytes per bytes the two FSTs.
|
||||||
let fst = fst::Set::from_iter(&*stop_words)?;
|
let fst = fst::Set::from_iter(stop_words)?;
|
||||||
|
|
||||||
// Does the new FST differ from the previous one?
|
// Does the new FST differ from the previous one?
|
||||||
if current.map_or(true, |current| current.as_fst().as_bytes() != fst.as_fst().as_bytes()) {
|
if current.map_or(true, |current| current.as_fst().as_bytes() != fst.as_fst().as_bytes()) {
|
||||||
@ -310,6 +325,64 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn update_synonyms(&mut self) -> anyhow::Result<bool> {
|
||||||
|
match self.synonyms {
|
||||||
|
Setting::Set(ref synonyms) => {
|
||||||
|
fn normalize(analyzer: &Analyzer<&[u8]>, text: &str) -> Vec<String> {
|
||||||
|
analyzer
|
||||||
|
.analyze(text)
|
||||||
|
.tokens()
|
||||||
|
.filter_map(|token|
|
||||||
|
if token.is_word() { Some(token.text().to_string()) } else { None }
|
||||||
|
)
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut config = AnalyzerConfig::default();
|
||||||
|
let stop_words = self.index.stop_words(self.wtxn)?;
|
||||||
|
if let Some(stop_words) = &stop_words {
|
||||||
|
config.stop_words(stop_words);
|
||||||
|
}
|
||||||
|
let analyzer = Analyzer::new(config);
|
||||||
|
|
||||||
|
let mut new_synonyms = HashMap::new();
|
||||||
|
for (word, synonyms) in synonyms {
|
||||||
|
// Normalize both the word and associated synonyms.
|
||||||
|
let normalized_word = normalize(&analyzer, word);
|
||||||
|
let normalized_synonyms = synonyms
|
||||||
|
.iter()
|
||||||
|
.map(|synonym| normalize(&analyzer, synonym));
|
||||||
|
|
||||||
|
// Store the normalized synonyms under the normalized word,
|
||||||
|
// merging the possible duplicate words.
|
||||||
|
let entry = new_synonyms
|
||||||
|
.entry(normalized_word)
|
||||||
|
.or_insert_with(Vec::new);
|
||||||
|
entry.extend(normalized_synonyms);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure that we don't have duplicate synonyms.
|
||||||
|
new_synonyms
|
||||||
|
.iter_mut()
|
||||||
|
.for_each(|(_, synonyms)| {
|
||||||
|
synonyms.sort_unstable();
|
||||||
|
synonyms.dedup();
|
||||||
|
});
|
||||||
|
|
||||||
|
let old_synonyms = self.index.synonyms(self.wtxn)?;
|
||||||
|
|
||||||
|
if new_synonyms != old_synonyms {
|
||||||
|
self.index.put_synonyms(self.wtxn, &new_synonyms)?;
|
||||||
|
Ok(true)
|
||||||
|
} else {
|
||||||
|
Ok(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Setting::Reset => Ok(self.index.delete_synonyms(self.wtxn)?),
|
||||||
|
Setting::NotSet => Ok(false),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn update_facets(&mut self) -> anyhow::Result<bool> {
|
fn update_facets(&mut self) -> anyhow::Result<bool> {
|
||||||
match self.faceted_fields {
|
match self.faceted_fields {
|
||||||
Setting::Set(ref fields) => {
|
Setting::Set(ref fields) => {
|
||||||
@ -359,9 +432,10 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
// update_criteria MUST be called after update_facets, since criterion fields must be set
|
// update_criteria MUST be called after update_facets, since criterion fields must be set
|
||||||
// as facets.
|
// as facets.
|
||||||
self.update_criteria()?;
|
self.update_criteria()?;
|
||||||
|
let synonyms_updated = self.update_synonyms()?;
|
||||||
let searchable_updated = self.update_searchable()?;
|
let searchable_updated = self.update_searchable()?;
|
||||||
|
|
||||||
if facets_updated || searchable_updated || stop_words_updated {
|
if stop_words_updated || facets_updated || synonyms_updated || searchable_updated {
|
||||||
self.reindex(&progress_callback, old_fields_ids_map)?;
|
self.reindex(&progress_callback, old_fields_ids_map)?;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -669,6 +743,64 @@ mod tests {
|
|||||||
assert_eq!(result.documents_ids.len(), 1); // there is one benoit in our data
|
assert_eq!(result.documents_ids.len(), 1); // there is one benoit in our data
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn set_and_reset_synonyms() {
|
||||||
|
let path = tempfile::tempdir().unwrap();
|
||||||
|
let mut options = EnvOpenOptions::new();
|
||||||
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||||
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
|
||||||
|
// Send 3 documents with ids from 1 to 3.
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let content = &b"name,age,maxim\nkevin,23,I love dogs\nkevina,21,Doggos are the best\nbenoit,34,The crepes are really good\n"[..];
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||||
|
builder.update_format(UpdateFormat::Csv);
|
||||||
|
builder.execute(content, |_, _| ()).unwrap();
|
||||||
|
|
||||||
|
// In the same transaction provide some synonyms
|
||||||
|
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||||
|
builder.set_synonyms(hashmap! {
|
||||||
|
"blini".to_string() => vec!["crepes".to_string()],
|
||||||
|
"super like".to_string() => vec!["love".to_string()],
|
||||||
|
"puppies".to_string() => vec!["dogs".to_string(), "doggos".to_string()]
|
||||||
|
});
|
||||||
|
builder.execute(|_, _| ()).unwrap();
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
// Ensure synonyms are effectively stored
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let synonyms = index.synonyms(&rtxn).unwrap();
|
||||||
|
assert!(!synonyms.is_empty()); // at this point the index should return something
|
||||||
|
|
||||||
|
// Check that we can use synonyms
|
||||||
|
let result = index.search(&rtxn).query("blini").execute().unwrap();
|
||||||
|
assert_eq!(result.documents_ids.len(), 1);
|
||||||
|
let result = index.search(&rtxn).query("super like").execute().unwrap();
|
||||||
|
assert_eq!(result.documents_ids.len(), 1);
|
||||||
|
let result = index.search(&rtxn).query("puppies").execute().unwrap();
|
||||||
|
assert_eq!(result.documents_ids.len(), 2);
|
||||||
|
|
||||||
|
// Reset the synonyms
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||||
|
builder.reset_synonyms();
|
||||||
|
builder.execute(|_, _| ()).unwrap();
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
// Ensure synonyms are reset
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let synonyms = index.synonyms(&rtxn).unwrap();
|
||||||
|
assert!(synonyms.is_empty());
|
||||||
|
|
||||||
|
// Check that synonyms are no longer work
|
||||||
|
let result = index.search(&rtxn).query("blini").execute().unwrap();
|
||||||
|
assert!(result.documents_ids.is_empty());
|
||||||
|
let result = index.search(&rtxn).query("super like").execute().unwrap();
|
||||||
|
assert!(result.documents_ids.is_empty());
|
||||||
|
let result = index.search(&rtxn).query("puppies").execute().unwrap();
|
||||||
|
assert!(result.documents_ids.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn setting_searchable_recomputes_other_settings() {
|
fn setting_searchable_recomputes_other_settings() {
|
||||||
let path = tempfile::tempdir().unwrap();
|
let path = tempfile::tempdir().unwrap();
|
||||||
|
Loading…
Reference in New Issue
Block a user