mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-05 20:48:58 +01:00
fix synonyms normalization
Synonyms needs to be indexed in ascendant order, and the new normalization step for synonyms potentially changes this order which break the indexation process because "Harry Potter" > "HP" but "harry potter" < "hp"
This commit is contained in:
parent
69c91d2b56
commit
1df0fdf3e2
@ -1,7 +1,7 @@
|
||||
use std::{borrow::Cow, collections::{BTreeMap, BTreeSet}};
|
||||
|
||||
use heed::Result as ZResult;
|
||||
use fst::{set::OpBuilder, SetBuilder};
|
||||
use fst::{SetBuilder, set::OpBuilder};
|
||||
use sdset::SetBuf;
|
||||
use meilisearch_schema::Schema;
|
||||
use meilisearch_tokenizer::analyzer::{Analyzer, AnalyzerConfig};
|
||||
@ -298,16 +298,23 @@ pub fn apply_synonyms_update(
|
||||
.tokens()
|
||||
.fold(String::new(), |s, t| s + t.text())
|
||||
}
|
||||
|
||||
// normalize synonyms and reorder them creating a BTreeMap
|
||||
let synonyms: BTreeMap<String, Vec<String>> = synonyms.into_iter().map( |(word, alternatives)| {
|
||||
let word = normalize(&analyzer, &word);
|
||||
let alternatives = alternatives.into_iter().map(|text| normalize(&analyzer, &text)).collect();
|
||||
|
||||
(word, alternatives)
|
||||
}).collect();
|
||||
|
||||
// index synonyms,
|
||||
// synyonyms have to be ordered by key before indexation
|
||||
let mut synonyms_builder = SetBuilder::memory();
|
||||
synonyms_store.clear(writer)?;
|
||||
for (word, alternatives) in synonyms {
|
||||
let word = normalize(&analyzer, &word);
|
||||
|
||||
synonyms_builder.insert(&word)?;
|
||||
|
||||
let alternatives = {
|
||||
let alternatives = alternatives.iter().map(|text| normalize(&analyzer, &text)).collect();
|
||||
let alternatives = SetBuf::from_dirty(alternatives);
|
||||
let mut alternatives_builder = SetBuilder::memory();
|
||||
alternatives_builder.extend_iter(alternatives)?;
|
||||
|
@ -171,6 +171,8 @@ async fn write_all_and_update() {
|
||||
"synonyms": {
|
||||
"road": ["street", "avenue"],
|
||||
"street": ["avenue"],
|
||||
"HP": ["Harry Potter"],
|
||||
"Harry Potter": ["HP"]
|
||||
},
|
||||
"attributesForFaceting": ["title"],
|
||||
});
|
||||
@ -208,6 +210,8 @@ async fn write_all_and_update() {
|
||||
"synonyms": {
|
||||
"road": ["street", "avenue"],
|
||||
"street": ["avenue"],
|
||||
"hp": ["harry potter"],
|
||||
"harry potter": ["hp"]
|
||||
},
|
||||
"attributesForFaceting": ["title"],
|
||||
});
|
||||
|
Loading…
Reference in New Issue
Block a user