1224: fix synonyms normalization r=MarinPostma a=LegendreM

Synonyms needs to be indexed in ascendant order,
and the new normalization step for synonyms potentially changes this order
which break the indexation process
because "Harry Potter" > "HP"  but "harry potter" < "hp"

Co-authored-by: many <maxime@meilisearch.com>
This commit is contained in:
bors[bot] 2021-02-04 15:37:33 +00:00 committed by GitHub
commit b1d9ad7134
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 15 additions and 4 deletions

View File

@ -1,7 +1,7 @@
use std::{borrow::Cow, collections::{BTreeMap, BTreeSet}}; use std::{borrow::Cow, collections::{BTreeMap, BTreeSet}};
use heed::Result as ZResult; use heed::Result as ZResult;
use fst::{set::OpBuilder, SetBuilder}; use fst::{SetBuilder, set::OpBuilder};
use sdset::SetBuf; use sdset::SetBuf;
use meilisearch_schema::Schema; use meilisearch_schema::Schema;
use meilisearch_tokenizer::analyzer::{Analyzer, AnalyzerConfig}; use meilisearch_tokenizer::analyzer::{Analyzer, AnalyzerConfig};
@ -299,15 +299,22 @@ pub fn apply_synonyms_update(
.fold(String::new(), |s, t| s + t.text()) .fold(String::new(), |s, t| s + t.text())
} }
// normalize synonyms and reorder them creating a BTreeMap
let synonyms: BTreeMap<String, Vec<String>> = synonyms.into_iter().map( |(word, alternatives)| {
let word = normalize(&analyzer, &word);
let alternatives = alternatives.into_iter().map(|text| normalize(&analyzer, &text)).collect();
(word, alternatives)
}).collect();
// index synonyms,
// synyonyms have to be ordered by key before indexation
let mut synonyms_builder = SetBuilder::memory(); let mut synonyms_builder = SetBuilder::memory();
synonyms_store.clear(writer)?; synonyms_store.clear(writer)?;
for (word, alternatives) in synonyms { for (word, alternatives) in synonyms {
let word = normalize(&analyzer, &word);
synonyms_builder.insert(&word)?; synonyms_builder.insert(&word)?;
let alternatives = { let alternatives = {
let alternatives = alternatives.iter().map(|text| normalize(&analyzer, &text)).collect();
let alternatives = SetBuf::from_dirty(alternatives); let alternatives = SetBuf::from_dirty(alternatives);
let mut alternatives_builder = SetBuilder::memory(); let mut alternatives_builder = SetBuilder::memory();
alternatives_builder.extend_iter(alternatives)?; alternatives_builder.extend_iter(alternatives)?;

View File

@ -171,6 +171,8 @@ async fn write_all_and_update() {
"synonyms": { "synonyms": {
"road": ["street", "avenue"], "road": ["street", "avenue"],
"street": ["avenue"], "street": ["avenue"],
"HP": ["Harry Potter"],
"Harry Potter": ["HP"]
}, },
"attributesForFaceting": ["title"], "attributesForFaceting": ["title"],
}); });
@ -208,6 +210,8 @@ async fn write_all_and_update() {
"synonyms": { "synonyms": {
"road": ["street", "avenue"], "road": ["street", "avenue"],
"street": ["avenue"], "street": ["avenue"],
"hp": ["harry potter"],
"harry potter": ["hp"]
}, },
"attributesForFaceting": ["title"], "attributesForFaceting": ["title"],
}); });