1037: Synonym unidecode r=Kerollmops a=MarinPostma

fix #964 

- unidecodes all synonyms before adding them to the synonyms fst
- stores a copy of the original synonyms (unicoded) for later retrieve

Co-authored-by: mpostma <postma.marin@protonmail.com>
This commit is contained in:
bors[bot] 2020-10-27 10:57:40 +00:00 committed by GitHub
commit 257f9fb2b2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 50 additions and 7 deletions

View File

@ -12,18 +12,14 @@ pub struct Synonyms {
}
impl Synonyms {
pub fn put_synonyms<A>(self, writer: &mut heed::RwTxn<MainT>, word: &[u8], synonyms: &fst::Set<A>) -> ZResult<()>
pub(crate) fn put_synonyms<A>(self, writer: &mut heed::RwTxn<MainT>, word: &[u8], synonyms: &fst::Set<A>) -> ZResult<()>
where A: AsRef<[u8]>,
{
let bytes = synonyms.as_fst().as_bytes();
self.synonyms.put(writer, word, bytes)
}
pub fn del_synonyms(self, writer: &mut heed::RwTxn<MainT>, word: &[u8]) -> ZResult<bool> {
self.synonyms.delete(writer, word)
}
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
pub(crate) fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
self.synonyms.clear(writer)
}

View File

@ -126,7 +126,7 @@ pub fn apply_settings_update(
}
match settings.synonyms {
UpdateState::Update(synonyms) => apply_synonyms_update(writer, index, synonyms)?,
UpdateState::Update(synonyms) => apply_synonyms_update(writer, index, canonicalize_synonyms(synonyms))? ,
UpdateState::Clear => apply_synonyms_update(writer, index, BTreeMap::new())?,
UpdateState::Nothing => (),
}
@ -138,6 +138,18 @@ pub fn apply_settings_update(
Ok(())
}
fn canonicalize_synonyms(synonyms: BTreeMap<String, Vec<String>>) -> BTreeMap<String, Vec<String>> {
let mut canonicalized = BTreeMap::new();
for (key, values) in synonyms {
let deunicoded = deunicode::deunicode(&key);
canonicalized
.entry(deunicoded)
.or_insert_with(Vec::new)
.extend_from_slice(&values);
}
canonicalized
}
fn apply_attributes_for_faceting_update(
writer: &mut heed::RwTxn<MainT>,
index: &store::Index,

View File

@ -1829,3 +1829,38 @@ async fn update_documents_with_facet_distribution() {
let (response2, _) = server.search_post(search).await;
assert_json_eq!(expected_facet_distribution, response2["facetsDistribution"].clone());
}
#[actix_rt::test]
async fn test_search_synonyms_unicased() {
let mut server = common::Server::with_uid("test");
let body = json!({ "uid": "test" });
server.create_index(body).await;
let settings = json!({
"synonyms": {
"cáse": ["truc"],
"case": ["machin"]
}
});
server.update_all_settings(settings).await;
let (response, _) = server.get_synonyms().await;
assert_json_eq!(response, json!({"case":["machin", "truc"]}));
let update = json!([
{
"id": "1",
"title": "truc"
},
]);
server.add_or_update_multiple_documents(update).await;
let search = json!({
"q": "case",
});
let (response, _) = server.search_post(search).await;
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
server.delete_synonyms().await;
let (response, _) = server.get_synonyms().await;
assert_json_eq!(response, json!({}));
}