mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 05:14:27 +01:00
Merge #1045
1045: Revert "Merge #1037" r=MarinPostma a=MarinPostma This reverts commit257f9fb2b2
, reversing changes made to9bae7a35bf
. The reason fo this is that de-unicoding is not always desirable (for example is the case of CJK documents). This cannot be handled correctly for now, and will necessitate work on the tokenizer. Co-authored-by: mpostma <postma.marin@protonmail.com>
This commit is contained in:
commit
e9f9f270e1
@ -12,14 +12,18 @@ pub struct Synonyms {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Synonyms {
|
impl Synonyms {
|
||||||
pub(crate) fn put_synonyms<A>(self, writer: &mut heed::RwTxn<MainT>, word: &[u8], synonyms: &fst::Set<A>) -> ZResult<()>
|
pub fn put_synonyms<A>(self, writer: &mut heed::RwTxn<MainT>, word: &[u8], synonyms: &fst::Set<A>) -> ZResult<()>
|
||||||
where A: AsRef<[u8]>,
|
where A: AsRef<[u8]>,
|
||||||
{
|
{
|
||||||
let bytes = synonyms.as_fst().as_bytes();
|
let bytes = synonyms.as_fst().as_bytes();
|
||||||
self.synonyms.put(writer, word, bytes)
|
self.synonyms.put(writer, word, bytes)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
|
pub fn del_synonyms(self, writer: &mut heed::RwTxn<MainT>, word: &[u8]) -> ZResult<bool> {
|
||||||
|
self.synonyms.delete(writer, word)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
|
||||||
self.synonyms.clear(writer)
|
self.synonyms.clear(writer)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -126,7 +126,7 @@ pub fn apply_settings_update(
|
|||||||
}
|
}
|
||||||
|
|
||||||
match settings.synonyms {
|
match settings.synonyms {
|
||||||
UpdateState::Update(synonyms) => apply_synonyms_update(writer, index, canonicalize_synonyms(synonyms))? ,
|
UpdateState::Update(synonyms) => apply_synonyms_update(writer, index, synonyms)?,
|
||||||
UpdateState::Clear => apply_synonyms_update(writer, index, BTreeMap::new())?,
|
UpdateState::Clear => apply_synonyms_update(writer, index, BTreeMap::new())?,
|
||||||
UpdateState::Nothing => (),
|
UpdateState::Nothing => (),
|
||||||
}
|
}
|
||||||
@ -138,18 +138,6 @@ pub fn apply_settings_update(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn canonicalize_synonyms(synonyms: BTreeMap<String, Vec<String>>) -> BTreeMap<String, Vec<String>> {
|
|
||||||
let mut canonicalized = BTreeMap::new();
|
|
||||||
for (key, values) in synonyms {
|
|
||||||
let deunicoded = deunicode::deunicode(&key);
|
|
||||||
canonicalized
|
|
||||||
.entry(deunicoded)
|
|
||||||
.or_insert_with(Vec::new)
|
|
||||||
.extend_from_slice(&values);
|
|
||||||
}
|
|
||||||
canonicalized
|
|
||||||
}
|
|
||||||
|
|
||||||
fn apply_attributes_for_faceting_update(
|
fn apply_attributes_for_faceting_update(
|
||||||
writer: &mut heed::RwTxn<MainT>,
|
writer: &mut heed::RwTxn<MainT>,
|
||||||
index: &store::Index,
|
index: &store::Index,
|
||||||
|
@ -1829,38 +1829,3 @@ async fn update_documents_with_facet_distribution() {
|
|||||||
let (response2, _) = server.search_post(search).await;
|
let (response2, _) = server.search_post(search).await;
|
||||||
assert_json_eq!(expected_facet_distribution, response2["facetsDistribution"].clone());
|
assert_json_eq!(expected_facet_distribution, response2["facetsDistribution"].clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
|
||||||
async fn test_search_synonyms_unicased() {
|
|
||||||
let mut server = common::Server::with_uid("test");
|
|
||||||
let body = json!({ "uid": "test" });
|
|
||||||
server.create_index(body).await;
|
|
||||||
let settings = json!({
|
|
||||||
"synonyms": {
|
|
||||||
"cáse": ["truc"],
|
|
||||||
"case": ["machin"]
|
|
||||||
}
|
|
||||||
});
|
|
||||||
server.update_all_settings(settings).await;
|
|
||||||
|
|
||||||
let (response, _) = server.get_synonyms().await;
|
|
||||||
assert_json_eq!(response, json!({"case":["machin", "truc"]}));
|
|
||||||
|
|
||||||
let update = json!([
|
|
||||||
{
|
|
||||||
"id": "1",
|
|
||||||
"title": "truc"
|
|
||||||
},
|
|
||||||
]);
|
|
||||||
server.add_or_update_multiple_documents(update).await;
|
|
||||||
|
|
||||||
let search = json!({
|
|
||||||
"q": "case",
|
|
||||||
});
|
|
||||||
let (response, _) = server.search_post(search).await;
|
|
||||||
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
|
|
||||||
|
|
||||||
server.delete_synonyms().await;
|
|
||||||
let (response, _) = server.get_synonyms().await;
|
|
||||||
assert_json_eq!(response, json!({}));
|
|
||||||
}
|
|
||||||
|
Loading…
Reference in New Issue
Block a user