mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
Merge #372
372: Fix Meilisearch 1714 r=Kerollmops a=ManyTheFish The bug comes from the typo tolerance, to know how many typos are accepted we were counting bytes instead of characters in a word. On Chinese Script characters, we were allowing 2 typos on 3 characters words. We are now counting the number of char instead of counting bytes to assign the typo tolerance. Related to [Meilisearch#1714](https://github.com/meilisearch/MeiliSearch/issues/1714) Co-authored-by: many <maxime@meilisearch.com>
This commit is contained in:
commit
b2a332599e
@ -262,7 +262,7 @@ fn split_best_frequency(ctx: &impl Context, word: &str) -> heed::Result<Option<O
|
||||
/// and the provided word length.
|
||||
fn typos(word: String, authorize_typos: bool) -> QueryKind {
|
||||
if authorize_typos {
|
||||
match word.len() {
|
||||
match word.chars().count() {
|
||||
0..=4 => QueryKind::exact(word),
|
||||
5..=8 => QueryKind::tolerant(1, word),
|
||||
_ => QueryKind::tolerant(2, word),
|
||||
|
@ -981,4 +981,41 @@ mod tests {
|
||||
let count = index.number_of_documents(&rtxn).unwrap();
|
||||
assert_eq!(count, 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_meilisearch_1714() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
let content = documents!([
|
||||
{"id": "123", "title": "小化妆包" },
|
||||
{"id": "456", "title": "Ipad 包" }
|
||||
]);
|
||||
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||
builder.execute(content, |_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
// Only the first document should match.
|
||||
let count = index.word_docids.get(&rtxn, "化妆包").unwrap().unwrap().len();
|
||||
assert_eq!(count, 1);
|
||||
|
||||
// Only the second document should match.
|
||||
let count = index.word_docids.get(&rtxn, "包").unwrap().unwrap().len();
|
||||
assert_eq!(count, 1);
|
||||
|
||||
let mut search = crate::Search::new(&rtxn, &index);
|
||||
search.query("化妆包");
|
||||
search.authorize_typos(true);
|
||||
search.optional_words(true);
|
||||
|
||||
// only 1 document should be returned
|
||||
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||
assert_eq!(documents_ids.len(), 1);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user