mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 05:14:27 +01:00
Merge #372
372: Fix Meilisearch 1714 r=Kerollmops a=ManyTheFish The bug comes from the typo tolerance, to know how many typos are accepted we were counting bytes instead of characters in a word. On Chinese Script characters, we were allowing 2 typos on 3 characters words. We are now counting the number of char instead of counting bytes to assign the typo tolerance. Related to [Meilisearch#1714](https://github.com/meilisearch/MeiliSearch/issues/1714) Co-authored-by: many <maxime@meilisearch.com>
This commit is contained in:
commit
b2a332599e
@ -262,7 +262,7 @@ fn split_best_frequency(ctx: &impl Context, word: &str) -> heed::Result<Option<O
|
|||||||
/// and the provided word length.
|
/// and the provided word length.
|
||||||
fn typos(word: String, authorize_typos: bool) -> QueryKind {
|
fn typos(word: String, authorize_typos: bool) -> QueryKind {
|
||||||
if authorize_typos {
|
if authorize_typos {
|
||||||
match word.len() {
|
match word.chars().count() {
|
||||||
0..=4 => QueryKind::exact(word),
|
0..=4 => QueryKind::exact(word),
|
||||||
5..=8 => QueryKind::tolerant(1, word),
|
5..=8 => QueryKind::tolerant(1, word),
|
||||||
_ => QueryKind::tolerant(2, word),
|
_ => QueryKind::tolerant(2, word),
|
||||||
|
@ -981,4 +981,41 @@ mod tests {
|
|||||||
let count = index.number_of_documents(&rtxn).unwrap();
|
let count = index.number_of_documents(&rtxn).unwrap();
|
||||||
assert_eq!(count, 4);
|
assert_eq!(count, 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_meilisearch_1714() {
|
||||||
|
let path = tempfile::tempdir().unwrap();
|
||||||
|
let mut options = EnvOpenOptions::new();
|
||||||
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||||
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
|
||||||
|
let content = documents!([
|
||||||
|
{"id": "123", "title": "小化妆包" },
|
||||||
|
{"id": "456", "title": "Ipad 包" }
|
||||||
|
]);
|
||||||
|
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||||
|
builder.execute(content, |_, _| ()).unwrap();
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
// Only the first document should match.
|
||||||
|
let count = index.word_docids.get(&rtxn, "化妆包").unwrap().unwrap().len();
|
||||||
|
assert_eq!(count, 1);
|
||||||
|
|
||||||
|
// Only the second document should match.
|
||||||
|
let count = index.word_docids.get(&rtxn, "包").unwrap().unwrap().len();
|
||||||
|
assert_eq!(count, 1);
|
||||||
|
|
||||||
|
let mut search = crate::Search::new(&rtxn, &index);
|
||||||
|
search.query("化妆包");
|
||||||
|
search.authorize_typos(true);
|
||||||
|
search.optional_words(true);
|
||||||
|
|
||||||
|
// only 1 document should be returned
|
||||||
|
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||||
|
assert_eq!(documents_ids.len(), 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user