mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
feat: Correct the sum of typos criterion
Thanks to @tpayet!
This commit is contained in:
parent
d2fb2ff404
commit
6dcec4f473
@ -4,13 +4,18 @@ use crate::Match;
|
||||
use crate::rank::{match_query_index, Document};
|
||||
|
||||
#[inline]
|
||||
fn sum_matches_typos(matches: &[Match]) -> u8 {
|
||||
fn sum_matches_typos(matches: &[Match]) -> i8 {
|
||||
let mut sum_typos = 0;
|
||||
let mut number_words = 0;
|
||||
|
||||
// note that GroupBy will never return an empty group
|
||||
// so we can do this assumption safely
|
||||
// matches must and will never be empty
|
||||
GroupBy::new(matches, match_query_index).map(|group| unsafe {
|
||||
group.get_unchecked(0).distance
|
||||
}).min().unwrap()
|
||||
for group in GroupBy::new(matches, match_query_index) {
|
||||
sum_typos += unsafe { group.get_unchecked(0).distance } as i8;
|
||||
number_words += 1;
|
||||
}
|
||||
|
||||
sum_typos - number_words
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@ -20,3 +25,99 @@ pub fn sum_of_typos(lhs: &Document, rhs: &Document) -> Ordering {
|
||||
|
||||
lhs.cmp(&rhs)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// typing: "Geox CEO"
|
||||
//
|
||||
// doc0: "Geox SpA: CEO and Executive"
|
||||
// doc1: "Mt. Gox CEO Resigns From Bitcoin Foundation"
|
||||
#[test]
|
||||
fn one_typo_reference() {
|
||||
let doc0 = {
|
||||
let matches = vec![
|
||||
Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false },
|
||||
Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 2, is_exact: false },
|
||||
];
|
||||
Document {
|
||||
document_id: 0,
|
||||
matches: matches,
|
||||
}
|
||||
};
|
||||
|
||||
let doc1 = {
|
||||
let matches = vec![
|
||||
Match { query_index: 0, distance: 1, attribute: 0, attribute_index: 0, is_exact: false },
|
||||
Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 2, is_exact: false },
|
||||
];
|
||||
Document {
|
||||
document_id: 1,
|
||||
matches: matches,
|
||||
}
|
||||
};
|
||||
|
||||
assert_eq!(sum_of_typos(&doc0, &doc1), Ordering::Less);
|
||||
}
|
||||
|
||||
// typing: "bouton manchette"
|
||||
//
|
||||
// doc0: "bouton manchette"
|
||||
// doc1: "bouton"
|
||||
#[test]
|
||||
fn no_typo() {
|
||||
let doc0 = {
|
||||
let matches = vec![
|
||||
Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false },
|
||||
Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 1, is_exact: false },
|
||||
];
|
||||
Document {
|
||||
document_id: 0,
|
||||
matches: matches,
|
||||
}
|
||||
};
|
||||
|
||||
let doc1 = {
|
||||
let matches = vec![
|
||||
Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false },
|
||||
];
|
||||
Document {
|
||||
document_id: 1,
|
||||
matches: matches,
|
||||
}
|
||||
};
|
||||
|
||||
assert_eq!(sum_of_typos(&doc0, &doc1), Ordering::Less);
|
||||
}
|
||||
|
||||
// typing: "bouton manchztte"
|
||||
//
|
||||
// doc0: "bouton manchette"
|
||||
// doc1: "bouton"
|
||||
#[test]
|
||||
fn one_typo() {
|
||||
let doc0 = {
|
||||
let matches = vec![
|
||||
Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false },
|
||||
Match { query_index: 1, distance: 1, attribute: 0, attribute_index: 1, is_exact: false },
|
||||
];
|
||||
Document {
|
||||
document_id: 0,
|
||||
matches: matches,
|
||||
}
|
||||
};
|
||||
|
||||
let doc1 = {
|
||||
let matches = vec![
|
||||
Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false },
|
||||
];
|
||||
Document {
|
||||
document_id: 1,
|
||||
matches: matches,
|
||||
}
|
||||
};
|
||||
|
||||
assert_eq!(sum_of_typos(&doc0, &doc1), Ordering::Equal);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user