feat: Optimize the SumOfTypos criterion

This commit is contained in:
Clément Renault 2019-02-23 18:36:45 +01:00
parent 64971de7ed
commit f8a743ee00
No known key found for this signature in database
GPG Key ID: 0151CDAB43460DAE

View File

@ -5,22 +5,33 @@ use slice_group_by::GroupBy;
use crate::rank::criterion::Criterion; use crate::rank::criterion::Criterion;
use crate::rank::RawDocument; use crate::rank::RawDocument;
// This function is a wrong logarithmic 10 function.
// It is safe to panic on input number higher than 3,
// the number of typos is never bigger than that.
#[inline] #[inline]
fn sum_matches_typos(query_index: &[u32], distance: &[u8]) -> isize { fn custom_log10(n: u8) -> f32 {
let mut number_words = 0.0; match n {
0 => 0.0, // log(1)
1 => 0.30102, // log(2)
2 => 0.47712, // log(3)
3 => 0.60205, // log(4)
_ => panic!("invalid number"),
}
}
#[inline]
fn sum_matches_typos(query_index: &[u32], distance: &[u8]) -> usize {
let mut number_words = 0;
let mut sum_typos = 0.0; let mut sum_typos = 0.0;
let mut index = 0; let mut index = 0;
for group in query_index.linear_group() { for group in query_index.linear_group() {
let typo = distance[index] as f32; sum_typos += custom_log10(distance[index]);
sum_typos += (typo + 1.0).log10(); number_words += 1;
number_words += 1.0_f32;
index += group.len(); index += group.len();
} }
let out = number_words / (sum_typos + 1.0); (number_words as f32 / (sum_typos + 1.0) * 1000.0) as usize
(out * 1000.0) as isize
} }
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]