From f8a743ee00dc7f7db1a51b1ba617e056ca5c52bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 23 Feb 2019 18:36:45 +0100 Subject: [PATCH] feat: Optimize the SumOfTypos criterion --- src/rank/criterion/sum_of_typos.rs | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/rank/criterion/sum_of_typos.rs b/src/rank/criterion/sum_of_typos.rs index c218293e3..bbffec870 100644 --- a/src/rank/criterion/sum_of_typos.rs +++ b/src/rank/criterion/sum_of_typos.rs @@ -5,22 +5,33 @@ use slice_group_by::GroupBy; use crate::rank::criterion::Criterion; use crate::rank::RawDocument; +// This function is a wrong logarithmic 10 function. +// It is safe to panic on input number higher than 3, +// the number of typos is never bigger than that. #[inline] -fn sum_matches_typos(query_index: &[u32], distance: &[u8]) -> isize { - let mut number_words = 0.0; +fn custom_log10(n: u8) -> f32 { + match n { + 0 => 0.0, // log(1) + 1 => 0.30102, // log(2) + 2 => 0.47712, // log(3) + 3 => 0.60205, // log(4) + _ => panic!("invalid number"), + } +} + +#[inline] +fn sum_matches_typos(query_index: &[u32], distance: &[u8]) -> usize { + let mut number_words = 0; let mut sum_typos = 0.0; let mut index = 0; for group in query_index.linear_group() { - let typo = distance[index] as f32; - sum_typos += (typo + 1.0).log10(); - number_words += 1.0_f32; + sum_typos += custom_log10(distance[index]); + number_words += 1; index += group.len(); } - let out = number_words / (sum_typos + 1.0); - - (out * 1000.0) as isize + (number_words as f32 / (sum_typos + 1.0) * 1000.0) as usize } #[derive(Debug, Clone, Copy)]