MeiliSearch/meilisearch-core/src/criterion/typo.rs

56 lines
1.7 KiB
Rust
Raw Normal View History

2019-12-11 17:02:10 +01:00
use std::cmp::Ordering;
2019-12-13 11:14:12 +01:00
use crate::{RawDocument, MResult};
use super::{Criterion, Context, ContextMut, prepare_query_distances};
2019-12-11 17:02:10 +01:00
pub struct Typo;
impl Criterion for Typo {
fn name(&self) -> &str { "typo" }
2019-12-13 11:14:12 +01:00
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'a, 'r>(
2019-12-11 17:02:10 +01:00
&self,
2019-12-13 11:14:12 +01:00
ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q, 'a>,
documents: &mut [RawDocument<'r, 'tag>],
2019-12-13 11:14:12 +01:00
) -> MResult<()>
{
prepare_query_distances(documents, ctx.query_enhancer, ctx.postings_lists);
Ok(())
2019-12-11 17:02:10 +01:00
}
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
2019-12-11 17:02:10 +01:00
// This function is a wrong logarithmic 10 function.
// It is safe to panic on input number higher than 3,
// the number of typos is never bigger than that.
#[inline]
fn custom_log10(n: u8) -> f32 {
match n {
0 => 0.0, // log(1)
1 => 0.30102, // log(2)
2 => 0.47712, // log(3)
3 => 0.60205, // log(4)
_ => panic!("invalid number"),
}
}
#[inline]
fn compute_typos(distances: &[Option<u8>]) -> usize {
let mut number_words: usize = 0;
let mut sum_typos = 0.0;
for distance in distances {
if let Some(distance) = distance {
sum_typos += custom_log10(*distance);
number_words += 1;
}
}
(number_words as f32 / (sum_typos + 1.0) * 1000.0) as usize
}
let lhs = compute_typos(&lhs.processed_distances);
let rhs = compute_typos(&rhs.processed_distances);
lhs.cmp(&rhs).reverse()
}
}