Update the exact criterion to use the documents fields counts

This commit is contained in:
Clément Renault 2019-10-14 18:48:54 +02:00
parent e629f51af4
commit 9ff92c5d15
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4

View File

@ -1,16 +1,38 @@
use std::cmp::Ordering; use std::cmp::Ordering;
use sdset::Set;
use slice_group_by::GroupBy; use slice_group_by::GroupBy;
use meilidb_schema::SchemaAttr;
use crate::criterion::Criterion; use crate::criterion::Criterion;
use crate::RawDocument; use crate::RawDocument;
#[inline] #[inline]
fn number_exact_matches(query_index: &[u32], is_exact: &[bool]) -> usize { fn number_exact_matches(
query_index: &[u32],
attribute: &[u16],
is_exact: &[bool],
fields_counts: &Set<(SchemaAttr, u64)>,
) -> usize
{
let mut count = 0; let mut count = 0;
let mut index = 0; let mut index = 0;
for group in query_index.linear_group() { for group in query_index.linear_group() {
let len = group.len(); let len = group.len();
count += is_exact[index..index + len].contains(&true) as usize;
let mut found_exact = false;
for (pos, _) in is_exact[index..index + len].iter().filter(|x| **x).enumerate() {
found_exact = true;
if let Ok(pos) = fields_counts.binary_search_by_key(&attribute[pos], |(a, _)| a.0) {
let (_, count) = fields_counts[pos];
if count == 1 {
return usize::max_value()
}
}
}
count += found_exact as usize;
index += len; index += len;
} }
@ -25,13 +47,19 @@ impl Criterion for Exact {
let lhs = { let lhs = {
let query_index = lhs.query_index(); let query_index = lhs.query_index();
let is_exact = lhs.is_exact(); let is_exact = lhs.is_exact();
number_exact_matches(query_index, is_exact) let attribute = lhs.attribute();
let fields_counts = &lhs.fields_counts;
number_exact_matches(query_index, attribute, is_exact, fields_counts)
}; };
let rhs = { let rhs = {
let query_index = rhs.query_index(); let query_index = rhs.query_index();
let is_exact = rhs.is_exact(); let is_exact = rhs.is_exact();
number_exact_matches(query_index, is_exact) let attribute = rhs.attribute();
let fields_counts = &rhs.fields_counts;
number_exact_matches(query_index, attribute, is_exact, fields_counts)
}; };
lhs.cmp(&rhs).reverse() lhs.cmp(&rhs).reverse()
@ -52,14 +80,51 @@ mod tests {
// doc1: "souliereres rouge" // doc1: "souliereres rouge"
#[test] #[test]
fn easy_case() { fn easy_case() {
let query_index0 = &[0]; let doc0 = {
let is_exact0 = &[true]; let query_index = &[0];
let attribute = &[0];
let is_exact = &[true];
let fields_counts = Set::new(&[(SchemaAttr(0), 2)]).unwrap();
let query_index1 = &[0]; number_exact_matches(query_index, attribute, is_exact, fields_counts)
let is_exact1 = &[false]; };
let doc1 = {
let query_index = &[0];
let attribute = &[0];
let is_exact = &[false];
let fields_counts = Set::new(&[(SchemaAttr(0), 2)]).unwrap();
number_exact_matches(query_index, attribute, is_exact, fields_counts)
};
assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
}
// typing: "soulier"
//
// doc0: { 0. "soulier" }
// doc1: { 0. "soulier bleu et blanc" }
#[test]
fn basic() {
let doc0 = {
let query_index = &[0];
let attribute = &[0];
let is_exact = &[true];
let fields_counts = Set::new(&[(SchemaAttr(0), 1)]).unwrap();
number_exact_matches(query_index, attribute, is_exact, fields_counts)
};
let doc1 = {
let query_index = &[0];
let attribute = &[0];
let is_exact = &[true];
let fields_counts = Set::new(&[(SchemaAttr(0), 4)]).unwrap();
number_exact_matches(query_index, attribute, is_exact, fields_counts)
};
let doc0 = number_exact_matches(query_index0, is_exact0);
let doc1 = number_exact_matches(query_index1, is_exact1);
assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less); assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
} }
} }