From 9ff92c5d15c921d161159cfa2744d960998a331b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 14 Oct 2019 18:48:54 +0200 Subject: [PATCH] Update the exact criterion to use the documents fields counts --- meilidb-core/src/criterion/exact.rs | 85 +++++++++++++++++++++++++---- 1 file changed, 75 insertions(+), 10 deletions(-) diff --git a/meilidb-core/src/criterion/exact.rs b/meilidb-core/src/criterion/exact.rs index 820c35aa0..94c4d2746 100644 --- a/meilidb-core/src/criterion/exact.rs +++ b/meilidb-core/src/criterion/exact.rs @@ -1,16 +1,38 @@ use std::cmp::Ordering; + +use sdset::Set; use slice_group_by::GroupBy; +use meilidb_schema::SchemaAttr; + use crate::criterion::Criterion; use crate::RawDocument; #[inline] -fn number_exact_matches(query_index: &[u32], is_exact: &[bool]) -> usize { +fn number_exact_matches( + query_index: &[u32], + attribute: &[u16], + is_exact: &[bool], + fields_counts: &Set<(SchemaAttr, u64)>, +) -> usize +{ let mut count = 0; let mut index = 0; for group in query_index.linear_group() { let len = group.len(); - count += is_exact[index..index + len].contains(&true) as usize; + + let mut found_exact = false; + for (pos, _) in is_exact[index..index + len].iter().filter(|x| **x).enumerate() { + found_exact = true; + if let Ok(pos) = fields_counts.binary_search_by_key(&attribute[pos], |(a, _)| a.0) { + let (_, count) = fields_counts[pos]; + if count == 1 { + return usize::max_value() + } + } + } + + count += found_exact as usize; index += len; } @@ -25,13 +47,19 @@ impl Criterion for Exact { let lhs = { let query_index = lhs.query_index(); let is_exact = lhs.is_exact(); - number_exact_matches(query_index, is_exact) + let attribute = lhs.attribute(); + let fields_counts = &lhs.fields_counts; + + number_exact_matches(query_index, attribute, is_exact, fields_counts) }; let rhs = { let query_index = rhs.query_index(); let is_exact = rhs.is_exact(); - number_exact_matches(query_index, is_exact) + let attribute = rhs.attribute(); + let fields_counts = &rhs.fields_counts; + + number_exact_matches(query_index, attribute, is_exact, fields_counts) }; lhs.cmp(&rhs).reverse() @@ -52,14 +80,51 @@ mod tests { // doc1: "souliereres rouge" #[test] fn easy_case() { - let query_index0 = &[0]; - let is_exact0 = &[true]; + let doc0 = { + let query_index = &[0]; + let attribute = &[0]; + let is_exact = &[true]; + let fields_counts = Set::new(&[(SchemaAttr(0), 2)]).unwrap(); - let query_index1 = &[0]; - let is_exact1 = &[false]; + number_exact_matches(query_index, attribute, is_exact, fields_counts) + }; + + let doc1 = { + let query_index = &[0]; + let attribute = &[0]; + let is_exact = &[false]; + let fields_counts = Set::new(&[(SchemaAttr(0), 2)]).unwrap(); + + number_exact_matches(query_index, attribute, is_exact, fields_counts) + }; + + assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less); + } + + // typing: "soulier" + // + // doc0: { 0. "soulier" } + // doc1: { 0. "soulier bleu et blanc" } + #[test] + fn basic() { + let doc0 = { + let query_index = &[0]; + let attribute = &[0]; + let is_exact = &[true]; + let fields_counts = Set::new(&[(SchemaAttr(0), 1)]).unwrap(); + + number_exact_matches(query_index, attribute, is_exact, fields_counts) + }; + + let doc1 = { + let query_index = &[0]; + let attribute = &[0]; + let is_exact = &[true]; + let fields_counts = Set::new(&[(SchemaAttr(0), 4)]).unwrap(); + + number_exact_matches(query_index, attribute, is_exact, fields_counts) + }; - let doc0 = number_exact_matches(query_index0, is_exact0); - let doc1 = number_exact_matches(query_index1, is_exact1); assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less); } }