2019-12-11 17:02:10 +01:00
|
|
|
use std::cmp::{Ordering, Reverse};
|
2019-12-13 11:33:22 +01:00
|
|
|
use std::collections::hash_map::{HashMap, Entry};
|
2020-01-13 19:10:58 +01:00
|
|
|
use meilisearch_schema::IndexedPos;
|
2019-10-02 17:34:32 +02:00
|
|
|
use slice_group_by::GroupBy;
|
2019-12-13 11:14:12 +01:00
|
|
|
use crate::{RawDocument, MResult};
|
2019-12-12 11:33:39 +01:00
|
|
|
use crate::bucket_sort::BareMatch;
|
|
|
|
use super::{Criterion, Context, ContextMut};
|
2019-10-02 17:34:32 +02:00
|
|
|
|
2020-01-31 11:45:57 +01:00
|
|
|
pub struct Exactness;
|
2019-10-02 17:34:32 +02:00
|
|
|
|
2020-01-31 11:45:57 +01:00
|
|
|
impl Criterion for Exactness {
|
|
|
|
fn name(&self) -> &str { "exactness" }
|
2019-12-11 17:02:10 +01:00
|
|
|
|
2020-01-13 14:36:06 +01:00
|
|
|
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(
|
2019-12-13 11:14:12 +01:00
|
|
|
&self,
|
2020-01-13 14:36:06 +01:00
|
|
|
ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,
|
2019-12-13 11:14:12 +01:00
|
|
|
documents: &mut [RawDocument<'r, 'tag>],
|
|
|
|
) -> MResult<()>
|
|
|
|
{
|
2019-12-13 11:33:22 +01:00
|
|
|
let store = ctx.documents_fields_counts_store;
|
|
|
|
let reader = ctx.reader;
|
|
|
|
|
|
|
|
'documents: for doc in documents {
|
2019-12-13 12:38:54 +01:00
|
|
|
doc.bare_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact)));
|
2019-12-13 11:33:22 +01:00
|
|
|
|
|
|
|
// mark the document if we find a "one word field" that matches
|
|
|
|
let mut fields_counts = HashMap::new();
|
2019-12-13 12:38:54 +01:00
|
|
|
for group in doc.bare_matches.linear_group_by_key(|bm| bm.query_index) {
|
2019-12-13 11:33:22 +01:00
|
|
|
for group in group.linear_group_by_key(|bm| bm.is_exact) {
|
|
|
|
if !group[0].is_exact { break }
|
|
|
|
|
|
|
|
for bm in group {
|
|
|
|
for di in ctx.postings_lists[bm.postings_list].as_ref() {
|
|
|
|
|
2020-01-13 19:10:58 +01:00
|
|
|
let attr = IndexedPos(di.attribute);
|
2019-12-13 11:33:22 +01:00
|
|
|
let count = match fields_counts.entry(attr) {
|
|
|
|
Entry::Occupied(entry) => *entry.get(),
|
|
|
|
Entry::Vacant(entry) => {
|
|
|
|
let count = store.document_field_count(reader, doc.id, attr)?;
|
|
|
|
*entry.insert(count)
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
|
|
|
if count == Some(1) {
|
|
|
|
doc.contains_one_word_field = true;
|
|
|
|
continue 'documents
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-12-11 17:02:10 +01:00
|
|
|
}
|
2019-12-13 11:33:22 +01:00
|
|
|
|
2019-12-13 11:14:12 +01:00
|
|
|
Ok(())
|
2019-10-14 18:48:54 +02:00
|
|
|
}
|
|
|
|
|
2019-12-12 11:33:39 +01:00
|
|
|
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
2019-12-11 17:02:10 +01:00
|
|
|
#[inline]
|
|
|
|
fn sum_exact_query_words(matches: &[BareMatch]) -> usize {
|
|
|
|
let mut sum_exact_query_words = 0;
|
|
|
|
|
|
|
|
for group in matches.linear_group_by_key(|bm| bm.query_index) {
|
|
|
|
sum_exact_query_words += group[0].is_exact as usize;
|
|
|
|
}
|
2019-10-14 18:48:54 +02:00
|
|
|
|
2019-12-11 17:02:10 +01:00
|
|
|
sum_exact_query_words
|
|
|
|
}
|
2019-10-14 18:48:54 +02:00
|
|
|
|
2019-12-13 11:33:22 +01:00
|
|
|
// does it contains a "one word field"
|
|
|
|
lhs.contains_one_word_field.cmp(&rhs.contains_one_word_field).reverse()
|
|
|
|
// if not, with document contains the more exact words
|
|
|
|
.then_with(|| {
|
2019-12-13 12:38:54 +01:00
|
|
|
let lhs = sum_exact_query_words(&lhs.bare_matches);
|
|
|
|
let rhs = sum_exact_query_words(&rhs.bare_matches);
|
2019-12-13 11:33:22 +01:00
|
|
|
lhs.cmp(&rhs).reverse()
|
|
|
|
})
|
2019-10-02 17:34:32 +02:00
|
|
|
}
|
|
|
|
}
|