MeiliSearch/meilisearch-core/src/criterion/exactness.rs

use std::cmp::{Ordering, Reverse};
use std::collections::hash_map::{HashMap, Entry};
use meilisearch_schema::IndexedPos;
use slice_group_by::GroupBy;
use crate::{RawDocument, MResult};
use crate::bucket_sort::BareMatch;
use super::{Criterion, Context, ContextMut};

pub struct Exactness;

impl Criterion for Exactness {
    fn name(&self) -> &str { "exactness" }

    fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(
        &self,
        ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,
        documents: &mut [RawDocument<'r, 'tag>],
    ) -> MResult<()>
    {
        let store = ctx.documents_fields_counts_store;
        let reader = ctx.reader;

        'documents: for doc in documents {
            doc.bare_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact)));

            // mark the document if we find a "one word field" that matches
            let mut fields_counts = HashMap::new();
            for group in doc.bare_matches.linear_group_by_key(|bm| bm.query_index) {
                for group in group.linear_group_by_key(|bm| bm.is_exact) {
                    if !group[0].is_exact { break }

                    for bm in group {
                        for di in ctx.postings_lists[bm.postings_list].as_ref() {

                            let attr = IndexedPos(di.attribute);
                            let count = match fields_counts.entry(attr) {
                                Entry::Occupied(entry) => *entry.get(),
                                Entry::Vacant(entry) => {
                                    let count = store.document_field_count(reader, doc.id, attr)?;
                                    *entry.insert(count)
                                },
                            };

                            if count == Some(1) {
                                doc.contains_one_word_field = true;
                                continue 'documents
                            }
                        }
                    }
                }
            }
        }

        Ok(())
    }

    fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
        #[inline]
        fn sum_exact_query_words(matches: &[BareMatch]) -> usize {
            let mut sum_exact_query_words = 0;

            for group in matches.linear_group_by_key(|bm| bm.query_index) {
                sum_exact_query_words += group[0].is_exact as usize;
            }

            sum_exact_query_words
        }

        // does it contains a "one word field"
        lhs.contains_one_word_field.cmp(&rhs.contains_one_word_field).reverse()
        // if not, with document contains the more exact words
        .then_with(|| {
            let lhs = sum_exact_query_words(&lhs.bare_matches);
            let rhs = sum_exact_query_words(&rhs.bare_matches);
            lhs.cmp(&rhs).reverse()
        })
    }
}
Update the criteria to the new ones 2019-12-11 17:02:10 +01:00			`use std::cmp::{Ordering, Reverse};`
Reintroduce exacteness for one word document field 2019-12-13 11:33:22 +01:00			`use std::collections::hash_map::{HashMap, Entry};`
introduce a new schemaless way 2020-01-13 19:10:58 +01:00			`use meilisearch_schema::IndexedPos;`
Introduce a basically working rkv based MeiliDB 2019-10-02 17:34:32 +02:00			`use slice_group_by::GroupBy;`
Make the test pass again 2019-12-13 11:14:12 +01:00			`use crate::{RawDocument, MResult};`
Introduce ContextMut and Context structs 2019-12-12 11:33:39 +01:00			`use crate::bucket_sort::BareMatch;`
			`use super::{Criterion, Context, ContextMut};`
Introduce a basically working rkv based MeiliDB 2019-10-02 17:34:32 +02:00
Rename the Exact criterion into Exactness 2020-01-31 11:45:57 +01:00			`pub struct Exactness;`
Introduce a basically working rkv based MeiliDB 2019-10-02 17:34:32 +02:00
Rename the Exact criterion into Exactness 2020-01-31 11:45:57 +01:00			`impl Criterion for Exactness {`
			`fn name(&self) -> &str { "exactness" }`
Update the criteria to the new ones 2019-12-11 17:02:10 +01:00
wip: Make the new query tree work with the criteria 2020-01-13 14:36:06 +01:00			`fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(`
Make the test pass again 2019-12-13 11:14:12 +01:00			`&self,`
wip: Make the new query tree work with the criteria 2020-01-13 14:36:06 +01:00			`ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,`
Make the test pass again 2019-12-13 11:14:12 +01:00			`documents: &mut [RawDocument<'r, 'tag>],`
			`) -> MResult<()>`
			`{`
Reintroduce exacteness for one word document field 2019-12-13 11:33:22 +01:00			`let store = ctx.documents_fields_counts_store;`
			`let reader = ctx.reader;`

			`'documents: for doc in documents {`
Rename raw_matches into bare_matches 2019-12-13 12:38:54 +01:00			`doc.bare_matches.sort_unstable_by_key(\|bm\| (bm.query_index, Reverse(bm.is_exact)));`
Reintroduce exacteness for one word document field 2019-12-13 11:33:22 +01:00
			`// mark the document if we find a "one word field" that matches`
			`let mut fields_counts = HashMap::new();`
Rename raw_matches into bare_matches 2019-12-13 12:38:54 +01:00			`for group in doc.bare_matches.linear_group_by_key(\|bm\| bm.query_index) {`
Reintroduce exacteness for one word document field 2019-12-13 11:33:22 +01:00			`for group in group.linear_group_by_key(\|bm\| bm.is_exact) {`
			`if !group[0].is_exact { break }`

			`for bm in group {`
			`for di in ctx.postings_lists[bm.postings_list].as_ref() {`

introduce a new schemaless way 2020-01-13 19:10:58 +01:00			`let attr = IndexedPos(di.attribute);`
Reintroduce exacteness for one word document field 2019-12-13 11:33:22 +01:00			`let count = match fields_counts.entry(attr) {`
			`Entry::Occupied(entry) => *entry.get(),`
			`Entry::Vacant(entry) => {`
			`let count = store.document_field_count(reader, doc.id, attr)?;`
			`*entry.insert(count)`
			`},`
			`};`

			`if count == Some(1) {`
			`doc.contains_one_word_field = true;`
			`continue 'documents`
			`}`
			`}`
			`}`
			`}`
			`}`
Update the criteria to the new ones 2019-12-11 17:02:10 +01:00			`}`
Reintroduce exacteness for one word document field 2019-12-13 11:33:22 +01:00
Make the test pass again 2019-12-13 11:14:12 +01:00			`Ok(())`
Update the exact criterion to use the documents fields counts 2019-10-14 18:48:54 +02:00			`}`

Introduce ContextMut and Context structs 2019-12-12 11:33:39 +01:00			`fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {`
Update the criteria to the new ones 2019-12-11 17:02:10 +01:00			`#[inline]`
			`fn sum_exact_query_words(matches: &[BareMatch]) -> usize {`
			`let mut sum_exact_query_words = 0;`

			`for group in matches.linear_group_by_key(\|bm\| bm.query_index) {`
			`sum_exact_query_words += group[0].is_exact as usize;`
			`}`
Update the exact criterion to use the documents fields counts 2019-10-14 18:48:54 +02:00
Update the criteria to the new ones 2019-12-11 17:02:10 +01:00			`sum_exact_query_words`
			`}`
Update the exact criterion to use the documents fields counts 2019-10-14 18:48:54 +02:00
Reintroduce exacteness for one word document field 2019-12-13 11:33:22 +01:00			`// does it contains a "one word field"`
			`lhs.contains_one_word_field.cmp(&rhs.contains_one_word_field).reverse()`
			`// if not, with document contains the more exact words`
			`.then_with(\|\| {`
Rename raw_matches into bare_matches 2019-12-13 12:38:54 +01:00			`let lhs = sum_exact_query_words(&lhs.bare_matches);`
			`let rhs = sum_exact_query_words(&rhs.bare_matches);`
Reintroduce exacteness for one word document field 2019-12-13 11:33:22 +01:00			`lhs.cmp(&rhs).reverse()`
			`})`
Introduce a basically working rkv based MeiliDB 2019-10-02 17:34:32 +02:00			`}`
			`}`