From d93e35cace2218263780c6cf042101393a60b41c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 12 Dec 2019 11:33:39 +0100 Subject: [PATCH] Introduce ContextMut and Context structs --- meilisearch-core/src/bucket_sort.rs | 43 +++++++++++++++---- meilisearch-core/src/criterion/attribute.rs | 27 +++--------- meilisearch-core/src/criterion/document_id.rs | 24 +---------- meilisearch-core/src/criterion/exact.rs | 24 ++--------- meilisearch-core/src/criterion/mod.rs | 42 +++++++++++------- meilisearch-core/src/criterion/proximity.rs | 29 ++++--------- .../src/criterion/sort_by_attr.rs | 25 +---------- meilisearch-core/src/criterion/typo.rs | 20 +++------ meilisearch-core/src/criterion/words.rs | 26 +++-------- .../src/criterion/words_position.rs | 25 +++++------ 10 files changed, 106 insertions(+), 179 deletions(-) diff --git a/meilisearch-core/src/bucket_sort.rs b/meilisearch-core/src/bucket_sort.rs index 9502f2562..e61858d99 100644 --- a/meilisearch-core/src/bucket_sort.rs +++ b/meilisearch-core/src/bucket_sort.rs @@ -21,7 +21,7 @@ use crate::automaton::{build_dfa, build_prefix_dfa, build_exact_dfa}; use crate::automaton::normalize_str; use crate::automaton::{QueryEnhancer, QueryEnhancerBuilder}; -use crate::criterion::Criteria; +use crate::criterion::{Criteria, Context, ContextMut}; use crate::distinct_map::{BufferedDistinctMap, DistinctMap}; use crate::raw_document::RawDocument; use crate::{database::MainT, reordered_attrs::ReorderedAttrs}; @@ -61,7 +61,7 @@ where ); } - let (automatons, query_enhancer) = + let (mut automatons, mut query_enhancer) = construct_automatons(reader, query, main_store, postings_lists_store, synonyms_store)?; debug!("{:?}", query_enhancer); @@ -102,14 +102,27 @@ where for mut group in tmp_groups { let before_criterion_preparation = Instant::now(); - criterion.prepare(&mut group, &mut arena, &query_enhancer, &automatons); + + let ctx = ContextMut { + postings_lists: &mut arena, + query_enhancer: &mut query_enhancer, + automatons: &mut automatons, + }; + + criterion.prepare(ctx, &mut group); debug!("{:?} preparation took {:.02?}", criterion.name(), before_criterion_preparation.elapsed()); + let ctx = Context { + postings_lists: &arena, + query_enhancer: &query_enhancer, + automatons: &automatons, + }; + let before_criterion_sort = Instant::now(); - group.sort_unstable_by(|a, b| criterion.evaluate(a, b, &arena)); + group.sort_unstable_by(|a, b| criterion.evaluate(&ctx, a, b)); debug!("{:?} evaluation took {:.02?}", criterion.name(), before_criterion_sort.elapsed()); - for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b, &arena)) { + for group in group.binary_group_by_mut(|a, b| criterion.eq(&ctx, a, b)) { debug!("{:?} produced a group of size {}", criterion.name(), group.len()); documents_seen += group.len(); @@ -147,7 +160,7 @@ where FI: Fn(DocumentId) -> bool, FD: Fn(DocumentId) -> Option, { - let (automatons, query_enhancer) = + let (mut automatons, mut query_enhancer) = construct_automatons(reader, query, main_store, postings_lists_store, synonyms_store)?; let before_postings_lists_fetching = Instant::now(); @@ -201,15 +214,27 @@ where continue; } + let ctx = ContextMut { + postings_lists: &mut arena, + query_enhancer: &mut query_enhancer, + automatons: &mut automatons, + }; + let before_criterion_preparation = Instant::now(); - criterion.prepare(&mut group, &mut arena, &query_enhancer, &automatons); + criterion.prepare(ctx, &mut group); debug!("{:?} preparation took {:.02?}", criterion.name(), before_criterion_preparation.elapsed()); + let ctx = Context { + postings_lists: &arena, + query_enhancer: &query_enhancer, + automatons: &automatons, + }; + let before_criterion_sort = Instant::now(); - group.sort_unstable_by(|a, b| criterion.evaluate(a, b, &arena)); + group.sort_unstable_by(|a, b| criterion.evaluate(&ctx, a, b)); debug!("{:?} evaluation took {:.02?}", criterion.name(), before_criterion_sort.elapsed()); - for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b, &arena)) { + for group in group.binary_group_by_mut(|a, b| criterion.eq(&ctx, a, b)) { // we must compute the real distinguished len of this sub-group for document in group.iter() { let filter_accepted = match &filter { diff --git a/meilisearch-core/src/criterion/attribute.rs b/meilisearch-core/src/criterion/attribute.rs index cad5664c0..4baebf66a 100644 --- a/meilisearch-core/src/criterion/attribute.rs +++ b/meilisearch-core/src/criterion/attribute.rs @@ -1,36 +1,23 @@ use std::cmp::Ordering; - -use compact_arena::SmallArena; use slice_group_by::GroupBy; - -use crate::automaton::QueryEnhancer; -use crate::bucket_sort::{SimpleMatch, PostingsListView, QueryWordAutomaton}; use crate::RawDocument; - -use super::{Criterion, prepare_raw_matches}; +use crate::bucket_sort::SimpleMatch; +use super::{Criterion, Context, ContextMut, prepare_raw_matches}; pub struct Attribute; impl Criterion for Attribute { fn name(&self) -> &str { "attribute" } - fn prepare<'a, 'tag, 'txn>( + fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>( &self, - documents: &mut [RawDocument<'a, 'tag>], - postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>, - query_enhancer: &QueryEnhancer, - automatons: &[QueryWordAutomaton], + ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>, + documents: &mut [RawDocument<'r, 'tag>], ) { - prepare_raw_matches(documents, postings_lists, query_enhancer, automatons); + prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer, ctx.automatons); } - fn evaluate<'a, 'tag, 'txn>( - &self, - lhs: &RawDocument<'a, 'tag>, - rhs: &RawDocument<'a, 'tag>, - postings_lists: &SmallArena<'tag, PostingsListView<'txn>>, - ) -> Ordering - { + fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering { #[inline] fn sum_of_attribute(matches: &[SimpleMatch]) -> usize { let mut sum_of_attribute = 0; diff --git a/meilisearch-core/src/criterion/document_id.rs b/meilisearch-core/src/criterion/document_id.rs index 596194bca..f54a43779 100644 --- a/meilisearch-core/src/criterion/document_id.rs +++ b/meilisearch-core/src/criterion/document_id.rs @@ -1,34 +1,14 @@ use std::cmp::Ordering; - use compact_arena::SmallArena; - -use crate::automaton::QueryEnhancer; -use crate::bucket_sort::{PostingsListView, QueryWordAutomaton}; use crate::RawDocument; -use super::Criterion; +use super::{Criterion, Context}; pub struct DocumentId; impl Criterion for DocumentId { fn name(&self) -> &str { "stable document id" } - fn prepare( - &self, - documents: &mut [RawDocument], - postings_lists: &mut SmallArena, - query_enhancer: &QueryEnhancer, - automatons: &[QueryWordAutomaton], - ) { - // ... - } - - fn evaluate( - &self, - lhs: &RawDocument, - rhs: &RawDocument, - postings_lists: &SmallArena, - ) -> Ordering - { + fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering { let lhs = &lhs.id; let rhs = &rhs.id; diff --git a/meilisearch-core/src/criterion/exact.rs b/meilisearch-core/src/criterion/exact.rs index d82f69462..56a81c9ee 100644 --- a/meilisearch-core/src/criterion/exact.rs +++ b/meilisearch-core/src/criterion/exact.rs @@ -1,37 +1,21 @@ use std::cmp::{Ordering, Reverse}; - -use compact_arena::SmallArena; use slice_group_by::GroupBy; - -use crate::automaton::QueryEnhancer; -use crate::bucket_sort::{PostingsListView, BareMatch, QueryWordAutomaton}; use crate::RawDocument; -use super::Criterion; +use crate::bucket_sort::BareMatch; +use super::{Criterion, Context, ContextMut}; pub struct Exact; impl Criterion for Exact { fn name(&self) -> &str { "exact" } - fn prepare( - &self, - documents: &mut [RawDocument], - postings_lists: &mut SmallArena, - query_enhancer: &QueryEnhancer, - automatons: &[QueryWordAutomaton], - ) { + fn prepare(&self, _ctx: ContextMut, documents: &mut [RawDocument]) { for document in documents { document.raw_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact))); } } - fn evaluate( - &self, - lhs: &RawDocument, - rhs: &RawDocument, - postings_lists: &SmallArena, - ) -> Ordering - { + fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering { #[inline] fn sum_exact_query_words(matches: &[BareMatch]) -> usize { let mut sum_exact_query_words = 0; diff --git a/meilisearch-core/src/criterion/mod.rs b/meilisearch-core/src/criterion/mod.rs index 0d54d89f2..40b75cf0d 100644 --- a/meilisearch-core/src/criterion/mod.rs +++ b/meilisearch-core/src/criterion/mod.rs @@ -29,33 +29,45 @@ pub use self::sort_by_attr::SortByAttr; pub trait Criterion { fn name(&self) -> &str; - fn prepare<'a, 'tag, 'txn>( + fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>( &self, - documents: &mut [RawDocument<'a, 'tag>], - postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>, - query_enhancer: &QueryEnhancer, - automatons: &[QueryWordAutomaton], - ); + ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>, + documents: &mut [RawDocument<'r, 'tag>], + ) { + /* ... */ + } - fn evaluate<'a, 'tag, 'txn>( + fn evaluate<'p, 'tag, 'txn, 'q, 'a, 'r>( &self, - lhs: &RawDocument<'a, 'tag>, - rhs: &RawDocument<'a, 'tag>, - postings_lists: &SmallArena<'tag, PostingsListView<'txn>>, + ctx: &Context<'p, 'tag, 'txn, 'q, 'a>, + lhs: &RawDocument<'r, 'tag>, + rhs: &RawDocument<'r, 'tag>, ) -> Ordering; #[inline] - fn eq<'a, 'tag, 'txn>( + fn eq<'p, 'tag, 'txn, 'q, 'a, 'r>( &self, - lhs: &RawDocument<'a, 'tag>, - rhs: &RawDocument<'a, 'tag>, - postings_lists: &SmallArena<'tag, PostingsListView<'txn>>, + ctx: &Context<'p, 'tag, 'txn, 'q, 'a>, + lhs: &RawDocument<'r, 'tag>, + rhs: &RawDocument<'r, 'tag>, ) -> bool { - self.evaluate(lhs, rhs, postings_lists) == Ordering::Equal + self.evaluate(ctx, lhs, rhs) == Ordering::Equal } } +pub struct ContextMut<'p, 'tag, 'txn, 'q, 'a> { + pub postings_lists: &'p mut SmallArena<'tag, PostingsListView<'txn>>, + pub query_enhancer: &'q mut QueryEnhancer, + pub automatons: &'a mut [QueryWordAutomaton], +} + +pub struct Context<'p, 'tag, 'txn, 'q, 'a> { + pub postings_lists: &'p SmallArena<'tag, PostingsListView<'txn>>, + pub query_enhancer: &'q QueryEnhancer, + pub automatons: &'a [QueryWordAutomaton], +} + #[derive(Default)] pub struct CriteriaBuilder<'a> { inner: Vec>, diff --git a/meilisearch-core/src/criterion/proximity.rs b/meilisearch-core/src/criterion/proximity.rs index c9c534ca8..7437fb2c9 100644 --- a/meilisearch-core/src/criterion/proximity.rs +++ b/meilisearch-core/src/criterion/proximity.rs @@ -1,38 +1,25 @@ use std::cmp::{self, Ordering}; - -use compact_arena::SmallArena; use slice_group_by::GroupBy; - -use crate::automaton::QueryEnhancer; -use crate::bucket_sort::{PostingsListView, SimpleMatch, QueryWordAutomaton}; +use crate::bucket_sort::{SimpleMatch}; use crate::RawDocument; +use super::{Criterion, Context, ContextMut, prepare_raw_matches}; -use super::{Criterion, prepare_raw_matches}; +const MAX_DISTANCE: u16 = 8; pub struct Proximity; impl Criterion for Proximity { fn name(&self) -> &str { "proximity" } - fn prepare<'a, 'tag, 'txn>( + fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>( &self, - documents: &mut [RawDocument<'a, 'tag>], - postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>, - query_enhancer: &QueryEnhancer, - automatons: &[QueryWordAutomaton], + ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>, + documents: &mut [RawDocument<'r, 'tag>], ) { - prepare_raw_matches(documents, postings_lists, query_enhancer, automatons); + prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer, ctx.automatons); } - fn evaluate<'a, 'tag, 'txn>( - &self, - lhs: &RawDocument<'a, 'tag>, - rhs: &RawDocument<'a, 'tag>, - postings_lists: &SmallArena<'tag, PostingsListView<'txn>>, - ) -> Ordering - { - const MAX_DISTANCE: u16 = 8; - + fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering { fn index_proximity(lhs: u16, rhs: u16) -> u16 { if lhs < rhs { cmp::min(rhs - lhs, MAX_DISTANCE) diff --git a/meilisearch-core/src/criterion/sort_by_attr.rs b/meilisearch-core/src/criterion/sort_by_attr.rs index ea1c016da..3fd801550 100644 --- a/meilisearch-core/src/criterion/sort_by_attr.rs +++ b/meilisearch-core/src/criterion/sort_by_attr.rs @@ -1,14 +1,9 @@ use std::cmp::Ordering; use std::error::Error; use std::fmt; - -use compact_arena::SmallArena; use meilisearch_schema::{Schema, SchemaAttr}; - -use crate::automaton::QueryEnhancer; -use crate::bucket_sort::{PostingsListView, QueryWordAutomaton}; -use crate::criterion::Criterion; use crate::{RankedMap, RawDocument}; +use super::{Criterion, Context}; /// An helper struct that permit to sort documents by /// some of their stored attributes. @@ -95,23 +90,7 @@ impl Criterion for SortByAttr<'_> { "sort by attribute" } - fn prepare<'a, 'tag, 'txn>( - &self, - documents: &mut [RawDocument<'a, 'tag>], - postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>, - query_enhancer: &QueryEnhancer, - automatons: &[QueryWordAutomaton], - ) { - // ... - } - - fn evaluate<'a, 'tag, 'txn>( - &self, - lhs: &RawDocument<'a, 'tag>, - rhs: &RawDocument<'a, 'tag>, - postings_lists: &SmallArena<'tag, PostingsListView<'txn>>, - ) -> Ordering - { + fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering { let lhs = self.ranked_map.get(lhs.id, self.attr); let rhs = self.ranked_map.get(rhs.id, self.attr); diff --git a/meilisearch-core/src/criterion/typo.rs b/meilisearch-core/src/criterion/typo.rs index d7907700d..8dcf9b578 100644 --- a/meilisearch-core/src/criterion/typo.rs +++ b/meilisearch-core/src/criterion/typo.rs @@ -6,30 +6,22 @@ use crate::automaton::QueryEnhancer; use crate::bucket_sort::{PostingsListView, QueryWordAutomaton}; use crate::RawDocument; -use super::{Criterion, prepare_query_distances}; +use super::{Criterion, Context, ContextMut, prepare_query_distances}; pub struct Typo; impl Criterion for Typo { fn name(&self) -> &str { "typo" } - fn prepare<'a, 'tag, 'txn>( + fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>( &self, - documents: &mut [RawDocument<'a, 'tag>], - postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>, - query_enhancer: &QueryEnhancer, - automatons: &[QueryWordAutomaton], + ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>, + documents: &mut [RawDocument<'r, 'tag>], ) { - prepare_query_distances(documents, query_enhancer, automatons, postings_lists); + prepare_query_distances(documents, ctx.query_enhancer, ctx.automatons, ctx.postings_lists); } - fn evaluate( - &self, - lhs: &RawDocument, - rhs: &RawDocument, - postings_lists: &SmallArena, - ) -> Ordering - { + fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering { // This function is a wrong logarithmic 10 function. // It is safe to panic on input number higher than 3, // the number of typos is never bigger than that. diff --git a/meilisearch-core/src/criterion/words.rs b/meilisearch-core/src/criterion/words.rs index fbe3d9070..edfd3eb2f 100644 --- a/meilisearch-core/src/criterion/words.rs +++ b/meilisearch-core/src/criterion/words.rs @@ -1,35 +1,21 @@ use std::cmp::Ordering; - -use compact_arena::SmallArena; - -use crate::automaton::QueryEnhancer; -use crate::bucket_sort::{PostingsListView, QueryWordAutomaton}; use crate::RawDocument; - -use super::{Criterion, prepare_query_distances}; +use super::{Criterion, Context, ContextMut, prepare_query_distances}; pub struct Words; impl Criterion for Words { fn name(&self) -> &str { "words" } - fn prepare<'a, 'tag, 'txn>( + fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>( &self, - documents: &mut [RawDocument<'a, 'tag>], - postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>, - query_enhancer: &QueryEnhancer, - automatons: &[QueryWordAutomaton], + ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>, + documents: &mut [RawDocument<'r, 'tag>], ) { - prepare_query_distances(documents, query_enhancer, automatons, postings_lists); + prepare_query_distances(documents, ctx.query_enhancer, ctx.automatons, ctx.postings_lists); } - fn evaluate( - &self, - lhs: &RawDocument, - rhs: &RawDocument, - postings_lists: &SmallArena, - ) -> Ordering - { + fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering { #[inline] fn number_of_query_words(distances: &[Option]) -> usize { distances.iter().cloned().filter(Option::is_some).count() diff --git a/meilisearch-core/src/criterion/words_position.rs b/meilisearch-core/src/criterion/words_position.rs index 7df3e1fbd..cb9ec32f5 100644 --- a/meilisearch-core/src/criterion/words_position.rs +++ b/meilisearch-core/src/criterion/words_position.rs @@ -1,34 +1,29 @@ use std::cmp::Ordering; -use compact_arena::SmallArena; use slice_group_by::GroupBy; -use crate::automaton::QueryEnhancer; -use crate::bucket_sort::{PostingsListView, SimpleMatch, QueryWordAutomaton}; use crate::RawDocument; - -use super::{Criterion, prepare_raw_matches}; +use crate::bucket_sort::SimpleMatch; +use super::{Criterion, Context, ContextMut, prepare_raw_matches}; pub struct WordsPosition; impl Criterion for WordsPosition { fn name(&self) -> &str { "words position" } - fn prepare<'a, 'tag, 'txn>( + fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>( &self, - documents: &mut [RawDocument<'a, 'tag>], - postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>, - query_enhancer: &QueryEnhancer, - automatons: &[QueryWordAutomaton], + ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>, + documents: &mut [RawDocument<'r, 'tag>], ) { - prepare_raw_matches(documents, postings_lists, query_enhancer, automatons); + prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer, ctx.automatons); } - fn evaluate<'a, 'tag, 'txn>( + fn evaluate<'p, 'tag, 'txn, 'q, 'a, 'r>( &self, - lhs: &RawDocument<'a, 'tag>, - rhs: &RawDocument<'a, 'tag>, - postings_lists: &SmallArena<'tag, PostingsListView<'txn>>, + ctx: &Context<'p, 'tag, 'txn, 'q, 'a>, + lhs: &RawDocument<'r, 'tag>, + rhs: &RawDocument<'r, 'tag>, ) -> Ordering { #[inline]