Introduce ContextMut and Context structs

This commit is contained in:
Clément Renault 2019-12-12 11:33:39 +01:00
parent d75339a271
commit d93e35cace
No known key found for this signature in database
GPG Key ID: 0151CDAB43460DAE
10 changed files with 106 additions and 179 deletions

View File

@ -21,7 +21,7 @@ use crate::automaton::{build_dfa, build_prefix_dfa, build_exact_dfa};
use crate::automaton::normalize_str; use crate::automaton::normalize_str;
use crate::automaton::{QueryEnhancer, QueryEnhancerBuilder}; use crate::automaton::{QueryEnhancer, QueryEnhancerBuilder};
use crate::criterion::Criteria; use crate::criterion::{Criteria, Context, ContextMut};
use crate::distinct_map::{BufferedDistinctMap, DistinctMap}; use crate::distinct_map::{BufferedDistinctMap, DistinctMap};
use crate::raw_document::RawDocument; use crate::raw_document::RawDocument;
use crate::{database::MainT, reordered_attrs::ReorderedAttrs}; use crate::{database::MainT, reordered_attrs::ReorderedAttrs};
@ -61,7 +61,7 @@ where
); );
} }
let (automatons, query_enhancer) = let (mut automatons, mut query_enhancer) =
construct_automatons(reader, query, main_store, postings_lists_store, synonyms_store)?; construct_automatons(reader, query, main_store, postings_lists_store, synonyms_store)?;
debug!("{:?}", query_enhancer); debug!("{:?}", query_enhancer);
@ -102,14 +102,27 @@ where
for mut group in tmp_groups { for mut group in tmp_groups {
let before_criterion_preparation = Instant::now(); let before_criterion_preparation = Instant::now();
criterion.prepare(&mut group, &mut arena, &query_enhancer, &automatons);
let ctx = ContextMut {
postings_lists: &mut arena,
query_enhancer: &mut query_enhancer,
automatons: &mut automatons,
};
criterion.prepare(ctx, &mut group);
debug!("{:?} preparation took {:.02?}", criterion.name(), before_criterion_preparation.elapsed()); debug!("{:?} preparation took {:.02?}", criterion.name(), before_criterion_preparation.elapsed());
let ctx = Context {
postings_lists: &arena,
query_enhancer: &query_enhancer,
automatons: &automatons,
};
let before_criterion_sort = Instant::now(); let before_criterion_sort = Instant::now();
group.sort_unstable_by(|a, b| criterion.evaluate(a, b, &arena)); group.sort_unstable_by(|a, b| criterion.evaluate(&ctx, a, b));
debug!("{:?} evaluation took {:.02?}", criterion.name(), before_criterion_sort.elapsed()); debug!("{:?} evaluation took {:.02?}", criterion.name(), before_criterion_sort.elapsed());
for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b, &arena)) { for group in group.binary_group_by_mut(|a, b| criterion.eq(&ctx, a, b)) {
debug!("{:?} produced a group of size {}", criterion.name(), group.len()); debug!("{:?} produced a group of size {}", criterion.name(), group.len());
documents_seen += group.len(); documents_seen += group.len();
@ -147,7 +160,7 @@ where
FI: Fn(DocumentId) -> bool, FI: Fn(DocumentId) -> bool,
FD: Fn(DocumentId) -> Option<u64>, FD: Fn(DocumentId) -> Option<u64>,
{ {
let (automatons, query_enhancer) = let (mut automatons, mut query_enhancer) =
construct_automatons(reader, query, main_store, postings_lists_store, synonyms_store)?; construct_automatons(reader, query, main_store, postings_lists_store, synonyms_store)?;
let before_postings_lists_fetching = Instant::now(); let before_postings_lists_fetching = Instant::now();
@ -201,15 +214,27 @@ where
continue; continue;
} }
let ctx = ContextMut {
postings_lists: &mut arena,
query_enhancer: &mut query_enhancer,
automatons: &mut automatons,
};
let before_criterion_preparation = Instant::now(); let before_criterion_preparation = Instant::now();
criterion.prepare(&mut group, &mut arena, &query_enhancer, &automatons); criterion.prepare(ctx, &mut group);
debug!("{:?} preparation took {:.02?}", criterion.name(), before_criterion_preparation.elapsed()); debug!("{:?} preparation took {:.02?}", criterion.name(), before_criterion_preparation.elapsed());
let ctx = Context {
postings_lists: &arena,
query_enhancer: &query_enhancer,
automatons: &automatons,
};
let before_criterion_sort = Instant::now(); let before_criterion_sort = Instant::now();
group.sort_unstable_by(|a, b| criterion.evaluate(a, b, &arena)); group.sort_unstable_by(|a, b| criterion.evaluate(&ctx, a, b));
debug!("{:?} evaluation took {:.02?}", criterion.name(), before_criterion_sort.elapsed()); debug!("{:?} evaluation took {:.02?}", criterion.name(), before_criterion_sort.elapsed());
for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b, &arena)) { for group in group.binary_group_by_mut(|a, b| criterion.eq(&ctx, a, b)) {
// we must compute the real distinguished len of this sub-group // we must compute the real distinguished len of this sub-group
for document in group.iter() { for document in group.iter() {
let filter_accepted = match &filter { let filter_accepted = match &filter {

View File

@ -1,36 +1,23 @@
use std::cmp::Ordering; use std::cmp::Ordering;
use compact_arena::SmallArena;
use slice_group_by::GroupBy; use slice_group_by::GroupBy;
use crate::automaton::QueryEnhancer;
use crate::bucket_sort::{SimpleMatch, PostingsListView, QueryWordAutomaton};
use crate::RawDocument; use crate::RawDocument;
use crate::bucket_sort::SimpleMatch;
use super::{Criterion, prepare_raw_matches}; use super::{Criterion, Context, ContextMut, prepare_raw_matches};
pub struct Attribute; pub struct Attribute;
impl Criterion for Attribute { impl Criterion for Attribute {
fn name(&self) -> &str { "attribute" } fn name(&self) -> &str { "attribute" }
fn prepare<'a, 'tag, 'txn>( fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>(
&self, &self,
documents: &mut [RawDocument<'a, 'tag>], ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>,
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>, documents: &mut [RawDocument<'r, 'tag>],
query_enhancer: &QueryEnhancer,
automatons: &[QueryWordAutomaton],
) { ) {
prepare_raw_matches(documents, postings_lists, query_enhancer, automatons); prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer, ctx.automatons);
} }
fn evaluate<'a, 'tag, 'txn>( fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
&self,
lhs: &RawDocument<'a, 'tag>,
rhs: &RawDocument<'a, 'tag>,
postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
) -> Ordering
{
#[inline] #[inline]
fn sum_of_attribute(matches: &[SimpleMatch]) -> usize { fn sum_of_attribute(matches: &[SimpleMatch]) -> usize {
let mut sum_of_attribute = 0; let mut sum_of_attribute = 0;

View File

@ -1,34 +1,14 @@
use std::cmp::Ordering; use std::cmp::Ordering;
use compact_arena::SmallArena; use compact_arena::SmallArena;
use crate::automaton::QueryEnhancer;
use crate::bucket_sort::{PostingsListView, QueryWordAutomaton};
use crate::RawDocument; use crate::RawDocument;
use super::Criterion; use super::{Criterion, Context};
pub struct DocumentId; pub struct DocumentId;
impl Criterion for DocumentId { impl Criterion for DocumentId {
fn name(&self) -> &str { "stable document id" } fn name(&self) -> &str { "stable document id" }
fn prepare( fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
&self,
documents: &mut [RawDocument],
postings_lists: &mut SmallArena<PostingsListView>,
query_enhancer: &QueryEnhancer,
automatons: &[QueryWordAutomaton],
) {
// ...
}
fn evaluate(
&self,
lhs: &RawDocument,
rhs: &RawDocument,
postings_lists: &SmallArena<PostingsListView>,
) -> Ordering
{
let lhs = &lhs.id; let lhs = &lhs.id;
let rhs = &rhs.id; let rhs = &rhs.id;

View File

@ -1,37 +1,21 @@
use std::cmp::{Ordering, Reverse}; use std::cmp::{Ordering, Reverse};
use compact_arena::SmallArena;
use slice_group_by::GroupBy; use slice_group_by::GroupBy;
use crate::automaton::QueryEnhancer;
use crate::bucket_sort::{PostingsListView, BareMatch, QueryWordAutomaton};
use crate::RawDocument; use crate::RawDocument;
use super::Criterion; use crate::bucket_sort::BareMatch;
use super::{Criterion, Context, ContextMut};
pub struct Exact; pub struct Exact;
impl Criterion for Exact { impl Criterion for Exact {
fn name(&self) -> &str { "exact" } fn name(&self) -> &str { "exact" }
fn prepare( fn prepare(&self, _ctx: ContextMut, documents: &mut [RawDocument]) {
&self,
documents: &mut [RawDocument],
postings_lists: &mut SmallArena<PostingsListView>,
query_enhancer: &QueryEnhancer,
automatons: &[QueryWordAutomaton],
) {
for document in documents { for document in documents {
document.raw_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact))); document.raw_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact)));
} }
} }
fn evaluate( fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
&self,
lhs: &RawDocument,
rhs: &RawDocument,
postings_lists: &SmallArena<PostingsListView>,
) -> Ordering
{
#[inline] #[inline]
fn sum_exact_query_words(matches: &[BareMatch]) -> usize { fn sum_exact_query_words(matches: &[BareMatch]) -> usize {
let mut sum_exact_query_words = 0; let mut sum_exact_query_words = 0;

View File

@ -29,33 +29,45 @@ pub use self::sort_by_attr::SortByAttr;
pub trait Criterion { pub trait Criterion {
fn name(&self) -> &str; fn name(&self) -> &str;
fn prepare<'a, 'tag, 'txn>( fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>(
&self, &self,
documents: &mut [RawDocument<'a, 'tag>], ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>,
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>, documents: &mut [RawDocument<'r, 'tag>],
query_enhancer: &QueryEnhancer, ) {
automatons: &[QueryWordAutomaton], /* ... */
); }
fn evaluate<'a, 'tag, 'txn>( fn evaluate<'p, 'tag, 'txn, 'q, 'a, 'r>(
&self, &self,
lhs: &RawDocument<'a, 'tag>, ctx: &Context<'p, 'tag, 'txn, 'q, 'a>,
rhs: &RawDocument<'a, 'tag>, lhs: &RawDocument<'r, 'tag>,
postings_lists: &SmallArena<'tag, PostingsListView<'txn>>, rhs: &RawDocument<'r, 'tag>,
) -> Ordering; ) -> Ordering;
#[inline] #[inline]
fn eq<'a, 'tag, 'txn>( fn eq<'p, 'tag, 'txn, 'q, 'a, 'r>(
&self, &self,
lhs: &RawDocument<'a, 'tag>, ctx: &Context<'p, 'tag, 'txn, 'q, 'a>,
rhs: &RawDocument<'a, 'tag>, lhs: &RawDocument<'r, 'tag>,
postings_lists: &SmallArena<'tag, PostingsListView<'txn>>, rhs: &RawDocument<'r, 'tag>,
) -> bool ) -> bool
{ {
self.evaluate(lhs, rhs, postings_lists) == Ordering::Equal self.evaluate(ctx, lhs, rhs) == Ordering::Equal
} }
} }
pub struct ContextMut<'p, 'tag, 'txn, 'q, 'a> {
pub postings_lists: &'p mut SmallArena<'tag, PostingsListView<'txn>>,
pub query_enhancer: &'q mut QueryEnhancer,
pub automatons: &'a mut [QueryWordAutomaton],
}
pub struct Context<'p, 'tag, 'txn, 'q, 'a> {
pub postings_lists: &'p SmallArena<'tag, PostingsListView<'txn>>,
pub query_enhancer: &'q QueryEnhancer,
pub automatons: &'a [QueryWordAutomaton],
}
#[derive(Default)] #[derive(Default)]
pub struct CriteriaBuilder<'a> { pub struct CriteriaBuilder<'a> {
inner: Vec<Box<dyn Criterion + 'a>>, inner: Vec<Box<dyn Criterion + 'a>>,

View File

@ -1,38 +1,25 @@
use std::cmp::{self, Ordering}; use std::cmp::{self, Ordering};
use compact_arena::SmallArena;
use slice_group_by::GroupBy; use slice_group_by::GroupBy;
use crate::bucket_sort::{SimpleMatch};
use crate::automaton::QueryEnhancer;
use crate::bucket_sort::{PostingsListView, SimpleMatch, QueryWordAutomaton};
use crate::RawDocument; use crate::RawDocument;
use super::{Criterion, Context, ContextMut, prepare_raw_matches};
use super::{Criterion, prepare_raw_matches}; const MAX_DISTANCE: u16 = 8;
pub struct Proximity; pub struct Proximity;
impl Criterion for Proximity { impl Criterion for Proximity {
fn name(&self) -> &str { "proximity" } fn name(&self) -> &str { "proximity" }
fn prepare<'a, 'tag, 'txn>( fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>(
&self, &self,
documents: &mut [RawDocument<'a, 'tag>], ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>,
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>, documents: &mut [RawDocument<'r, 'tag>],
query_enhancer: &QueryEnhancer,
automatons: &[QueryWordAutomaton],
) { ) {
prepare_raw_matches(documents, postings_lists, query_enhancer, automatons); prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer, ctx.automatons);
} }
fn evaluate<'a, 'tag, 'txn>( fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
&self,
lhs: &RawDocument<'a, 'tag>,
rhs: &RawDocument<'a, 'tag>,
postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
) -> Ordering
{
const MAX_DISTANCE: u16 = 8;
fn index_proximity(lhs: u16, rhs: u16) -> u16 { fn index_proximity(lhs: u16, rhs: u16) -> u16 {
if lhs < rhs { if lhs < rhs {
cmp::min(rhs - lhs, MAX_DISTANCE) cmp::min(rhs - lhs, MAX_DISTANCE)

View File

@ -1,14 +1,9 @@
use std::cmp::Ordering; use std::cmp::Ordering;
use std::error::Error; use std::error::Error;
use std::fmt; use std::fmt;
use compact_arena::SmallArena;
use meilisearch_schema::{Schema, SchemaAttr}; use meilisearch_schema::{Schema, SchemaAttr};
use crate::automaton::QueryEnhancer;
use crate::bucket_sort::{PostingsListView, QueryWordAutomaton};
use crate::criterion::Criterion;
use crate::{RankedMap, RawDocument}; use crate::{RankedMap, RawDocument};
use super::{Criterion, Context};
/// An helper struct that permit to sort documents by /// An helper struct that permit to sort documents by
/// some of their stored attributes. /// some of their stored attributes.
@ -95,23 +90,7 @@ impl Criterion for SortByAttr<'_> {
"sort by attribute" "sort by attribute"
} }
fn prepare<'a, 'tag, 'txn>( fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
&self,
documents: &mut [RawDocument<'a, 'tag>],
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
query_enhancer: &QueryEnhancer,
automatons: &[QueryWordAutomaton],
) {
// ...
}
fn evaluate<'a, 'tag, 'txn>(
&self,
lhs: &RawDocument<'a, 'tag>,
rhs: &RawDocument<'a, 'tag>,
postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
) -> Ordering
{
let lhs = self.ranked_map.get(lhs.id, self.attr); let lhs = self.ranked_map.get(lhs.id, self.attr);
let rhs = self.ranked_map.get(rhs.id, self.attr); let rhs = self.ranked_map.get(rhs.id, self.attr);

View File

@ -6,30 +6,22 @@ use crate::automaton::QueryEnhancer;
use crate::bucket_sort::{PostingsListView, QueryWordAutomaton}; use crate::bucket_sort::{PostingsListView, QueryWordAutomaton};
use crate::RawDocument; use crate::RawDocument;
use super::{Criterion, prepare_query_distances}; use super::{Criterion, Context, ContextMut, prepare_query_distances};
pub struct Typo; pub struct Typo;
impl Criterion for Typo { impl Criterion for Typo {
fn name(&self) -> &str { "typo" } fn name(&self) -> &str { "typo" }
fn prepare<'a, 'tag, 'txn>( fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>(
&self, &self,
documents: &mut [RawDocument<'a, 'tag>], ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>,
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>, documents: &mut [RawDocument<'r, 'tag>],
query_enhancer: &QueryEnhancer,
automatons: &[QueryWordAutomaton],
) { ) {
prepare_query_distances(documents, query_enhancer, automatons, postings_lists); prepare_query_distances(documents, ctx.query_enhancer, ctx.automatons, ctx.postings_lists);
} }
fn evaluate( fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
&self,
lhs: &RawDocument,
rhs: &RawDocument,
postings_lists: &SmallArena<PostingsListView>,
) -> Ordering
{
// This function is a wrong logarithmic 10 function. // This function is a wrong logarithmic 10 function.
// It is safe to panic on input number higher than 3, // It is safe to panic on input number higher than 3,
// the number of typos is never bigger than that. // the number of typos is never bigger than that.

View File

@ -1,35 +1,21 @@
use std::cmp::Ordering; use std::cmp::Ordering;
use compact_arena::SmallArena;
use crate::automaton::QueryEnhancer;
use crate::bucket_sort::{PostingsListView, QueryWordAutomaton};
use crate::RawDocument; use crate::RawDocument;
use super::{Criterion, Context, ContextMut, prepare_query_distances};
use super::{Criterion, prepare_query_distances};
pub struct Words; pub struct Words;
impl Criterion for Words { impl Criterion for Words {
fn name(&self) -> &str { "words" } fn name(&self) -> &str { "words" }
fn prepare<'a, 'tag, 'txn>( fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>(
&self, &self,
documents: &mut [RawDocument<'a, 'tag>], ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>,
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>, documents: &mut [RawDocument<'r, 'tag>],
query_enhancer: &QueryEnhancer,
automatons: &[QueryWordAutomaton],
) { ) {
prepare_query_distances(documents, query_enhancer, automatons, postings_lists); prepare_query_distances(documents, ctx.query_enhancer, ctx.automatons, ctx.postings_lists);
} }
fn evaluate( fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
&self,
lhs: &RawDocument,
rhs: &RawDocument,
postings_lists: &SmallArena<PostingsListView>,
) -> Ordering
{
#[inline] #[inline]
fn number_of_query_words(distances: &[Option<u8>]) -> usize { fn number_of_query_words(distances: &[Option<u8>]) -> usize {
distances.iter().cloned().filter(Option::is_some).count() distances.iter().cloned().filter(Option::is_some).count()

View File

@ -1,34 +1,29 @@
use std::cmp::Ordering; use std::cmp::Ordering;
use compact_arena::SmallArena;
use slice_group_by::GroupBy; use slice_group_by::GroupBy;
use crate::automaton::QueryEnhancer;
use crate::bucket_sort::{PostingsListView, SimpleMatch, QueryWordAutomaton};
use crate::RawDocument; use crate::RawDocument;
use crate::bucket_sort::SimpleMatch;
use super::{Criterion, prepare_raw_matches}; use super::{Criterion, Context, ContextMut, prepare_raw_matches};
pub struct WordsPosition; pub struct WordsPosition;
impl Criterion for WordsPosition { impl Criterion for WordsPosition {
fn name(&self) -> &str { "words position" } fn name(&self) -> &str { "words position" }
fn prepare<'a, 'tag, 'txn>( fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>(
&self, &self,
documents: &mut [RawDocument<'a, 'tag>], ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>,
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>, documents: &mut [RawDocument<'r, 'tag>],
query_enhancer: &QueryEnhancer,
automatons: &[QueryWordAutomaton],
) { ) {
prepare_raw_matches(documents, postings_lists, query_enhancer, automatons); prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer, ctx.automatons);
} }
fn evaluate<'a, 'tag, 'txn>( fn evaluate<'p, 'tag, 'txn, 'q, 'a, 'r>(
&self, &self,
lhs: &RawDocument<'a, 'tag>, ctx: &Context<'p, 'tag, 'txn, 'q, 'a>,
rhs: &RawDocument<'a, 'tag>, lhs: &RawDocument<'r, 'tag>,
postings_lists: &SmallArena<'tag, PostingsListView<'txn>>, rhs: &RawDocument<'r, 'tag>,
) -> Ordering ) -> Ordering
{ {
#[inline] #[inline]