Introduce ContextMut and Context structs

This commit is contained in:
Clément Renault 2019-12-12 11:33:39 +01:00
parent d75339a271
commit d93e35cace
No known key found for this signature in database
GPG Key ID: 0151CDAB43460DAE
10 changed files with 106 additions and 179 deletions

View File

@ -21,7 +21,7 @@ use crate::automaton::{build_dfa, build_prefix_dfa, build_exact_dfa};
use crate::automaton::normalize_str;
use crate::automaton::{QueryEnhancer, QueryEnhancerBuilder};
use crate::criterion::Criteria;
use crate::criterion::{Criteria, Context, ContextMut};
use crate::distinct_map::{BufferedDistinctMap, DistinctMap};
use crate::raw_document::RawDocument;
use crate::{database::MainT, reordered_attrs::ReorderedAttrs};
@ -61,7 +61,7 @@ where
);
}
let (automatons, query_enhancer) =
let (mut automatons, mut query_enhancer) =
construct_automatons(reader, query, main_store, postings_lists_store, synonyms_store)?;
debug!("{:?}", query_enhancer);
@ -102,14 +102,27 @@ where
for mut group in tmp_groups {
let before_criterion_preparation = Instant::now();
criterion.prepare(&mut group, &mut arena, &query_enhancer, &automatons);
let ctx = ContextMut {
postings_lists: &mut arena,
query_enhancer: &mut query_enhancer,
automatons: &mut automatons,
};
criterion.prepare(ctx, &mut group);
debug!("{:?} preparation took {:.02?}", criterion.name(), before_criterion_preparation.elapsed());
let ctx = Context {
postings_lists: &arena,
query_enhancer: &query_enhancer,
automatons: &automatons,
};
let before_criterion_sort = Instant::now();
group.sort_unstable_by(|a, b| criterion.evaluate(a, b, &arena));
group.sort_unstable_by(|a, b| criterion.evaluate(&ctx, a, b));
debug!("{:?} evaluation took {:.02?}", criterion.name(), before_criterion_sort.elapsed());
for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b, &arena)) {
for group in group.binary_group_by_mut(|a, b| criterion.eq(&ctx, a, b)) {
debug!("{:?} produced a group of size {}", criterion.name(), group.len());
documents_seen += group.len();
@ -147,7 +160,7 @@ where
FI: Fn(DocumentId) -> bool,
FD: Fn(DocumentId) -> Option<u64>,
{
let (automatons, query_enhancer) =
let (mut automatons, mut query_enhancer) =
construct_automatons(reader, query, main_store, postings_lists_store, synonyms_store)?;
let before_postings_lists_fetching = Instant::now();
@ -201,15 +214,27 @@ where
continue;
}
let ctx = ContextMut {
postings_lists: &mut arena,
query_enhancer: &mut query_enhancer,
automatons: &mut automatons,
};
let before_criterion_preparation = Instant::now();
criterion.prepare(&mut group, &mut arena, &query_enhancer, &automatons);
criterion.prepare(ctx, &mut group);
debug!("{:?} preparation took {:.02?}", criterion.name(), before_criterion_preparation.elapsed());
let ctx = Context {
postings_lists: &arena,
query_enhancer: &query_enhancer,
automatons: &automatons,
};
let before_criterion_sort = Instant::now();
group.sort_unstable_by(|a, b| criterion.evaluate(a, b, &arena));
group.sort_unstable_by(|a, b| criterion.evaluate(&ctx, a, b));
debug!("{:?} evaluation took {:.02?}", criterion.name(), before_criterion_sort.elapsed());
for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b, &arena)) {
for group in group.binary_group_by_mut(|a, b| criterion.eq(&ctx, a, b)) {
// we must compute the real distinguished len of this sub-group
for document in group.iter() {
let filter_accepted = match &filter {

View File

@ -1,36 +1,23 @@
use std::cmp::Ordering;
use compact_arena::SmallArena;
use slice_group_by::GroupBy;
use crate::automaton::QueryEnhancer;
use crate::bucket_sort::{SimpleMatch, PostingsListView, QueryWordAutomaton};
use crate::RawDocument;
use super::{Criterion, prepare_raw_matches};
use crate::bucket_sort::SimpleMatch;
use super::{Criterion, Context, ContextMut, prepare_raw_matches};
pub struct Attribute;
impl Criterion for Attribute {
fn name(&self) -> &str { "attribute" }
fn prepare<'a, 'tag, 'txn>(
fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>(
&self,
documents: &mut [RawDocument<'a, 'tag>],
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
query_enhancer: &QueryEnhancer,
automatons: &[QueryWordAutomaton],
ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>,
documents: &mut [RawDocument<'r, 'tag>],
) {
prepare_raw_matches(documents, postings_lists, query_enhancer, automatons);
prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer, ctx.automatons);
}
fn evaluate<'a, 'tag, 'txn>(
&self,
lhs: &RawDocument<'a, 'tag>,
rhs: &RawDocument<'a, 'tag>,
postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
) -> Ordering
{
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
#[inline]
fn sum_of_attribute(matches: &[SimpleMatch]) -> usize {
let mut sum_of_attribute = 0;

View File

@ -1,34 +1,14 @@
use std::cmp::Ordering;
use compact_arena::SmallArena;
use crate::automaton::QueryEnhancer;
use crate::bucket_sort::{PostingsListView, QueryWordAutomaton};
use crate::RawDocument;
use super::Criterion;
use super::{Criterion, Context};
pub struct DocumentId;
impl Criterion for DocumentId {
fn name(&self) -> &str { "stable document id" }
fn prepare(
&self,
documents: &mut [RawDocument],
postings_lists: &mut SmallArena<PostingsListView>,
query_enhancer: &QueryEnhancer,
automatons: &[QueryWordAutomaton],
) {
// ...
}
fn evaluate(
&self,
lhs: &RawDocument,
rhs: &RawDocument,
postings_lists: &SmallArena<PostingsListView>,
) -> Ordering
{
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
let lhs = &lhs.id;
let rhs = &rhs.id;

View File

@ -1,37 +1,21 @@
use std::cmp::{Ordering, Reverse};
use compact_arena::SmallArena;
use slice_group_by::GroupBy;
use crate::automaton::QueryEnhancer;
use crate::bucket_sort::{PostingsListView, BareMatch, QueryWordAutomaton};
use crate::RawDocument;
use super::Criterion;
use crate::bucket_sort::BareMatch;
use super::{Criterion, Context, ContextMut};
pub struct Exact;
impl Criterion for Exact {
fn name(&self) -> &str { "exact" }
fn prepare(
&self,
documents: &mut [RawDocument],
postings_lists: &mut SmallArena<PostingsListView>,
query_enhancer: &QueryEnhancer,
automatons: &[QueryWordAutomaton],
) {
fn prepare(&self, _ctx: ContextMut, documents: &mut [RawDocument]) {
for document in documents {
document.raw_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact)));
}
}
fn evaluate(
&self,
lhs: &RawDocument,
rhs: &RawDocument,
postings_lists: &SmallArena<PostingsListView>,
) -> Ordering
{
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
#[inline]
fn sum_exact_query_words(matches: &[BareMatch]) -> usize {
let mut sum_exact_query_words = 0;

View File

@ -29,33 +29,45 @@ pub use self::sort_by_attr::SortByAttr;
pub trait Criterion {
fn name(&self) -> &str;
fn prepare<'a, 'tag, 'txn>(
fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>(
&self,
documents: &mut [RawDocument<'a, 'tag>],
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
query_enhancer: &QueryEnhancer,
automatons: &[QueryWordAutomaton],
);
ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>,
documents: &mut [RawDocument<'r, 'tag>],
) {
/* ... */
}
fn evaluate<'a, 'tag, 'txn>(
fn evaluate<'p, 'tag, 'txn, 'q, 'a, 'r>(
&self,
lhs: &RawDocument<'a, 'tag>,
rhs: &RawDocument<'a, 'tag>,
postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
ctx: &Context<'p, 'tag, 'txn, 'q, 'a>,
lhs: &RawDocument<'r, 'tag>,
rhs: &RawDocument<'r, 'tag>,
) -> Ordering;
#[inline]
fn eq<'a, 'tag, 'txn>(
fn eq<'p, 'tag, 'txn, 'q, 'a, 'r>(
&self,
lhs: &RawDocument<'a, 'tag>,
rhs: &RawDocument<'a, 'tag>,
postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
ctx: &Context<'p, 'tag, 'txn, 'q, 'a>,
lhs: &RawDocument<'r, 'tag>,
rhs: &RawDocument<'r, 'tag>,
) -> bool
{
self.evaluate(lhs, rhs, postings_lists) == Ordering::Equal
self.evaluate(ctx, lhs, rhs) == Ordering::Equal
}
}
pub struct ContextMut<'p, 'tag, 'txn, 'q, 'a> {
pub postings_lists: &'p mut SmallArena<'tag, PostingsListView<'txn>>,
pub query_enhancer: &'q mut QueryEnhancer,
pub automatons: &'a mut [QueryWordAutomaton],
}
pub struct Context<'p, 'tag, 'txn, 'q, 'a> {
pub postings_lists: &'p SmallArena<'tag, PostingsListView<'txn>>,
pub query_enhancer: &'q QueryEnhancer,
pub automatons: &'a [QueryWordAutomaton],
}
#[derive(Default)]
pub struct CriteriaBuilder<'a> {
inner: Vec<Box<dyn Criterion + 'a>>,

View File

@ -1,38 +1,25 @@
use std::cmp::{self, Ordering};
use compact_arena::SmallArena;
use slice_group_by::GroupBy;
use crate::automaton::QueryEnhancer;
use crate::bucket_sort::{PostingsListView, SimpleMatch, QueryWordAutomaton};
use crate::bucket_sort::{SimpleMatch};
use crate::RawDocument;
use super::{Criterion, Context, ContextMut, prepare_raw_matches};
use super::{Criterion, prepare_raw_matches};
const MAX_DISTANCE: u16 = 8;
pub struct Proximity;
impl Criterion for Proximity {
fn name(&self) -> &str { "proximity" }
fn prepare<'a, 'tag, 'txn>(
fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>(
&self,
documents: &mut [RawDocument<'a, 'tag>],
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
query_enhancer: &QueryEnhancer,
automatons: &[QueryWordAutomaton],
ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>,
documents: &mut [RawDocument<'r, 'tag>],
) {
prepare_raw_matches(documents, postings_lists, query_enhancer, automatons);
prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer, ctx.automatons);
}
fn evaluate<'a, 'tag, 'txn>(
&self,
lhs: &RawDocument<'a, 'tag>,
rhs: &RawDocument<'a, 'tag>,
postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
) -> Ordering
{
const MAX_DISTANCE: u16 = 8;
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
fn index_proximity(lhs: u16, rhs: u16) -> u16 {
if lhs < rhs {
cmp::min(rhs - lhs, MAX_DISTANCE)

View File

@ -1,14 +1,9 @@
use std::cmp::Ordering;
use std::error::Error;
use std::fmt;
use compact_arena::SmallArena;
use meilisearch_schema::{Schema, SchemaAttr};
use crate::automaton::QueryEnhancer;
use crate::bucket_sort::{PostingsListView, QueryWordAutomaton};
use crate::criterion::Criterion;
use crate::{RankedMap, RawDocument};
use super::{Criterion, Context};
/// An helper struct that permit to sort documents by
/// some of their stored attributes.
@ -95,23 +90,7 @@ impl Criterion for SortByAttr<'_> {
"sort by attribute"
}
fn prepare<'a, 'tag, 'txn>(
&self,
documents: &mut [RawDocument<'a, 'tag>],
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
query_enhancer: &QueryEnhancer,
automatons: &[QueryWordAutomaton],
) {
// ...
}
fn evaluate<'a, 'tag, 'txn>(
&self,
lhs: &RawDocument<'a, 'tag>,
rhs: &RawDocument<'a, 'tag>,
postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
) -> Ordering
{
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
let lhs = self.ranked_map.get(lhs.id, self.attr);
let rhs = self.ranked_map.get(rhs.id, self.attr);

View File

@ -6,30 +6,22 @@ use crate::automaton::QueryEnhancer;
use crate::bucket_sort::{PostingsListView, QueryWordAutomaton};
use crate::RawDocument;
use super::{Criterion, prepare_query_distances};
use super::{Criterion, Context, ContextMut, prepare_query_distances};
pub struct Typo;
impl Criterion for Typo {
fn name(&self) -> &str { "typo" }
fn prepare<'a, 'tag, 'txn>(
fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>(
&self,
documents: &mut [RawDocument<'a, 'tag>],
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
query_enhancer: &QueryEnhancer,
automatons: &[QueryWordAutomaton],
ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>,
documents: &mut [RawDocument<'r, 'tag>],
) {
prepare_query_distances(documents, query_enhancer, automatons, postings_lists);
prepare_query_distances(documents, ctx.query_enhancer, ctx.automatons, ctx.postings_lists);
}
fn evaluate(
&self,
lhs: &RawDocument,
rhs: &RawDocument,
postings_lists: &SmallArena<PostingsListView>,
) -> Ordering
{
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
// This function is a wrong logarithmic 10 function.
// It is safe to panic on input number higher than 3,
// the number of typos is never bigger than that.

View File

@ -1,35 +1,21 @@
use std::cmp::Ordering;
use compact_arena::SmallArena;
use crate::automaton::QueryEnhancer;
use crate::bucket_sort::{PostingsListView, QueryWordAutomaton};
use crate::RawDocument;
use super::{Criterion, prepare_query_distances};
use super::{Criterion, Context, ContextMut, prepare_query_distances};
pub struct Words;
impl Criterion for Words {
fn name(&self) -> &str { "words" }
fn prepare<'a, 'tag, 'txn>(
fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>(
&self,
documents: &mut [RawDocument<'a, 'tag>],
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
query_enhancer: &QueryEnhancer,
automatons: &[QueryWordAutomaton],
ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>,
documents: &mut [RawDocument<'r, 'tag>],
) {
prepare_query_distances(documents, query_enhancer, automatons, postings_lists);
prepare_query_distances(documents, ctx.query_enhancer, ctx.automatons, ctx.postings_lists);
}
fn evaluate(
&self,
lhs: &RawDocument,
rhs: &RawDocument,
postings_lists: &SmallArena<PostingsListView>,
) -> Ordering
{
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
#[inline]
fn number_of_query_words(distances: &[Option<u8>]) -> usize {
distances.iter().cloned().filter(Option::is_some).count()

View File

@ -1,34 +1,29 @@
use std::cmp::Ordering;
use compact_arena::SmallArena;
use slice_group_by::GroupBy;
use crate::automaton::QueryEnhancer;
use crate::bucket_sort::{PostingsListView, SimpleMatch, QueryWordAutomaton};
use crate::RawDocument;
use super::{Criterion, prepare_raw_matches};
use crate::bucket_sort::SimpleMatch;
use super::{Criterion, Context, ContextMut, prepare_raw_matches};
pub struct WordsPosition;
impl Criterion for WordsPosition {
fn name(&self) -> &str { "words position" }
fn prepare<'a, 'tag, 'txn>(
fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>(
&self,
documents: &mut [RawDocument<'a, 'tag>],
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
query_enhancer: &QueryEnhancer,
automatons: &[QueryWordAutomaton],
ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>,
documents: &mut [RawDocument<'r, 'tag>],
) {
prepare_raw_matches(documents, postings_lists, query_enhancer, automatons);
prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer, ctx.automatons);
}
fn evaluate<'a, 'tag, 'txn>(
fn evaluate<'p, 'tag, 'txn, 'q, 'a, 'r>(
&self,
lhs: &RawDocument<'a, 'tag>,
rhs: &RawDocument<'a, 'tag>,
postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
ctx: &Context<'p, 'tag, 'txn, 'q, 'a>,
lhs: &RawDocument<'r, 'tag>,
rhs: &RawDocument<'r, 'tag>,
) -> Ordering
{
#[inline]