mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-24 13:40:31 +01:00
Introduce the Initial Criterion
This commit is contained in:
parent
89ee2cf576
commit
658f316511
@ -57,9 +57,6 @@ criterion = "0.3.4"
|
||||
maplit = "1.0.2"
|
||||
rand = "0.8.3"
|
||||
|
||||
[build-dependencies]
|
||||
fst = "0.4.5"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
|
||||
|
@ -31,32 +31,10 @@ pub struct AscDesc<'t> {
|
||||
candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>,
|
||||
bucket_candidates: RoaringBitmap,
|
||||
faceted_candidates: RoaringBitmap,
|
||||
parent: Option<Box<dyn Criterion + 't>>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
}
|
||||
|
||||
impl<'t> AscDesc<'t> {
|
||||
pub fn initial_asc(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
query_tree: Option<Operation>,
|
||||
candidates: Option<RoaringBitmap>,
|
||||
field_name: String,
|
||||
) -> anyhow::Result<Self>
|
||||
{
|
||||
Self::initial(index, rtxn, query_tree, candidates, field_name, true)
|
||||
}
|
||||
|
||||
pub fn initial_desc(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
query_tree: Option<Operation>,
|
||||
candidates: Option<RoaringBitmap>,
|
||||
field_name: String,
|
||||
) -> anyhow::Result<Self>
|
||||
{
|
||||
Self::initial(index, rtxn, query_tree, candidates, field_name, false)
|
||||
}
|
||||
|
||||
pub fn asc(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
@ -77,47 +55,6 @@ impl<'t> AscDesc<'t> {
|
||||
Self::new(index, rtxn, parent, field_name, false)
|
||||
}
|
||||
|
||||
fn initial(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
query_tree: Option<Operation>,
|
||||
candidates: Option<RoaringBitmap>,
|
||||
field_name: String,
|
||||
ascending: bool,
|
||||
) -> anyhow::Result<Self>
|
||||
{
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let faceted_fields = index.faceted_fields(rtxn)?;
|
||||
let (field_id, facet_type) = field_id_facet_type(&fields_ids_map, &faceted_fields, &field_name)?;
|
||||
|
||||
let faceted_candidates = index.faceted_documents_ids(rtxn, field_id)?;
|
||||
let candidates = match &query_tree {
|
||||
Some(qt) => {
|
||||
let context = CriteriaBuilder::new(rtxn, index)?;
|
||||
let mut qt_candidates = resolve_query_tree(&context, qt, &mut HashMap::new(), &mut WordDerivationsCache::new())?;
|
||||
if let Some(candidates) = candidates {
|
||||
qt_candidates.intersect_with(&candidates);
|
||||
}
|
||||
qt_candidates
|
||||
},
|
||||
None => candidates.unwrap_or(faceted_candidates.clone()),
|
||||
};
|
||||
|
||||
Ok(AscDesc {
|
||||
index,
|
||||
rtxn,
|
||||
field_name,
|
||||
field_id,
|
||||
facet_type,
|
||||
ascending,
|
||||
query_tree,
|
||||
candidates: facet_ordered(index, rtxn, field_id, facet_type, ascending, candidates)?,
|
||||
faceted_candidates,
|
||||
bucket_candidates: RoaringBitmap::new(),
|
||||
parent: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn new(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
@ -141,7 +78,7 @@ impl<'t> AscDesc<'t> {
|
||||
candidates: Box::new(std::iter::empty()),
|
||||
faceted_candidates: index.faceted_documents_ids(rtxn, field_id)?,
|
||||
bucket_candidates: RoaringBitmap::new(),
|
||||
parent: Some(parent),
|
||||
parent,
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -156,12 +93,9 @@ impl<'t> Criterion for AscDesc<'t> {
|
||||
|
||||
match self.candidates.next().transpose()? {
|
||||
None => {
|
||||
let query_tree = self.query_tree.take();
|
||||
let bucket_candidates = take(&mut self.bucket_candidates);
|
||||
match self.parent.as_mut() {
|
||||
Some(parent) => {
|
||||
match parent.next(wdcache)? {
|
||||
match self.parent.next(wdcache)? {
|
||||
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
||||
let candidates_is_some = candidates.is_some();
|
||||
self.query_tree = query_tree;
|
||||
let candidates = match (&self.query_tree, candidates) {
|
||||
(_, Some(mut candidates)) => {
|
||||
@ -176,11 +110,22 @@ impl<'t> Criterion for AscDesc<'t> {
|
||||
},
|
||||
(None, None) => take(&mut self.faceted_candidates),
|
||||
};
|
||||
if bucket_candidates.is_empty() {
|
||||
self.bucket_candidates.union_with(&candidates);
|
||||
} else {
|
||||
|
||||
// If our parent returns candidates it means that the bucket
|
||||
// candidates were already computed before and we can use them.
|
||||
//
|
||||
// If not, we must use the just computed candidates as our bucket
|
||||
// candidates.
|
||||
if candidates_is_some {
|
||||
self.bucket_candidates.union_with(&bucket_candidates);
|
||||
} else {
|
||||
self.bucket_candidates.union_with(&candidates);
|
||||
}
|
||||
|
||||
if candidates.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
self.candidates = facet_ordered(
|
||||
self.index,
|
||||
self.rtxn,
|
||||
@ -193,27 +138,11 @@ impl<'t> Criterion for AscDesc<'t> {
|
||||
None => return Ok(None),
|
||||
}
|
||||
},
|
||||
None => if query_tree.is_none() && bucket_candidates.is_empty() {
|
||||
return Ok(None)
|
||||
},
|
||||
}
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree,
|
||||
candidates: Some(RoaringBitmap::new()),
|
||||
bucket_candidates,
|
||||
}));
|
||||
},
|
||||
Some(candidates) => {
|
||||
let bucket_candidates = match self.parent {
|
||||
Some(_) => take(&mut self.bucket_candidates),
|
||||
None => candidates.clone(),
|
||||
};
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.clone(),
|
||||
candidates: Some(candidates),
|
||||
bucket_candidates,
|
||||
bucket_candidates: take(&mut self.bucket_candidates),
|
||||
}));
|
||||
},
|
||||
}
|
||||
|
@ -14,36 +14,19 @@ pub struct Attribute<'t> {
|
||||
query_tree: Option<Operation>,
|
||||
candidates: Option<RoaringBitmap>,
|
||||
bucket_candidates: RoaringBitmap,
|
||||
parent: Option<Box<dyn Criterion + 't>>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
flattened_query_tree: Option<Vec<Vec<Query>>>,
|
||||
current_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>,
|
||||
}
|
||||
|
||||
impl<'t> Attribute<'t> {
|
||||
pub fn initial(
|
||||
ctx: &'t dyn Context,
|
||||
query_tree: Option<Operation>,
|
||||
candidates: Option<RoaringBitmap>,
|
||||
) -> Self
|
||||
{
|
||||
Attribute {
|
||||
ctx,
|
||||
query_tree,
|
||||
candidates,
|
||||
bucket_candidates: RoaringBitmap::new(),
|
||||
parent: None,
|
||||
flattened_query_tree: None,
|
||||
current_buckets: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(ctx: &'t dyn Context, parent: Box<dyn Criterion + 't>) -> Self {
|
||||
Attribute {
|
||||
ctx,
|
||||
query_tree: None,
|
||||
candidates: None,
|
||||
bucket_candidates: RoaringBitmap::new(),
|
||||
parent: Some(parent),
|
||||
parent,
|
||||
flattened_query_tree: None,
|
||||
current_buckets: None,
|
||||
}
|
||||
@ -63,34 +46,35 @@ impl<'t> Criterion for Attribute<'t> {
|
||||
}));
|
||||
},
|
||||
(Some(qt), Some(candidates)) => {
|
||||
let flattened_query_tree = self.flattened_query_tree.get_or_insert_with(|| flatten_query_tree(&qt));
|
||||
let current_buckets = if let Some(current_buckets) = self.current_buckets.as_mut() {
|
||||
current_buckets
|
||||
} else {
|
||||
let flattened_query_tree = self.flattened_query_tree.get_or_insert_with(|| {
|
||||
flatten_query_tree(&qt)
|
||||
});
|
||||
|
||||
let current_buckets = match self.current_buckets.as_mut() {
|
||||
Some(current_buckets) => current_buckets,
|
||||
None => {
|
||||
let new_buckets = linear_compute_candidates(self.ctx, flattened_query_tree, candidates)?;
|
||||
self.current_buckets.get_or_insert(new_buckets.into_iter())
|
||||
},
|
||||
};
|
||||
|
||||
let found_candidates = if let Some((_score, candidates)) = current_buckets.next() {
|
||||
candidates
|
||||
} else {
|
||||
let found_candidates = match current_buckets.next() {
|
||||
Some((_score, candidates)) => candidates,
|
||||
None => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.take(),
|
||||
candidates: self.candidates.take(),
|
||||
bucket_candidates: take(&mut self.bucket_candidates),
|
||||
}));
|
||||
},
|
||||
};
|
||||
candidates.difference_with(&found_candidates);
|
||||
|
||||
let bucket_candidates = match self.parent {
|
||||
Some(_) => take(&mut self.bucket_candidates),
|
||||
None => found_candidates.clone(),
|
||||
};
|
||||
candidates.difference_with(&found_candidates);
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.clone(),
|
||||
candidates: Some(found_candidates),
|
||||
bucket_candidates: bucket_candidates,
|
||||
bucket_candidates: take(&mut self.bucket_candidates),
|
||||
}));
|
||||
},
|
||||
(Some(qt), None) => {
|
||||
@ -106,9 +90,14 @@ impl<'t> Criterion for Attribute<'t> {
|
||||
}));
|
||||
},
|
||||
(None, None) => {
|
||||
match self.parent.as_mut() {
|
||||
Some(parent) => {
|
||||
match parent.next(wdcache)? {
|
||||
match self.parent.next(wdcache)? {
|
||||
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates: None,
|
||||
bucket_candidates,
|
||||
}));
|
||||
},
|
||||
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
||||
self.query_tree = query_tree;
|
||||
self.candidates = candidates;
|
||||
@ -119,9 +108,6 @@ impl<'t> Criterion for Attribute<'t> {
|
||||
None => return Ok(None),
|
||||
}
|
||||
},
|
||||
None => return Ok(None),
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,135 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
use std::mem::take;
|
||||
|
||||
use log::debug;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::search::query_tree::Operation;
|
||||
use crate::search::WordDerivationsCache;
|
||||
use super::{resolve_query_tree, Candidates, Criterion, CriterionResult, Context};
|
||||
|
||||
/// The result of a call to the fetcher.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct FetcherResult {
|
||||
/// The query tree corresponding to the current bucket of the last criterion.
|
||||
pub query_tree: Option<Operation>,
|
||||
/// The candidates of the current bucket of the last criterion.
|
||||
pub candidates: RoaringBitmap,
|
||||
/// Candidates that comes from the current bucket of the initial criterion.
|
||||
pub bucket_candidates: RoaringBitmap,
|
||||
}
|
||||
|
||||
pub struct Fetcher<'t> {
|
||||
ctx: &'t dyn Context,
|
||||
query_tree: Option<Operation>,
|
||||
candidates: Candidates,
|
||||
parent: Option<Box<dyn Criterion + 't>>,
|
||||
should_get_documents_ids: bool,
|
||||
wdcache: WordDerivationsCache,
|
||||
}
|
||||
|
||||
impl<'t> Fetcher<'t> {
|
||||
pub fn initial(
|
||||
ctx: &'t dyn Context,
|
||||
query_tree: Option<Operation>,
|
||||
candidates: Option<RoaringBitmap>,
|
||||
) -> Self
|
||||
{
|
||||
Fetcher {
|
||||
ctx,
|
||||
query_tree,
|
||||
candidates: candidates.map_or_else(Candidates::default, Candidates::Allowed),
|
||||
parent: None,
|
||||
should_get_documents_ids: true,
|
||||
wdcache: WordDerivationsCache::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(
|
||||
ctx: &'t dyn Context,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
) -> Self
|
||||
{
|
||||
Fetcher {
|
||||
ctx,
|
||||
query_tree: None,
|
||||
candidates: Candidates::default(),
|
||||
parent: Some(parent),
|
||||
should_get_documents_ids: true,
|
||||
wdcache: WordDerivationsCache::new(),
|
||||
}
|
||||
}
|
||||
|
||||
#[logging_timer::time("Fetcher::{}")]
|
||||
pub fn next(&mut self) -> anyhow::Result<Option<FetcherResult>> {
|
||||
use Candidates::{Allowed, Forbidden};
|
||||
loop {
|
||||
debug!("Fetcher iteration (should_get_documents_ids: {}) ({:?})",
|
||||
self.should_get_documents_ids, self.candidates,
|
||||
);
|
||||
|
||||
let should_get_documents_ids = take(&mut self.should_get_documents_ids);
|
||||
match &mut self.candidates {
|
||||
Allowed(_) => {
|
||||
let candidates = take(&mut self.candidates).into_inner();
|
||||
let candidates = match &self.query_tree {
|
||||
Some(qt) if should_get_documents_ids => {
|
||||
let mut docids = resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), &mut self.wdcache)?;
|
||||
docids.intersect_with(&candidates);
|
||||
docids
|
||||
},
|
||||
_ => candidates,
|
||||
};
|
||||
|
||||
return Ok(Some(FetcherResult {
|
||||
query_tree: self.query_tree.take(),
|
||||
candidates: candidates.clone(),
|
||||
bucket_candidates: candidates,
|
||||
}));
|
||||
},
|
||||
Forbidden(_) => {
|
||||
match self.parent.as_mut() {
|
||||
Some(parent) => {
|
||||
match parent.next(&mut self.wdcache)? {
|
||||
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
||||
let candidates = match (&query_tree, candidates) {
|
||||
(_, Some(candidates)) => candidates,
|
||||
(Some(qt), None) => resolve_query_tree(self.ctx, qt, &mut HashMap::new(), &mut self.wdcache)?,
|
||||
(None, None) => RoaringBitmap::new(),
|
||||
};
|
||||
|
||||
return Ok(Some(FetcherResult { query_tree, candidates, bucket_candidates }))
|
||||
},
|
||||
None => if should_get_documents_ids {
|
||||
let candidates = match &self.query_tree {
|
||||
Some(qt) => resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), &mut self.wdcache)?,
|
||||
None => self.ctx.documents_ids()?,
|
||||
};
|
||||
|
||||
return Ok(Some(FetcherResult {
|
||||
query_tree: self.query_tree.clone(),
|
||||
candidates: candidates.clone(),
|
||||
bucket_candidates: candidates,
|
||||
}));
|
||||
},
|
||||
}
|
||||
},
|
||||
None => if should_get_documents_ids {
|
||||
let candidates = match &self.query_tree {
|
||||
Some(qt) => resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), &mut self.wdcache)?,
|
||||
None => self.ctx.documents_ids()?,
|
||||
};
|
||||
|
||||
return Ok(Some(FetcherResult {
|
||||
query_tree: self.query_tree.clone(),
|
||||
candidates: candidates.clone(),
|
||||
bucket_candidates: candidates,
|
||||
}));
|
||||
},
|
||||
}
|
||||
return Ok(None);
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
57
milli/src/search/criteria/final.rs
Normal file
57
milli/src/search/criteria/final.rs
Normal file
@ -0,0 +1,57 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use log::debug;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::search::query_tree::Operation;
|
||||
use crate::search::WordDerivationsCache;
|
||||
use super::{resolve_query_tree, Criterion, CriterionResult, Context};
|
||||
|
||||
/// The result of a call to the fetcher.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct FinalResult {
|
||||
/// The query tree corresponding to the current bucket of the last criterion.
|
||||
pub query_tree: Option<Operation>,
|
||||
/// The candidates of the current bucket of the last criterion.
|
||||
pub candidates: RoaringBitmap,
|
||||
/// Candidates that comes from the current bucket of the initial criterion.
|
||||
pub bucket_candidates: RoaringBitmap,
|
||||
}
|
||||
|
||||
pub struct Final<'t> {
|
||||
ctx: &'t dyn Context,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
wdcache: WordDerivationsCache,
|
||||
}
|
||||
|
||||
impl<'t> Final<'t> {
|
||||
pub fn new(ctx: &'t dyn Context, parent: Box<dyn Criterion + 't>) -> Final<'t> {
|
||||
Final { ctx, parent, wdcache: WordDerivationsCache::new() }
|
||||
}
|
||||
|
||||
#[logging_timer::time("Final::{}")]
|
||||
pub fn next(&mut self) -> anyhow::Result<Option<FinalResult>> {
|
||||
loop {
|
||||
debug!("Final iteration");
|
||||
|
||||
match self.parent.next(&mut self.wdcache)? {
|
||||
Some(CriterionResult { query_tree, candidates, mut bucket_candidates }) => {
|
||||
let candidates = match (&query_tree, candidates) {
|
||||
(_, Some(candidates)) => candidates,
|
||||
(Some(qt), None) => resolve_query_tree(self.ctx, qt, &mut HashMap::new(), &mut self.wdcache)?,
|
||||
(None, None) => self.ctx.documents_ids()?,
|
||||
};
|
||||
|
||||
bucket_candidates.union_with(&candidates);
|
||||
|
||||
return Ok(Some(FinalResult {
|
||||
query_tree,
|
||||
candidates,
|
||||
bucket_candidates,
|
||||
}));
|
||||
},
|
||||
None => return Ok(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
28
milli/src/search/criteria/initial.rs
Normal file
28
milli/src/search/criteria/initial.rs
Normal file
@ -0,0 +1,28 @@
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::search::query_tree::Operation;
|
||||
use crate::search::WordDerivationsCache;
|
||||
|
||||
use super::{Criterion, CriterionResult};
|
||||
|
||||
pub struct Initial {
|
||||
answer: Option<CriterionResult>
|
||||
}
|
||||
|
||||
impl Initial {
|
||||
pub fn new(query_tree: Option<Operation>, mut candidates: Option<RoaringBitmap>) -> Initial {
|
||||
let answer = CriterionResult {
|
||||
query_tree,
|
||||
candidates: candidates.clone(),
|
||||
bucket_candidates: candidates.take().unwrap_or_default(),
|
||||
};
|
||||
Initial { answer: Some(answer) }
|
||||
}
|
||||
}
|
||||
|
||||
impl Criterion for Initial {
|
||||
#[logging_timer::time("Initial::{}")]
|
||||
fn next(&mut self, _: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>> {
|
||||
Ok(self.answer.take())
|
||||
}
|
||||
}
|
@ -8,19 +8,21 @@ use crate::search::{word_derivations, WordDerivationsCache};
|
||||
use crate::{Index, DocumentId};
|
||||
|
||||
use super::query_tree::{Operation, Query, QueryKind};
|
||||
use self::asc_desc::AscDesc;
|
||||
use self::attribute::Attribute;
|
||||
use self::r#final::Final;
|
||||
use self::initial::Initial;
|
||||
use self::proximity::Proximity;
|
||||
use self::typo::Typo;
|
||||
use self::words::Words;
|
||||
use self::asc_desc::AscDesc;
|
||||
use self::proximity::Proximity;
|
||||
use self::attribute::Attribute;
|
||||
use self::fetcher::Fetcher;
|
||||
|
||||
mod asc_desc;
|
||||
mod attribute;
|
||||
mod initial;
|
||||
mod proximity;
|
||||
mod typo;
|
||||
mod words;
|
||||
mod asc_desc;
|
||||
mod proximity;
|
||||
mod attribute;
|
||||
pub mod fetcher;
|
||||
pub mod r#final;
|
||||
|
||||
pub trait Criterion {
|
||||
fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>>;
|
||||
@ -61,6 +63,7 @@ impl Default for Candidates {
|
||||
Self::Forbidden(RoaringBitmap::new())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Context {
|
||||
fn documents_ids(&self) -> heed::Result<RoaringBitmap>;
|
||||
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
|
||||
@ -128,44 +131,26 @@ impl<'t> CriteriaBuilder<'t> {
|
||||
|
||||
pub fn build(
|
||||
&'t self,
|
||||
mut query_tree: Option<Operation>,
|
||||
mut facet_candidates: Option<RoaringBitmap>,
|
||||
) -> anyhow::Result<Fetcher<'t>>
|
||||
query_tree: Option<Operation>,
|
||||
facet_candidates: Option<RoaringBitmap>,
|
||||
) -> anyhow::Result<Final<'t>>
|
||||
{
|
||||
use crate::criterion::Criterion as Name;
|
||||
|
||||
let mut criterion = None as Option<Box<dyn Criterion>>;
|
||||
let mut criterion = Box::new(Initial::new(query_tree, facet_candidates)) as Box<dyn Criterion>;
|
||||
for name in self.index.criteria(&self.rtxn)? {
|
||||
criterion = Some(match criterion.take() {
|
||||
Some(father) => match name {
|
||||
Name::Typo => Box::new(Typo::new(self, father)),
|
||||
Name::Words => Box::new(Words::new(self, father)),
|
||||
Name::Proximity => Box::new(Proximity::new(self, father)),
|
||||
Name::Attribute => Box::new(Attribute::new(self, father)),
|
||||
Name::Asc(field) => Box::new(AscDesc::asc(&self.index, &self.rtxn, father, field)?),
|
||||
Name::Desc(field) => Box::new(AscDesc::desc(&self.index, &self.rtxn, father, field)?),
|
||||
_otherwise => father,
|
||||
},
|
||||
None => match name {
|
||||
Name::Typo => Box::new(Typo::initial(self, query_tree.take(), facet_candidates.take())),
|
||||
Name::Words => Box::new(Words::initial(self, query_tree.take(), facet_candidates.take())),
|
||||
Name::Proximity => Box::new(Proximity::initial(self, query_tree.take(), facet_candidates.take())),
|
||||
Name::Attribute => Box::new(Attribute::initial(self, query_tree.take(), facet_candidates.take())),
|
||||
Name::Asc(field) => {
|
||||
Box::new(AscDesc::initial_asc(&self.index, &self.rtxn, query_tree.take(), facet_candidates.take(), field)?)
|
||||
},
|
||||
Name::Desc(field) => {
|
||||
Box::new(AscDesc::initial_desc(&self.index, &self.rtxn, query_tree.take(), facet_candidates.take(), field)?)
|
||||
},
|
||||
_otherwise => continue,
|
||||
},
|
||||
});
|
||||
criterion = match name {
|
||||
Name::Typo => Box::new(Typo::new(self, criterion)),
|
||||
Name::Words => Box::new(Words::new(self, criterion)),
|
||||
Name::Proximity => Box::new(Proximity::new(self, criterion)),
|
||||
Name::Attribute => Box::new(Attribute::new(self, criterion)),
|
||||
Name::Asc(field) => Box::new(AscDesc::asc(&self.index, &self.rtxn, criterion, field)?),
|
||||
Name::Desc(field) => Box::new(AscDesc::desc(&self.index, &self.rtxn, criterion, field)?),
|
||||
_otherwise => criterion,
|
||||
};
|
||||
}
|
||||
|
||||
match criterion {
|
||||
Some(criterion) => Ok(Fetcher::new(self, criterion)),
|
||||
None => Ok(Fetcher::initial(self, query_tree, facet_candidates)),
|
||||
}
|
||||
Ok(Final::new(self, criterion))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8,48 +8,29 @@ use log::debug;
|
||||
use crate::{DocumentId, Position, search::{query_tree::QueryKind}};
|
||||
use crate::search::query_tree::{maximum_proximity, Operation, Query};
|
||||
use crate::search::{build_dfa, WordDerivationsCache};
|
||||
use super::{Candidates, Criterion, CriterionResult, Context, query_docids, query_pair_proximity_docids, resolve_query_tree};
|
||||
use super::{Criterion, CriterionResult, Context, query_docids, query_pair_proximity_docids, resolve_query_tree};
|
||||
|
||||
type Cache = HashMap<(Operation, u8), Vec<(Query, Query, RoaringBitmap)>>;
|
||||
|
||||
pub struct Proximity<'t> {
|
||||
ctx: &'t dyn Context,
|
||||
query_tree: Option<(usize, Operation)>,
|
||||
/// ((max_proximity, query_tree), allowed_candidates)
|
||||
state: Option<(Option<(usize, Operation)>, RoaringBitmap)>,
|
||||
proximity: u8,
|
||||
candidates: Candidates,
|
||||
bucket_candidates: RoaringBitmap,
|
||||
parent: Option<Box<dyn Criterion + 't>>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
candidates_cache: Cache,
|
||||
plane_sweep_cache: Option<btree_map::IntoIter<u8, RoaringBitmap>>,
|
||||
}
|
||||
|
||||
impl<'t> Proximity<'t> {
|
||||
pub fn initial(
|
||||
ctx: &'t dyn Context,
|
||||
query_tree: Option<Operation>,
|
||||
candidates: Option<RoaringBitmap>,
|
||||
) -> Self
|
||||
{
|
||||
Proximity {
|
||||
ctx,
|
||||
query_tree: query_tree.map(|op| (maximum_proximity(&op), op)),
|
||||
proximity: 0,
|
||||
candidates: candidates.map_or_else(Candidates::default, Candidates::Allowed),
|
||||
bucket_candidates: RoaringBitmap::new(),
|
||||
parent: None,
|
||||
candidates_cache: Cache::new(),
|
||||
plane_sweep_cache: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(ctx: &'t dyn Context, parent: Box<dyn Criterion + 't>) -> Self {
|
||||
Proximity {
|
||||
ctx,
|
||||
query_tree: None,
|
||||
state: None,
|
||||
proximity: 0,
|
||||
candidates: Candidates::default(),
|
||||
bucket_candidates: RoaringBitmap::new(),
|
||||
parent: Some(parent),
|
||||
parent: parent,
|
||||
candidates_cache: Cache::new(),
|
||||
plane_sweep_cache: None,
|
||||
}
|
||||
@ -59,27 +40,20 @@ impl<'t> Proximity<'t> {
|
||||
impl<'t> Criterion for Proximity<'t> {
|
||||
#[logging_timer::time("Proximity::{}")]
|
||||
fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<CriterionResult>> {
|
||||
use Candidates::{Allowed, Forbidden};
|
||||
loop {
|
||||
debug!("Proximity at iteration {} (max {:?}) ({:?})",
|
||||
debug!("Proximity at iteration {} (max prox {:?}) ({:?})",
|
||||
self.proximity,
|
||||
self.query_tree.as_ref().map(|(mp, _)| mp),
|
||||
self.candidates,
|
||||
self.state.as_ref().map(|(qt, _)| qt.as_ref().map(|(mp, _)| mp)),
|
||||
self.state.as_ref().map(|(_, cd)| cd),
|
||||
);
|
||||
|
||||
match (&mut self.query_tree, &mut self.candidates) {
|
||||
(_, Allowed(candidates)) if candidates.is_empty() => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.take().map(|(_, qt)| qt),
|
||||
candidates: Some(take(&mut self.candidates).into_inner()),
|
||||
bucket_candidates: take(&mut self.bucket_candidates),
|
||||
}));
|
||||
match &mut self.state {
|
||||
Some((_, candidates)) if candidates.is_empty() => {
|
||||
self.state = None; // reset state
|
||||
},
|
||||
(Some((max_prox, query_tree)), Allowed(candidates)) => {
|
||||
Some((Some((max_prox, query_tree)), candidates)) => {
|
||||
if self.proximity as usize > *max_prox {
|
||||
// reset state to (None, Forbidden(_))
|
||||
self.query_tree = None;
|
||||
self.candidates = Candidates::default();
|
||||
self.state = None; // reset state
|
||||
} else {
|
||||
let mut new_candidates = if candidates.len() <= 1000 {
|
||||
if let Some(cache) = self.plane_sweep_cache.as_mut() {
|
||||
@ -89,9 +63,7 @@ impl<'t> Criterion for Proximity<'t> {
|
||||
candidates
|
||||
},
|
||||
None => {
|
||||
// reset state to (None, Forbidden(_))
|
||||
self.query_tree = None;
|
||||
self.candidates = Candidates::default();
|
||||
self.state = None; // reset state
|
||||
continue
|
||||
},
|
||||
}
|
||||
@ -120,83 +92,58 @@ impl<'t> Criterion for Proximity<'t> {
|
||||
candidates.difference_with(&new_candidates);
|
||||
self.proximity += 1;
|
||||
|
||||
let bucket_candidates = match self.parent {
|
||||
Some(_) => take(&mut self.bucket_candidates),
|
||||
None => new_candidates.clone(),
|
||||
};
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(query_tree.clone()),
|
||||
candidates: Some(new_candidates),
|
||||
bucket_candidates,
|
||||
bucket_candidates: take(&mut self.bucket_candidates),
|
||||
}));
|
||||
}
|
||||
},
|
||||
(Some((max_prox, query_tree)), Forbidden(candidates)) => {
|
||||
if self.proximity as usize > *max_prox {
|
||||
self.query_tree = None;
|
||||
self.candidates = Candidates::default();
|
||||
} else {
|
||||
let mut new_candidates = resolve_candidates(
|
||||
self.ctx,
|
||||
&query_tree,
|
||||
self.proximity,
|
||||
&mut self.candidates_cache,
|
||||
wdcache,
|
||||
)?;
|
||||
|
||||
new_candidates.difference_with(&candidates);
|
||||
candidates.union_with(&new_candidates);
|
||||
self.proximity += 1;
|
||||
|
||||
let bucket_candidates = match self.parent {
|
||||
Some(_) => take(&mut self.bucket_candidates),
|
||||
None => new_candidates.clone(),
|
||||
};
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(query_tree.clone()),
|
||||
candidates: Some(new_candidates),
|
||||
bucket_candidates,
|
||||
}));
|
||||
}
|
||||
},
|
||||
(None, Allowed(_)) => {
|
||||
let candidates = take(&mut self.candidates).into_inner();
|
||||
Some((None, candidates)) => {
|
||||
let candidates = take(candidates);
|
||||
self.state = None; // reset state
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates: Some(candidates.clone()),
|
||||
bucket_candidates: candidates,
|
||||
}));
|
||||
},
|
||||
(None, Forbidden(_)) => {
|
||||
match self.parent.as_mut() {
|
||||
Some(parent) => {
|
||||
match parent.next(wdcache)? {
|
||||
None => {
|
||||
match self.parent.next(wdcache)? {
|
||||
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates: None,
|
||||
bucket_candidates,
|
||||
}));
|
||||
},
|
||||
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
||||
let candidates_is_some = candidates.is_some();
|
||||
let candidates = match (&query_tree, candidates) {
|
||||
(_, Some(candidates)) => candidates,
|
||||
(Some(qt), None) => resolve_query_tree(self.ctx, qt, &mut HashMap::new(), wdcache)?,
|
||||
(None, None) => RoaringBitmap::new(),
|
||||
};
|
||||
|
||||
if bucket_candidates.is_empty() {
|
||||
self.bucket_candidates.union_with(&candidates);
|
||||
} else {
|
||||
// If our parent returns candidates it means that the bucket
|
||||
// candidates were already computed before and we can use them.
|
||||
//
|
||||
// If not, we must use the just computed candidates as our bucket
|
||||
// candidates.
|
||||
if candidates_is_some {
|
||||
self.bucket_candidates.union_with(&bucket_candidates);
|
||||
} else {
|
||||
self.bucket_candidates.union_with(&candidates);
|
||||
}
|
||||
|
||||
self.query_tree = query_tree.map(|op| (maximum_proximity(&op), op));
|
||||
let query_tree = query_tree.map(|op| (maximum_proximity(&op), op));
|
||||
self.state = Some((query_tree, candidates));
|
||||
self.proximity = 0;
|
||||
self.candidates = Candidates::Allowed(candidates);
|
||||
self.plane_sweep_cache = None;
|
||||
},
|
||||
None => return Ok(None),
|
||||
}
|
||||
},
|
||||
None => return Ok(None),
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -14,28 +14,11 @@ pub struct Typo<'t> {
|
||||
number_typos: u8,
|
||||
candidates: Candidates,
|
||||
bucket_candidates: RoaringBitmap,
|
||||
parent: Option<Box<dyn Criterion + 't>>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
candidates_cache: HashMap<(Operation, u8), RoaringBitmap>,
|
||||
}
|
||||
|
||||
impl<'t> Typo<'t> {
|
||||
pub fn initial(
|
||||
ctx: &'t dyn Context,
|
||||
query_tree: Option<Operation>,
|
||||
candidates: Option<RoaringBitmap>,
|
||||
) -> Self
|
||||
{
|
||||
Typo {
|
||||
ctx,
|
||||
query_tree: query_tree.map(|op| (maximum_typo(&op), op)),
|
||||
number_typos: 0,
|
||||
candidates: candidates.map_or_else(Candidates::default, Candidates::Allowed),
|
||||
bucket_candidates: RoaringBitmap::new(),
|
||||
parent: None,
|
||||
candidates_cache: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(ctx: &'t dyn Context, parent: Box<dyn Criterion + 't>) -> Self {
|
||||
Typo {
|
||||
ctx,
|
||||
@ -43,7 +26,7 @@ impl<'t> Typo<'t> {
|
||||
number_typos: 0,
|
||||
candidates: Candidates::default(),
|
||||
bucket_candidates: RoaringBitmap::new(),
|
||||
parent: Some(parent),
|
||||
parent,
|
||||
candidates_cache: HashMap::new(),
|
||||
}
|
||||
}
|
||||
@ -90,15 +73,10 @@ impl<'t> Criterion for Typo<'t> {
|
||||
candidates.difference_with(&new_candidates);
|
||||
self.number_typos += 1;
|
||||
|
||||
let bucket_candidates = match self.parent {
|
||||
Some(_) => take(&mut self.bucket_candidates),
|
||||
None => new_candidates.clone(),
|
||||
};
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(new_query_tree),
|
||||
candidates: Some(new_candidates),
|
||||
bucket_candidates,
|
||||
bucket_candidates: take(&mut self.bucket_candidates),
|
||||
}));
|
||||
}
|
||||
},
|
||||
@ -145,9 +123,14 @@ impl<'t> Criterion for Typo<'t> {
|
||||
}));
|
||||
},
|
||||
(None, Forbidden(_)) => {
|
||||
match self.parent.as_mut() {
|
||||
Some(parent) => {
|
||||
match parent.next(wdcache)? {
|
||||
match self.parent.next(wdcache)? {
|
||||
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates: None,
|
||||
bucket_candidates,
|
||||
}));
|
||||
},
|
||||
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
||||
self.query_tree = query_tree.map(|op| (maximum_typo(&op), op));
|
||||
self.number_typos = 0;
|
||||
@ -157,9 +140,6 @@ impl<'t> Criterion for Typo<'t> {
|
||||
None => return Ok(None),
|
||||
}
|
||||
},
|
||||
None => return Ok(None),
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -334,8 +314,8 @@ fn resolve_candidates<'t>(
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
use super::*;
|
||||
use super::super::initial::Initial;
|
||||
use super::super::test::TestContext;
|
||||
|
||||
#[test]
|
||||
@ -345,7 +325,8 @@ mod test {
|
||||
let facet_candidates = None;
|
||||
|
||||
let mut wdcache = WordDerivationsCache::new();
|
||||
let mut criteria = Typo::initial(&context, query_tree, facet_candidates);
|
||||
let parent = Initial::new(query_tree, facet_candidates);
|
||||
let mut criteria = Typo::new(&context, Box::new(parent));
|
||||
|
||||
assert!(criteria.next(&mut wdcache).unwrap().is_none());
|
||||
}
|
||||
@ -364,7 +345,8 @@ mod test {
|
||||
let facet_candidates = None;
|
||||
|
||||
let mut wdcache = WordDerivationsCache::new();
|
||||
let mut criteria = Typo::initial(&context, Some(query_tree), facet_candidates);
|
||||
let parent = Initial::new(Some(query_tree), facet_candidates);
|
||||
let mut criteria = Typo::new(&context, Box::new(parent));
|
||||
|
||||
let candidates_1 = context.word_docids("split").unwrap().unwrap()
|
||||
& context.word_docids("this").unwrap().unwrap()
|
||||
@ -413,7 +395,8 @@ mod test {
|
||||
let facet_candidates = context.word_docids("earth").unwrap().unwrap();
|
||||
|
||||
let mut wdcache = WordDerivationsCache::new();
|
||||
let mut criteria = Typo::initial(&context, query_tree, Some(facet_candidates.clone()));
|
||||
let parent = Initial::new(query_tree, Some(facet_candidates.clone()));
|
||||
let mut criteria = Typo::new(&context, Box::new(parent));
|
||||
|
||||
let expected = CriterionResult {
|
||||
query_tree: None,
|
||||
@ -442,7 +425,8 @@ mod test {
|
||||
let facet_candidates = context.word_docids("earth").unwrap().unwrap();
|
||||
|
||||
let mut wdcache = WordDerivationsCache::new();
|
||||
let mut criteria = Typo::initial(&context, Some(query_tree), Some(facet_candidates.clone()));
|
||||
let parent = Initial::new(Some(query_tree), Some(facet_candidates.clone()));
|
||||
let mut criteria = Typo::new(&context, Box::new(parent));
|
||||
|
||||
let candidates_1 = context.word_docids("split").unwrap().unwrap()
|
||||
& context.word_docids("this").unwrap().unwrap()
|
||||
|
@ -12,34 +12,18 @@ pub struct Words<'t> {
|
||||
query_trees: Vec<Operation>,
|
||||
candidates: Option<RoaringBitmap>,
|
||||
bucket_candidates: RoaringBitmap,
|
||||
parent: Option<Box<dyn Criterion + 't>>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
candidates_cache: HashMap<(Operation, u8), RoaringBitmap>,
|
||||
}
|
||||
|
||||
impl<'t> Words<'t> {
|
||||
pub fn initial(
|
||||
ctx: &'t dyn Context,
|
||||
query_tree: Option<Operation>,
|
||||
candidates: Option<RoaringBitmap>,
|
||||
) -> Self
|
||||
{
|
||||
Words {
|
||||
ctx,
|
||||
query_trees: query_tree.map(explode_query_tree).unwrap_or_default(),
|
||||
candidates,
|
||||
bucket_candidates: RoaringBitmap::new(),
|
||||
parent: None,
|
||||
candidates_cache: HashMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(ctx: &'t dyn Context, parent: Box<dyn Criterion + 't>) -> Self {
|
||||
Words {
|
||||
ctx,
|
||||
query_trees: Vec::default(),
|
||||
candidates: None,
|
||||
bucket_candidates: RoaringBitmap::new(),
|
||||
parent: Some(parent),
|
||||
parent,
|
||||
candidates_cache: HashMap::default(),
|
||||
}
|
||||
}
|
||||
@ -65,27 +49,17 @@ impl<'t> Criterion for Words<'t> {
|
||||
found_candidates.intersect_with(&candidates);
|
||||
candidates.difference_with(&found_candidates);
|
||||
|
||||
let bucket_candidates = match self.parent {
|
||||
Some(_) => take(&mut self.bucket_candidates),
|
||||
None => found_candidates.clone(),
|
||||
};
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(qt),
|
||||
candidates: Some(found_candidates),
|
||||
bucket_candidates,
|
||||
bucket_candidates: take(&mut self.bucket_candidates),
|
||||
}));
|
||||
},
|
||||
(Some(qt), None) => {
|
||||
let bucket_candidates = match self.parent {
|
||||
Some(_) => take(&mut self.bucket_candidates),
|
||||
None => RoaringBitmap::new(),
|
||||
};
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(qt),
|
||||
candidates: None,
|
||||
bucket_candidates,
|
||||
bucket_candidates: take(&mut self.bucket_candidates),
|
||||
}));
|
||||
},
|
||||
(None, Some(_)) => {
|
||||
@ -97,9 +71,14 @@ impl<'t> Criterion for Words<'t> {
|
||||
}));
|
||||
},
|
||||
(None, None) => {
|
||||
match self.parent.as_mut() {
|
||||
Some(parent) => {
|
||||
match parent.next(wdcache)? {
|
||||
match self.parent.next(wdcache)? {
|
||||
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates: None,
|
||||
bucket_candidates,
|
||||
}));
|
||||
},
|
||||
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
||||
self.query_trees = query_tree.map(explode_query_tree).unwrap_or_default();
|
||||
self.candidates = candidates;
|
||||
@ -108,9 +87,6 @@ impl<'t> Criterion for Words<'t> {
|
||||
None => return Ok(None),
|
||||
}
|
||||
},
|
||||
None => return Ok(None),
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -13,9 +13,8 @@ use once_cell::sync::Lazy;
|
||||
use roaring::bitmap::RoaringBitmap;
|
||||
|
||||
use distinct::{Distinct, DocIter, FacetDistinct, MapDistinct, NoopDistinct};
|
||||
|
||||
use crate::search::criteria::fetcher::{Fetcher, FetcherResult};
|
||||
use crate::{DocumentId, Index};
|
||||
use crate::search::criteria::r#final::{Final, FinalResult};
|
||||
use crate::{Index, DocumentId};
|
||||
|
||||
pub use self::facet::{
|
||||
FacetCondition, FacetDistribution, FacetIter, FacetNumberOperator, FacetStringOperator,
|
||||
@ -162,14 +161,14 @@ impl<'a> Search<'a> {
|
||||
&self,
|
||||
mut distinct: impl for<'c> Distinct<'c>,
|
||||
matching_words: MatchingWords,
|
||||
mut criteria: Fetcher,
|
||||
mut criteria: Final,
|
||||
) -> anyhow::Result<SearchResult> {
|
||||
let mut offset = self.offset;
|
||||
let mut initial_candidates = RoaringBitmap::new();
|
||||
let mut excluded_documents = RoaringBitmap::new();
|
||||
let mut documents_ids = Vec::with_capacity(self.limit);
|
||||
|
||||
while let Some(FetcherResult { candidates, bucket_candidates, .. }) = criteria.next()? {
|
||||
while let Some(FinalResult { candidates, bucket_candidates, .. }) = criteria.next()? {
|
||||
debug!("Number of candidates found {}", candidates.len());
|
||||
|
||||
let excluded = take(&mut excluded_documents);
|
||||
|
Loading…
x
Reference in New Issue
Block a user