mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Introduce the final Fetcher criterion
This commit is contained in:
parent
7ac09d7b7c
commit
daf126a638
107
milli/src/search/criteria/fetcher.rs
Normal file
107
milli/src/search/criteria/fetcher.rs
Normal file
@ -0,0 +1,107 @@
|
||||
use std::collections::HashMap;
|
||||
use std::mem::take;
|
||||
|
||||
use log::debug;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::search::query_tree::Operation;
|
||||
use super::{resolve_query_tree, Candidates, Criterion, CriterionResult, Context};
|
||||
|
||||
pub struct Fetcher<'t> {
|
||||
ctx: &'t dyn Context,
|
||||
query_tree: Option<Operation>,
|
||||
candidates: Candidates,
|
||||
parent: Option<Box<dyn Criterion + 't>>,
|
||||
should_get_documents_ids: bool,
|
||||
}
|
||||
|
||||
impl<'t> Fetcher<'t> {
|
||||
pub fn initial(
|
||||
ctx: &'t dyn Context,
|
||||
query_tree: Option<Operation>,
|
||||
candidates: Option<RoaringBitmap>,
|
||||
) -> Self
|
||||
{
|
||||
Fetcher {
|
||||
ctx,
|
||||
query_tree,
|
||||
candidates: candidates.map_or_else(Candidates::default, Candidates::Allowed),
|
||||
parent: None,
|
||||
should_get_documents_ids: true,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(
|
||||
ctx: &'t dyn Context,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
) -> Self
|
||||
{
|
||||
Fetcher {
|
||||
ctx,
|
||||
query_tree: None,
|
||||
candidates: Candidates::default(),
|
||||
parent: Some(parent),
|
||||
should_get_documents_ids: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Criterion for Fetcher<'t> {
|
||||
fn next(&mut self) -> anyhow::Result<Option<CriterionResult>> {
|
||||
use Candidates::{Allowed, Forbidden};
|
||||
loop {
|
||||
debug!("Fetcher iteration (should_get_documents_ids: {}) ({:?})",
|
||||
self.should_get_documents_ids, self.candidates,
|
||||
);
|
||||
|
||||
match &mut self.candidates {
|
||||
Allowed(candidates) => if candidates.is_empty() {
|
||||
self.candidates = Candidates::default();
|
||||
} else {
|
||||
self.should_get_documents_ids = false;
|
||||
let candidates = take(&mut self.candidates).into_inner();
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.clone(),
|
||||
candidates: candidates.clone(),
|
||||
bucket_candidates: Some(candidates),
|
||||
}));
|
||||
},
|
||||
Forbidden(_) => {
|
||||
let should_get_documents_ids = take(&mut self.should_get_documents_ids);
|
||||
match self.parent.as_mut() {
|
||||
Some(parent) => {
|
||||
match parent.next()? {
|
||||
Some(result) => return Ok(Some(result)),
|
||||
None => if should_get_documents_ids {
|
||||
let candidates = match &self.query_tree {
|
||||
Some(qt) => resolve_query_tree(self.ctx, &qt, &mut HashMap::new())?,
|
||||
None => self.ctx.documents_ids()?,
|
||||
};
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.clone(),
|
||||
candidates: candidates.clone(),
|
||||
bucket_candidates: Some(candidates),
|
||||
}));
|
||||
},
|
||||
}
|
||||
},
|
||||
None => if should_get_documents_ids {
|
||||
let candidates = match &self.query_tree {
|
||||
Some(qt) => resolve_query_tree(self.ctx, &qt, &mut HashMap::new())?,
|
||||
None => self.ctx.documents_ids()?,
|
||||
};
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.clone(),
|
||||
candidates: candidates.clone(),
|
||||
bucket_candidates: Some(candidates),
|
||||
}));
|
||||
},
|
||||
}
|
||||
return Ok(None);
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,16 +1,19 @@
|
||||
use std::collections::HashMap;
|
||||
use std::borrow::Cow;
|
||||
|
||||
use anyhow::bail;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::Index;
|
||||
use crate::search::word_derivations;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::query_tree::{Operation, Query, QueryKind};
|
||||
|
||||
pub mod typo;
|
||||
pub mod words;
|
||||
pub mod asc_desc;
|
||||
pub mod proximity;
|
||||
pub mod fetcher;
|
||||
|
||||
pub trait Criterion {
|
||||
fn next(&mut self) -> anyhow::Result<Option<CriterionResult>>;
|
||||
@ -51,6 +54,7 @@ impl Default for Candidates {
|
||||
}
|
||||
}
|
||||
pub trait Context {
|
||||
fn documents_ids(&self) -> heed::Result<RoaringBitmap>;
|
||||
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
|
||||
fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
|
||||
fn word_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result<Option<RoaringBitmap>>;
|
||||
@ -66,6 +70,10 @@ pub struct HeedContext<'t> {
|
||||
}
|
||||
|
||||
impl<'a> Context for HeedContext<'a> {
|
||||
fn documents_ids(&self) -> heed::Result<RoaringBitmap> {
|
||||
self.index.documents_ids(self.rtxn)
|
||||
}
|
||||
|
||||
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
|
||||
self.index.word_docids.get(self.rtxn, &word)
|
||||
}
|
||||
@ -107,6 +115,80 @@ impl<'t> HeedContext<'t> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn resolve_query_tree<'t>(
|
||||
ctx: &'t dyn Context,
|
||||
query_tree: &Operation,
|
||||
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
||||
) -> anyhow::Result<RoaringBitmap>
|
||||
{
|
||||
fn resolve_operation<'t>(
|
||||
ctx: &'t dyn Context,
|
||||
query_tree: &Operation,
|
||||
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
||||
) -> anyhow::Result<RoaringBitmap>
|
||||
{
|
||||
use Operation::{And, Consecutive, Or, Query};
|
||||
|
||||
match query_tree {
|
||||
And(ops) => {
|
||||
let mut ops = ops.iter().map(|op| {
|
||||
resolve_operation(ctx, op, cache)
|
||||
}).collect::<anyhow::Result<Vec<_>>>()?;
|
||||
|
||||
ops.sort_unstable_by_key(|cds| cds.len());
|
||||
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
let mut first_loop = true;
|
||||
for docids in ops {
|
||||
if first_loop {
|
||||
candidates = docids;
|
||||
first_loop = false;
|
||||
} else {
|
||||
candidates.intersect_with(&docids);
|
||||
}
|
||||
}
|
||||
Ok(candidates)
|
||||
},
|
||||
Consecutive(ops) => {
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
let mut first_loop = true;
|
||||
for slice in ops.windows(2) {
|
||||
match (&slice[0], &slice[1]) {
|
||||
(Operation::Query(left), Operation::Query(right)) => {
|
||||
match query_pair_proximity_docids(ctx, left, right, 1)? {
|
||||
pair_docids if pair_docids.is_empty() => {
|
||||
return Ok(RoaringBitmap::new())
|
||||
},
|
||||
pair_docids if first_loop => {
|
||||
candidates = pair_docids;
|
||||
first_loop = false;
|
||||
},
|
||||
pair_docids => {
|
||||
candidates.intersect_with(&pair_docids);
|
||||
},
|
||||
}
|
||||
},
|
||||
_ => bail!("invalid consecutive query type"),
|
||||
}
|
||||
}
|
||||
Ok(candidates)
|
||||
},
|
||||
Or(_, ops) => {
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
for op in ops {
|
||||
let docids = resolve_operation(ctx, op, cache)?;
|
||||
candidates.union_with(&docids);
|
||||
}
|
||||
Ok(candidates)
|
||||
},
|
||||
Query(q) => Ok(query_docids(ctx, q)?),
|
||||
}
|
||||
}
|
||||
|
||||
resolve_operation(ctx, query_tree, cache)
|
||||
}
|
||||
|
||||
|
||||
fn all_word_pair_proximity_docids<T: AsRef<str>, U: AsRef<str>>(
|
||||
ctx: &dyn Context,
|
||||
left_words: &[(T, u8)],
|
||||
@ -218,6 +300,10 @@ pub mod test {
|
||||
}
|
||||
|
||||
impl<'a> Context for TestContext<'a> {
|
||||
fn documents_ids(&self) -> heed::Result<RoaringBitmap> {
|
||||
Ok(self.word_docids.iter().fold(RoaringBitmap::new(), |acc, (_, docids)| acc | docids))
|
||||
}
|
||||
|
||||
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
|
||||
Ok(self.word_docids.get(&word.to_string()).cloned())
|
||||
}
|
||||
|
@ -1,12 +1,11 @@
|
||||
use std::collections::HashMap;
|
||||
use std::mem::take;
|
||||
|
||||
use anyhow::bail;
|
||||
use log::debug;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::search::query_tree::Operation;
|
||||
use super::{Candidates, Criterion, CriterionResult, Context, query_docids, query_pair_proximity_docids};
|
||||
use super::{resolve_query_tree, Candidates, Criterion, CriterionResult, Context};
|
||||
|
||||
pub struct Words<'t> {
|
||||
ctx: &'t dyn Context,
|
||||
@ -62,7 +61,7 @@ impl<'t> Criterion for Words<'t> {
|
||||
self.candidates = Candidates::default();
|
||||
},
|
||||
(Some(qt), Allowed(candidates)) => {
|
||||
let mut found_candidates = resolve_candidates(self.ctx, &qt, &mut self.candidates_cache)?;
|
||||
let mut found_candidates = resolve_query_tree(self.ctx, &qt, &mut self.candidates_cache)?;
|
||||
found_candidates.intersect_with(&candidates);
|
||||
candidates.difference_with(&found_candidates);
|
||||
|
||||
@ -78,7 +77,7 @@ impl<'t> Criterion for Words<'t> {
|
||||
}));
|
||||
},
|
||||
(Some(qt), Forbidden(candidates)) => {
|
||||
let mut found_candidates = resolve_candidates(self.ctx, &qt, &mut self.candidates_cache)?;
|
||||
let mut found_candidates = resolve_query_tree(self.ctx, &qt, &mut self.candidates_cache)?;
|
||||
found_candidates.difference_with(&candidates);
|
||||
candidates.union_with(&found_candidates);
|
||||
|
||||
@ -127,76 +126,3 @@ fn explode_query_tree(query_tree: Operation) -> Vec<Operation> {
|
||||
otherwise => vec![otherwise],
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_candidates<'t>(
|
||||
ctx: &'t dyn Context,
|
||||
query_tree: &Operation,
|
||||
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
||||
) -> anyhow::Result<RoaringBitmap>
|
||||
{
|
||||
fn resolve_operation<'t>(
|
||||
ctx: &'t dyn Context,
|
||||
query_tree: &Operation,
|
||||
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
||||
) -> anyhow::Result<RoaringBitmap>
|
||||
{
|
||||
use Operation::{And, Consecutive, Or, Query};
|
||||
|
||||
match query_tree {
|
||||
And(ops) => {
|
||||
let mut ops = ops.iter().map(|op| {
|
||||
resolve_operation(ctx, op, cache)
|
||||
}).collect::<anyhow::Result<Vec<_>>>()?;
|
||||
|
||||
ops.sort_unstable_by_key(|cds| cds.len());
|
||||
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
let mut first_loop = true;
|
||||
for docids in ops {
|
||||
if first_loop {
|
||||
candidates = docids;
|
||||
first_loop = false;
|
||||
} else {
|
||||
candidates.intersect_with(&docids);
|
||||
}
|
||||
}
|
||||
Ok(candidates)
|
||||
},
|
||||
Consecutive(ops) => {
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
let mut first_loop = true;
|
||||
for slice in ops.windows(2) {
|
||||
match (&slice[0], &slice[1]) {
|
||||
(Operation::Query(left), Operation::Query(right)) => {
|
||||
match query_pair_proximity_docids(ctx, left, right, 1)? {
|
||||
pair_docids if pair_docids.is_empty() => {
|
||||
return Ok(RoaringBitmap::new())
|
||||
},
|
||||
pair_docids if first_loop => {
|
||||
candidates = pair_docids;
|
||||
first_loop = false;
|
||||
},
|
||||
pair_docids => {
|
||||
candidates.intersect_with(&pair_docids);
|
||||
},
|
||||
}
|
||||
},
|
||||
_ => bail!("invalid consecutive query type"),
|
||||
}
|
||||
}
|
||||
Ok(candidates)
|
||||
},
|
||||
Or(_, ops) => {
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
for op in ops {
|
||||
let docids = resolve_operation(ctx, op, cache)?;
|
||||
candidates.union_with(&docids);
|
||||
}
|
||||
Ok(candidates)
|
||||
},
|
||||
Query(q) => Ok(query_docids(ctx, q)?),
|
||||
}
|
||||
}
|
||||
|
||||
resolve_operation(ctx, query_tree, cache)
|
||||
}
|
||||
|
@ -10,7 +10,7 @@ use once_cell::sync::Lazy;
|
||||
use roaring::bitmap::RoaringBitmap;
|
||||
|
||||
use crate::search::criteria::{Criterion, CriterionResult};
|
||||
use crate::search::criteria::{typo::Typo, words::Words, proximity::Proximity};
|
||||
use crate::search::criteria::{typo::Typo, words::Words, proximity::Proximity, fetcher::Fetcher};
|
||||
use crate::{Index, DocumentId};
|
||||
|
||||
pub use self::facet::FacetIter;
|
||||
@ -92,13 +92,12 @@ impl<'a> Search<'a> {
|
||||
None => MatchingWords::default(),
|
||||
};
|
||||
|
||||
// We are testing the typo criteria but there will be more of them soon.
|
||||
let criteria_ctx = criteria::HeedContext::new(self.rtxn, self.index)?;
|
||||
let typo_criterion = Typo::initial(&criteria_ctx, query_tree, facet_candidates)?;
|
||||
let words_criterion = Words::new(&criteria_ctx, Box::new(typo_criterion))?;
|
||||
let proximity_criterion = Proximity::new(&criteria_ctx, Box::new(words_criterion))?;
|
||||
// let proximity_criterion = Proximity::initial(&criteria_ctx, query_tree, facet_candidates)?;
|
||||
let mut criteria = proximity_criterion;
|
||||
let fetcher_criterion = Fetcher::new(&criteria_ctx, Box::new(proximity_criterion));
|
||||
let mut criteria = fetcher_criterion;
|
||||
|
||||
// // We sort in descending order on a specific field *by hand*, don't do that at home.
|
||||
// let attr_name = "released-timestamp";
|
||||
|
Loading…
Reference in New Issue
Block a user