mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
format the whole project
This commit is contained in:
parent
ba30cef987
commit
9716fb3b36
68 changed files with 3327 additions and 2336 deletions
|
@ -5,12 +5,12 @@ use log::debug;
|
|||
use ordered_float::OrderedFloat;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::error::FieldIdMapMissingEntry;
|
||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
||||
use crate::search::facet::FacetIter;
|
||||
use crate::search::query_tree::Operation;
|
||||
use crate::{FieldId, Index, Result};
|
||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||
|
||||
/// Threshold on the number of candidates that will make
|
||||
/// the system to choose between one algorithm or another.
|
||||
|
@ -57,9 +57,8 @@ impl<'t> AscDesc<'t> {
|
|||
ascending: bool,
|
||||
) -> Result<Self> {
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let field_id = fields_ids_map
|
||||
.id(&field_name)
|
||||
.ok_or_else(|| FieldIdMapMissingEntry::FieldName {
|
||||
let field_id =
|
||||
fields_ids_map.id(&field_name).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
|
||||
field_name: field_name.clone(),
|
||||
process: "AscDesc::new",
|
||||
})?;
|
||||
|
@ -101,44 +100,47 @@ impl<'t> Criterion for AscDesc<'t> {
|
|||
filtered_candidates: None,
|
||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||
}));
|
||||
},
|
||||
None => {
|
||||
match self.parent.next(params)? {
|
||||
Some(CriterionResult { query_tree, candidates, filtered_candidates, bucket_candidates }) => {
|
||||
self.query_tree = query_tree;
|
||||
let mut candidates = match (&self.query_tree, candidates) {
|
||||
(_, Some(candidates)) => candidates,
|
||||
(Some(qt), None) => {
|
||||
let context = CriteriaBuilder::new(&self.rtxn, &self.index)?;
|
||||
resolve_query_tree(&context, qt, params.wdcache)?
|
||||
},
|
||||
(None, None) => self.index.documents_ids(self.rtxn)?,
|
||||
};
|
||||
|
||||
if let Some(filtered_candidates) = filtered_candidates {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
Some(CriterionResult {
|
||||
query_tree,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
}) => {
|
||||
self.query_tree = query_tree;
|
||||
let mut candidates = match (&self.query_tree, candidates) {
|
||||
(_, Some(candidates)) => candidates,
|
||||
(Some(qt), None) => {
|
||||
let context = CriteriaBuilder::new(&self.rtxn, &self.index)?;
|
||||
resolve_query_tree(&context, qt, params.wdcache)?
|
||||
}
|
||||
(None, None) => self.index.documents_ids(self.rtxn)?,
|
||||
};
|
||||
|
||||
match bucket_candidates {
|
||||
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
||||
None => self.bucket_candidates |= &candidates,
|
||||
}
|
||||
if let Some(filtered_candidates) = filtered_candidates {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
if candidates.is_empty() {
|
||||
continue;
|
||||
}
|
||||
match bucket_candidates {
|
||||
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
||||
None => self.bucket_candidates |= &candidates,
|
||||
}
|
||||
|
||||
self.allowed_candidates = &candidates - params.excluded_candidates;
|
||||
self.candidates = facet_ordered(
|
||||
self.index,
|
||||
self.rtxn,
|
||||
self.field_id,
|
||||
self.ascending,
|
||||
candidates & &self.faceted_candidates,
|
||||
)?;
|
||||
},
|
||||
None => return Ok(None),
|
||||
if candidates.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
self.allowed_candidates = &candidates - params.excluded_candidates;
|
||||
self.candidates = facet_ordered(
|
||||
self.index,
|
||||
self.rtxn,
|
||||
self.field_id,
|
||||
self.ascending,
|
||||
candidates & &self.faceted_candidates,
|
||||
)?;
|
||||
}
|
||||
None => return Ok(None),
|
||||
},
|
||||
Some(mut candidates) => {
|
||||
candidates -= params.excluded_candidates;
|
||||
|
@ -170,11 +172,8 @@ fn facet_ordered<'t>(
|
|||
let iter = iterative_facet_ordered_iter(index, rtxn, field_id, ascending, candidates)?;
|
||||
Ok(Box::new(iter.map(Ok)) as Box<dyn Iterator<Item = _>>)
|
||||
} else {
|
||||
let facet_fn = if ascending {
|
||||
FacetIter::new_reducing
|
||||
} else {
|
||||
FacetIter::new_reverse_reducing
|
||||
};
|
||||
let facet_fn =
|
||||
if ascending { FacetIter::new_reducing } else { FacetIter::new_reverse_reducing };
|
||||
let iter = facet_fn(rtxn, index, field_id, candidates)?;
|
||||
Ok(Box::new(iter.map(|res| res.map(|(_, docids)| docids))))
|
||||
}
|
||||
|
@ -194,9 +193,7 @@ fn iterative_facet_ordered_iter<'t>(
|
|||
for docid in candidates.iter() {
|
||||
let left = (field_id, docid, f64::MIN);
|
||||
let right = (field_id, docid, f64::MAX);
|
||||
let mut iter = index
|
||||
.field_id_docid_facet_f64s
|
||||
.range(rtxn, &(left..=right))?;
|
||||
let mut iter = index.field_id_docid_facet_f64s.range(rtxn, &(left..=right))?;
|
||||
let entry = if ascending { iter.next() } else { iter.last() };
|
||||
if let Some(((_, _, value), ())) = entry.transpose()? {
|
||||
docids_values.push((docid, OrderedFloat(value)));
|
||||
|
|
|
@ -1,15 +1,16 @@
|
|||
use std::{borrow::Cow, cmp::{self, Ordering}, collections::BinaryHeap};
|
||||
use std::collections::{BTreeMap, HashMap, btree_map};
|
||||
use std::borrow::Cow;
|
||||
use std::cmp::{self, Ordering};
|
||||
use std::collections::binary_heap::PeekMut;
|
||||
use std::collections::{btree_map, BTreeMap, BinaryHeap, HashMap};
|
||||
use std::mem::take;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::{TreeLevel, Result, search::build_dfa};
|
||||
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::search::criteria::Query;
|
||||
use crate::search::query_tree::{Operation, QueryKind};
|
||||
use crate::search::{word_derivations, WordDerivationsCache};
|
||||
use super::{Criterion, CriterionParameters, CriterionResult, Context, resolve_query_tree};
|
||||
use crate::search::{build_dfa, word_derivations, WordDerivationsCache};
|
||||
use crate::{Result, TreeLevel};
|
||||
|
||||
/// To be able to divide integers by the number of words in the query
|
||||
/// we want to find a multiplier that allow us to divide by any number between 1 and 10.
|
||||
|
@ -63,15 +64,19 @@ impl<'t> Criterion for Attribute<'t> {
|
|||
filtered_candidates: None,
|
||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||
}));
|
||||
},
|
||||
}
|
||||
Some((query_tree, flattened_query_tree, mut allowed_candidates)) => {
|
||||
let found_candidates = if allowed_candidates.len() < CANDIDATES_THRESHOLD {
|
||||
let current_buckets = match self.current_buckets.as_mut() {
|
||||
Some(current_buckets) => current_buckets,
|
||||
None => {
|
||||
let new_buckets = linear_compute_candidates(self.ctx, &flattened_query_tree, &allowed_candidates)?;
|
||||
let new_buckets = linear_compute_candidates(
|
||||
self.ctx,
|
||||
&flattened_query_tree,
|
||||
&allowed_candidates,
|
||||
)?;
|
||||
self.current_buckets.get_or_insert(new_buckets.into_iter())
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
match current_buckets.next() {
|
||||
|
@ -83,10 +88,15 @@ impl<'t> Criterion for Attribute<'t> {
|
|||
filtered_candidates: None,
|
||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||
}));
|
||||
},
|
||||
}
|
||||
}
|
||||
} else {
|
||||
match set_compute_candidates(self.ctx, &flattened_query_tree, &allowed_candidates, params.wdcache)? {
|
||||
match set_compute_candidates(
|
||||
self.ctx,
|
||||
&flattened_query_tree,
|
||||
&allowed_candidates,
|
||||
params.wdcache,
|
||||
)? {
|
||||
Some(candidates) => candidates,
|
||||
None => {
|
||||
return Ok(Some(CriterionResult {
|
||||
|
@ -95,13 +105,14 @@ impl<'t> Criterion for Attribute<'t> {
|
|||
filtered_candidates: None,
|
||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||
}));
|
||||
},
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
allowed_candidates -= &found_candidates;
|
||||
|
||||
self.state = Some((query_tree.clone(), flattened_query_tree, allowed_candidates));
|
||||
self.state =
|
||||
Some((query_tree.clone(), flattened_query_tree, allowed_candidates));
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
|
@ -109,39 +120,50 @@ impl<'t> Criterion for Attribute<'t> {
|
|||
filtered_candidates: None,
|
||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||
}));
|
||||
},
|
||||
None => {
|
||||
match self.parent.next(params)? {
|
||||
Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => {
|
||||
let mut candidates = match candidates {
|
||||
Some(candidates) => candidates,
|
||||
None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)? - params.excluded_candidates,
|
||||
};
|
||||
|
||||
if let Some(filtered_candidates) = filtered_candidates {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
}) => {
|
||||
let mut candidates = match candidates {
|
||||
Some(candidates) => candidates,
|
||||
None => {
|
||||
resolve_query_tree(self.ctx, &query_tree, params.wdcache)?
|
||||
- params.excluded_candidates
|
||||
}
|
||||
};
|
||||
|
||||
let flattened_query_tree = flatten_query_tree(&query_tree);
|
||||
if let Some(filtered_candidates) = filtered_candidates {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
match bucket_candidates {
|
||||
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
||||
None => self.bucket_candidates |= &candidates,
|
||||
}
|
||||
let flattened_query_tree = flatten_query_tree(&query_tree);
|
||||
|
||||
self.state = Some((query_tree, flattened_query_tree, candidates));
|
||||
self.current_buckets = None;
|
||||
},
|
||||
Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
}));
|
||||
},
|
||||
None => return Ok(None),
|
||||
match bucket_candidates {
|
||||
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
||||
None => self.bucket_candidates |= &candidates,
|
||||
}
|
||||
|
||||
self.state = Some((query_tree, flattened_query_tree, candidates));
|
||||
self.current_buckets = None;
|
||||
}
|
||||
Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
}) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
}));
|
||||
}
|
||||
None => return Ok(None),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
@ -152,7 +174,9 @@ impl<'t> Criterion for Attribute<'t> {
|
|||
/// it will begin at the first non-empty interval and will return every interval without
|
||||
/// jumping over empty intervals.
|
||||
struct WordLevelIterator<'t, 'q> {
|
||||
inner: Box<dyn Iterator<Item =heed::Result<((&'t str, TreeLevel, u32, u32), RoaringBitmap)>> + 't>,
|
||||
inner: Box<
|
||||
dyn Iterator<Item = heed::Result<((&'t str, TreeLevel, u32, u32), RoaringBitmap)>> + 't,
|
||||
>,
|
||||
level: TreeLevel,
|
||||
interval_size: u32,
|
||||
word: Cow<'q, str>,
|
||||
|
@ -162,49 +186,80 @@ struct WordLevelIterator<'t, 'q> {
|
|||
}
|
||||
|
||||
impl<'t, 'q> WordLevelIterator<'t, 'q> {
|
||||
fn new(ctx: &'t dyn Context<'t>, word: Cow<'q, str>, in_prefix_cache: bool) -> heed::Result<Option<Self>> {
|
||||
fn new(
|
||||
ctx: &'t dyn Context<'t>,
|
||||
word: Cow<'q, str>,
|
||||
in_prefix_cache: bool,
|
||||
) -> heed::Result<Option<Self>> {
|
||||
match ctx.word_position_last_level(&word, in_prefix_cache)? {
|
||||
Some(level) => {
|
||||
Some(level) => {
|
||||
let interval_size = LEVEL_EXPONENTIATION_BASE.pow(Into::<u8>::into(level) as u32);
|
||||
let inner = ctx.word_position_iterator(&word, level, in_prefix_cache, None, None)?;
|
||||
Ok(Some(Self { inner, level, interval_size, word, in_prefix_cache, inner_next: None, current_interval: None }))
|
||||
},
|
||||
let inner =
|
||||
ctx.word_position_iterator(&word, level, in_prefix_cache, None, None)?;
|
||||
Ok(Some(Self {
|
||||
inner,
|
||||
level,
|
||||
interval_size,
|
||||
word,
|
||||
in_prefix_cache,
|
||||
inner_next: None,
|
||||
current_interval: None,
|
||||
}))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
fn dig(&self, ctx: &'t dyn Context<'t>, level: &TreeLevel, left_interval: Option<u32>) -> heed::Result<Self> {
|
||||
fn dig(
|
||||
&self,
|
||||
ctx: &'t dyn Context<'t>,
|
||||
level: &TreeLevel,
|
||||
left_interval: Option<u32>,
|
||||
) -> heed::Result<Self> {
|
||||
let level = *level.min(&self.level);
|
||||
let interval_size = LEVEL_EXPONENTIATION_BASE.pow(Into::<u8>::into(level) as u32);
|
||||
let word = self.word.clone();
|
||||
let in_prefix_cache = self.in_prefix_cache;
|
||||
let inner = ctx.word_position_iterator(&word, level, in_prefix_cache, left_interval, None)?;
|
||||
let inner =
|
||||
ctx.word_position_iterator(&word, level, in_prefix_cache, left_interval, None)?;
|
||||
|
||||
Ok(Self {inner, level, interval_size, word, in_prefix_cache, inner_next: None, current_interval: None})
|
||||
Ok(Self {
|
||||
inner,
|
||||
level,
|
||||
interval_size,
|
||||
word,
|
||||
in_prefix_cache,
|
||||
inner_next: None,
|
||||
current_interval: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn next(&mut self) -> heed::Result<Option<(u32, u32, RoaringBitmap)>> {
|
||||
fn is_next_interval(last_right: u32, next_left: u32) -> bool { last_right + 1 == next_left }
|
||||
fn is_next_interval(last_right: u32, next_left: u32) -> bool {
|
||||
last_right + 1 == next_left
|
||||
}
|
||||
|
||||
let inner_next = match self.inner_next.take() {
|
||||
Some(inner_next) => Some(inner_next),
|
||||
None => self.inner.next().transpose()?.map(|((_, _, left, right), docids)| (left, right, docids)),
|
||||
None => self
|
||||
.inner
|
||||
.next()
|
||||
.transpose()?
|
||||
.map(|((_, _, left, right), docids)| (left, right, docids)),
|
||||
};
|
||||
|
||||
match inner_next {
|
||||
Some((left, right, docids)) => {
|
||||
match self.current_interval {
|
||||
Some((last_left, last_right)) if !is_next_interval(last_right, left) => {
|
||||
let blank_left = last_left + self.interval_size;
|
||||
let blank_right = last_right + self.interval_size;
|
||||
self.current_interval = Some((blank_left, blank_right));
|
||||
self.inner_next = Some((left, right, docids));
|
||||
Ok(Some((blank_left, blank_right, RoaringBitmap::new())))
|
||||
},
|
||||
_ => {
|
||||
self.current_interval = Some((left, right));
|
||||
Ok(Some((left, right, docids)))
|
||||
}
|
||||
Some((left, right, docids)) => match self.current_interval {
|
||||
Some((last_left, last_right)) if !is_next_interval(last_right, left) => {
|
||||
let blank_left = last_left + self.interval_size;
|
||||
let blank_right = last_right + self.interval_size;
|
||||
self.current_interval = Some((blank_left, blank_right));
|
||||
self.inner_next = Some((left, right, docids));
|
||||
Ok(Some((blank_left, blank_right, RoaringBitmap::new())))
|
||||
}
|
||||
_ => {
|
||||
self.current_interval = Some((left, right));
|
||||
Ok(Some((left, right, docids)))
|
||||
}
|
||||
},
|
||||
None => Ok(None),
|
||||
|
@ -228,30 +283,37 @@ impl<'t, 'q> QueryLevelIterator<'t, 'q> {
|
|||
ctx: &'t dyn Context<'t>,
|
||||
queries: &'q [Query],
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<Option<Self>>
|
||||
{
|
||||
) -> Result<Option<Self>> {
|
||||
let mut inner = Vec::with_capacity(queries.len());
|
||||
for query in queries {
|
||||
match &query.kind {
|
||||
QueryKind::Exact { word, .. } => {
|
||||
if !query.prefix || ctx.in_prefix_cache(&word) {
|
||||
let word = Cow::Borrowed(query.kind.word());
|
||||
if let Some(word_level_iterator) = WordLevelIterator::new(ctx, word, query.prefix)? {
|
||||
if let Some(word_level_iterator) =
|
||||
WordLevelIterator::new(ctx, word, query.prefix)?
|
||||
{
|
||||
inner.push(word_level_iterator);
|
||||
}
|
||||
} else {
|
||||
for (word, _) in word_derivations(&word, true, 0, ctx.words_fst(), wdcache)? {
|
||||
for (word, _) in word_derivations(&word, true, 0, ctx.words_fst(), wdcache)?
|
||||
{
|
||||
let word = Cow::Owned(word.to_owned());
|
||||
if let Some(word_level_iterator) = WordLevelIterator::new(ctx, word, false)? {
|
||||
if let Some(word_level_iterator) =
|
||||
WordLevelIterator::new(ctx, word, false)?
|
||||
{
|
||||
inner.push(word_level_iterator);
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
QueryKind::Tolerant { typo, word } => {
|
||||
for (word, _) in word_derivations(&word, query.prefix, *typo, ctx.words_fst(), wdcache)? {
|
||||
for (word, _) in
|
||||
word_derivations(&word, query.prefix, *typo, ctx.words_fst(), wdcache)?
|
||||
{
|
||||
let word = Cow::Owned(word.to_owned());
|
||||
if let Some(word_level_iterator) = WordLevelIterator::new(ctx, word, false)? {
|
||||
if let Some(word_level_iterator) = WordLevelIterator::new(ctx, word, false)?
|
||||
{
|
||||
inner.push(word_level_iterator);
|
||||
}
|
||||
}
|
||||
|
@ -284,17 +346,28 @@ impl<'t, 'q> QueryLevelIterator<'t, 'q> {
|
|||
Some(parent) => {
|
||||
let parent = parent.dig(ctx)?;
|
||||
(parent.level.min(self.level), Some(Box::new(parent)))
|
||||
},
|
||||
}
|
||||
None => (self.level.saturating_sub(1), None),
|
||||
};
|
||||
|
||||
let left_interval = self.accumulator.get(self.interval_to_skip).map(|opt| opt.as_ref().map(|(left, _, _)| *left)).flatten();
|
||||
let left_interval = self
|
||||
.accumulator
|
||||
.get(self.interval_to_skip)
|
||||
.map(|opt| opt.as_ref().map(|(left, _, _)| *left))
|
||||
.flatten();
|
||||
let mut inner = Vec::with_capacity(self.inner.len());
|
||||
for word_level_iterator in self.inner.iter() {
|
||||
inner.push(word_level_iterator.dig(ctx, &level, left_interval)?);
|
||||
}
|
||||
|
||||
Ok(Self {parent, inner, level, accumulator: vec![], parent_accumulator: vec![], interval_to_skip: 0})
|
||||
Ok(Self {
|
||||
parent,
|
||||
inner,
|
||||
level,
|
||||
accumulator: vec![],
|
||||
parent_accumulator: vec![],
|
||||
interval_to_skip: 0,
|
||||
})
|
||||
}
|
||||
|
||||
fn inner_next(&mut self, level: TreeLevel) -> heed::Result<Option<(u32, u32, RoaringBitmap)>> {
|
||||
|
@ -305,12 +378,12 @@ impl<'t, 'q> QueryLevelIterator<'t, 'q> {
|
|||
let wli_u8_level = Into::<u8>::into(wli.level);
|
||||
let accumulated_count = LEVEL_EXPONENTIATION_BASE.pow((u8_level - wli_u8_level) as u32);
|
||||
for _ in 0..accumulated_count {
|
||||
if let Some((next_left, _, next_docids)) = wli.next()? {
|
||||
accumulated = match accumulated.take(){
|
||||
if let Some((next_left, _, next_docids)) = wli.next()? {
|
||||
accumulated = match accumulated.take() {
|
||||
Some((acc_left, acc_right, mut acc_docids)) => {
|
||||
acc_docids |= next_docids;
|
||||
Some((acc_left, acc_right, acc_docids))
|
||||
},
|
||||
}
|
||||
None => Some((next_left, next_left + interval_size, next_docids)),
|
||||
};
|
||||
}
|
||||
|
@ -322,7 +395,11 @@ impl<'t, 'q> QueryLevelIterator<'t, 'q> {
|
|||
|
||||
/// return the next meta-interval created from inner WordLevelIterators,
|
||||
/// and from eventual chainned QueryLevelIterator.
|
||||
fn next(&mut self, allowed_candidates: &RoaringBitmap, tree_level: TreeLevel) -> heed::Result<Option<(u32, u32, RoaringBitmap)>> {
|
||||
fn next(
|
||||
&mut self,
|
||||
allowed_candidates: &RoaringBitmap,
|
||||
tree_level: TreeLevel,
|
||||
) -> heed::Result<Option<(u32, u32, RoaringBitmap)>> {
|
||||
let parent_result = match self.parent.as_mut() {
|
||||
Some(parent) => Some(parent.next(allowed_candidates, tree_level)?),
|
||||
None => None,
|
||||
|
@ -335,22 +412,30 @@ impl<'t, 'q> QueryLevelIterator<'t, 'q> {
|
|||
&self.parent_accumulator,
|
||||
&self.accumulator,
|
||||
self.interval_to_skip,
|
||||
allowed_candidates
|
||||
allowed_candidates,
|
||||
);
|
||||
self.accumulator.push(inner_next);
|
||||
self.parent_accumulator.push(parent_next);
|
||||
let mut merged_interval: Option<(u32, u32, RoaringBitmap)> = None;
|
||||
|
||||
for current in self.accumulator.iter().rev().zip(self.parent_accumulator.iter()).skip(self.interval_to_skip) {
|
||||
for current in self
|
||||
.accumulator
|
||||
.iter()
|
||||
.rev()
|
||||
.zip(self.parent_accumulator.iter())
|
||||
.skip(self.interval_to_skip)
|
||||
{
|
||||
if let (Some((left_a, right_a, a)), Some((left_b, right_b, b))) = current {
|
||||
match merged_interval.as_mut() {
|
||||
Some((_, _, merged_docids)) => *merged_docids |= a & b,
|
||||
None => merged_interval = Some((left_a + left_b, right_a + right_b, a & b)),
|
||||
None => {
|
||||
merged_interval = Some((left_a + left_b, right_a + right_b, a & b))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(merged_interval)
|
||||
},
|
||||
}
|
||||
None => {
|
||||
let level = self.level;
|
||||
match self.inner_next(level)? {
|
||||
|
@ -358,12 +443,11 @@ impl<'t, 'q> QueryLevelIterator<'t, 'q> {
|
|||
self.accumulator = vec![Some((left, right, RoaringBitmap::new()))];
|
||||
candidates &= allowed_candidates;
|
||||
Ok(Some((left, right, candidates)))
|
||||
|
||||
},
|
||||
}
|
||||
None => {
|
||||
self.accumulator = vec![None];
|
||||
Ok(None)
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -379,16 +463,18 @@ fn interval_to_skip(
|
|||
already_skiped: usize,
|
||||
allowed_candidates: &RoaringBitmap,
|
||||
) -> usize {
|
||||
parent_accumulator.iter()
|
||||
parent_accumulator
|
||||
.iter()
|
||||
.zip(current_accumulator.iter())
|
||||
.skip(already_skiped)
|
||||
.take_while(|(parent, current)| {
|
||||
let skip_parent = parent.as_ref().map_or(true, |(_, _, docids)| docids.is_empty());
|
||||
let skip_current = current.as_ref().map_or(true, |(_, _, docids)| docids.is_disjoint(allowed_candidates));
|
||||
let skip_current = current
|
||||
.as_ref()
|
||||
.map_or(true, |(_, _, docids)| docids.is_disjoint(allowed_candidates));
|
||||
skip_parent && skip_current
|
||||
})
|
||||
.count()
|
||||
|
||||
}
|
||||
|
||||
/// A Branch is represent a possible alternative of the original query and is build with the Query Tree,
|
||||
|
@ -410,7 +496,7 @@ impl<'t, 'q> Branch<'t, 'q> {
|
|||
self.last_result = last_result;
|
||||
self.tree_level = tree_level;
|
||||
Ok(true)
|
||||
},
|
||||
}
|
||||
None => Ok(false),
|
||||
}
|
||||
}
|
||||
|
@ -429,7 +515,7 @@ impl<'t, 'q> Branch<'t, 'q> {
|
|||
let interval_size = LEVEL_EXPONENTIATION_BASE.pow(u8_level as u32);
|
||||
let (left, right, _) = self.last_result;
|
||||
|
||||
self.last_result = (left + interval_size, right + interval_size, RoaringBitmap::new());
|
||||
self.last_result = (left + interval_size, right + interval_size, RoaringBitmap::new());
|
||||
}
|
||||
|
||||
/// return the score of the current inner interval.
|
||||
|
@ -477,31 +563,31 @@ fn initialize_query_level_iterators<'t, 'q>(
|
|||
allowed_candidates: &RoaringBitmap,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<BinaryHeap<Branch<'t, 'q>>> {
|
||||
|
||||
let mut positions = BinaryHeap::with_capacity(branches.len());
|
||||
for branch in branches {
|
||||
let mut branch_positions = Vec::with_capacity(branch.len());
|
||||
for queries in branch {
|
||||
for queries in branch {
|
||||
match QueryLevelIterator::new(ctx, queries, wdcache)? {
|
||||
Some(qli) => branch_positions.push(qli),
|
||||
None => {
|
||||
// the branch seems to be invalid, so we skip it.
|
||||
branch_positions.clear();
|
||||
break;
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
// QueryLevelIterator need to be sorted by level and folded in descending order.
|
||||
branch_positions.sort_unstable_by_key(|qli| qli.level);
|
||||
let folded_query_level_iterators = branch_positions
|
||||
.into_iter()
|
||||
.fold(None, |fold: Option<QueryLevelIterator>, mut qli| match fold {
|
||||
Some(fold) => {
|
||||
qli.parent(fold);
|
||||
Some(qli)
|
||||
},
|
||||
None => Some(qli),
|
||||
});
|
||||
let folded_query_level_iterators =
|
||||
branch_positions.into_iter().fold(None, |fold: Option<QueryLevelIterator>, mut qli| {
|
||||
match fold {
|
||||
Some(fold) => {
|
||||
qli.parent(fold);
|
||||
Some(qli)
|
||||
}
|
||||
None => Some(qli),
|
||||
}
|
||||
});
|
||||
|
||||
if let Some(mut folded_query_level_iterators) = folded_query_level_iterators {
|
||||
let tree_level = folded_query_level_iterators.level;
|
||||
|
@ -526,9 +612,9 @@ fn set_compute_candidates<'t>(
|
|||
branches: &FlattenedQueryTree,
|
||||
allowed_candidates: &RoaringBitmap,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<Option<RoaringBitmap>>
|
||||
{
|
||||
let mut branches_heap = initialize_query_level_iterators(ctx, branches, allowed_candidates, wdcache)?;
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
let mut branches_heap =
|
||||
initialize_query_level_iterators(ctx, branches, allowed_candidates, wdcache)?;
|
||||
let lowest_level = TreeLevel::min_value();
|
||||
let mut final_candidates: Option<(u32, RoaringBitmap)> = None;
|
||||
let mut allowed_candidates = allowed_candidates.clone();
|
||||
|
@ -539,15 +625,18 @@ fn set_compute_candidates<'t>(
|
|||
// if current is worst than best we break to return
|
||||
// candidates that correspond to the best rank
|
||||
if let Some((best_rank, _)) = final_candidates {
|
||||
if branch_rank > best_rank { break }
|
||||
if branch_rank > best_rank {
|
||||
break;
|
||||
}
|
||||
}
|
||||
let _left = branch.last_result.0;
|
||||
let candidates = take(&mut branch.last_result.2);
|
||||
if candidates.is_empty() {
|
||||
// we don't have candidates, get next interval.
|
||||
if !branch.next(&allowed_candidates)? { PeekMut::pop(branch); }
|
||||
}
|
||||
else if is_lowest_level {
|
||||
if !branch.next(&allowed_candidates)? {
|
||||
PeekMut::pop(branch);
|
||||
}
|
||||
} else if is_lowest_level {
|
||||
// we have candidates, but we can't dig deeper.
|
||||
allowed_candidates -= &candidates;
|
||||
final_candidates = match final_candidates.take() {
|
||||
|
@ -556,19 +645,20 @@ fn set_compute_candidates<'t>(
|
|||
best_candidates |= candidates;
|
||||
branch.lazy_next();
|
||||
Some((best_rank, best_candidates))
|
||||
},
|
||||
}
|
||||
// we take current candidates as best candidates
|
||||
None => {
|
||||
branch.lazy_next();
|
||||
Some((branch_rank, candidates))
|
||||
},
|
||||
}
|
||||
};
|
||||
} else {
|
||||
// we have candidates, lets dig deeper in levels.
|
||||
branch.dig(ctx)?;
|
||||
if !branch.next(&allowed_candidates)? { PeekMut::pop(branch); }
|
||||
if !branch.next(&allowed_candidates)? {
|
||||
PeekMut::pop(branch);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Ok(final_candidates.map(|(_rank, candidates)| candidates))
|
||||
|
@ -578,9 +668,11 @@ fn linear_compute_candidates(
|
|||
ctx: &dyn Context,
|
||||
branches: &FlattenedQueryTree,
|
||||
allowed_candidates: &RoaringBitmap,
|
||||
) -> Result<BTreeMap<u64, RoaringBitmap>>
|
||||
{
|
||||
fn compute_candidate_rank(branches: &FlattenedQueryTree, words_positions: HashMap<String, RoaringBitmap>) -> u64 {
|
||||
) -> Result<BTreeMap<u64, RoaringBitmap>> {
|
||||
fn compute_candidate_rank(
|
||||
branches: &FlattenedQueryTree,
|
||||
words_positions: HashMap<String, RoaringBitmap>,
|
||||
) -> u64 {
|
||||
let mut min_rank = u64::max_value();
|
||||
for branch in branches {
|
||||
let branch_len = branch.len();
|
||||
|
@ -593,17 +685,20 @@ fn linear_compute_candidates(
|
|||
QueryKind::Exact { word, .. } => {
|
||||
if *prefix {
|
||||
word_derivations(word, true, 0, &words_positions)
|
||||
.flat_map(|positions| positions.iter().next()).min()
|
||||
.flat_map(|positions| positions.iter().next())
|
||||
.min()
|
||||
} else {
|
||||
words_positions.get(word)
|
||||
words_positions
|
||||
.get(word)
|
||||
.map(|positions| positions.iter().next())
|
||||
.flatten()
|
||||
}
|
||||
},
|
||||
}
|
||||
QueryKind::Tolerant { typo, word } => {
|
||||
word_derivations(word, *prefix, *typo, &words_positions)
|
||||
.flat_map(|positions| positions.iter().next()).min()
|
||||
},
|
||||
.flat_map(|positions| positions.iter().next())
|
||||
.min()
|
||||
}
|
||||
};
|
||||
|
||||
match (position, current_position) {
|
||||
|
@ -627,9 +722,11 @@ fn linear_compute_candidates(
|
|||
branch_rank.sort_unstable();
|
||||
// because several words in same query can't match all a the position 0,
|
||||
// we substract the word index to the position.
|
||||
let branch_rank: u64 = branch_rank.into_iter().enumerate().map(|(i, r)| r - i as u64).sum();
|
||||
let branch_rank: u64 =
|
||||
branch_rank.into_iter().enumerate().map(|(i, r)| r - i as u64).sum();
|
||||
// here we do the means of the words of the branch
|
||||
min_rank = min_rank.min(branch_rank * LCM_10_FIRST_NUMBERS as u64 / branch_len as u64);
|
||||
min_rank =
|
||||
min_rank.min(branch_rank * LCM_10_FIRST_NUMBERS as u64 / branch_len as u64);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -641,8 +738,7 @@ fn linear_compute_candidates(
|
|||
is_prefix: bool,
|
||||
max_typo: u8,
|
||||
words_positions: &'a HashMap<String, RoaringBitmap>,
|
||||
) -> impl Iterator<Item = &'a RoaringBitmap>
|
||||
{
|
||||
) -> impl Iterator<Item = &'a RoaringBitmap> {
|
||||
let dfa = build_dfa(word, max_typo, is_prefix);
|
||||
words_positions.iter().filter_map(move |(document_word, positions)| {
|
||||
use levenshtein_automata::Distance;
|
||||
|
@ -680,25 +776,26 @@ fn flatten_query_tree(query_tree: &Operation) -> FlattenedQueryTree {
|
|||
}
|
||||
}
|
||||
out
|
||||
},
|
||||
}
|
||||
None => recurse(head),
|
||||
}
|
||||
}
|
||||
|
||||
fn recurse(op: &Operation) -> FlattenedQueryTree {
|
||||
match op {
|
||||
And(ops) => {
|
||||
ops.split_first().map_or_else(Vec::new, |(h, t)| and_recurse(h, t))
|
||||
},
|
||||
Or(_, ops) => if ops.iter().all(|op| op.query().is_some()) {
|
||||
vec![vec![ops.iter().flat_map(|op| op.query()).cloned().collect()]]
|
||||
} else {
|
||||
ops.iter().map(recurse).flatten().collect()
|
||||
},
|
||||
And(ops) => ops.split_first().map_or_else(Vec::new, |(h, t)| and_recurse(h, t)),
|
||||
Or(_, ops) => {
|
||||
if ops.iter().all(|op| op.query().is_some()) {
|
||||
vec![vec![ops.iter().flat_map(|op| op.query()).cloned().collect()]]
|
||||
} else {
|
||||
ops.iter().map(recurse).flatten().collect()
|
||||
}
|
||||
}
|
||||
Phrase(words) => {
|
||||
let queries = words.iter().map(|word| {
|
||||
vec![Query {prefix: false, kind: QueryKind::exact(word.clone())}]
|
||||
}).collect();
|
||||
let queries = words
|
||||
.iter()
|
||||
.map(|word| vec![Query { prefix: false, kind: QueryKind::exact(word.clone()) }])
|
||||
.collect();
|
||||
vec![queries]
|
||||
}
|
||||
Operation::Query(query) => vec![vec![vec![query.clone()]]],
|
||||
|
@ -712,28 +809,43 @@ fn flatten_query_tree(query_tree: &Operation) -> FlattenedQueryTree {
|
|||
mod tests {
|
||||
use big_s::S;
|
||||
|
||||
use crate::search::criteria::QueryKind;
|
||||
use super::*;
|
||||
use crate::search::criteria::QueryKind;
|
||||
|
||||
#[test]
|
||||
fn simple_flatten_query_tree() {
|
||||
let query_tree = Operation::Or(false, vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("manythefish")) }),
|
||||
Operation::And(vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("manythe")) }),
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("fish")) }),
|
||||
]),
|
||||
Operation::And(vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("many")) }),
|
||||
Operation::Or(false, vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("thefish")) }),
|
||||
Operation::And(vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("the")) }),
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("fish")) }),
|
||||
]),
|
||||
let query_tree = Operation::Or(
|
||||
false,
|
||||
vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("manythefish")) }),
|
||||
Operation::And(vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("manythe")) }),
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("fish")) }),
|
||||
]),
|
||||
]),
|
||||
]);
|
||||
Operation::And(vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("many")) }),
|
||||
Operation::Or(
|
||||
false,
|
||||
vec![
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact(S("thefish")),
|
||||
}),
|
||||
Operation::And(vec![
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact(S("the")),
|
||||
}),
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact(S("fish")),
|
||||
}),
|
||||
]),
|
||||
],
|
||||
),
|
||||
]),
|
||||
],
|
||||
);
|
||||
|
||||
let expected = vec![
|
||||
vec![vec![Query { prefix: false, kind: QueryKind::exact(S("manythefish")) }]],
|
||||
|
|
|
@ -2,19 +2,15 @@ use std::convert::TryFrom;
|
|||
use std::mem::take;
|
||||
use std::ops::BitOr;
|
||||
|
||||
use itertools::Itertools;
|
||||
use log::debug;
|
||||
use roaring::RoaringBitmap;
|
||||
use itertools::Itertools;
|
||||
|
||||
use crate::search::query_tree::{Operation, PrimitiveQueryPart};
|
||||
use crate::search::criteria::{
|
||||
Context,
|
||||
Criterion,
|
||||
CriterionParameters,
|
||||
CriterionResult,
|
||||
resolve_query_tree,
|
||||
resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult,
|
||||
};
|
||||
use crate::{TreeLevel, Result};
|
||||
use crate::search::query_tree::{Operation, PrimitiveQueryPart};
|
||||
use crate::{Result, TreeLevel};
|
||||
|
||||
pub struct Exactness<'t> {
|
||||
ctx: &'t dyn Context<'t>,
|
||||
|
@ -26,7 +22,11 @@ pub struct Exactness<'t> {
|
|||
}
|
||||
|
||||
impl<'t> Exactness<'t> {
|
||||
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>, primitive_query: &[PrimitiveQueryPart]) -> heed::Result<Self> {
|
||||
pub fn new(
|
||||
ctx: &'t dyn Context<'t>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
primitive_query: &[PrimitiveQueryPart],
|
||||
) -> heed::Result<Self> {
|
||||
let mut query: Vec<_> = Vec::with_capacity(primitive_query.len());
|
||||
for part in primitive_query {
|
||||
query.push(ExactQueryPart::from_primitive_query_part(ctx, part)?);
|
||||
|
@ -59,7 +59,7 @@ impl<'t> Criterion for Exactness<'t> {
|
|||
// reset state
|
||||
self.state = None;
|
||||
self.query_tree = None;
|
||||
},
|
||||
}
|
||||
Some(state) => {
|
||||
let (candidates, state) = resolve_state(self.ctx, take(state), &self.query)?;
|
||||
self.state = state;
|
||||
|
@ -70,40 +70,51 @@ impl<'t> Criterion for Exactness<'t> {
|
|||
filtered_candidates: None,
|
||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||
}));
|
||||
},
|
||||
None => {
|
||||
match self.parent.next(params)? {
|
||||
Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => {
|
||||
let mut candidates = match candidates {
|
||||
Some(candidates) => candidates,
|
||||
None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)? - params.excluded_candidates,
|
||||
};
|
||||
|
||||
if let Some(filtered_candidates) = filtered_candidates {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
}) => {
|
||||
let mut candidates = match candidates {
|
||||
Some(candidates) => candidates,
|
||||
None => {
|
||||
resolve_query_tree(self.ctx, &query_tree, params.wdcache)?
|
||||
- params.excluded_candidates
|
||||
}
|
||||
};
|
||||
|
||||
match bucket_candidates {
|
||||
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
||||
None => self.bucket_candidates |= &candidates,
|
||||
}
|
||||
if let Some(filtered_candidates) = filtered_candidates {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
self.state = Some(State::new(candidates));
|
||||
self.query_tree = Some(query_tree);
|
||||
},
|
||||
Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
}));
|
||||
},
|
||||
None => return Ok(None),
|
||||
match bucket_candidates {
|
||||
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
||||
None => self.bucket_candidates |= &candidates,
|
||||
}
|
||||
|
||||
self.state = Some(State::new(candidates));
|
||||
self.query_tree = Some(query_tree);
|
||||
}
|
||||
Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
}) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
}));
|
||||
}
|
||||
None => return Ok(None),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -125,9 +136,9 @@ impl State {
|
|||
|
||||
fn difference_with(&mut self, lhs: &RoaringBitmap) {
|
||||
match self {
|
||||
Self::ExactAttribute(candidates) |
|
||||
Self::AttributeStartsWith(candidates) |
|
||||
Self::ExactWords(candidates) => *candidates -= lhs,
|
||||
Self::ExactAttribute(candidates)
|
||||
| Self::AttributeStartsWith(candidates)
|
||||
| Self::ExactWords(candidates) => *candidates -= lhs,
|
||||
Self::Remainings(candidates_array) => {
|
||||
candidates_array.iter_mut().for_each(|candidates| *candidates -= lhs);
|
||||
candidates_array.retain(|candidates| !candidates.is_empty());
|
||||
|
@ -137,9 +148,9 @@ impl State {
|
|||
|
||||
fn is_empty(&self) -> bool {
|
||||
match self {
|
||||
Self::ExactAttribute(candidates) |
|
||||
Self::AttributeStartsWith(candidates) |
|
||||
Self::ExactWords(candidates) => candidates.is_empty(),
|
||||
Self::ExactAttribute(candidates)
|
||||
| Self::AttributeStartsWith(candidates)
|
||||
| Self::ExactWords(candidates) => candidates.is_empty(),
|
||||
Self::Remainings(candidates_array) => {
|
||||
candidates_array.iter().all(RoaringBitmap::is_empty)
|
||||
}
|
||||
|
@ -158,8 +169,7 @@ fn resolve_state(
|
|||
ctx: &dyn Context,
|
||||
state: State,
|
||||
query: &[ExactQueryPart],
|
||||
) -> Result<(RoaringBitmap, Option<State>)>
|
||||
{
|
||||
) -> Result<(RoaringBitmap, Option<State>)> {
|
||||
use State::*;
|
||||
match state {
|
||||
ExactAttribute(mut allowed_candidates) => {
|
||||
|
@ -167,8 +177,11 @@ fn resolve_state(
|
|||
if let Ok(query_len) = u8::try_from(query.len()) {
|
||||
let attributes_ids = ctx.searchable_fields_ids()?;
|
||||
for id in attributes_ids {
|
||||
if let Some(attribute_allowed_docids) = ctx.field_id_word_count_docids(id, query_len)? {
|
||||
let mut attribute_candidates_array = attribute_start_with_docids(ctx, id as u32, query)?;
|
||||
if let Some(attribute_allowed_docids) =
|
||||
ctx.field_id_word_count_docids(id, query_len)?
|
||||
{
|
||||
let mut attribute_candidates_array =
|
||||
attribute_start_with_docids(ctx, id as u32, query)?;
|
||||
attribute_candidates_array.push(attribute_allowed_docids);
|
||||
candidates |= intersection_of(attribute_candidates_array.iter().collect());
|
||||
}
|
||||
|
@ -181,12 +194,13 @@ fn resolve_state(
|
|||
}
|
||||
|
||||
Ok((candidates, Some(AttributeStartsWith(allowed_candidates))))
|
||||
},
|
||||
}
|
||||
AttributeStartsWith(mut allowed_candidates) => {
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
let attributes_ids = ctx.searchable_fields_ids()?;
|
||||
for id in attributes_ids {
|
||||
let attribute_candidates_array = attribute_start_with_docids(ctx, id as u32, query)?;
|
||||
let attribute_candidates_array =
|
||||
attribute_start_with_docids(ctx, id as u32, query)?;
|
||||
candidates |= intersection_of(attribute_candidates_array.iter().collect());
|
||||
}
|
||||
|
||||
|
@ -195,7 +209,7 @@ fn resolve_state(
|
|||
// remove current candidates from allowed candidates
|
||||
allowed_candidates -= &candidates;
|
||||
Ok((candidates, Some(ExactWords(allowed_candidates))))
|
||||
},
|
||||
}
|
||||
ExactWords(mut allowed_candidates) => {
|
||||
let number_of_part = query.len();
|
||||
let mut parts_candidates_array = Vec::with_capacity(number_of_part);
|
||||
|
@ -210,7 +224,7 @@ fn resolve_state(
|
|||
candidates |= synonym_candidates;
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
// compute intersection on pair of words with a proximity of 0.
|
||||
Phrase(phrase) => {
|
||||
let mut bitmaps = Vec::with_capacity(phrase.len().saturating_sub(1));
|
||||
|
@ -220,8 +234,8 @@ fn resolve_state(
|
|||
Some(docids) => bitmaps.push(docids),
|
||||
None => {
|
||||
bitmaps.clear();
|
||||
break
|
||||
},
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -247,7 +261,7 @@ fn resolve_state(
|
|||
// intersect each word candidates in combinations
|
||||
.map(intersection_of)
|
||||
// union combinations of `c_count` exact words
|
||||
.fold(RoaringBitmap::new(), RoaringBitmap::bitor);
|
||||
.fold(RoaringBitmap::new(), RoaringBitmap::bitor);
|
||||
// only keep allowed candidates
|
||||
combinations_candidates &= &allowed_candidates;
|
||||
// remove current candidates from allowed candidates
|
||||
|
@ -261,7 +275,7 @@ fn resolve_state(
|
|||
candidates_array.reverse();
|
||||
|
||||
Ok((all_exact_candidates, Some(Remainings(candidates_array))))
|
||||
},
|
||||
}
|
||||
// pop remainings candidates until the emptiness
|
||||
Remainings(mut candidates_array) => {
|
||||
let candidates = candidates_array.pop().unwrap_or_default();
|
||||
|
@ -270,12 +284,15 @@ fn resolve_state(
|
|||
} else {
|
||||
Ok((candidates, None))
|
||||
}
|
||||
},
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn attribute_start_with_docids(ctx: &dyn Context, attribute_id: u32, query: &[ExactQueryPart]) -> heed::Result<Vec<RoaringBitmap>> {
|
||||
fn attribute_start_with_docids(
|
||||
ctx: &dyn Context,
|
||||
attribute_id: u32,
|
||||
query: &[ExactQueryPart],
|
||||
) -> heed::Result<Vec<RoaringBitmap>> {
|
||||
let lowest_level = TreeLevel::min_value();
|
||||
let mut attribute_candidates_array = Vec::new();
|
||||
// start from attribute first position
|
||||
|
@ -293,7 +310,7 @@ fn attribute_start_with_docids(ctx: &dyn Context, attribute_id: u32, query: &[Ex
|
|||
}
|
||||
attribute_candidates_array.push(synonyms_candidates);
|
||||
pos += 1;
|
||||
},
|
||||
}
|
||||
Phrase(phrase) => {
|
||||
for word in phrase {
|
||||
let wc = ctx.word_level_position_docids(word, lowest_level, pos, pos)?;
|
||||
|
@ -325,24 +342,30 @@ pub enum ExactQueryPart {
|
|||
}
|
||||
|
||||
impl ExactQueryPart {
|
||||
fn from_primitive_query_part(ctx: &dyn Context, part: &PrimitiveQueryPart) -> heed::Result<Self> {
|
||||
fn from_primitive_query_part(
|
||||
ctx: &dyn Context,
|
||||
part: &PrimitiveQueryPart,
|
||||
) -> heed::Result<Self> {
|
||||
let part = match part {
|
||||
PrimitiveQueryPart::Word(word, _) => {
|
||||
match ctx.synonyms(word)? {
|
||||
Some(synonyms) => {
|
||||
let mut synonyms: Vec<_> = synonyms.into_iter().filter_map(|mut array| {
|
||||
// keep 1 word synonyms only.
|
||||
match array.pop() {
|
||||
Some(word) if array.is_empty() => Some(word),
|
||||
_ => None,
|
||||
}
|
||||
}).collect();
|
||||
let mut synonyms: Vec<_> = synonyms
|
||||
.into_iter()
|
||||
.filter_map(|mut array| {
|
||||
// keep 1 word synonyms only.
|
||||
match array.pop() {
|
||||
Some(word) if array.is_empty() => Some(word),
|
||||
_ => None,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
synonyms.push(word.clone());
|
||||
ExactQueryPart::Synonyms(synonyms)
|
||||
},
|
||||
}
|
||||
None => ExactQueryPart::Synonyms(vec![word.clone()]),
|
||||
}
|
||||
},
|
||||
}
|
||||
PrimitiveQueryPart::Phrase(phrase) => ExactQueryPart::Phrase(phrase.clone()),
|
||||
};
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
use log::debug;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::Result;
|
||||
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::search::query_tree::Operation;
|
||||
use crate::search::WordDerivationsCache;
|
||||
use super::{resolve_query_tree, Criterion, CriterionResult, CriterionParameters, Context};
|
||||
use crate::Result;
|
||||
|
||||
/// The result of a call to the fetcher.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
|
@ -26,7 +26,12 @@ pub struct Final<'t> {
|
|||
|
||||
impl<'t> Final<'t> {
|
||||
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Final<'t> {
|
||||
Final { ctx, parent, wdcache: WordDerivationsCache::new(), returned_candidates: RoaringBitmap::new() }
|
||||
Final {
|
||||
ctx,
|
||||
parent,
|
||||
wdcache: WordDerivationsCache::new(),
|
||||
returned_candidates: RoaringBitmap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
#[logging_timer::time("Final::{}")]
|
||||
|
@ -40,10 +45,17 @@ impl<'t> Final<'t> {
|
|||
};
|
||||
|
||||
match self.parent.next(&mut criterion_parameters)? {
|
||||
Some(CriterionResult { query_tree, candidates, filtered_candidates, bucket_candidates }) => {
|
||||
Some(CriterionResult {
|
||||
query_tree,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
}) => {
|
||||
let mut candidates = match (candidates, query_tree.as_ref()) {
|
||||
(Some(candidates), _) => candidates,
|
||||
(None, Some(qt)) => resolve_query_tree(self.ctx, qt, &mut self.wdcache)? - excluded_candidates,
|
||||
(None, Some(qt)) => {
|
||||
resolve_query_tree(self.ctx, qt, &mut self.wdcache)? - excluded_candidates
|
||||
}
|
||||
(None, None) => self.ctx.documents_ids()? - excluded_candidates,
|
||||
};
|
||||
|
||||
|
@ -56,7 +68,7 @@ impl<'t> Final<'t> {
|
|||
self.returned_candidates |= &candidates;
|
||||
|
||||
Ok(Some(FinalResult { query_tree, candidates, bucket_candidates }))
|
||||
},
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,15 +1,18 @@
|
|||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::Result;
|
||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::search::query_tree::Operation;
|
||||
use super::{Criterion, CriterionResult, CriterionParameters};
|
||||
use crate::Result;
|
||||
|
||||
pub struct Initial {
|
||||
answer: Option<CriterionResult>
|
||||
answer: Option<CriterionResult>,
|
||||
}
|
||||
|
||||
impl Initial {
|
||||
pub fn new(query_tree: Option<Operation>, filtered_candidates: Option<RoaringBitmap>) -> Initial {
|
||||
pub fn new(
|
||||
query_tree: Option<Operation>,
|
||||
filtered_candidates: Option<RoaringBitmap>,
|
||||
) -> Initial {
|
||||
let answer = CriterionResult {
|
||||
query_tree,
|
||||
candidates: None,
|
||||
|
|
|
@ -1,29 +1,28 @@
|
|||
use std::collections::HashMap;
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::{FieldId, TreeLevel, search::{word_derivations, WordDerivationsCache}};
|
||||
use crate::{Index, DocumentId, Result};
|
||||
|
||||
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
|
||||
use self::asc_desc::AscDesc;
|
||||
use self::attribute::Attribute;
|
||||
use self::exactness::Exactness;
|
||||
use self::r#final::Final;
|
||||
use self::initial::Initial;
|
||||
use self::proximity::Proximity;
|
||||
use self::r#final::Final;
|
||||
use self::typo::Typo;
|
||||
use self::words::Words;
|
||||
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
|
||||
use crate::search::{word_derivations, WordDerivationsCache};
|
||||
use crate::{DocumentId, FieldId, Index, Result, TreeLevel};
|
||||
|
||||
mod asc_desc;
|
||||
mod attribute;
|
||||
mod exactness;
|
||||
pub mod r#final;
|
||||
mod initial;
|
||||
mod proximity;
|
||||
mod typo;
|
||||
mod words;
|
||||
pub mod r#final;
|
||||
|
||||
pub trait Criterion {
|
||||
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>>;
|
||||
|
@ -55,7 +54,7 @@ pub struct CriterionParameters<'a> {
|
|||
#[derive(Debug)]
|
||||
enum Candidates {
|
||||
Allowed(RoaringBitmap),
|
||||
Forbidden(RoaringBitmap)
|
||||
Forbidden(RoaringBitmap),
|
||||
}
|
||||
|
||||
impl Default for Candidates {
|
||||
|
@ -68,17 +67,55 @@ pub trait Context<'c> {
|
|||
fn documents_ids(&self) -> heed::Result<RoaringBitmap>;
|
||||
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
|
||||
fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
|
||||
fn word_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result<Option<RoaringBitmap>>;
|
||||
fn word_prefix_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result<Option<RoaringBitmap>>;
|
||||
fn word_pair_proximity_docids(
|
||||
&self,
|
||||
left: &str,
|
||||
right: &str,
|
||||
proximity: u8,
|
||||
) -> heed::Result<Option<RoaringBitmap>>;
|
||||
fn word_prefix_pair_proximity_docids(
|
||||
&self,
|
||||
left: &str,
|
||||
right: &str,
|
||||
proximity: u8,
|
||||
) -> heed::Result<Option<RoaringBitmap>>;
|
||||
fn words_fst<'t>(&self) -> &'t fst::Set<Cow<[u8]>>;
|
||||
fn in_prefix_cache(&self, word: &str) -> bool;
|
||||
fn docid_words_positions(&self, docid: DocumentId) -> heed::Result<HashMap<String, RoaringBitmap>>;
|
||||
fn word_position_iterator(&self, word: &str, level: TreeLevel, in_prefix_cache: bool, left: Option<u32>, right: Option<u32>) -> heed::Result<Box<dyn Iterator<Item =heed::Result<((&'c str, TreeLevel, u32, u32), RoaringBitmap)>> + 'c>>;
|
||||
fn word_position_last_level(&self, word: &str, in_prefix_cache: bool) -> heed::Result<Option<TreeLevel>>;
|
||||
fn docid_words_positions(
|
||||
&self,
|
||||
docid: DocumentId,
|
||||
) -> heed::Result<HashMap<String, RoaringBitmap>>;
|
||||
fn word_position_iterator(
|
||||
&self,
|
||||
word: &str,
|
||||
level: TreeLevel,
|
||||
in_prefix_cache: bool,
|
||||
left: Option<u32>,
|
||||
right: Option<u32>,
|
||||
) -> heed::Result<
|
||||
Box<
|
||||
dyn Iterator<Item = heed::Result<((&'c str, TreeLevel, u32, u32), RoaringBitmap)>> + 'c,
|
||||
>,
|
||||
>;
|
||||
fn word_position_last_level(
|
||||
&self,
|
||||
word: &str,
|
||||
in_prefix_cache: bool,
|
||||
) -> heed::Result<Option<TreeLevel>>;
|
||||
fn synonyms(&self, word: &str) -> heed::Result<Option<Vec<Vec<String>>>>;
|
||||
fn searchable_fields_ids(&self) -> Result<Vec<FieldId>>;
|
||||
fn field_id_word_count_docids(&self, field_id: FieldId, word_count: u8) -> heed::Result<Option<RoaringBitmap>>;
|
||||
fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> heed::Result<Option<RoaringBitmap>>;
|
||||
fn searchable_fields_ids(&self) -> Result<Vec<FieldId>>;
|
||||
fn field_id_word_count_docids(
|
||||
&self,
|
||||
field_id: FieldId,
|
||||
word_count: u8,
|
||||
) -> heed::Result<Option<RoaringBitmap>>;
|
||||
fn word_level_position_docids(
|
||||
&self,
|
||||
word: &str,
|
||||
level: TreeLevel,
|
||||
left: u32,
|
||||
right: u32,
|
||||
) -> heed::Result<Option<RoaringBitmap>>;
|
||||
}
|
||||
|
||||
pub struct CriteriaBuilder<'t> {
|
||||
|
@ -101,12 +138,22 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
|
|||
self.index.word_prefix_docids.get(self.rtxn, &word)
|
||||
}
|
||||
|
||||
fn word_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result<Option<RoaringBitmap>> {
|
||||
fn word_pair_proximity_docids(
|
||||
&self,
|
||||
left: &str,
|
||||
right: &str,
|
||||
proximity: u8,
|
||||
) -> heed::Result<Option<RoaringBitmap>> {
|
||||
let key = (left, right, proximity);
|
||||
self.index.word_pair_proximity_docids.get(self.rtxn, &key)
|
||||
}
|
||||
|
||||
fn word_prefix_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result<Option<RoaringBitmap>> {
|
||||
fn word_prefix_pair_proximity_docids(
|
||||
&self,
|
||||
left: &str,
|
||||
right: &str,
|
||||
proximity: u8,
|
||||
) -> heed::Result<Option<RoaringBitmap>> {
|
||||
let key = (left, right, proximity);
|
||||
self.index.word_prefix_pair_proximity_docids.get(self.rtxn, &key)
|
||||
}
|
||||
|
@ -119,7 +166,10 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
|
|||
self.words_prefixes_fst.contains(word)
|
||||
}
|
||||
|
||||
fn docid_words_positions(&self, docid: DocumentId) -> heed::Result<HashMap<String, RoaringBitmap>> {
|
||||
fn docid_words_positions(
|
||||
&self,
|
||||
docid: DocumentId,
|
||||
) -> heed::Result<HashMap<String, RoaringBitmap>> {
|
||||
let mut words_positions = HashMap::new();
|
||||
for result in self.index.docid_word_positions.prefix_iter(self.rtxn, &(docid, ""))? {
|
||||
let ((_, word), positions) = result?;
|
||||
|
@ -134,9 +184,12 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
|
|||
level: TreeLevel,
|
||||
in_prefix_cache: bool,
|
||||
left: Option<u32>,
|
||||
right: Option<u32>
|
||||
) -> heed::Result<Box<dyn Iterator<Item = heed::Result<((&'c str, TreeLevel, u32, u32), RoaringBitmap)>> + 'c>>
|
||||
{
|
||||
right: Option<u32>,
|
||||
) -> heed::Result<
|
||||
Box<
|
||||
dyn Iterator<Item = heed::Result<((&'c str, TreeLevel, u32, u32), RoaringBitmap)>> + 'c,
|
||||
>,
|
||||
> {
|
||||
let range = {
|
||||
let left = left.unwrap_or(u32::min_value());
|
||||
let right = right.unwrap_or(u32::max_value());
|
||||
|
@ -152,7 +205,11 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
|
|||
Ok(Box::new(db.range(self.rtxn, &range)?))
|
||||
}
|
||||
|
||||
fn word_position_last_level(&self, word: &str, in_prefix_cache: bool) -> heed::Result<Option<TreeLevel>> {
|
||||
fn word_position_last_level(
|
||||
&self,
|
||||
word: &str,
|
||||
in_prefix_cache: bool,
|
||||
) -> heed::Result<Option<TreeLevel>> {
|
||||
let range = {
|
||||
let left = (word, TreeLevel::min_value(), u32::min_value(), u32::min_value());
|
||||
let right = (word, TreeLevel::max_value(), u32::max_value(), u32::max_value());
|
||||
|
@ -164,7 +221,9 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
|
|||
};
|
||||
let last_level = db
|
||||
.remap_data_type::<heed::types::DecodeIgnore>()
|
||||
.range(self.rtxn, &range)?.last().transpose()?
|
||||
.range(self.rtxn, &range)?
|
||||
.last()
|
||||
.transpose()?
|
||||
.map(|((_, level, _, _), _)| level);
|
||||
|
||||
Ok(last_level)
|
||||
|
@ -181,12 +240,22 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
|
|||
}
|
||||
}
|
||||
|
||||
fn field_id_word_count_docids(&self, field_id: FieldId, word_count: u8) -> heed::Result<Option<RoaringBitmap>> {
|
||||
fn field_id_word_count_docids(
|
||||
&self,
|
||||
field_id: FieldId,
|
||||
word_count: u8,
|
||||
) -> heed::Result<Option<RoaringBitmap>> {
|
||||
let key = (field_id, word_count);
|
||||
self.index.field_id_word_count_docids.get(self.rtxn, &key)
|
||||
}
|
||||
|
||||
fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> heed::Result<Option<RoaringBitmap>> {
|
||||
fn word_level_position_docids(
|
||||
&self,
|
||||
word: &str,
|
||||
level: TreeLevel,
|
||||
left: u32,
|
||||
right: u32,
|
||||
) -> heed::Result<Option<RoaringBitmap>> {
|
||||
let key = (word, level, left, right);
|
||||
self.index.word_level_position_docids.get(self.rtxn, &key)
|
||||
}
|
||||
|
@ -204,13 +273,13 @@ impl<'t> CriteriaBuilder<'t> {
|
|||
query_tree: Option<Operation>,
|
||||
primitive_query: Option<Vec<PrimitiveQueryPart>>,
|
||||
filtered_candidates: Option<RoaringBitmap>,
|
||||
) -> Result<Final<'t>>
|
||||
{
|
||||
) -> Result<Final<'t>> {
|
||||
use crate::criterion::Criterion as Name;
|
||||
|
||||
let primitive_query = primitive_query.unwrap_or_default();
|
||||
|
||||
let mut criterion = Box::new(Initial::new(query_tree, filtered_candidates)) as Box<dyn Criterion>;
|
||||
let mut criterion =
|
||||
Box::new(Initial::new(query_tree, filtered_candidates)) as Box<dyn Criterion>;
|
||||
for name in self.index.criteria(&self.rtxn)? {
|
||||
criterion = match name {
|
||||
Name::Typo => Box::new(Typo::new(self, criterion)),
|
||||
|
@ -218,8 +287,12 @@ impl<'t> CriteriaBuilder<'t> {
|
|||
Name::Proximity => Box::new(Proximity::new(self, criterion)),
|
||||
Name::Attribute => Box::new(Attribute::new(self, criterion)),
|
||||
Name::Exactness => Box::new(Exactness::new(self, criterion, &primitive_query)?),
|
||||
Name::Asc(field) => Box::new(AscDesc::asc(&self.index, &self.rtxn, criterion, field)?),
|
||||
Name::Desc(field) => Box::new(AscDesc::desc(&self.index, &self.rtxn, criterion, field)?),
|
||||
Name::Asc(field) => {
|
||||
Box::new(AscDesc::asc(&self.index, &self.rtxn, criterion, field)?)
|
||||
}
|
||||
Name::Desc(field) => {
|
||||
Box::new(AscDesc::desc(&self.index, &self.rtxn, criterion, field)?)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -231,21 +304,20 @@ pub fn resolve_query_tree<'t>(
|
|||
ctx: &'t dyn Context,
|
||||
query_tree: &Operation,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<RoaringBitmap>
|
||||
{
|
||||
) -> Result<RoaringBitmap> {
|
||||
fn resolve_operation<'t>(
|
||||
ctx: &'t dyn Context,
|
||||
query_tree: &Operation,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<RoaringBitmap>
|
||||
{
|
||||
use Operation::{And, Phrase, Or, Query};
|
||||
) -> Result<RoaringBitmap> {
|
||||
use Operation::{And, Or, Phrase, Query};
|
||||
|
||||
match query_tree {
|
||||
And(ops) => {
|
||||
let mut ops = ops.iter().map(|op| {
|
||||
resolve_operation(ctx, op, wdcache)
|
||||
}).collect::<Result<Vec<_>>>()?;
|
||||
let mut ops = ops
|
||||
.iter()
|
||||
.map(|op| resolve_operation(ctx, op, wdcache))
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
ops.sort_unstable_by_key(|cds| cds.len());
|
||||
|
||||
|
@ -260,7 +332,7 @@ pub fn resolve_query_tree<'t>(
|
|||
}
|
||||
}
|
||||
Ok(candidates)
|
||||
},
|
||||
}
|
||||
Phrase(words) => {
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
let mut first_loop = true;
|
||||
|
@ -276,12 +348,12 @@ pub fn resolve_query_tree<'t>(
|
|||
} else {
|
||||
candidates &= pair_docids;
|
||||
}
|
||||
},
|
||||
None => return Ok(RoaringBitmap::new())
|
||||
}
|
||||
None => return Ok(RoaringBitmap::new()),
|
||||
}
|
||||
}
|
||||
Ok(candidates)
|
||||
},
|
||||
}
|
||||
Or(_, ops) => {
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
for op in ops {
|
||||
|
@ -289,7 +361,7 @@ pub fn resolve_query_tree<'t>(
|
|||
candidates.union_with(&docids);
|
||||
}
|
||||
Ok(candidates)
|
||||
},
|
||||
}
|
||||
Query(q) => Ok(query_docids(ctx, q, wdcache)?),
|
||||
}
|
||||
}
|
||||
|
@ -297,18 +369,18 @@ pub fn resolve_query_tree<'t>(
|
|||
resolve_operation(ctx, query_tree, wdcache)
|
||||
}
|
||||
|
||||
|
||||
fn all_word_pair_proximity_docids<T: AsRef<str>, U: AsRef<str>>(
|
||||
ctx: &dyn Context,
|
||||
left_words: &[(T, u8)],
|
||||
right_words: &[(U, u8)],
|
||||
proximity: u8
|
||||
) -> Result<RoaringBitmap>
|
||||
{
|
||||
proximity: u8,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let mut docids = RoaringBitmap::new();
|
||||
for (left, _l_typo) in left_words {
|
||||
for (right, _r_typo) in right_words {
|
||||
let current_docids = ctx.word_pair_proximity_docids(left.as_ref(), right.as_ref(), proximity)?.unwrap_or_default();
|
||||
let current_docids = ctx
|
||||
.word_pair_proximity_docids(left.as_ref(), right.as_ref(), proximity)?
|
||||
.unwrap_or_default();
|
||||
docids.union_with(¤t_docids);
|
||||
}
|
||||
}
|
||||
|
@ -319,8 +391,7 @@ fn query_docids(
|
|||
ctx: &dyn Context,
|
||||
query: &Query,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<RoaringBitmap>
|
||||
{
|
||||
) -> Result<RoaringBitmap> {
|
||||
match &query.kind {
|
||||
QueryKind::Exact { word, .. } => {
|
||||
if query.prefix && ctx.in_prefix_cache(&word) {
|
||||
|
@ -336,7 +407,7 @@ fn query_docids(
|
|||
} else {
|
||||
Ok(ctx.word_docids(&word)?.unwrap_or_default())
|
||||
}
|
||||
},
|
||||
}
|
||||
QueryKind::Tolerant { typo, word } => {
|
||||
let words = word_derivations(&word, query.prefix, *typo, ctx.words_fst(), wdcache)?;
|
||||
let mut docids = RoaringBitmap::new();
|
||||
|
@ -345,7 +416,7 @@ fn query_docids(
|
|||
docids.union_with(¤t_docids);
|
||||
}
|
||||
Ok(docids)
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -355,8 +426,7 @@ fn query_pair_proximity_docids(
|
|||
right: &Query,
|
||||
proximity: u8,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<RoaringBitmap>
|
||||
{
|
||||
) -> Result<RoaringBitmap> {
|
||||
if proximity >= 8 {
|
||||
let mut candidates = query_docids(ctx, left, wdcache)?;
|
||||
let right_candidates = query_docids(ctx, right, wdcache)?;
|
||||
|
@ -368,20 +438,31 @@ fn query_pair_proximity_docids(
|
|||
match (&left.kind, &right.kind) {
|
||||
(QueryKind::Exact { word: left, .. }, QueryKind::Exact { word: right, .. }) => {
|
||||
if prefix && ctx.in_prefix_cache(&right) {
|
||||
Ok(ctx.word_prefix_pair_proximity_docids(left.as_str(), right.as_str(), proximity)?.unwrap_or_default())
|
||||
Ok(ctx
|
||||
.word_prefix_pair_proximity_docids(left.as_str(), right.as_str(), proximity)?
|
||||
.unwrap_or_default())
|
||||
} else if prefix {
|
||||
let r_words = word_derivations(&right, true, 0, ctx.words_fst(), wdcache)?;
|
||||
all_word_pair_proximity_docids(ctx, &[(left, 0)], &r_words, proximity)
|
||||
} else {
|
||||
Ok(ctx.word_pair_proximity_docids(left.as_str(), right.as_str(), proximity)?.unwrap_or_default())
|
||||
Ok(ctx
|
||||
.word_pair_proximity_docids(left.as_str(), right.as_str(), proximity)?
|
||||
.unwrap_or_default())
|
||||
}
|
||||
},
|
||||
}
|
||||
(QueryKind::Tolerant { typo, word: left }, QueryKind::Exact { word: right, .. }) => {
|
||||
let l_words = word_derivations(&left, false, *typo, ctx.words_fst(), wdcache)?.to_owned();
|
||||
let l_words =
|
||||
word_derivations(&left, false, *typo, ctx.words_fst(), wdcache)?.to_owned();
|
||||
if prefix && ctx.in_prefix_cache(&right) {
|
||||
let mut docids = RoaringBitmap::new();
|
||||
for (left, _) in l_words {
|
||||
let current_docids = ctx.word_prefix_pair_proximity_docids(left.as_ref(), right.as_ref(), proximity)?.unwrap_or_default();
|
||||
let current_docids = ctx
|
||||
.word_prefix_pair_proximity_docids(
|
||||
left.as_ref(),
|
||||
right.as_ref(),
|
||||
proximity,
|
||||
)?
|
||||
.unwrap_or_default();
|
||||
docids.union_with(¤t_docids);
|
||||
}
|
||||
Ok(docids)
|
||||
|
@ -391,28 +472,36 @@ fn query_pair_proximity_docids(
|
|||
} else {
|
||||
all_word_pair_proximity_docids(ctx, &l_words, &[(right, 0)], proximity)
|
||||
}
|
||||
},
|
||||
}
|
||||
(QueryKind::Exact { word: left, .. }, QueryKind::Tolerant { typo, word: right }) => {
|
||||
let r_words = word_derivations(&right, prefix, *typo, ctx.words_fst(), wdcache)?;
|
||||
all_word_pair_proximity_docids(ctx, &[(left, 0)], &r_words, proximity)
|
||||
},
|
||||
(QueryKind::Tolerant { typo: l_typo, word: left }, QueryKind::Tolerant { typo: r_typo, word: right }) => {
|
||||
let l_words = word_derivations(&left, false, *l_typo, ctx.words_fst(), wdcache)?.to_owned();
|
||||
}
|
||||
(
|
||||
QueryKind::Tolerant { typo: l_typo, word: left },
|
||||
QueryKind::Tolerant { typo: r_typo, word: right },
|
||||
) => {
|
||||
let l_words =
|
||||
word_derivations(&left, false, *l_typo, ctx.words_fst(), wdcache)?.to_owned();
|
||||
let r_words = word_derivations(&right, prefix, *r_typo, ctx.words_fst(), wdcache)?;
|
||||
all_word_pair_proximity_docids(ctx, &l_words, &r_words, proximity)
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test {
|
||||
use maplit::hashmap;
|
||||
use rand::{Rng, SeedableRng, rngs::StdRng};
|
||||
|
||||
use super::*;
|
||||
use std::collections::HashMap;
|
||||
|
||||
fn s(s: &str) -> String { s.to_string() }
|
||||
use maplit::hashmap;
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
|
||||
use super::*;
|
||||
|
||||
fn s(s: &str) -> String {
|
||||
s.to_string()
|
||||
}
|
||||
pub struct TestContext<'t> {
|
||||
words_fst: fst::Set<Cow<'t, [u8]>>,
|
||||
word_docids: HashMap<String, RoaringBitmap>,
|
||||
|
@ -435,12 +524,22 @@ pub mod test {
|
|||
Ok(self.word_prefix_docids.get(&word.to_string()).cloned())
|
||||
}
|
||||
|
||||
fn word_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result<Option<RoaringBitmap>> {
|
||||
fn word_pair_proximity_docids(
|
||||
&self,
|
||||
left: &str,
|
||||
right: &str,
|
||||
proximity: u8,
|
||||
) -> heed::Result<Option<RoaringBitmap>> {
|
||||
let key = (left.to_string(), right.to_string(), proximity.into());
|
||||
Ok(self.word_pair_proximity_docids.get(&key).cloned())
|
||||
}
|
||||
|
||||
fn word_prefix_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result<Option<RoaringBitmap>> {
|
||||
fn word_prefix_pair_proximity_docids(
|
||||
&self,
|
||||
left: &str,
|
||||
right: &str,
|
||||
proximity: u8,
|
||||
) -> heed::Result<Option<RoaringBitmap>> {
|
||||
let key = (left.to_string(), right.to_string(), proximity.into());
|
||||
Ok(self.word_prefix_pair_proximity_docids.get(&key).cloned())
|
||||
}
|
||||
|
@ -453,24 +552,44 @@ pub mod test {
|
|||
self.word_prefix_docids.contains_key(&word.to_string())
|
||||
}
|
||||
|
||||
fn docid_words_positions(&self, docid: DocumentId) -> heed::Result<HashMap<String, RoaringBitmap>> {
|
||||
fn docid_words_positions(
|
||||
&self,
|
||||
docid: DocumentId,
|
||||
) -> heed::Result<HashMap<String, RoaringBitmap>> {
|
||||
if let Some(docid_words) = self.docid_words.get(&docid) {
|
||||
Ok(docid_words
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i,w)| (w.clone(), RoaringBitmap::from_sorted_iter(std::iter::once(i as u32))))
|
||||
.collect()
|
||||
)
|
||||
.map(|(i, w)| {
|
||||
(w.clone(), RoaringBitmap::from_sorted_iter(std::iter::once(i as u32)))
|
||||
})
|
||||
.collect())
|
||||
} else {
|
||||
Ok(HashMap::new())
|
||||
}
|
||||
}
|
||||
|
||||
fn word_position_iterator(&self, _word: &str, _level: TreeLevel, _in_prefix_cache: bool, _left: Option<u32>, _right: Option<u32>) -> heed::Result<Box<dyn Iterator<Item =heed::Result<((&'c str, TreeLevel, u32, u32), RoaringBitmap)>> + 'c>> {
|
||||
fn word_position_iterator(
|
||||
&self,
|
||||
_word: &str,
|
||||
_level: TreeLevel,
|
||||
_in_prefix_cache: bool,
|
||||
_left: Option<u32>,
|
||||
_right: Option<u32>,
|
||||
) -> heed::Result<
|
||||
Box<
|
||||
dyn Iterator<Item = heed::Result<((&'c str, TreeLevel, u32, u32), RoaringBitmap)>>
|
||||
+ 'c,
|
||||
>,
|
||||
> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn word_position_last_level(&self, _word: &str, _in_prefix_cache: bool) -> heed::Result<Option<TreeLevel>> {
|
||||
fn word_position_last_level(
|
||||
&self,
|
||||
_word: &str,
|
||||
_in_prefix_cache: bool,
|
||||
) -> heed::Result<Option<TreeLevel>> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
|
@ -478,15 +597,25 @@ pub mod test {
|
|||
todo!()
|
||||
}
|
||||
|
||||
fn searchable_fields_ids(&self) -> Result<Vec<FieldId>> {
|
||||
fn searchable_fields_ids(&self) -> Result<Vec<FieldId>> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn word_level_position_docids(&self, _word: &str, _level: TreeLevel, _left: u32, _right: u32) -> heed::Result<Option<RoaringBitmap>> {
|
||||
fn word_level_position_docids(
|
||||
&self,
|
||||
_word: &str,
|
||||
_level: TreeLevel,
|
||||
_left: u32,
|
||||
_right: u32,
|
||||
) -> heed::Result<Option<RoaringBitmap>> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn field_id_word_count_docids(&self, _field_id: FieldId, _word_count: u8) -> heed::Result<Option<RoaringBitmap>> {
|
||||
fn field_id_word_count_docids(
|
||||
&self,
|
||||
_field_id: FieldId,
|
||||
_word_count: u8,
|
||||
) -> heed::Result<Option<RoaringBitmap>> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
@ -506,7 +635,7 @@ pub mod test {
|
|||
RoaringBitmap::from_sorted_iter(values.into_iter())
|
||||
}
|
||||
|
||||
let word_docids = hashmap!{
|
||||
let word_docids = hashmap! {
|
||||
s("hello") => random_postings(rng, 1500),
|
||||
s("hi") => random_postings(rng, 4000),
|
||||
s("word") => random_postings(rng, 2500),
|
||||
|
@ -530,7 +659,7 @@ pub mod test {
|
|||
}
|
||||
}
|
||||
|
||||
let word_prefix_docids = hashmap!{
|
||||
let word_prefix_docids = hashmap! {
|
||||
s("h") => &word_docids[&s("hello")] | &word_docids[&s("hi")],
|
||||
s("wor") => &word_docids[&s("word")] | &word_docids[&s("world")],
|
||||
s("20") => &word_docids[&s("2020")] | &word_docids[&s("2021")],
|
||||
|
@ -540,7 +669,9 @@ pub mod test {
|
|||
let mut word_prefix_pair_proximity_docids = HashMap::new();
|
||||
for (lword, lcandidates) in &word_docids {
|
||||
for (rword, rcandidates) in &word_docids {
|
||||
if lword == rword { continue }
|
||||
if lword == rword {
|
||||
continue;
|
||||
}
|
||||
let candidates = lcandidates & rcandidates;
|
||||
for candidate in candidates {
|
||||
if let Some(docid_words) = docid_words.get(&candidate) {
|
||||
|
@ -551,24 +682,31 @@ pub mod test {
|
|||
} else {
|
||||
(s(lword), s(rword), (lposition - rposition + 1) as i32)
|
||||
};
|
||||
let docids = word_pair_proximity_docids.entry(key).or_insert(RoaringBitmap::new());
|
||||
let docids = word_pair_proximity_docids
|
||||
.entry(key)
|
||||
.or_insert(RoaringBitmap::new());
|
||||
docids.push(candidate);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (pword, pcandidates) in &word_prefix_docids {
|
||||
if lword.starts_with(pword) { continue }
|
||||
if lword.starts_with(pword) {
|
||||
continue;
|
||||
}
|
||||
let candidates = lcandidates & pcandidates;
|
||||
for candidate in candidates {
|
||||
if let Some(docid_words) = docid_words.get(&candidate) {
|
||||
let lposition = docid_words.iter().position(|w| w == lword).unwrap();
|
||||
let rposition = docid_words.iter().position(|w| w.starts_with(pword)).unwrap();
|
||||
let rposition =
|
||||
docid_words.iter().position(|w| w.starts_with(pword)).unwrap();
|
||||
let key = if lposition < rposition {
|
||||
(s(lword), s(pword), (rposition - lposition) as i32)
|
||||
} else {
|
||||
(s(lword), s(pword), (lposition - rposition + 1) as i32)
|
||||
};
|
||||
let docids = word_prefix_pair_proximity_docids.entry(key).or_insert(RoaringBitmap::new());
|
||||
let docids = word_prefix_pair_proximity_docids
|
||||
.entry(key)
|
||||
.or_insert(RoaringBitmap::new());
|
||||
docids.push(candidate);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,22 +2,16 @@ use std::collections::btree_map::{self, BTreeMap};
|
|||
use std::collections::hash_map::HashMap;
|
||||
use std::mem::take;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
use log::debug;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::search::query_tree::{maximum_proximity, Operation, Query};
|
||||
use crate::search::{build_dfa, WordDerivationsCache};
|
||||
use crate::search::{query_tree::QueryKind};
|
||||
use crate::{DocumentId, Position, Result};
|
||||
use super::{
|
||||
Context,
|
||||
Criterion,
|
||||
CriterionParameters,
|
||||
CriterionResult,
|
||||
query_docids,
|
||||
query_pair_proximity_docids,
|
||||
resolve_query_tree,
|
||||
query_docids, query_pair_proximity_docids, resolve_query_tree, Context, Criterion,
|
||||
CriterionParameters, CriterionResult,
|
||||
};
|
||||
use crate::search::query_tree::{maximum_proximity, Operation, Query, QueryKind};
|
||||
use crate::search::{build_dfa, WordDerivationsCache};
|
||||
use crate::{DocumentId, Position, Result};
|
||||
|
||||
type Cache = HashMap<(Operation, u8), Vec<(Query, Query, RoaringBitmap)>>;
|
||||
|
||||
|
@ -63,28 +57,33 @@ impl<'t> Criterion for Proximity<'t> {
|
|||
}
|
||||
|
||||
loop {
|
||||
debug!("Proximity at iteration {} (max prox {:?}) ({:?})",
|
||||
debug!(
|
||||
"Proximity at iteration {} (max prox {:?}) ({:?})",
|
||||
self.proximity,
|
||||
self.state.as_ref().map(|(mp, _, _)| mp),
|
||||
self.state.as_ref().map(|(_, _, cd)| cd),
|
||||
);
|
||||
|
||||
match &mut self.state {
|
||||
Some((max_prox, _, allowed_candidates)) if allowed_candidates.is_empty() || self.proximity > *max_prox => {
|
||||
Some((max_prox, _, allowed_candidates))
|
||||
if allowed_candidates.is_empty() || self.proximity > *max_prox =>
|
||||
{
|
||||
self.state = None; // reset state
|
||||
},
|
||||
}
|
||||
Some((_, query_tree, allowed_candidates)) => {
|
||||
let mut new_candidates = if allowed_candidates.len() <= CANDIDATES_THRESHOLD && self.proximity > PROXIMITY_THRESHOLD {
|
||||
let mut new_candidates = if allowed_candidates.len() <= CANDIDATES_THRESHOLD
|
||||
&& self.proximity > PROXIMITY_THRESHOLD
|
||||
{
|
||||
if let Some(cache) = self.plane_sweep_cache.as_mut() {
|
||||
match cache.next() {
|
||||
Some((p, candidates)) => {
|
||||
self.proximity = p;
|
||||
candidates
|
||||
},
|
||||
}
|
||||
None => {
|
||||
self.state = None; // reset state
|
||||
continue
|
||||
},
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let cache = resolve_plane_sweep_candidates(
|
||||
|
@ -95,9 +94,10 @@ impl<'t> Criterion for Proximity<'t> {
|
|||
)?;
|
||||
self.plane_sweep_cache = Some(cache.into_iter());
|
||||
|
||||
continue
|
||||
continue;
|
||||
}
|
||||
} else { // use set theory based algorithm
|
||||
} else {
|
||||
// use set theory based algorithm
|
||||
resolve_candidates(
|
||||
self.ctx,
|
||||
&query_tree,
|
||||
|
@ -117,39 +117,50 @@ impl<'t> Criterion for Proximity<'t> {
|
|||
filtered_candidates: None,
|
||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||
}));
|
||||
},
|
||||
None => {
|
||||
match self.parent.next(params)? {
|
||||
Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => {
|
||||
let mut candidates = match candidates {
|
||||
Some(candidates) => candidates,
|
||||
None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)? - params.excluded_candidates,
|
||||
};
|
||||
|
||||
if let Some(filtered_candidates) = filtered_candidates {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
}) => {
|
||||
let mut candidates = match candidates {
|
||||
Some(candidates) => candidates,
|
||||
None => {
|
||||
resolve_query_tree(self.ctx, &query_tree, params.wdcache)?
|
||||
- params.excluded_candidates
|
||||
}
|
||||
};
|
||||
|
||||
match bucket_candidates {
|
||||
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
||||
None => self.bucket_candidates |= &candidates,
|
||||
}
|
||||
if let Some(filtered_candidates) = filtered_candidates {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
let maximum_proximity = maximum_proximity(&query_tree);
|
||||
self.state = Some((maximum_proximity as u8, query_tree, candidates));
|
||||
self.proximity = 0;
|
||||
self.plane_sweep_cache = None;
|
||||
},
|
||||
Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
}));
|
||||
},
|
||||
None => return Ok(None),
|
||||
match bucket_candidates {
|
||||
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
||||
None => self.bucket_candidates |= &candidates,
|
||||
}
|
||||
|
||||
let maximum_proximity = maximum_proximity(&query_tree);
|
||||
self.state = Some((maximum_proximity as u8, query_tree, candidates));
|
||||
self.proximity = 0;
|
||||
self.plane_sweep_cache = None;
|
||||
}
|
||||
Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
}) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
}));
|
||||
}
|
||||
None => return Ok(None),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
@ -162,46 +173,48 @@ fn resolve_candidates<'t>(
|
|||
proximity: u8,
|
||||
cache: &mut Cache,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<RoaringBitmap>
|
||||
{
|
||||
) -> Result<RoaringBitmap> {
|
||||
fn resolve_operation<'t>(
|
||||
ctx: &'t dyn Context,
|
||||
query_tree: &Operation,
|
||||
proximity: u8,
|
||||
cache: &mut Cache,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<Vec<(Query, Query, RoaringBitmap)>>
|
||||
{
|
||||
use Operation::{And, Phrase, Or};
|
||||
) -> Result<Vec<(Query, Query, RoaringBitmap)>> {
|
||||
use Operation::{And, Or, Phrase};
|
||||
|
||||
let result = match query_tree {
|
||||
And(ops) => mdfs(ctx, ops, proximity, cache, wdcache)?,
|
||||
Phrase(words) => if proximity == 0 {
|
||||
let most_left = words.first().map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
|
||||
let most_right = words.last().map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
|
||||
let mut candidates = None;
|
||||
for slice in words.windows(2) {
|
||||
let (left, right) = (&slice[0], &slice[1]);
|
||||
match ctx.word_pair_proximity_docids(left, right, 1)? {
|
||||
Some(pair_docids) => {
|
||||
match candidates.as_mut() {
|
||||
Phrase(words) => {
|
||||
if proximity == 0 {
|
||||
let most_left = words
|
||||
.first()
|
||||
.map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
|
||||
let most_right = words
|
||||
.last()
|
||||
.map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
|
||||
let mut candidates = None;
|
||||
for slice in words.windows(2) {
|
||||
let (left, right) = (&slice[0], &slice[1]);
|
||||
match ctx.word_pair_proximity_docids(left, right, 1)? {
|
||||
Some(pair_docids) => match candidates.as_mut() {
|
||||
Some(candidates) => *candidates &= pair_docids,
|
||||
None => candidates = Some(pair_docids),
|
||||
},
|
||||
None => {
|
||||
candidates = None;
|
||||
break;
|
||||
}
|
||||
},
|
||||
None => {
|
||||
candidates = None;
|
||||
break;
|
||||
}
|
||||
}
|
||||
match (most_left, most_right, candidates) {
|
||||
(Some(l), Some(r), Some(c)) => vec![(l, r, c)],
|
||||
_otherwise => Default::default(),
|
||||
}
|
||||
} else {
|
||||
Default::default()
|
||||
}
|
||||
match (most_left, most_right, candidates) {
|
||||
(Some(l), Some(r), Some(c)) => vec![(l, r, c)],
|
||||
_otherwise => Default::default(),
|
||||
}
|
||||
} else {
|
||||
Default::default()
|
||||
},
|
||||
}
|
||||
Or(_, ops) => {
|
||||
let mut output = Vec::new();
|
||||
for op in ops {
|
||||
|
@ -209,13 +222,15 @@ fn resolve_candidates<'t>(
|
|||
output.extend(result);
|
||||
}
|
||||
output
|
||||
},
|
||||
Operation::Query(q) => if proximity == 0 {
|
||||
let candidates = query_docids(ctx, q, wdcache)?;
|
||||
vec![(q.clone(), q.clone(), candidates)]
|
||||
} else {
|
||||
Default::default()
|
||||
},
|
||||
}
|
||||
Operation::Query(q) => {
|
||||
if proximity == 0 {
|
||||
let candidates = query_docids(ctx, q, wdcache)?;
|
||||
vec![(q.clone(), q.clone(), candidates)]
|
||||
} else {
|
||||
Default::default()
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(result)
|
||||
|
@ -228,8 +243,7 @@ fn resolve_candidates<'t>(
|
|||
proximity: u8,
|
||||
cache: &mut Cache,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<Vec<(Query, Query, RoaringBitmap)>>
|
||||
{
|
||||
) -> Result<Vec<(Query, Query, RoaringBitmap)>> {
|
||||
fn pair_combinations(mana: u8, left_max: u8) -> impl Iterator<Item = (u8, u8)> {
|
||||
(0..=mana.min(left_max)).map(move |m| (m, mana - m))
|
||||
}
|
||||
|
@ -257,7 +271,8 @@ fn resolve_candidates<'t>(
|
|||
|
||||
for (ll, lr, lcandidates) in lefts {
|
||||
for (rl, rr, rcandidates) in rights {
|
||||
let mut candidates = query_pair_proximity_docids(ctx, lr, rl, pair_p + 1, wdcache)?;
|
||||
let mut candidates =
|
||||
query_pair_proximity_docids(ctx, lr, rl, pair_p + 1, wdcache)?;
|
||||
if lcandidates.len() < rcandidates.len() {
|
||||
candidates.intersect_with(lcandidates);
|
||||
candidates.intersect_with(rcandidates);
|
||||
|
@ -282,22 +297,26 @@ fn resolve_candidates<'t>(
|
|||
proximity: u8,
|
||||
cache: &mut Cache,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<Vec<(Query, Query, RoaringBitmap)>>
|
||||
{
|
||||
) -> Result<Vec<(Query, Query, RoaringBitmap)>> {
|
||||
// Extract the first two elements but gives the tail
|
||||
// that is just after the first element.
|
||||
let next = branches.split_first().map(|(h1, t)| {
|
||||
(h1, t.split_first().map(|(h2, _)| (h2, t)))
|
||||
});
|
||||
let next =
|
||||
branches.split_first().map(|(h1, t)| (h1, t.split_first().map(|(h2, _)| (h2, t))));
|
||||
|
||||
match next {
|
||||
Some((head1, Some((head2, [_])))) => mdfs_pair(ctx, head1, head2, proximity, cache, wdcache),
|
||||
Some((head1, Some((head2, [_])))) => {
|
||||
mdfs_pair(ctx, head1, head2, proximity, cache, wdcache)
|
||||
}
|
||||
Some((head1, Some((head2, tail)))) => {
|
||||
let mut output = Vec::new();
|
||||
for p in 0..=proximity {
|
||||
for (lhead, _, head_candidates) in mdfs_pair(ctx, head1, head2, p, cache, wdcache)? {
|
||||
for (lhead, _, head_candidates) in
|
||||
mdfs_pair(ctx, head1, head2, p, cache, wdcache)?
|
||||
{
|
||||
if !head_candidates.is_empty() {
|
||||
for (_, rtail, mut candidates) in mdfs(ctx, tail, proximity - p, cache, wdcache)? {
|
||||
for (_, rtail, mut candidates) in
|
||||
mdfs(ctx, tail, proximity - p, cache, wdcache)?
|
||||
{
|
||||
candidates.intersect_with(&head_candidates);
|
||||
if !candidates.is_empty() {
|
||||
output.push((lhead.clone(), rtail, candidates));
|
||||
|
@ -307,7 +326,7 @@ fn resolve_candidates<'t>(
|
|||
}
|
||||
}
|
||||
Ok(output)
|
||||
},
|
||||
}
|
||||
Some((head1, None)) => resolve_operation(ctx, head1, proximity, cache, wdcache),
|
||||
None => Ok(Default::default()),
|
||||
}
|
||||
|
@ -325,47 +344,48 @@ fn resolve_plane_sweep_candidates(
|
|||
query_tree: &Operation,
|
||||
allowed_candidates: &RoaringBitmap,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<BTreeMap<u8, RoaringBitmap>>
|
||||
{
|
||||
) -> Result<BTreeMap<u8, RoaringBitmap>> {
|
||||
/// FIXME may be buggy with query like "new new york"
|
||||
fn plane_sweep(
|
||||
groups_positions: Vec<Vec<(Position, u8, Position)>>,
|
||||
consecutive: bool,
|
||||
) -> Result<Vec<(Position, u8, Position)>>
|
||||
{
|
||||
) -> Result<Vec<(Position, u8, Position)>> {
|
||||
fn compute_groups_proximity(
|
||||
groups: &[(usize, (Position, u8, Position))],
|
||||
consecutive: bool,
|
||||
) -> Option<(Position, u8, Position)>
|
||||
{
|
||||
) -> Option<(Position, u8, Position)> {
|
||||
// take the inner proximity of the first group as initial
|
||||
let (_, (_, mut proximity, _)) = groups.first()?;
|
||||
let (_, (left_most_pos, _, _)) = groups.first()?;
|
||||
let (_, (_, _, right_most_pos)) = groups.iter().max_by_key(|(_, (_, _, right_most_pos))| right_most_pos)?;
|
||||
let (_, (_, _, right_most_pos)) =
|
||||
groups.iter().max_by_key(|(_, (_, _, right_most_pos))| right_most_pos)?;
|
||||
|
||||
for pair in groups.windows(2) {
|
||||
if let [(i1, (lpos1, _, rpos1)), (i2, (lpos2, prox2, rpos2))] = pair {
|
||||
// if two positions are equal, meaning that they share at least a word, we return None
|
||||
if rpos1 == rpos2 || lpos1 == lpos2 || rpos1 == lpos2 || lpos1 == rpos2 {
|
||||
return None
|
||||
return None;
|
||||
}
|
||||
|
||||
let pair_proximity = {
|
||||
// if intervals are disjoint [..].(..)
|
||||
if lpos2 > rpos1 { lpos2 - rpos1 }
|
||||
if lpos2 > rpos1 {
|
||||
lpos2 - rpos1
|
||||
}
|
||||
// if the second interval is a subset of the first [.(..).]
|
||||
else if rpos2 < rpos1 { (lpos2 - lpos1).min(rpos1 - rpos2) }
|
||||
else if rpos2 < rpos1 {
|
||||
(lpos2 - lpos1).min(rpos1 - rpos2)
|
||||
}
|
||||
// if intervals overlaps [.(..].)
|
||||
else { (lpos2 - lpos1).min(rpos2 - rpos1) }
|
||||
else {
|
||||
(lpos2 - lpos1).min(rpos2 - rpos1)
|
||||
}
|
||||
};
|
||||
|
||||
// if groups are in the good order (query order) we remove 1 to the proximity
|
||||
// the proximity is clamped to 7
|
||||
let pair_proximity = if i1 < i2 {
|
||||
(pair_proximity - 1).min(7)
|
||||
} else {
|
||||
pair_proximity.min(7)
|
||||
};
|
||||
let pair_proximity =
|
||||
if i1 < i2 { (pair_proximity - 1).min(7) } else { pair_proximity.min(7) };
|
||||
|
||||
proximity += pair_proximity as u8 + prox2;
|
||||
}
|
||||
|
@ -381,7 +401,8 @@ fn resolve_plane_sweep_candidates(
|
|||
|
||||
let groups_len = groups_positions.len();
|
||||
|
||||
let mut groups_positions: Vec<_> = groups_positions.into_iter().map(|pos| pos.into_iter()).collect();
|
||||
let mut groups_positions: Vec<_> =
|
||||
groups_positions.into_iter().map(|pos| pos.into_iter()).collect();
|
||||
|
||||
// Pop top elements of each list.
|
||||
let mut current = Vec::with_capacity(groups_len);
|
||||
|
@ -452,9 +473,8 @@ fn resolve_plane_sweep_candidates(
|
|||
rocache: &mut HashMap<&'a Operation, Vec<(Position, u8, Position)>>,
|
||||
words_positions: &HashMap<String, RoaringBitmap>,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<Vec<(Position, u8, Position)>>
|
||||
{
|
||||
use Operation::{And, Phrase, Or};
|
||||
) -> Result<Vec<(Position, u8, Position)>> {
|
||||
use Operation::{And, Or, Phrase};
|
||||
|
||||
if let Some(result) = rocache.get(query_tree) {
|
||||
return Ok(result.clone());
|
||||
|
@ -462,13 +482,20 @@ fn resolve_plane_sweep_candidates(
|
|||
|
||||
let result = match query_tree {
|
||||
And(ops) => {
|
||||
let mut groups_positions = Vec::with_capacity(ops.len());
|
||||
let mut groups_positions = Vec::with_capacity(ops.len());
|
||||
for operation in ops {
|
||||
let positions = resolve_operation(ctx, operation, docid, rocache, words_positions, wdcache)?;
|
||||
let positions = resolve_operation(
|
||||
ctx,
|
||||
operation,
|
||||
docid,
|
||||
rocache,
|
||||
words_positions,
|
||||
wdcache,
|
||||
)?;
|
||||
groups_positions.push(positions);
|
||||
}
|
||||
plane_sweep(groups_positions, false)?
|
||||
},
|
||||
}
|
||||
Phrase(words) => {
|
||||
let mut groups_positions = Vec::with_capacity(words.len());
|
||||
for word in words {
|
||||
|
@ -479,16 +506,23 @@ fn resolve_plane_sweep_candidates(
|
|||
groups_positions.push(positions);
|
||||
}
|
||||
plane_sweep(groups_positions, true)?
|
||||
},
|
||||
}
|
||||
Or(_, ops) => {
|
||||
let mut result = Vec::new();
|
||||
for op in ops {
|
||||
result.extend(resolve_operation(ctx, op, docid, rocache, words_positions, wdcache)?)
|
||||
result.extend(resolve_operation(
|
||||
ctx,
|
||||
op,
|
||||
docid,
|
||||
rocache,
|
||||
words_positions,
|
||||
wdcache,
|
||||
)?)
|
||||
}
|
||||
|
||||
result.sort_unstable();
|
||||
result
|
||||
},
|
||||
}
|
||||
Operation::Query(Query { prefix, kind }) => {
|
||||
let mut result = Vec::new();
|
||||
match kind {
|
||||
|
@ -498,9 +532,9 @@ fn resolve_plane_sweep_candidates(
|
|||
.flat_map(|positions| positions.iter().map(|p| (p, 0, p)));
|
||||
result.extend(iter);
|
||||
} else if let Some(positions) = words_positions.get(word) {
|
||||
result.extend(positions.iter().map(|p| (p, 0, p)));
|
||||
result.extend(positions.iter().map(|p| (p, 0, p)));
|
||||
}
|
||||
},
|
||||
}
|
||||
QueryKind::Tolerant { typo, word } => {
|
||||
let iter = word_derivations(word, *prefix, *typo, &words_positions)
|
||||
.flat_map(|positions| positions.iter().map(|p| (p, 0, p)));
|
||||
|
@ -522,8 +556,7 @@ fn resolve_plane_sweep_candidates(
|
|||
is_prefix: bool,
|
||||
max_typo: u8,
|
||||
words_positions: &'a HashMap<String, RoaringBitmap>,
|
||||
) -> impl Iterator<Item = &'a RoaringBitmap>
|
||||
{
|
||||
) -> impl Iterator<Item = &'a RoaringBitmap> {
|
||||
let dfa = build_dfa(word, max_typo, is_prefix);
|
||||
words_positions.iter().filter_map(move |(document_word, positions)| {
|
||||
use levenshtein_automata::Distance;
|
||||
|
@ -539,7 +572,7 @@ fn resolve_plane_sweep_candidates(
|
|||
for docid in allowed_candidates {
|
||||
let words_positions = ctx.docid_words_positions(docid)?;
|
||||
resolve_operation_cache.clear();
|
||||
let positions = resolve_operation(
|
||||
let positions = resolve_operation(
|
||||
ctx,
|
||||
query_tree,
|
||||
docid,
|
||||
|
|
|
@ -1,20 +1,17 @@
|
|||
use std::{borrow::Cow, collections::HashMap, mem::take};
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::mem::take;
|
||||
|
||||
use log::debug;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{
|
||||
query_docids, resolve_query_tree, Candidates, Context, Criterion, CriterionParameters,
|
||||
CriterionResult,
|
||||
};
|
||||
use crate::search::query_tree::{maximum_typo, Operation, Query, QueryKind};
|
||||
use crate::search::{word_derivations, WordDerivationsCache};
|
||||
use crate::Result;
|
||||
use super::{
|
||||
Candidates,
|
||||
Context,
|
||||
Criterion,
|
||||
CriterionParameters,
|
||||
CriterionResult,
|
||||
query_docids,
|
||||
resolve_query_tree,
|
||||
};
|
||||
|
||||
/// Maximum number of typo for a word of any length.
|
||||
const MAX_TYPOS_PER_WORD: u8 = 2;
|
||||
|
@ -54,7 +51,8 @@ impl<'t> Criterion for Typo<'t> {
|
|||
}
|
||||
|
||||
loop {
|
||||
debug!("Typo at iteration {} (max typos {:?}) ({:?})",
|
||||
debug!(
|
||||
"Typo at iteration {} (max typos {:?}) ({:?})",
|
||||
self.typos,
|
||||
self.state.as_ref().map(|(mt, _, _)| mt),
|
||||
self.state.as_ref().map(|(_, _, cd)| cd),
|
||||
|
@ -63,29 +61,42 @@ impl<'t> Criterion for Typo<'t> {
|
|||
match self.state.as_mut() {
|
||||
Some((max_typos, _, _)) if self.typos > *max_typos => {
|
||||
self.state = None; // reset state
|
||||
},
|
||||
}
|
||||
Some((_, _, Allowed(allowed_candidates))) if allowed_candidates.is_empty() => {
|
||||
self.state = None; // reset state
|
||||
},
|
||||
}
|
||||
Some((_, query_tree, candidates_authorization)) => {
|
||||
let fst = self.ctx.words_fst();
|
||||
let new_query_tree = match self.typos {
|
||||
typos if typos < MAX_TYPOS_PER_WORD => {
|
||||
alterate_query_tree(&fst, query_tree.clone(), self.typos, params.wdcache)?
|
||||
},
|
||||
typos if typos < MAX_TYPOS_PER_WORD => alterate_query_tree(
|
||||
&fst,
|
||||
query_tree.clone(),
|
||||
self.typos,
|
||||
params.wdcache,
|
||||
)?,
|
||||
MAX_TYPOS_PER_WORD => {
|
||||
// When typos >= MAX_TYPOS_PER_WORD, no more alteration of the query tree is possible,
|
||||
// we keep the altered query tree
|
||||
*query_tree = alterate_query_tree(&fst, query_tree.clone(), self.typos, params.wdcache)?;
|
||||
*query_tree = alterate_query_tree(
|
||||
&fst,
|
||||
query_tree.clone(),
|
||||
self.typos,
|
||||
params.wdcache,
|
||||
)?;
|
||||
// we compute the allowed candidates
|
||||
let query_tree_allowed_candidates = resolve_query_tree(self.ctx, query_tree, params.wdcache)?;
|
||||
let query_tree_allowed_candidates =
|
||||
resolve_query_tree(self.ctx, query_tree, params.wdcache)?;
|
||||
// we assign the allowed candidates to the candidates authorization.
|
||||
*candidates_authorization = match take(candidates_authorization) {
|
||||
Allowed(allowed_candidates) => Allowed(query_tree_allowed_candidates & allowed_candidates),
|
||||
Forbidden(forbidden_candidates) => Allowed(query_tree_allowed_candidates - forbidden_candidates),
|
||||
Allowed(allowed_candidates) => {
|
||||
Allowed(query_tree_allowed_candidates & allowed_candidates)
|
||||
}
|
||||
Forbidden(forbidden_candidates) => {
|
||||
Allowed(query_tree_allowed_candidates - forbidden_candidates)
|
||||
}
|
||||
};
|
||||
query_tree.clone()
|
||||
},
|
||||
}
|
||||
_otherwise => query_tree.clone(),
|
||||
};
|
||||
|
||||
|
@ -101,11 +112,11 @@ impl<'t> Criterion for Typo<'t> {
|
|||
Allowed(allowed_candidates) => {
|
||||
candidates &= &*allowed_candidates;
|
||||
*allowed_candidates -= &candidates;
|
||||
},
|
||||
}
|
||||
Forbidden(forbidden_candidates) => {
|
||||
candidates -= &*forbidden_candidates;
|
||||
*forbidden_candidates |= &candidates;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
let bucket_candidates = match self.bucket_candidates.as_mut() {
|
||||
|
@ -121,35 +132,45 @@ impl<'t> Criterion for Typo<'t> {
|
|||
filtered_candidates: None,
|
||||
bucket_candidates: Some(bucket_candidates),
|
||||
}));
|
||||
},
|
||||
None => {
|
||||
match self.parent.next(params)? {
|
||||
Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => {
|
||||
self.bucket_candidates = match (self.bucket_candidates.take(), bucket_candidates) {
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
}) => {
|
||||
self.bucket_candidates =
|
||||
match (self.bucket_candidates.take(), bucket_candidates) {
|
||||
(Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc),
|
||||
(self_bc, parent_bc) => self_bc.or(parent_bc),
|
||||
};
|
||||
|
||||
let candidates = match candidates.or(filtered_candidates) {
|
||||
Some(candidates) => Candidates::Allowed(candidates - params.excluded_candidates),
|
||||
None => Candidates::Forbidden(params.excluded_candidates.clone()),
|
||||
};
|
||||
let candidates = match candidates.or(filtered_candidates) {
|
||||
Some(candidates) => {
|
||||
Candidates::Allowed(candidates - params.excluded_candidates)
|
||||
}
|
||||
None => Candidates::Forbidden(params.excluded_candidates.clone()),
|
||||
};
|
||||
|
||||
let maximum_typos = maximum_typo(&query_tree) as u8;
|
||||
self.state = Some((maximum_typos, query_tree, candidates));
|
||||
self.typos = 0;
|
||||
|
||||
},
|
||||
Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
}));
|
||||
},
|
||||
None => return Ok(None),
|
||||
let maximum_typos = maximum_typo(&query_tree) as u8;
|
||||
self.state = Some((maximum_typos, query_tree, candidates));
|
||||
self.typos = 0;
|
||||
}
|
||||
Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
}) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
}));
|
||||
}
|
||||
None => return Ok(None),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
@ -164,21 +185,19 @@ fn alterate_query_tree(
|
|||
mut query_tree: Operation,
|
||||
number_typos: u8,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<Operation>
|
||||
{
|
||||
) -> Result<Operation> {
|
||||
fn recurse(
|
||||
words_fst: &fst::Set<Cow<[u8]>>,
|
||||
operation: &mut Operation,
|
||||
number_typos: u8,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<()>
|
||||
{
|
||||
use Operation::{And, Phrase, Or};
|
||||
) -> Result<()> {
|
||||
use Operation::{And, Or, Phrase};
|
||||
|
||||
match operation {
|
||||
And(ops) | Or(_, ops) => {
|
||||
ops.iter_mut().try_for_each(|op| recurse(words_fst, op, number_typos, wdcache))
|
||||
},
|
||||
}
|
||||
// Because Phrases don't allow typos, no alteration can be done.
|
||||
Phrase(_words) => return Ok(()),
|
||||
Operation::Query(q) => {
|
||||
|
@ -193,19 +212,25 @@ fn alterate_query_tree(
|
|||
} else {
|
||||
let typo = *typo.min(&number_typos);
|
||||
let words = word_derivations(word, q.prefix, typo, words_fst, wdcache)?;
|
||||
let queries = words.iter().map(|(word, typo)| {
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::Exact { original_typo: *typo, word: word.to_string() },
|
||||
let queries = words
|
||||
.iter()
|
||||
.map(|(word, typo)| {
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::Exact {
|
||||
original_typo: *typo,
|
||||
word: word.to_string(),
|
||||
},
|
||||
})
|
||||
})
|
||||
}).collect();
|
||||
.collect();
|
||||
|
||||
*operation = Operation::or(false, queries);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -219,22 +244,18 @@ fn resolve_candidates<'t>(
|
|||
number_typos: u8,
|
||||
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<RoaringBitmap>
|
||||
{
|
||||
) -> Result<RoaringBitmap> {
|
||||
fn resolve_operation<'t>(
|
||||
ctx: &'t dyn Context,
|
||||
query_tree: &Operation,
|
||||
number_typos: u8,
|
||||
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<RoaringBitmap>
|
||||
{
|
||||
use Operation::{And, Phrase, Or, Query};
|
||||
) -> Result<RoaringBitmap> {
|
||||
use Operation::{And, Or, Phrase, Query};
|
||||
|
||||
match query_tree {
|
||||
And(ops) => {
|
||||
mdfs(ctx, ops, number_typos, cache, wdcache)
|
||||
},
|
||||
And(ops) => mdfs(ctx, ops, number_typos, cache, wdcache),
|
||||
Phrase(words) => {
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
let mut first_loop = true;
|
||||
|
@ -250,12 +271,12 @@ fn resolve_candidates<'t>(
|
|||
} else {
|
||||
candidates &= pair_docids;
|
||||
}
|
||||
},
|
||||
None => return Ok(RoaringBitmap::new())
|
||||
}
|
||||
None => return Ok(RoaringBitmap::new()),
|
||||
}
|
||||
}
|
||||
Ok(candidates)
|
||||
},
|
||||
}
|
||||
Or(_, ops) => {
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
for op in ops {
|
||||
|
@ -263,12 +284,14 @@ fn resolve_candidates<'t>(
|
|||
candidates.union_with(&docids);
|
||||
}
|
||||
Ok(candidates)
|
||||
},
|
||||
Query(q) => if q.kind.typo() == number_typos {
|
||||
Ok(query_docids(ctx, q, wdcache)?)
|
||||
} else {
|
||||
Ok(RoaringBitmap::new())
|
||||
},
|
||||
}
|
||||
Query(q) => {
|
||||
if q.kind.typo() == number_typos {
|
||||
Ok(query_docids(ctx, q, wdcache)?)
|
||||
} else {
|
||||
Ok(RoaringBitmap::new())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -278,8 +301,7 @@ fn resolve_candidates<'t>(
|
|||
mana: u8,
|
||||
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<RoaringBitmap>
|
||||
{
|
||||
) -> Result<RoaringBitmap> {
|
||||
match branches.split_first() {
|
||||
Some((head, [])) => {
|
||||
let cache_key = (head.clone(), mana);
|
||||
|
@ -290,7 +312,7 @@ fn resolve_candidates<'t>(
|
|||
cache.insert(cache_key, candidates.clone());
|
||||
Ok(candidates)
|
||||
}
|
||||
},
|
||||
}
|
||||
Some((head, tail)) => {
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
|
||||
|
@ -313,7 +335,7 @@ fn resolve_candidates<'t>(
|
|||
}
|
||||
|
||||
Ok(candidates)
|
||||
},
|
||||
}
|
||||
None => Ok(RoaringBitmap::new()),
|
||||
}
|
||||
}
|
||||
|
@ -323,9 +345,9 @@ fn resolve_candidates<'t>(
|
|||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use super::super::initial::Initial;
|
||||
use super::super::test::TestContext;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn initial_placeholder_no_facets() {
|
||||
|
@ -348,13 +370,23 @@ mod test {
|
|||
#[test]
|
||||
fn initial_query_tree_no_facets() {
|
||||
let context = TestContext::default();
|
||||
let query_tree = Operation::Or(false, vec![
|
||||
Operation::And(vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("split".to_string()) }),
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("this".to_string()) }),
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(1, "world".to_string()) }),
|
||||
])
|
||||
]);
|
||||
let query_tree = Operation::Or(
|
||||
false,
|
||||
vec![Operation::And(vec![
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact("split".to_string()),
|
||||
}),
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact("this".to_string()),
|
||||
}),
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::tolerant(1, "world".to_string()),
|
||||
}),
|
||||
])],
|
||||
);
|
||||
|
||||
let facet_candidates = None;
|
||||
|
||||
|
@ -369,13 +401,23 @@ mod test {
|
|||
& context.word_docids("this").unwrap().unwrap()
|
||||
& context.word_docids("world").unwrap().unwrap();
|
||||
let expected_1 = CriterionResult {
|
||||
query_tree: Some(Operation::Or(false, vec![
|
||||
Operation::And(vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("split".to_string()) }),
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("this".to_string()) }),
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("world".to_string()) }),
|
||||
]),
|
||||
])),
|
||||
query_tree: Some(Operation::Or(
|
||||
false,
|
||||
vec![Operation::And(vec![
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact("split".to_string()),
|
||||
}),
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact("this".to_string()),
|
||||
}),
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact("world".to_string()),
|
||||
}),
|
||||
])],
|
||||
)),
|
||||
candidates: Some(candidates_1.clone()),
|
||||
bucket_candidates: Some(candidates_1),
|
||||
filtered_candidates: None,
|
||||
|
@ -383,22 +425,37 @@ mod test {
|
|||
|
||||
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1));
|
||||
|
||||
let candidates_2 = (
|
||||
context.word_docids("split").unwrap().unwrap()
|
||||
& context.word_docids("this").unwrap().unwrap()
|
||||
& context.word_docids("word").unwrap().unwrap()
|
||||
) - context.word_docids("world").unwrap().unwrap();
|
||||
let candidates_2 = (context.word_docids("split").unwrap().unwrap()
|
||||
& context.word_docids("this").unwrap().unwrap()
|
||||
& context.word_docids("word").unwrap().unwrap())
|
||||
- context.word_docids("world").unwrap().unwrap();
|
||||
let expected_2 = CriterionResult {
|
||||
query_tree: Some(Operation::Or(false, vec![
|
||||
Operation::And(vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("split".to_string()) }),
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("this".to_string()) }),
|
||||
Operation::Or(false, vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact_with_typo(1, "word".to_string()) }),
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("world".to_string()) }),
|
||||
]),
|
||||
]),
|
||||
])),
|
||||
query_tree: Some(Operation::Or(
|
||||
false,
|
||||
vec![Operation::And(vec![
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact("split".to_string()),
|
||||
}),
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact("this".to_string()),
|
||||
}),
|
||||
Operation::Or(
|
||||
false,
|
||||
vec![
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact_with_typo(1, "word".to_string()),
|
||||
}),
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact("world".to_string()),
|
||||
}),
|
||||
],
|
||||
),
|
||||
])],
|
||||
)),
|
||||
candidates: Some(candidates_2.clone()),
|
||||
bucket_candidates: Some(candidates_2),
|
||||
filtered_candidates: None,
|
||||
|
@ -437,17 +494,26 @@ mod test {
|
|||
#[test]
|
||||
fn initial_query_tree_with_facets() {
|
||||
let context = TestContext::default();
|
||||
let query_tree = Operation::Or(false, vec![
|
||||
Operation::And(vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("split".to_string()) }),
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("this".to_string()) }),
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(1, "world".to_string()) }),
|
||||
])
|
||||
]);
|
||||
let query_tree = Operation::Or(
|
||||
false,
|
||||
vec![Operation::And(vec![
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact("split".to_string()),
|
||||
}),
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact("this".to_string()),
|
||||
}),
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::tolerant(1, "world".to_string()),
|
||||
}),
|
||||
])],
|
||||
);
|
||||
|
||||
let facet_candidates = context.word_docids("earth").unwrap().unwrap();
|
||||
|
||||
|
||||
let mut criterion_parameters = CriterionParameters {
|
||||
wdcache: &mut WordDerivationsCache::new(),
|
||||
excluded_candidates: &RoaringBitmap::new(),
|
||||
|
@ -459,13 +525,23 @@ mod test {
|
|||
& context.word_docids("this").unwrap().unwrap()
|
||||
& context.word_docids("world").unwrap().unwrap();
|
||||
let expected_1 = CriterionResult {
|
||||
query_tree: Some(Operation::Or(false, vec![
|
||||
Operation::And(vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("split".to_string()) }),
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("this".to_string()) }),
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("world".to_string()) }),
|
||||
]),
|
||||
])),
|
||||
query_tree: Some(Operation::Or(
|
||||
false,
|
||||
vec![Operation::And(vec![
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact("split".to_string()),
|
||||
}),
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact("this".to_string()),
|
||||
}),
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact("world".to_string()),
|
||||
}),
|
||||
])],
|
||||
)),
|
||||
candidates: Some(&candidates_1 & &facet_candidates),
|
||||
bucket_candidates: Some(&candidates_1 & &facet_candidates),
|
||||
filtered_candidates: None,
|
||||
|
@ -473,22 +549,37 @@ mod test {
|
|||
|
||||
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1));
|
||||
|
||||
let candidates_2 = (
|
||||
context.word_docids("split").unwrap().unwrap()
|
||||
& context.word_docids("this").unwrap().unwrap()
|
||||
& context.word_docids("word").unwrap().unwrap()
|
||||
) - context.word_docids("world").unwrap().unwrap();
|
||||
let candidates_2 = (context.word_docids("split").unwrap().unwrap()
|
||||
& context.word_docids("this").unwrap().unwrap()
|
||||
& context.word_docids("word").unwrap().unwrap())
|
||||
- context.word_docids("world").unwrap().unwrap();
|
||||
let expected_2 = CriterionResult {
|
||||
query_tree: Some(Operation::Or(false, vec![
|
||||
Operation::And(vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("split".to_string()) }),
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("this".to_string()) }),
|
||||
Operation::Or(false, vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact_with_typo(1, "word".to_string()) }),
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("world".to_string()) }),
|
||||
]),
|
||||
]),
|
||||
])),
|
||||
query_tree: Some(Operation::Or(
|
||||
false,
|
||||
vec![Operation::And(vec![
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact("split".to_string()),
|
||||
}),
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact("this".to_string()),
|
||||
}),
|
||||
Operation::Or(
|
||||
false,
|
||||
vec![
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact_with_typo(1, "word".to_string()),
|
||||
}),
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact("world".to_string()),
|
||||
}),
|
||||
],
|
||||
),
|
||||
])],
|
||||
)),
|
||||
candidates: Some(&candidates_2 & &facet_candidates),
|
||||
bucket_candidates: Some(&candidates_2 & &facet_candidates),
|
||||
filtered_candidates: None,
|
||||
|
|
|
@ -3,9 +3,9 @@ use std::mem::take;
|
|||
use log::debug;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::search::query_tree::Operation;
|
||||
use crate::Result;
|
||||
use super::{Context, Criterion, CriterionParameters, CriterionResult, resolve_query_tree};
|
||||
|
||||
pub struct Words<'t> {
|
||||
ctx: &'t dyn Context<'t>,
|
||||
|
@ -44,11 +44,12 @@ impl<'t> Criterion for Words<'t> {
|
|||
Some(query_tree) => {
|
||||
let candidates = match self.candidates.as_mut() {
|
||||
Some(allowed_candidates) => {
|
||||
let mut candidates = resolve_query_tree(self.ctx, &query_tree, params.wdcache)?;
|
||||
let mut candidates =
|
||||
resolve_query_tree(self.ctx, &query_tree, params.wdcache)?;
|
||||
candidates &= &*allowed_candidates;
|
||||
*allowed_candidates -= &candidates;
|
||||
Some(candidates)
|
||||
},
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
|
@ -63,29 +64,38 @@ impl<'t> Criterion for Words<'t> {
|
|||
filtered_candidates: self.filtered_candidates.clone(),
|
||||
bucket_candidates,
|
||||
}));
|
||||
},
|
||||
None => {
|
||||
match self.parent.next(params)? {
|
||||
Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => {
|
||||
self.query_trees = explode_query_tree(query_tree);
|
||||
self.candidates = candidates;
|
||||
self.filtered_candidates = filtered_candidates;
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
}) => {
|
||||
self.query_trees = explode_query_tree(query_tree);
|
||||
self.candidates = candidates;
|
||||
self.filtered_candidates = filtered_candidates;
|
||||
|
||||
self.bucket_candidates = match (self.bucket_candidates.take(), bucket_candidates) {
|
||||
self.bucket_candidates =
|
||||
match (self.bucket_candidates.take(), bucket_candidates) {
|
||||
(Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc),
|
||||
(self_bc, parent_bc) => self_bc.or(parent_bc),
|
||||
};
|
||||
},
|
||||
Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
}));
|
||||
},
|
||||
None => return Ok(None),
|
||||
}
|
||||
Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
}) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
}));
|
||||
}
|
||||
None => return Ok(None),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,11 +3,11 @@ use std::mem::size_of;
|
|||
use heed::types::ByteSlice;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{Distinct, DocIter};
|
||||
use crate::error::InternalError;
|
||||
use crate::heed_codec::facet::*;
|
||||
use crate::index::db_name;
|
||||
use crate::{DocumentId, FieldId, Index, Result};
|
||||
use super::{Distinct, DocIter};
|
||||
|
||||
const FID_SIZE: usize = size_of::<FieldId>();
|
||||
const DOCID_SIZE: usize = size_of::<DocumentId>();
|
||||
|
@ -28,11 +28,7 @@ pub struct FacetDistinct<'a> {
|
|||
|
||||
impl<'a> FacetDistinct<'a> {
|
||||
pub fn new(distinct: FieldId, index: &'a Index, txn: &'a heed::RoTxn<'a>) -> Self {
|
||||
Self {
|
||||
distinct,
|
||||
index,
|
||||
txn,
|
||||
}
|
||||
Self { distinct, index, txn }
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -47,16 +43,12 @@ pub struct FacetDistinctIter<'a> {
|
|||
|
||||
impl<'a> FacetDistinctIter<'a> {
|
||||
fn facet_string_docids(&self, key: &str) -> heed::Result<Option<RoaringBitmap>> {
|
||||
self.index
|
||||
.facet_id_string_docids
|
||||
.get(self.txn, &(self.distinct, key))
|
||||
self.index.facet_id_string_docids.get(self.txn, &(self.distinct, key))
|
||||
}
|
||||
|
||||
fn facet_number_docids(&self, key: f64) -> heed::Result<Option<RoaringBitmap>> {
|
||||
// get facet docids on level 0
|
||||
self.index
|
||||
.facet_id_f64_docids
|
||||
.get(self.txn, &(self.distinct, 0, key, key))
|
||||
self.index.facet_id_f64_docids.get(self.txn, &(self.distinct, 0, key, key))
|
||||
}
|
||||
|
||||
fn distinct_string(&mut self, id: DocumentId) -> Result<()> {
|
||||
|
@ -64,9 +56,8 @@ impl<'a> FacetDistinctIter<'a> {
|
|||
|
||||
for item in iter {
|
||||
let ((_, _, value), _) = item?;
|
||||
let facet_docids = self
|
||||
.facet_string_docids(value)?
|
||||
.ok_or(InternalError::DatabaseMissingEntry {
|
||||
let facet_docids =
|
||||
self.facet_string_docids(value)?.ok_or(InternalError::DatabaseMissingEntry {
|
||||
db_name: db_name::FACET_ID_STRING_DOCIDS,
|
||||
key: None,
|
||||
})?;
|
||||
|
@ -83,9 +74,8 @@ impl<'a> FacetDistinctIter<'a> {
|
|||
|
||||
for item in iter {
|
||||
let ((_, _, value), _) = item?;
|
||||
let facet_docids = self
|
||||
.facet_number_docids(value)?
|
||||
.ok_or(InternalError::DatabaseMissingEntry {
|
||||
let facet_docids =
|
||||
self.facet_number_docids(value)?.ok_or(InternalError::DatabaseMissingEntry {
|
||||
db_name: db_name::FACET_ID_F64_DOCIDS,
|
||||
key: None,
|
||||
})?;
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
mod facet_distinct;
|
||||
mod noop_distinct;
|
||||
|
||||
pub use facet_distinct::FacetDistinct;
|
||||
pub use noop_distinct::NoopDistinct;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::{DocumentId, Result};
|
||||
pub use facet_distinct::FacetDistinct;
|
||||
pub use noop_distinct::NoopDistinct;
|
||||
|
||||
/// A trait implemented by document interators that are returned by calls to `Distinct::distinct`.
|
||||
/// It provides a way to get back the ownership to the excluded set.
|
||||
|
@ -29,13 +29,15 @@ mod test {
|
|||
use std::collections::HashSet;
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
use rand::{seq::SliceRandom, Rng};
|
||||
use rand::seq::SliceRandom;
|
||||
use rand::Rng;
|
||||
use roaring::RoaringBitmap;
|
||||
use serde_json::{json, Value};
|
||||
|
||||
use crate::index::{Index, tests::TempIndex};
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::index::Index;
|
||||
use crate::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
|
||||
use crate::{BEU32, FieldId, DocumentId};
|
||||
use crate::{DocumentId, FieldId, BEU32};
|
||||
|
||||
static JSON: Lazy<Value> = Lazy::new(generate_json);
|
||||
|
||||
|
@ -89,9 +91,7 @@ mod test {
|
|||
addition.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
|
||||
addition.update_format(UpdateFormat::Json);
|
||||
|
||||
addition
|
||||
.execute(JSON.to_string().as_bytes(), |_, _| ())
|
||||
.unwrap();
|
||||
addition.execute(JSON.to_string().as_bytes(), |_, _| ()).unwrap();
|
||||
|
||||
let fields_map = index.fields_ids_map(&txn).unwrap();
|
||||
let fid = fields_map.id(&distinct).unwrap();
|
||||
|
@ -103,13 +103,12 @@ mod test {
|
|||
(index, fid, map)
|
||||
}
|
||||
|
||||
|
||||
/// Checks that all the candidates are distinct, and returns the candidates number.
|
||||
pub(crate) fn validate_distinct_candidates(
|
||||
candidates: impl Iterator<Item = crate::Result<DocumentId>>,
|
||||
distinct: FieldId,
|
||||
index: &Index,
|
||||
) -> usize {
|
||||
) -> usize {
|
||||
fn test(seen: &mut HashSet<String>, value: &Value) {
|
||||
match value {
|
||||
Value::Null | Value::Object(_) | Value::Bool(_) => (),
|
||||
|
@ -117,7 +116,7 @@ mod test {
|
|||
let s = value.to_string();
|
||||
assert!(seen.insert(s));
|
||||
}
|
||||
Value::Array(values) => {values.into_iter().for_each(|value| test(seen, value))}
|
||||
Value::Array(values) => values.into_iter().for_each(|value| test(seen, value)),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
use roaring::{RoaringBitmap, bitmap::IntoIter};
|
||||
use roaring::bitmap::IntoIter;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{Distinct, DocIter};
|
||||
use crate::{DocumentId, Result};
|
||||
use super::{DocIter, Distinct};
|
||||
|
||||
/// A distinct implementer that does not perform any distinct,
|
||||
/// and simply returns an iterator to the candidates.
|
||||
|
@ -30,10 +31,7 @@ impl Distinct for NoopDistinct {
|
|||
type Iter = NoopDistinctIter;
|
||||
|
||||
fn distinct(&mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter {
|
||||
NoopDistinctIter {
|
||||
candidates: candidates.into_iter(),
|
||||
excluded,
|
||||
}
|
||||
NoopDistinctIter { candidates: candidates.into_iter(), excluded }
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,16 +1,16 @@
|
|||
use std::collections::{HashSet, BTreeMap};
|
||||
use std::collections::{BTreeMap, HashSet};
|
||||
use std::ops::Bound::Unbounded;
|
||||
use std::{cmp, fmt};
|
||||
|
||||
use heed::{Database, BytesDecode};
|
||||
use heed::types::{ByteSlice, Unit};
|
||||
use heed::{BytesDecode, Database};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::error::FieldIdMapMissingEntry;
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::FacetValueStringCodec;
|
||||
use crate::search::facet::{FacetIter, FacetRange};
|
||||
use crate::{Index, FieldId, DocumentId, Result};
|
||||
use crate::{DocumentId, FieldId, Index, Result};
|
||||
|
||||
/// The default number of values by facets that will
|
||||
/// be fetched from the key-value store.
|
||||
|
@ -43,7 +43,7 @@ impl<'a> FacetDistribution<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn facets<I: IntoIterator<Item=A>, A: AsRef<str>>(&mut self, names: I) -> &mut Self {
|
||||
pub fn facets<I: IntoIterator<Item = A>, A: AsRef<str>>(&mut self, names: I) -> &mut Self {
|
||||
self.facets = Some(names.into_iter().map(|s| s.as_ref().to_string()).collect());
|
||||
self
|
||||
}
|
||||
|
@ -66,8 +66,7 @@ impl<'a> FacetDistribution<'a> {
|
|||
facet_type: FacetType,
|
||||
candidates: &RoaringBitmap,
|
||||
distribution: &mut BTreeMap<String, u64>,
|
||||
) -> heed::Result<()>
|
||||
{
|
||||
) -> heed::Result<()> {
|
||||
fn fetch_facet_values<'t, KC, K: 't>(
|
||||
rtxn: &'t heed::RoTxn,
|
||||
db: Database<KC, Unit>,
|
||||
|
@ -102,7 +101,7 @@ impl<'a> FacetDistribution<'a> {
|
|||
FacetType::Number => {
|
||||
let db = self.index.field_id_docid_facet_f64s;
|
||||
fetch_facet_values(self.rtxn, db, field_id, candidates, distribution)
|
||||
},
|
||||
}
|
||||
FacetType::String => {
|
||||
let db = self.index.field_id_docid_facet_strings;
|
||||
fetch_facet_values(self.rtxn, db, field_id, candidates, distribution)
|
||||
|
@ -117,11 +116,9 @@ impl<'a> FacetDistribution<'a> {
|
|||
field_id: FieldId,
|
||||
candidates: &RoaringBitmap,
|
||||
distribution: &mut BTreeMap<String, u64>,
|
||||
) -> heed::Result<()>
|
||||
{
|
||||
let iter = FacetIter::new_non_reducing(
|
||||
self.rtxn, self.index, field_id, candidates.clone(),
|
||||
)?;
|
||||
) -> heed::Result<()> {
|
||||
let iter =
|
||||
FacetIter::new_non_reducing(self.rtxn, self.index, field_id, candidates.clone())?;
|
||||
|
||||
for result in iter {
|
||||
let (value, mut docids) = result?;
|
||||
|
@ -142,8 +139,7 @@ impl<'a> FacetDistribution<'a> {
|
|||
fn facet_values_from_raw_facet_database(
|
||||
&self,
|
||||
field_id: FieldId,
|
||||
) -> heed::Result<BTreeMap<String, u64>>
|
||||
{
|
||||
) -> heed::Result<BTreeMap<String, u64>> {
|
||||
let mut distribution = BTreeMap::new();
|
||||
|
||||
let db = self.index.facet_id_f64_docids;
|
||||
|
@ -157,7 +153,8 @@ impl<'a> FacetDistribution<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
let iter = self.index
|
||||
let iter = self
|
||||
.index
|
||||
.facet_id_string_docids
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.prefix_iter(self.rtxn, &[field_id])?
|
||||
|
@ -182,11 +179,30 @@ impl<'a> FacetDistribution<'a> {
|
|||
// to those candidates. We also enter here for facet strings for performance reasons.
|
||||
let mut distribution = BTreeMap::new();
|
||||
if candidates.len() <= CANDIDATES_THRESHOLD {
|
||||
self.facet_distribution_from_documents(field_id, Number, candidates, &mut distribution)?;
|
||||
self.facet_distribution_from_documents(field_id, String, candidates, &mut distribution)?;
|
||||
self.facet_distribution_from_documents(
|
||||
field_id,
|
||||
Number,
|
||||
candidates,
|
||||
&mut distribution,
|
||||
)?;
|
||||
self.facet_distribution_from_documents(
|
||||
field_id,
|
||||
String,
|
||||
candidates,
|
||||
&mut distribution,
|
||||
)?;
|
||||
} else {
|
||||
self.facet_numbers_distribution_from_facet_levels(field_id, candidates, &mut distribution)?;
|
||||
self.facet_distribution_from_documents(field_id, String, candidates, &mut distribution)?;
|
||||
self.facet_numbers_distribution_from_facet_levels(
|
||||
field_id,
|
||||
candidates,
|
||||
&mut distribution,
|
||||
)?;
|
||||
self.facet_distribution_from_documents(
|
||||
field_id,
|
||||
String,
|
||||
candidates,
|
||||
&mut distribution,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(distribution)
|
||||
|
@ -201,10 +217,11 @@ impl<'a> FacetDistribution<'a> {
|
|||
|
||||
let mut distribution = BTreeMap::new();
|
||||
for name in filterable_fields {
|
||||
let fid = fields_ids_map.id(&name).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
|
||||
field_name: name.clone(),
|
||||
process: "FacetDistribution::execute",
|
||||
})?;
|
||||
let fid =
|
||||
fields_ids_map.id(&name).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
|
||||
field_name: name.clone(),
|
||||
process: "FacetDistribution::execute",
|
||||
})?;
|
||||
let values = self.facet_values(fid)?;
|
||||
distribution.insert(name, values);
|
||||
}
|
||||
|
@ -215,13 +232,7 @@ impl<'a> FacetDistribution<'a> {
|
|||
|
||||
impl fmt::Debug for FacetDistribution<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let FacetDistribution {
|
||||
facets,
|
||||
candidates,
|
||||
max_values_by_facet,
|
||||
rtxn: _,
|
||||
index: _,
|
||||
} = self;
|
||||
let FacetDistribution { facets, candidates, max_values_by_facet, rtxn: _, index: _ } = self;
|
||||
|
||||
f.debug_struct("FacetDistribution")
|
||||
.field("facets", facets)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::collections::HashSet;
|
||||
use std::fmt::Debug;
|
||||
use std::ops::Bound::{self, Included, Excluded};
|
||||
use std::ops::Bound::{self, Excluded, Included};
|
||||
use std::result::Result as StdResult;
|
||||
use std::str::FromStr;
|
||||
|
||||
|
@ -12,16 +12,13 @@ use pest::iterators::{Pair, Pairs};
|
|||
use pest::Parser;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::error::UserError;
|
||||
use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec};
|
||||
use crate::{Index, FieldId, FieldsIdsMap, CboRoaringBitmapCodec, Result};
|
||||
|
||||
use super::FacetRange;
|
||||
use super::parser::Rule;
|
||||
use super::parser::{PREC_CLIMBER, FilterParser};
|
||||
|
||||
use self::FilterCondition::*;
|
||||
use self::Operator::*;
|
||||
use super::parser::{FilterParser, Rule, PREC_CLIMBER};
|
||||
use super::FacetRange;
|
||||
use crate::error::UserError;
|
||||
use crate::heed_codec::facet::{FacetLevelValueF64Codec, FacetValueStringCodec};
|
||||
use crate::{CboRoaringBitmapCodec, FieldId, FieldsIdsMap, Index, Result};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Operator {
|
||||
|
@ -39,13 +36,13 @@ impl Operator {
|
|||
/// an OR operation for the between case (i.e. `TO`).
|
||||
fn negate(self) -> (Self, Option<Self>) {
|
||||
match self {
|
||||
GreaterThan(n) => (LowerThanOrEqual(n), None),
|
||||
GreaterThan(n) => (LowerThanOrEqual(n), None),
|
||||
GreaterThanOrEqual(n) => (LowerThan(n), None),
|
||||
Equal(n, s) => (NotEqual(n, s), None),
|
||||
NotEqual(n, s) => (Equal(n, s), None),
|
||||
LowerThan(n) => (GreaterThanOrEqual(n), None),
|
||||
LowerThanOrEqual(n) => (GreaterThan(n), None),
|
||||
Between(n, m) => (LowerThan(n), Some(GreaterThan(m))),
|
||||
Equal(n, s) => (NotEqual(n, s), None),
|
||||
NotEqual(n, s) => (Equal(n, s), None),
|
||||
LowerThan(n) => (GreaterThanOrEqual(n), None),
|
||||
LowerThanOrEqual(n) => (GreaterThan(n), None),
|
||||
Between(n, m) => (LowerThan(n), Some(GreaterThan(m))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -63,10 +60,11 @@ impl FilterCondition {
|
|||
index: &Index,
|
||||
array: I,
|
||||
) -> Result<Option<FilterCondition>>
|
||||
where I: IntoIterator<Item=Either<J, B>>,
|
||||
J: IntoIterator<Item=A>,
|
||||
A: AsRef<str>,
|
||||
B: AsRef<str>,
|
||||
where
|
||||
I: IntoIterator<Item = Either<J, B>>,
|
||||
J: IntoIterator<Item = A>,
|
||||
A: AsRef<str>,
|
||||
B: AsRef<str>,
|
||||
{
|
||||
let mut ands = None;
|
||||
|
||||
|
@ -88,7 +86,7 @@ impl FilterCondition {
|
|||
None => Some(rule),
|
||||
};
|
||||
}
|
||||
},
|
||||
}
|
||||
Either::Right(rule) => {
|
||||
let condition = FilterCondition::from_str(rtxn, index, rule.as_ref())?;
|
||||
ands = match ands.take() {
|
||||
|
@ -106,11 +104,11 @@ impl FilterCondition {
|
|||
rtxn: &heed::RoTxn,
|
||||
index: &Index,
|
||||
expression: &str,
|
||||
) -> Result<FilterCondition>
|
||||
{
|
||||
) -> Result<FilterCondition> {
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let filterable_fields = index.filterable_fields_ids(rtxn)?;
|
||||
let lexed = FilterParser::parse(Rule::prgm, expression).map_err(UserError::InvalidFilter)?;
|
||||
let lexed =
|
||||
FilterParser::parse(Rule::prgm, expression).map_err(UserError::InvalidFilter)?;
|
||||
FilterCondition::from_pairs(&fields_ids_map, &filterable_fields, lexed)
|
||||
}
|
||||
|
||||
|
@ -118,8 +116,7 @@ impl FilterCondition {
|
|||
fim: &FieldsIdsMap,
|
||||
ff: &HashSet<FieldId>,
|
||||
expression: Pairs<Rule>,
|
||||
) -> Result<Self>
|
||||
{
|
||||
) -> Result<Self> {
|
||||
PREC_CLIMBER.climb(
|
||||
expression,
|
||||
|pair: Pair<Rule>| match pair.as_rule() {
|
||||
|
@ -135,12 +132,10 @@ impl FilterCondition {
|
|||
Rule::term => Self::from_pairs(fim, ff, pair.into_inner()),
|
||||
_ => unreachable!(),
|
||||
},
|
||||
|lhs: Result<Self>, op: Pair<Rule>, rhs: Result<Self>| {
|
||||
match op.as_rule() {
|
||||
Rule::or => Ok(Or(Box::new(lhs?), Box::new(rhs?))),
|
||||
Rule::and => Ok(And(Box::new(lhs?), Box::new(rhs?))),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|lhs: Result<Self>, op: Pair<Rule>, rhs: Result<Self>| match op.as_rule() {
|
||||
Rule::or => Ok(Or(Box::new(lhs?), Box::new(rhs?))),
|
||||
Rule::and => Ok(And(Box::new(lhs?), Box::new(rhs?))),
|
||||
_ => unreachable!(),
|
||||
},
|
||||
)
|
||||
}
|
||||
|
@ -160,8 +155,7 @@ impl FilterCondition {
|
|||
fields_ids_map: &FieldsIdsMap,
|
||||
filterable_fields: &HashSet<FieldId>,
|
||||
item: Pair<Rule>,
|
||||
) -> Result<FilterCondition>
|
||||
{
|
||||
) -> Result<FilterCondition> {
|
||||
let mut items = item.into_inner();
|
||||
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
|
||||
.map_err(UserError::InvalidFilterAttribute)?;
|
||||
|
@ -179,8 +173,7 @@ impl FilterCondition {
|
|||
fields_ids_map: &FieldsIdsMap,
|
||||
filterable_fields: &HashSet<FieldId>,
|
||||
item: Pair<Rule>,
|
||||
) -> Result<FilterCondition>
|
||||
{
|
||||
) -> Result<FilterCondition> {
|
||||
let mut items = item.into_inner();
|
||||
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
|
||||
.map_err(UserError::InvalidFilterAttribute)?;
|
||||
|
@ -196,8 +189,7 @@ impl FilterCondition {
|
|||
fields_ids_map: &FieldsIdsMap,
|
||||
filterable_fields: &HashSet<FieldId>,
|
||||
item: Pair<Rule>,
|
||||
) -> Result<FilterCondition>
|
||||
{
|
||||
) -> Result<FilterCondition> {
|
||||
let mut items = item.into_inner();
|
||||
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
|
||||
.map_err(UserError::InvalidFilterAttribute)?;
|
||||
|
@ -213,8 +205,7 @@ impl FilterCondition {
|
|||
fields_ids_map: &FieldsIdsMap,
|
||||
filterable_fields: &HashSet<FieldId>,
|
||||
item: Pair<Rule>,
|
||||
) -> Result<FilterCondition>
|
||||
{
|
||||
) -> Result<FilterCondition> {
|
||||
let mut items = item.into_inner();
|
||||
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
|
||||
.map_err(UserError::InvalidFilterAttribute)?;
|
||||
|
@ -230,8 +221,7 @@ impl FilterCondition {
|
|||
fields_ids_map: &FieldsIdsMap,
|
||||
filterable_fields: &HashSet<FieldId>,
|
||||
item: Pair<Rule>,
|
||||
) -> Result<FilterCondition>
|
||||
{
|
||||
) -> Result<FilterCondition> {
|
||||
let mut items = item.into_inner();
|
||||
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
|
||||
.map_err(UserError::InvalidFilterAttribute)?;
|
||||
|
@ -247,8 +237,7 @@ impl FilterCondition {
|
|||
fields_ids_map: &FieldsIdsMap,
|
||||
filterable_fields: &HashSet<FieldId>,
|
||||
item: Pair<Rule>,
|
||||
) -> Result<FilterCondition>
|
||||
{
|
||||
) -> Result<FilterCondition> {
|
||||
let mut items = item.into_inner();
|
||||
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
|
||||
.map_err(UserError::InvalidFilterAttribute)?;
|
||||
|
@ -272,13 +261,14 @@ impl FilterCondition {
|
|||
left: Bound<f64>,
|
||||
right: Bound<f64>,
|
||||
output: &mut RoaringBitmap,
|
||||
) -> Result<()>
|
||||
{
|
||||
) -> Result<()> {
|
||||
match (left, right) {
|
||||
// If the request is an exact value we must go directly to the deepest level.
|
||||
(Included(l), Included(r)) if l == r && level > 0 => {
|
||||
return Self::explore_facet_number_levels(rtxn, db, field_id, 0, left, right, output);
|
||||
},
|
||||
return Self::explore_facet_number_levels(
|
||||
rtxn, db, field_id, 0, left, right, output,
|
||||
);
|
||||
}
|
||||
// lower TO upper when lower > upper must return no result
|
||||
(Included(l), Included(r)) if l > r => return Ok(()),
|
||||
(Included(l), Excluded(r)) if l >= r => return Ok(()),
|
||||
|
@ -301,7 +291,9 @@ impl FilterCondition {
|
|||
debug!("{:?} to {:?} (level {}) found {} documents", l, r, level, docids.len());
|
||||
output.union_with(&docids);
|
||||
// We save the leftest and rightest bounds we actually found at this level.
|
||||
if i == 0 { left_found = Some(l); }
|
||||
if i == 0 {
|
||||
left_found = Some(l);
|
||||
}
|
||||
right_found = Some(r);
|
||||
}
|
||||
|
||||
|
@ -318,20 +310,50 @@ impl FilterCondition {
|
|||
// If the bound is satisfied we avoid calling this function again.
|
||||
if !matches!(left, Included(l) if l == left_found) {
|
||||
let sub_right = Excluded(left_found);
|
||||
debug!("calling left with {:?} to {:?} (level {})", left, sub_right, deeper_level);
|
||||
Self::explore_facet_number_levels(rtxn, db, field_id, deeper_level, left, sub_right, output)?;
|
||||
debug!(
|
||||
"calling left with {:?} to {:?} (level {})",
|
||||
left, sub_right, deeper_level
|
||||
);
|
||||
Self::explore_facet_number_levels(
|
||||
rtxn,
|
||||
db,
|
||||
field_id,
|
||||
deeper_level,
|
||||
left,
|
||||
sub_right,
|
||||
output,
|
||||
)?;
|
||||
}
|
||||
if !matches!(right, Included(r) if r == right_found) {
|
||||
let sub_left = Excluded(right_found);
|
||||
debug!("calling right with {:?} to {:?} (level {})", sub_left, right, deeper_level);
|
||||
Self::explore_facet_number_levels(rtxn, db, field_id, deeper_level, sub_left, right, output)?;
|
||||
debug!(
|
||||
"calling right with {:?} to {:?} (level {})",
|
||||
sub_left, right, deeper_level
|
||||
);
|
||||
Self::explore_facet_number_levels(
|
||||
rtxn,
|
||||
db,
|
||||
field_id,
|
||||
deeper_level,
|
||||
sub_left,
|
||||
right,
|
||||
output,
|
||||
)?;
|
||||
}
|
||||
},
|
||||
}
|
||||
None => {
|
||||
// If we found nothing at this level it means that we must find
|
||||
// the same bounds but at a deeper, more precise level.
|
||||
Self::explore_facet_number_levels(rtxn, db, field_id, deeper_level, left, right, output)?;
|
||||
},
|
||||
Self::explore_facet_number_levels(
|
||||
rtxn,
|
||||
db,
|
||||
field_id,
|
||||
deeper_level,
|
||||
left,
|
||||
right,
|
||||
output,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
@ -344,27 +366,34 @@ impl FilterCondition {
|
|||
strings_db: heed::Database<FacetValueStringCodec, CboRoaringBitmapCodec>,
|
||||
field_id: FieldId,
|
||||
operator: &Operator,
|
||||
) -> Result<RoaringBitmap>
|
||||
{
|
||||
) -> Result<RoaringBitmap> {
|
||||
// Make sure we always bound the ranges with the field id and the level,
|
||||
// as the facets values are all in the same database and prefixed by the
|
||||
// field id and the level.
|
||||
let (left, right) = match operator {
|
||||
GreaterThan(val) => (Excluded(*val), Included(f64::MAX)),
|
||||
GreaterThan(val) => (Excluded(*val), Included(f64::MAX)),
|
||||
GreaterThanOrEqual(val) => (Included(*val), Included(f64::MAX)),
|
||||
Equal(number, string) => {
|
||||
Equal(number, string) => {
|
||||
let string_docids = strings_db.get(rtxn, &(field_id, &string))?.unwrap_or_default();
|
||||
let number_docids = match number {
|
||||
Some(n) => {
|
||||
let n = Included(*n);
|
||||
let mut output = RoaringBitmap::new();
|
||||
Self::explore_facet_number_levels(rtxn, numbers_db, field_id, 0, n, n, &mut output)?;
|
||||
Self::explore_facet_number_levels(
|
||||
rtxn,
|
||||
numbers_db,
|
||||
field_id,
|
||||
0,
|
||||
n,
|
||||
n,
|
||||
&mut output,
|
||||
)?;
|
||||
output
|
||||
},
|
||||
}
|
||||
None => RoaringBitmap::new(),
|
||||
};
|
||||
return Ok(string_docids | number_docids);
|
||||
},
|
||||
}
|
||||
NotEqual(number, string) => {
|
||||
let all_numbers_ids = if number.is_some() {
|
||||
index.number_faceted_documents_ids(rtxn, field_id)?
|
||||
|
@ -373,12 +402,14 @@ impl FilterCondition {
|
|||
};
|
||||
let all_strings_ids = index.string_faceted_documents_ids(rtxn, field_id)?;
|
||||
let operator = Equal(*number, string.clone());
|
||||
let docids = Self::evaluate_operator(rtxn, index, numbers_db, strings_db, field_id, &operator)?;
|
||||
let docids = Self::evaluate_operator(
|
||||
rtxn, index, numbers_db, strings_db, field_id, &operator,
|
||||
)?;
|
||||
return Ok((all_numbers_ids | all_strings_ids) - docids);
|
||||
},
|
||||
LowerThan(val) => (Included(f64::MIN), Excluded(*val)),
|
||||
}
|
||||
LowerThan(val) => (Included(f64::MIN), Excluded(*val)),
|
||||
LowerThanOrEqual(val) => (Included(f64::MIN), Included(*val)),
|
||||
Between(left, right) => (Included(*left), Included(*right)),
|
||||
Between(left, right) => (Included(*left), Included(*right)),
|
||||
};
|
||||
|
||||
// Ask for the biggest value that can exist for this specific field, if it exists
|
||||
|
@ -391,36 +422,39 @@ impl FilterCondition {
|
|||
match biggest_level {
|
||||
Some(level) => {
|
||||
let mut output = RoaringBitmap::new();
|
||||
Self::explore_facet_number_levels(rtxn, numbers_db, field_id, level, left, right, &mut output)?;
|
||||
Self::explore_facet_number_levels(
|
||||
rtxn,
|
||||
numbers_db,
|
||||
field_id,
|
||||
level,
|
||||
left,
|
||||
right,
|
||||
&mut output,
|
||||
)?;
|
||||
Ok(output)
|
||||
},
|
||||
}
|
||||
None => Ok(RoaringBitmap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn evaluate(
|
||||
&self,
|
||||
rtxn: &heed::RoTxn,
|
||||
index: &Index,
|
||||
) -> Result<RoaringBitmap>
|
||||
{
|
||||
pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
|
||||
let numbers_db = index.facet_id_f64_docids;
|
||||
let strings_db = index.facet_id_string_docids;
|
||||
|
||||
match self {
|
||||
Operator(fid, op) => {
|
||||
Self::evaluate_operator(rtxn, index, numbers_db, strings_db, *fid, op)
|
||||
},
|
||||
}
|
||||
Or(lhs, rhs) => {
|
||||
let lhs = lhs.evaluate(rtxn, index)?;
|
||||
let rhs = rhs.evaluate(rtxn, index)?;
|
||||
Ok(lhs | rhs)
|
||||
},
|
||||
}
|
||||
And(lhs, rhs) => {
|
||||
let lhs = lhs.evaluate(rtxn, index)?;
|
||||
let rhs = rhs.evaluate(rtxn, index)?;
|
||||
Ok(lhs & rhs)
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -434,23 +468,24 @@ fn field_id(
|
|||
fields_ids_map: &FieldsIdsMap,
|
||||
filterable_fields: &HashSet<FieldId>,
|
||||
items: &mut Pairs<Rule>,
|
||||
) -> StdResult<FieldId, PestError<Rule>>
|
||||
{
|
||||
) -> StdResult<FieldId, PestError<Rule>> {
|
||||
// lexing ensures that we at least have a key
|
||||
let key = items.next().unwrap();
|
||||
|
||||
let field_id = match fields_ids_map.id(key.as_str()) {
|
||||
Some(field_id) => field_id,
|
||||
None => return Err(PestError::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: format!(
|
||||
"attribute `{}` not found, available attributes are: {}",
|
||||
key.as_str(),
|
||||
fields_ids_map.iter().map(|(_, n)| n).collect::<Vec<_>>().join(", "),
|
||||
),
|
||||
},
|
||||
key.as_span(),
|
||||
)),
|
||||
None => {
|
||||
return Err(PestError::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: format!(
|
||||
"attribute `{}` not found, available attributes are: {}",
|
||||
key.as_str(),
|
||||
fields_ids_map.iter().map(|(_, n)| n).collect::<Vec<_>>().join(", "),
|
||||
),
|
||||
},
|
||||
key.as_span(),
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
if !filterable_fields.contains(&field_id) {
|
||||
|
@ -459,9 +494,11 @@ fn field_id(
|
|||
message: format!(
|
||||
"attribute `{}` is not filterable, available filterable attributes are: {}",
|
||||
key.as_str(),
|
||||
filterable_fields.iter().flat_map(|id| {
|
||||
fields_ids_map.name(*id)
|
||||
}).collect::<Vec<_>>().join(", "),
|
||||
filterable_fields
|
||||
.iter()
|
||||
.flat_map(|id| { fields_ids_map.name(*id) })
|
||||
.collect::<Vec<_>>()
|
||||
.join(", "),
|
||||
),
|
||||
},
|
||||
key.as_span(),
|
||||
|
@ -476,8 +513,9 @@ fn field_id(
|
|||
///
|
||||
/// Returns the parsing error associated with the span if the conversion fails.
|
||||
fn pest_parse<T>(pair: Pair<Rule>) -> (StdResult<T, pest::error::Error<Rule>>, String)
|
||||
where T: FromStr,
|
||||
T::Err: ToString,
|
||||
where
|
||||
T: FromStr,
|
||||
T::Err: ToString,
|
||||
{
|
||||
let result = match pair.as_str().parse::<T>() {
|
||||
Ok(value) => Ok(value),
|
||||
|
@ -492,11 +530,12 @@ where T: FromStr,
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::update::Settings;
|
||||
use big_s::S;
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::hashset;
|
||||
use big_s::S;
|
||||
|
||||
use super::*;
|
||||
use crate::update::Settings;
|
||||
|
||||
#[test]
|
||||
fn string() {
|
||||
|
@ -508,7 +547,7 @@ mod tests {
|
|||
// Set the filterable fields to be the channel.
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
builder.set_filterable_fields(hashset!{ S("channel") });
|
||||
builder.set_filterable_fields(hashset! { S("channel") });
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
|
@ -537,7 +576,7 @@ mod tests {
|
|||
// Set the filterable fields to be the channel.
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
builder.set_filterable_fields(hashset!{ "timestamp".into() });
|
||||
builder.set_filterable_fields(hashset! { "timestamp".into() });
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
|
@ -548,10 +587,8 @@ mod tests {
|
|||
assert_eq!(condition, expected);
|
||||
|
||||
let condition = FilterCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap();
|
||||
let expected = Or(
|
||||
Box::new(Operator(0, LowerThan(22.0))),
|
||||
Box::new(Operator(0, GreaterThan(44.0))),
|
||||
);
|
||||
let expected =
|
||||
Or(Box::new(Operator(0, LowerThan(22.0))), Box::new(Operator(0, GreaterThan(44.0))));
|
||||
assert_eq!(condition, expected);
|
||||
}
|
||||
|
||||
|
@ -566,29 +603,33 @@ mod tests {
|
|||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order
|
||||
builder.set_filterable_fields(hashset!{ S("channel"), S("timestamp") });
|
||||
builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") });
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// Test that the facet condition is correctly generated.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let condition = FilterCondition::from_str(
|
||||
&rtxn, &index,
|
||||
&rtxn,
|
||||
&index,
|
||||
"channel = gotaga OR (timestamp 22 TO 44 AND channel != ponce)",
|
||||
).unwrap();
|
||||
)
|
||||
.unwrap();
|
||||
let expected = Or(
|
||||
Box::new(Operator(0, Operator::Equal(None, S("gotaga")))),
|
||||
Box::new(And(
|
||||
Box::new(Operator(1, Between(22.0, 44.0))),
|
||||
Box::new(Operator(0, Operator::NotEqual(None, S("ponce")))),
|
||||
))
|
||||
)),
|
||||
);
|
||||
assert_eq!(condition, expected);
|
||||
|
||||
let condition = FilterCondition::from_str(
|
||||
&rtxn, &index,
|
||||
&rtxn,
|
||||
&index,
|
||||
"channel = gotaga OR NOT (timestamp 22 TO 44 AND channel != ponce)",
|
||||
).unwrap();
|
||||
)
|
||||
.unwrap();
|
||||
let expected = Or(
|
||||
Box::new(Operator(0, Operator::Equal(None, S("gotaga")))),
|
||||
Box::new(Or(
|
||||
|
@ -613,20 +654,28 @@ mod tests {
|
|||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order
|
||||
builder.set_filterable_fields(hashset!{ S("channel"), S("timestamp") });
|
||||
builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") });
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// Test that the facet condition is correctly generated.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let condition = FilterCondition::from_array(
|
||||
&rtxn, &index,
|
||||
vec![Either::Right("channel = gotaga"), Either::Left(vec!["timestamp = 44", "channel != ponce"])],
|
||||
).unwrap().unwrap();
|
||||
&rtxn,
|
||||
&index,
|
||||
vec![
|
||||
Either::Right("channel = gotaga"),
|
||||
Either::Left(vec!["timestamp = 44", "channel != ponce"]),
|
||||
],
|
||||
)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
let expected = FilterCondition::from_str(
|
||||
&rtxn, &index,
|
||||
&rtxn,
|
||||
&index,
|
||||
"channel = gotaga AND (timestamp = 44 OR channel != ponce)",
|
||||
).unwrap();
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(condition, expected);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,20 +1,19 @@
|
|||
use std::ops::Bound::{self, Included, Excluded, Unbounded};
|
||||
use std::ops::Bound::{self, Excluded, Included, Unbounded};
|
||||
|
||||
use either::Either::{self, Left, Right};
|
||||
use heed::types::{DecodeIgnore, ByteSlice};
|
||||
use heed::{Database, RoRange, RoRevRange, LazyDecode};
|
||||
use heed::types::{ByteSlice, DecodeIgnore};
|
||||
use heed::{Database, LazyDecode, RoRange, RoRevRange};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||
use crate::heed_codec::facet::FacetLevelValueF64Codec;
|
||||
use crate::{Index, FieldId};
|
||||
|
||||
pub use self::facet_distribution::FacetDistribution;
|
||||
pub use self::filter_condition::{FilterCondition, Operator};
|
||||
pub(crate) use self::parser::Rule as ParserRule;
|
||||
use crate::heed_codec::facet::FacetLevelValueF64Codec;
|
||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||
use crate::{FieldId, Index};
|
||||
|
||||
mod filter_condition;
|
||||
mod facet_distribution;
|
||||
mod filter_condition;
|
||||
mod parser;
|
||||
|
||||
pub struct FacetRange<'t> {
|
||||
|
@ -30,8 +29,7 @@ impl<'t> FacetRange<'t> {
|
|||
level: u8,
|
||||
left: Bound<f64>,
|
||||
right: Bound<f64>,
|
||||
) -> heed::Result<FacetRange<'t>>
|
||||
{
|
||||
) -> heed::Result<FacetRange<'t>> {
|
||||
let left_bound = match left {
|
||||
Included(left) => Included((field_id, level, left, f64::MIN)),
|
||||
Excluded(left) => Excluded((field_id, level, left, f64::MIN)),
|
||||
|
@ -62,7 +60,7 @@ impl<'t> Iterator for FacetRange<'t> {
|
|||
} else {
|
||||
None
|
||||
}
|
||||
},
|
||||
}
|
||||
Some(Err(e)) => Some(Err(e)),
|
||||
None => None,
|
||||
}
|
||||
|
@ -82,8 +80,7 @@ impl<'t> FacetRevRange<'t> {
|
|||
level: u8,
|
||||
left: Bound<f64>,
|
||||
right: Bound<f64>,
|
||||
) -> heed::Result<FacetRevRange<'t>>
|
||||
{
|
||||
) -> heed::Result<FacetRevRange<'t>> {
|
||||
let left_bound = match left {
|
||||
Included(left) => Included((field_id, level, left, f64::MIN)),
|
||||
Excluded(left) => Excluded((field_id, level, left, f64::MIN)),
|
||||
|
@ -114,7 +111,7 @@ impl<'t> Iterator for FacetRevRange<'t> {
|
|||
}
|
||||
}
|
||||
continue;
|
||||
},
|
||||
}
|
||||
Some(Err(e)) => return Some(Err(e)),
|
||||
None => return None,
|
||||
}
|
||||
|
@ -139,11 +136,11 @@ impl<'t> FacetIter<'t> {
|
|||
index: &'t Index,
|
||||
field_id: FieldId,
|
||||
documents_ids: RoaringBitmap,
|
||||
) -> heed::Result<FacetIter<'t>>
|
||||
{
|
||||
) -> heed::Result<FacetIter<'t>> {
|
||||
let db = index.facet_id_f64_docids.remap_key_type::<FacetLevelValueF64Codec>();
|
||||
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
||||
let highest_iter = FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
|
||||
let highest_iter =
|
||||
FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
|
||||
let level_iters = vec![(documents_ids, Left(highest_iter))];
|
||||
Ok(FacetIter { rtxn, db, field_id, level_iters, must_reduce: true })
|
||||
}
|
||||
|
@ -156,11 +153,11 @@ impl<'t> FacetIter<'t> {
|
|||
index: &'t Index,
|
||||
field_id: FieldId,
|
||||
documents_ids: RoaringBitmap,
|
||||
) -> heed::Result<FacetIter<'t>>
|
||||
{
|
||||
) -> heed::Result<FacetIter<'t>> {
|
||||
let db = index.facet_id_f64_docids.remap_key_type::<FacetLevelValueF64Codec>();
|
||||
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
||||
let highest_iter = FacetRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
|
||||
let highest_iter =
|
||||
FacetRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
|
||||
let level_iters = vec![(documents_ids, Right(highest_iter))];
|
||||
Ok(FacetIter { rtxn, db, field_id, level_iters, must_reduce: true })
|
||||
}
|
||||
|
@ -174,11 +171,11 @@ impl<'t> FacetIter<'t> {
|
|||
index: &'t Index,
|
||||
field_id: FieldId,
|
||||
documents_ids: RoaringBitmap,
|
||||
) -> heed::Result<FacetIter<'t>>
|
||||
{
|
||||
) -> heed::Result<FacetIter<'t>> {
|
||||
let db = index.facet_id_f64_docids.remap_key_type::<FacetLevelValueF64Codec>();
|
||||
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
||||
let highest_iter = FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
|
||||
let highest_iter =
|
||||
FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
|
||||
let level_iters = vec![(documents_ids, Left(highest_iter))];
|
||||
Ok(FacetIter { rtxn, db, field_id, level_iters, must_reduce: false })
|
||||
}
|
||||
|
@ -187,12 +184,13 @@ impl<'t> FacetIter<'t> {
|
|||
rtxn: &'t heed::RoTxn,
|
||||
db: Database<FacetLevelValueF64Codec, X>,
|
||||
fid: FieldId,
|
||||
) -> heed::Result<Option<u8>>
|
||||
{
|
||||
let level = db.remap_types::<ByteSlice, DecodeIgnore>()
|
||||
) -> heed::Result<Option<u8>> {
|
||||
let level = db
|
||||
.remap_types::<ByteSlice, DecodeIgnore>()
|
||||
.prefix_iter(rtxn, &[fid][..])?
|
||||
.remap_key_type::<FacetLevelValueF64Codec>()
|
||||
.last().transpose()?
|
||||
.last()
|
||||
.transpose()?
|
||||
.map(|((_, level, _, _), _)| level);
|
||||
Ok(level)
|
||||
}
|
||||
|
@ -215,7 +213,6 @@ impl<'t> Iterator for FacetIter<'t> {
|
|||
|
||||
match result {
|
||||
Ok(((_fid, level, left, right), mut docids)) => {
|
||||
|
||||
docids.intersect_with(&documents_ids);
|
||||
if !docids.is_empty() {
|
||||
if self.must_reduce {
|
||||
|
@ -242,11 +239,11 @@ impl<'t> Iterator for FacetIter<'t> {
|
|||
Ok(iter) => {
|
||||
self.level_iters.push((docids, iter));
|
||||
continue 'outer;
|
||||
},
|
||||
}
|
||||
Err(e) => return Some(Err(e)),
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
Err(e) => return Some(Err(e)),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use once_cell::sync::Lazy;
|
||||
use pest::prec_climber::{Operator, Assoc, PrecClimber};
|
||||
use pest::prec_climber::{Assoc, Operator, PrecClimber};
|
||||
|
||||
pub static PREC_CLIMBER: Lazy<PrecClimber<Rule>> = Lazy::new(|| {
|
||||
use Assoc::*;
|
||||
|
|
|
@ -1,13 +1,11 @@
|
|||
use std::collections::HashSet;
|
||||
use std::cmp::{min, Reverse};
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::{BTreeMap, HashSet};
|
||||
use std::ops::{Index, IndexMut};
|
||||
|
||||
use levenshtein_automata::{DFA, Distance};
|
||||
|
||||
use crate::search::query_tree::{Operation, Query};
|
||||
use levenshtein_automata::{Distance, DFA};
|
||||
|
||||
use super::build_dfa;
|
||||
use crate::search::query_tree::{Operation, Query};
|
||||
|
||||
type IsPrefix = bool;
|
||||
|
||||
|
@ -28,7 +26,9 @@ impl MatchingWords {
|
|||
.collect();
|
||||
// Sort word by len in DESC order prioritizing the longuest word,
|
||||
// in order to highlight the longuest part of the matched word.
|
||||
dfas.sort_unstable_by_key(|(_dfa, query_word, _typo, _is_prefix)| Reverse(query_word.len()));
|
||||
dfas.sort_unstable_by_key(|(_dfa, query_word, _typo, _is_prefix)| {
|
||||
Reverse(query_word.len())
|
||||
});
|
||||
Self { dfas }
|
||||
}
|
||||
|
||||
|
@ -37,12 +37,13 @@ impl MatchingWords {
|
|||
self.dfas.iter().find_map(|(dfa, query_word, typo, is_prefix)| match dfa.eval(word) {
|
||||
Distance::Exact(t) if t <= *typo => {
|
||||
if *is_prefix {
|
||||
let (_dist, len) = prefix_damerau_levenshtein(query_word.as_bytes(), word.as_bytes());
|
||||
let (_dist, len) =
|
||||
prefix_damerau_levenshtein(query_word.as_bytes(), word.as_bytes());
|
||||
Some(len)
|
||||
} else {
|
||||
Some(word.len())
|
||||
}
|
||||
},
|
||||
}
|
||||
_otherwise => None,
|
||||
})
|
||||
}
|
||||
|
@ -54,11 +55,11 @@ fn fetch_queries(tree: &Operation) -> HashSet<(&str, u8, IsPrefix)> {
|
|||
match tree {
|
||||
Operation::Or(_, ops) | Operation::And(ops) => {
|
||||
ops.as_slice().iter().for_each(|op| resolve_ops(op, out));
|
||||
},
|
||||
}
|
||||
Operation::Query(Query { prefix, kind }) => {
|
||||
let typo = if kind.is_exact() { 0 } else { kind.typo() };
|
||||
out.insert((kind.word(), typo, *prefix));
|
||||
},
|
||||
}
|
||||
Operation::Phrase(words) => {
|
||||
for word in words {
|
||||
out.insert((word, 0, false));
|
||||
|
@ -80,10 +81,7 @@ struct N2Array<T> {
|
|||
|
||||
impl<T: Clone> N2Array<T> {
|
||||
fn new(x: usize, y: usize, value: T) -> N2Array<T> {
|
||||
N2Array {
|
||||
y_size: y,
|
||||
buf: vec![value; x * y],
|
||||
}
|
||||
N2Array { y_size: y, buf: vec![value; x * y] }
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -178,9 +176,8 @@ fn prefix_damerau_levenshtein(source: &[u8], target: &[u8]) -> (u32, usize) {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use crate::MatchingWords;
|
||||
use crate::search::query_tree::{Operation, Query, QueryKind};
|
||||
use crate::MatchingWords;
|
||||
|
||||
#[test]
|
||||
fn matched_length() {
|
||||
|
@ -194,13 +191,23 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn matching_words() {
|
||||
let query_tree = Operation::Or(false, vec![
|
||||
Operation::And(vec![
|
||||
Operation::Query(Query { prefix: true, kind: QueryKind::exact("split".to_string()) }),
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("this".to_string()) }),
|
||||
Operation::Query(Query { prefix: true, kind: QueryKind::tolerant(1, "world".to_string()) }),
|
||||
]),
|
||||
]);
|
||||
let query_tree = Operation::Or(
|
||||
false,
|
||||
vec![Operation::And(vec![
|
||||
Operation::Query(Query {
|
||||
prefix: true,
|
||||
kind: QueryKind::exact("split".to_string()),
|
||||
}),
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact("this".to_string()),
|
||||
}),
|
||||
Operation::Query(Query {
|
||||
prefix: true,
|
||||
kind: QueryKind::tolerant(1, "world".to_string()),
|
||||
}),
|
||||
])],
|
||||
);
|
||||
|
||||
let matching_words = MatchingWords::from_query_tree(&query_tree);
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@ use std::result::Result as StdResult;
|
|||
use std::str::Utf8Error;
|
||||
use std::time::Instant;
|
||||
|
||||
use distinct::{Distinct, DocIter, FacetDistinct, NoopDistinct};
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
|
||||
use log::debug;
|
||||
|
@ -13,16 +14,13 @@ use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
|
|||
use once_cell::sync::Lazy;
|
||||
use roaring::bitmap::RoaringBitmap;
|
||||
|
||||
pub(crate) use self::facet::ParserRule;
|
||||
pub use self::facet::{FacetDistribution, FacetIter, FilterCondition, Operator};
|
||||
pub use self::matching_words::MatchingWords;
|
||||
use self::query_tree::QueryTreeBuilder;
|
||||
use crate::error::FieldIdMapMissingEntry;
|
||||
use crate::search::criteria::r#final::{Final, FinalResult};
|
||||
use crate::{Index, DocumentId, Result};
|
||||
|
||||
pub use self::facet::{FilterCondition, FacetDistribution, FacetIter, Operator};
|
||||
pub use self::matching_words::MatchingWords;
|
||||
pub(crate) use self::facet::ParserRule;
|
||||
use self::query_tree::QueryTreeBuilder;
|
||||
|
||||
use distinct::{Distinct, DocIter, FacetDistinct, NoopDistinct};
|
||||
use crate::{DocumentId, Index, Result};
|
||||
|
||||
// Building these factories is not free.
|
||||
static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
|
||||
|
@ -32,8 +30,8 @@ static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));
|
|||
mod criteria;
|
||||
mod distinct;
|
||||
mod facet;
|
||||
mod query_tree;
|
||||
mod matching_words;
|
||||
mod query_tree;
|
||||
|
||||
pub struct Search<'a> {
|
||||
query: Option<String>,
|
||||
|
@ -117,7 +115,7 @@ impl<'a> Search<'a> {
|
|||
let result = analyzer.analyze(query);
|
||||
let tokens = result.tokens();
|
||||
builder.build(tokens)?.map_or((None, None), |(qt, pq)| (Some(qt), Some(pq)))
|
||||
},
|
||||
}
|
||||
None => (None, None),
|
||||
};
|
||||
|
||||
|
@ -144,10 +142,11 @@ impl<'a> Search<'a> {
|
|||
None => self.perform_sort(NoopDistinct, matching_words, criteria),
|
||||
Some(name) => {
|
||||
let field_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||
let id = field_ids_map.id(name).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
|
||||
field_name: name.to_string(),
|
||||
process: "distinct attribute",
|
||||
})?;
|
||||
let id =
|
||||
field_ids_map.id(name).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
|
||||
field_name: name.to_string(),
|
||||
process: "distinct attribute",
|
||||
})?;
|
||||
let distinct = FacetDistinct::new(id, self.index, self.rtxn);
|
||||
self.perform_sort(distinct, matching_words, criteria)
|
||||
}
|
||||
|
@ -159,14 +158,15 @@ impl<'a> Search<'a> {
|
|||
mut distinct: D,
|
||||
matching_words: MatchingWords,
|
||||
mut criteria: Final,
|
||||
) -> Result<SearchResult>
|
||||
{
|
||||
) -> Result<SearchResult> {
|
||||
let mut offset = self.offset;
|
||||
let mut initial_candidates = RoaringBitmap::new();
|
||||
let mut excluded_candidates = RoaringBitmap::new();
|
||||
let mut documents_ids = Vec::with_capacity(self.limit);
|
||||
|
||||
while let Some(FinalResult { candidates, bucket_candidates, .. }) = criteria.next(&excluded_candidates)? {
|
||||
while let Some(FinalResult { candidates, bucket_candidates, .. }) =
|
||||
criteria.next(&excluded_candidates)?
|
||||
{
|
||||
debug!("Number of candidates found {}", candidates.len());
|
||||
|
||||
let excluded = take(&mut excluded_candidates);
|
||||
|
@ -183,7 +183,9 @@ impl<'a> Search<'a> {
|
|||
for candidate in candidates.by_ref().take(self.limit - documents_ids.len()) {
|
||||
documents_ids.push(candidate?);
|
||||
}
|
||||
if documents_ids.len() == self.limit { break }
|
||||
if documents_ids.len() == self.limit {
|
||||
break;
|
||||
}
|
||||
excluded_candidates = candidates.into_excluded();
|
||||
}
|
||||
|
||||
|
@ -247,7 +249,7 @@ pub fn word_derivations<'c>(
|
|||
}
|
||||
|
||||
Ok(entry.insert(derived_words))
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue