format the whole project

This commit is contained in:
Tamo 2021-06-16 18:33:33 +02:00
parent ba30cef987
commit 9716fb3b36
No known key found for this signature in database
GPG key ID: 20CD8020AFA88D69
68 changed files with 3327 additions and 2336 deletions

View file

@ -5,12 +5,12 @@ use log::debug;
use ordered_float::OrderedFloat;
use roaring::RoaringBitmap;
use super::{Criterion, CriterionParameters, CriterionResult};
use crate::error::FieldIdMapMissingEntry;
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
use crate::search::facet::FacetIter;
use crate::search::query_tree::Operation;
use crate::{FieldId, Index, Result};
use super::{Criterion, CriterionParameters, CriterionResult};
/// Threshold on the number of candidates that will make
/// the system to choose between one algorithm or another.
@ -57,9 +57,8 @@ impl<'t> AscDesc<'t> {
ascending: bool,
) -> Result<Self> {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let field_id = fields_ids_map
.id(&field_name)
.ok_or_else(|| FieldIdMapMissingEntry::FieldName {
let field_id =
fields_ids_map.id(&field_name).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
field_name: field_name.clone(),
process: "AscDesc::new",
})?;
@ -101,44 +100,47 @@ impl<'t> Criterion for AscDesc<'t> {
filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)),
}));
},
None => {
match self.parent.next(params)? {
Some(CriterionResult { query_tree, candidates, filtered_candidates, bucket_candidates }) => {
self.query_tree = query_tree;
let mut candidates = match (&self.query_tree, candidates) {
(_, Some(candidates)) => candidates,
(Some(qt), None) => {
let context = CriteriaBuilder::new(&self.rtxn, &self.index)?;
resolve_query_tree(&context, qt, params.wdcache)?
},
(None, None) => self.index.documents_ids(self.rtxn)?,
};
if let Some(filtered_candidates) = filtered_candidates {
candidates &= filtered_candidates;
}
None => match self.parent.next(params)? {
Some(CriterionResult {
query_tree,
candidates,
filtered_candidates,
bucket_candidates,
}) => {
self.query_tree = query_tree;
let mut candidates = match (&self.query_tree, candidates) {
(_, Some(candidates)) => candidates,
(Some(qt), None) => {
let context = CriteriaBuilder::new(&self.rtxn, &self.index)?;
resolve_query_tree(&context, qt, params.wdcache)?
}
(None, None) => self.index.documents_ids(self.rtxn)?,
};
match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates,
}
if let Some(filtered_candidates) = filtered_candidates {
candidates &= filtered_candidates;
}
if candidates.is_empty() {
continue;
}
match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates,
}
self.allowed_candidates = &candidates - params.excluded_candidates;
self.candidates = facet_ordered(
self.index,
self.rtxn,
self.field_id,
self.ascending,
candidates & &self.faceted_candidates,
)?;
},
None => return Ok(None),
if candidates.is_empty() {
continue;
}
self.allowed_candidates = &candidates - params.excluded_candidates;
self.candidates = facet_ordered(
self.index,
self.rtxn,
self.field_id,
self.ascending,
candidates & &self.faceted_candidates,
)?;
}
None => return Ok(None),
},
Some(mut candidates) => {
candidates -= params.excluded_candidates;
@ -170,11 +172,8 @@ fn facet_ordered<'t>(
let iter = iterative_facet_ordered_iter(index, rtxn, field_id, ascending, candidates)?;
Ok(Box::new(iter.map(Ok)) as Box<dyn Iterator<Item = _>>)
} else {
let facet_fn = if ascending {
FacetIter::new_reducing
} else {
FacetIter::new_reverse_reducing
};
let facet_fn =
if ascending { FacetIter::new_reducing } else { FacetIter::new_reverse_reducing };
let iter = facet_fn(rtxn, index, field_id, candidates)?;
Ok(Box::new(iter.map(|res| res.map(|(_, docids)| docids))))
}
@ -194,9 +193,7 @@ fn iterative_facet_ordered_iter<'t>(
for docid in candidates.iter() {
let left = (field_id, docid, f64::MIN);
let right = (field_id, docid, f64::MAX);
let mut iter = index
.field_id_docid_facet_f64s
.range(rtxn, &(left..=right))?;
let mut iter = index.field_id_docid_facet_f64s.range(rtxn, &(left..=right))?;
let entry = if ascending { iter.next() } else { iter.last() };
if let Some(((_, _, value), ())) = entry.transpose()? {
docids_values.push((docid, OrderedFloat(value)));

View file

@ -1,15 +1,16 @@
use std::{borrow::Cow, cmp::{self, Ordering}, collections::BinaryHeap};
use std::collections::{BTreeMap, HashMap, btree_map};
use std::borrow::Cow;
use std::cmp::{self, Ordering};
use std::collections::binary_heap::PeekMut;
use std::collections::{btree_map, BTreeMap, BinaryHeap, HashMap};
use std::mem::take;
use roaring::RoaringBitmap;
use crate::{TreeLevel, Result, search::build_dfa};
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
use crate::search::criteria::Query;
use crate::search::query_tree::{Operation, QueryKind};
use crate::search::{word_derivations, WordDerivationsCache};
use super::{Criterion, CriterionParameters, CriterionResult, Context, resolve_query_tree};
use crate::search::{build_dfa, word_derivations, WordDerivationsCache};
use crate::{Result, TreeLevel};
/// To be able to divide integers by the number of words in the query
/// we want to find a multiplier that allow us to divide by any number between 1 and 10.
@ -63,15 +64,19 @@ impl<'t> Criterion for Attribute<'t> {
filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)),
}));
},
}
Some((query_tree, flattened_query_tree, mut allowed_candidates)) => {
let found_candidates = if allowed_candidates.len() < CANDIDATES_THRESHOLD {
let current_buckets = match self.current_buckets.as_mut() {
Some(current_buckets) => current_buckets,
None => {
let new_buckets = linear_compute_candidates(self.ctx, &flattened_query_tree, &allowed_candidates)?;
let new_buckets = linear_compute_candidates(
self.ctx,
&flattened_query_tree,
&allowed_candidates,
)?;
self.current_buckets.get_or_insert(new_buckets.into_iter())
},
}
};
match current_buckets.next() {
@ -83,10 +88,15 @@ impl<'t> Criterion for Attribute<'t> {
filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)),
}));
},
}
}
} else {
match set_compute_candidates(self.ctx, &flattened_query_tree, &allowed_candidates, params.wdcache)? {
match set_compute_candidates(
self.ctx,
&flattened_query_tree,
&allowed_candidates,
params.wdcache,
)? {
Some(candidates) => candidates,
None => {
return Ok(Some(CriterionResult {
@ -95,13 +105,14 @@ impl<'t> Criterion for Attribute<'t> {
filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)),
}));
},
}
}
};
allowed_candidates -= &found_candidates;
self.state = Some((query_tree.clone(), flattened_query_tree, allowed_candidates));
self.state =
Some((query_tree.clone(), flattened_query_tree, allowed_candidates));
return Ok(Some(CriterionResult {
query_tree: Some(query_tree),
@ -109,39 +120,50 @@ impl<'t> Criterion for Attribute<'t> {
filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)),
}));
},
None => {
match self.parent.next(params)? {
Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => {
let mut candidates = match candidates {
Some(candidates) => candidates,
None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)? - params.excluded_candidates,
};
if let Some(filtered_candidates) = filtered_candidates {
candidates &= filtered_candidates;
}
None => match self.parent.next(params)? {
Some(CriterionResult {
query_tree: Some(query_tree),
candidates,
filtered_candidates,
bucket_candidates,
}) => {
let mut candidates = match candidates {
Some(candidates) => candidates,
None => {
resolve_query_tree(self.ctx, &query_tree, params.wdcache)?
- params.excluded_candidates
}
};
let flattened_query_tree = flatten_query_tree(&query_tree);
if let Some(filtered_candidates) = filtered_candidates {
candidates &= filtered_candidates;
}
match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates,
}
let flattened_query_tree = flatten_query_tree(&query_tree);
self.state = Some((query_tree, flattened_query_tree, candidates));
self.current_buckets = None;
},
Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
}));
},
None => return Ok(None),
match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates,
}
self.state = Some((query_tree, flattened_query_tree, candidates));
self.current_buckets = None;
}
Some(CriterionResult {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
}) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
}));
}
None => return Ok(None),
},
}
}
@ -152,7 +174,9 @@ impl<'t> Criterion for Attribute<'t> {
/// it will begin at the first non-empty interval and will return every interval without
/// jumping over empty intervals.
struct WordLevelIterator<'t, 'q> {
inner: Box<dyn Iterator<Item =heed::Result<((&'t str, TreeLevel, u32, u32), RoaringBitmap)>> + 't>,
inner: Box<
dyn Iterator<Item = heed::Result<((&'t str, TreeLevel, u32, u32), RoaringBitmap)>> + 't,
>,
level: TreeLevel,
interval_size: u32,
word: Cow<'q, str>,
@ -162,49 +186,80 @@ struct WordLevelIterator<'t, 'q> {
}
impl<'t, 'q> WordLevelIterator<'t, 'q> {
fn new(ctx: &'t dyn Context<'t>, word: Cow<'q, str>, in_prefix_cache: bool) -> heed::Result<Option<Self>> {
fn new(
ctx: &'t dyn Context<'t>,
word: Cow<'q, str>,
in_prefix_cache: bool,
) -> heed::Result<Option<Self>> {
match ctx.word_position_last_level(&word, in_prefix_cache)? {
Some(level) => {
Some(level) => {
let interval_size = LEVEL_EXPONENTIATION_BASE.pow(Into::<u8>::into(level) as u32);
let inner = ctx.word_position_iterator(&word, level, in_prefix_cache, None, None)?;
Ok(Some(Self { inner, level, interval_size, word, in_prefix_cache, inner_next: None, current_interval: None }))
},
let inner =
ctx.word_position_iterator(&word, level, in_prefix_cache, None, None)?;
Ok(Some(Self {
inner,
level,
interval_size,
word,
in_prefix_cache,
inner_next: None,
current_interval: None,
}))
}
None => Ok(None),
}
}
fn dig(&self, ctx: &'t dyn Context<'t>, level: &TreeLevel, left_interval: Option<u32>) -> heed::Result<Self> {
fn dig(
&self,
ctx: &'t dyn Context<'t>,
level: &TreeLevel,
left_interval: Option<u32>,
) -> heed::Result<Self> {
let level = *level.min(&self.level);
let interval_size = LEVEL_EXPONENTIATION_BASE.pow(Into::<u8>::into(level) as u32);
let word = self.word.clone();
let in_prefix_cache = self.in_prefix_cache;
let inner = ctx.word_position_iterator(&word, level, in_prefix_cache, left_interval, None)?;
let inner =
ctx.word_position_iterator(&word, level, in_prefix_cache, left_interval, None)?;
Ok(Self {inner, level, interval_size, word, in_prefix_cache, inner_next: None, current_interval: None})
Ok(Self {
inner,
level,
interval_size,
word,
in_prefix_cache,
inner_next: None,
current_interval: None,
})
}
fn next(&mut self) -> heed::Result<Option<(u32, u32, RoaringBitmap)>> {
fn is_next_interval(last_right: u32, next_left: u32) -> bool { last_right + 1 == next_left }
fn is_next_interval(last_right: u32, next_left: u32) -> bool {
last_right + 1 == next_left
}
let inner_next = match self.inner_next.take() {
Some(inner_next) => Some(inner_next),
None => self.inner.next().transpose()?.map(|((_, _, left, right), docids)| (left, right, docids)),
None => self
.inner
.next()
.transpose()?
.map(|((_, _, left, right), docids)| (left, right, docids)),
};
match inner_next {
Some((left, right, docids)) => {
match self.current_interval {
Some((last_left, last_right)) if !is_next_interval(last_right, left) => {
let blank_left = last_left + self.interval_size;
let blank_right = last_right + self.interval_size;
self.current_interval = Some((blank_left, blank_right));
self.inner_next = Some((left, right, docids));
Ok(Some((blank_left, blank_right, RoaringBitmap::new())))
},
_ => {
self.current_interval = Some((left, right));
Ok(Some((left, right, docids)))
}
Some((left, right, docids)) => match self.current_interval {
Some((last_left, last_right)) if !is_next_interval(last_right, left) => {
let blank_left = last_left + self.interval_size;
let blank_right = last_right + self.interval_size;
self.current_interval = Some((blank_left, blank_right));
self.inner_next = Some((left, right, docids));
Ok(Some((blank_left, blank_right, RoaringBitmap::new())))
}
_ => {
self.current_interval = Some((left, right));
Ok(Some((left, right, docids)))
}
},
None => Ok(None),
@ -228,30 +283,37 @@ impl<'t, 'q> QueryLevelIterator<'t, 'q> {
ctx: &'t dyn Context<'t>,
queries: &'q [Query],
wdcache: &mut WordDerivationsCache,
) -> Result<Option<Self>>
{
) -> Result<Option<Self>> {
let mut inner = Vec::with_capacity(queries.len());
for query in queries {
match &query.kind {
QueryKind::Exact { word, .. } => {
if !query.prefix || ctx.in_prefix_cache(&word) {
let word = Cow::Borrowed(query.kind.word());
if let Some(word_level_iterator) = WordLevelIterator::new(ctx, word, query.prefix)? {
if let Some(word_level_iterator) =
WordLevelIterator::new(ctx, word, query.prefix)?
{
inner.push(word_level_iterator);
}
} else {
for (word, _) in word_derivations(&word, true, 0, ctx.words_fst(), wdcache)? {
for (word, _) in word_derivations(&word, true, 0, ctx.words_fst(), wdcache)?
{
let word = Cow::Owned(word.to_owned());
if let Some(word_level_iterator) = WordLevelIterator::new(ctx, word, false)? {
if let Some(word_level_iterator) =
WordLevelIterator::new(ctx, word, false)?
{
inner.push(word_level_iterator);
}
}
}
},
}
QueryKind::Tolerant { typo, word } => {
for (word, _) in word_derivations(&word, query.prefix, *typo, ctx.words_fst(), wdcache)? {
for (word, _) in
word_derivations(&word, query.prefix, *typo, ctx.words_fst(), wdcache)?
{
let word = Cow::Owned(word.to_owned());
if let Some(word_level_iterator) = WordLevelIterator::new(ctx, word, false)? {
if let Some(word_level_iterator) = WordLevelIterator::new(ctx, word, false)?
{
inner.push(word_level_iterator);
}
}
@ -284,17 +346,28 @@ impl<'t, 'q> QueryLevelIterator<'t, 'q> {
Some(parent) => {
let parent = parent.dig(ctx)?;
(parent.level.min(self.level), Some(Box::new(parent)))
},
}
None => (self.level.saturating_sub(1), None),
};
let left_interval = self.accumulator.get(self.interval_to_skip).map(|opt| opt.as_ref().map(|(left, _, _)| *left)).flatten();
let left_interval = self
.accumulator
.get(self.interval_to_skip)
.map(|opt| opt.as_ref().map(|(left, _, _)| *left))
.flatten();
let mut inner = Vec::with_capacity(self.inner.len());
for word_level_iterator in self.inner.iter() {
inner.push(word_level_iterator.dig(ctx, &level, left_interval)?);
}
Ok(Self {parent, inner, level, accumulator: vec![], parent_accumulator: vec![], interval_to_skip: 0})
Ok(Self {
parent,
inner,
level,
accumulator: vec![],
parent_accumulator: vec![],
interval_to_skip: 0,
})
}
fn inner_next(&mut self, level: TreeLevel) -> heed::Result<Option<(u32, u32, RoaringBitmap)>> {
@ -305,12 +378,12 @@ impl<'t, 'q> QueryLevelIterator<'t, 'q> {
let wli_u8_level = Into::<u8>::into(wli.level);
let accumulated_count = LEVEL_EXPONENTIATION_BASE.pow((u8_level - wli_u8_level) as u32);
for _ in 0..accumulated_count {
if let Some((next_left, _, next_docids)) = wli.next()? {
accumulated = match accumulated.take(){
if let Some((next_left, _, next_docids)) = wli.next()? {
accumulated = match accumulated.take() {
Some((acc_left, acc_right, mut acc_docids)) => {
acc_docids |= next_docids;
Some((acc_left, acc_right, acc_docids))
},
}
None => Some((next_left, next_left + interval_size, next_docids)),
};
}
@ -322,7 +395,11 @@ impl<'t, 'q> QueryLevelIterator<'t, 'q> {
/// return the next meta-interval created from inner WordLevelIterators,
/// and from eventual chainned QueryLevelIterator.
fn next(&mut self, allowed_candidates: &RoaringBitmap, tree_level: TreeLevel) -> heed::Result<Option<(u32, u32, RoaringBitmap)>> {
fn next(
&mut self,
allowed_candidates: &RoaringBitmap,
tree_level: TreeLevel,
) -> heed::Result<Option<(u32, u32, RoaringBitmap)>> {
let parent_result = match self.parent.as_mut() {
Some(parent) => Some(parent.next(allowed_candidates, tree_level)?),
None => None,
@ -335,22 +412,30 @@ impl<'t, 'q> QueryLevelIterator<'t, 'q> {
&self.parent_accumulator,
&self.accumulator,
self.interval_to_skip,
allowed_candidates
allowed_candidates,
);
self.accumulator.push(inner_next);
self.parent_accumulator.push(parent_next);
let mut merged_interval: Option<(u32, u32, RoaringBitmap)> = None;
for current in self.accumulator.iter().rev().zip(self.parent_accumulator.iter()).skip(self.interval_to_skip) {
for current in self
.accumulator
.iter()
.rev()
.zip(self.parent_accumulator.iter())
.skip(self.interval_to_skip)
{
if let (Some((left_a, right_a, a)), Some((left_b, right_b, b))) = current {
match merged_interval.as_mut() {
Some((_, _, merged_docids)) => *merged_docids |= a & b,
None => merged_interval = Some((left_a + left_b, right_a + right_b, a & b)),
None => {
merged_interval = Some((left_a + left_b, right_a + right_b, a & b))
}
}
}
}
Ok(merged_interval)
},
}
None => {
let level = self.level;
match self.inner_next(level)? {
@ -358,12 +443,11 @@ impl<'t, 'q> QueryLevelIterator<'t, 'q> {
self.accumulator = vec![Some((left, right, RoaringBitmap::new()))];
candidates &= allowed_candidates;
Ok(Some((left, right, candidates)))
},
}
None => {
self.accumulator = vec![None];
Ok(None)
},
}
}
}
}
@ -379,16 +463,18 @@ fn interval_to_skip(
already_skiped: usize,
allowed_candidates: &RoaringBitmap,
) -> usize {
parent_accumulator.iter()
parent_accumulator
.iter()
.zip(current_accumulator.iter())
.skip(already_skiped)
.take_while(|(parent, current)| {
let skip_parent = parent.as_ref().map_or(true, |(_, _, docids)| docids.is_empty());
let skip_current = current.as_ref().map_or(true, |(_, _, docids)| docids.is_disjoint(allowed_candidates));
let skip_current = current
.as_ref()
.map_or(true, |(_, _, docids)| docids.is_disjoint(allowed_candidates));
skip_parent && skip_current
})
.count()
}
/// A Branch is represent a possible alternative of the original query and is build with the Query Tree,
@ -410,7 +496,7 @@ impl<'t, 'q> Branch<'t, 'q> {
self.last_result = last_result;
self.tree_level = tree_level;
Ok(true)
},
}
None => Ok(false),
}
}
@ -429,7 +515,7 @@ impl<'t, 'q> Branch<'t, 'q> {
let interval_size = LEVEL_EXPONENTIATION_BASE.pow(u8_level as u32);
let (left, right, _) = self.last_result;
self.last_result = (left + interval_size, right + interval_size, RoaringBitmap::new());
self.last_result = (left + interval_size, right + interval_size, RoaringBitmap::new());
}
/// return the score of the current inner interval.
@ -477,31 +563,31 @@ fn initialize_query_level_iterators<'t, 'q>(
allowed_candidates: &RoaringBitmap,
wdcache: &mut WordDerivationsCache,
) -> Result<BinaryHeap<Branch<'t, 'q>>> {
let mut positions = BinaryHeap::with_capacity(branches.len());
for branch in branches {
let mut branch_positions = Vec::with_capacity(branch.len());
for queries in branch {
for queries in branch {
match QueryLevelIterator::new(ctx, queries, wdcache)? {
Some(qli) => branch_positions.push(qli),
None => {
// the branch seems to be invalid, so we skip it.
branch_positions.clear();
break;
},
}
}
}
// QueryLevelIterator need to be sorted by level and folded in descending order.
branch_positions.sort_unstable_by_key(|qli| qli.level);
let folded_query_level_iterators = branch_positions
.into_iter()
.fold(None, |fold: Option<QueryLevelIterator>, mut qli| match fold {
Some(fold) => {
qli.parent(fold);
Some(qli)
},
None => Some(qli),
});
let folded_query_level_iterators =
branch_positions.into_iter().fold(None, |fold: Option<QueryLevelIterator>, mut qli| {
match fold {
Some(fold) => {
qli.parent(fold);
Some(qli)
}
None => Some(qli),
}
});
if let Some(mut folded_query_level_iterators) = folded_query_level_iterators {
let tree_level = folded_query_level_iterators.level;
@ -526,9 +612,9 @@ fn set_compute_candidates<'t>(
branches: &FlattenedQueryTree,
allowed_candidates: &RoaringBitmap,
wdcache: &mut WordDerivationsCache,
) -> Result<Option<RoaringBitmap>>
{
let mut branches_heap = initialize_query_level_iterators(ctx, branches, allowed_candidates, wdcache)?;
) -> Result<Option<RoaringBitmap>> {
let mut branches_heap =
initialize_query_level_iterators(ctx, branches, allowed_candidates, wdcache)?;
let lowest_level = TreeLevel::min_value();
let mut final_candidates: Option<(u32, RoaringBitmap)> = None;
let mut allowed_candidates = allowed_candidates.clone();
@ -539,15 +625,18 @@ fn set_compute_candidates<'t>(
// if current is worst than best we break to return
// candidates that correspond to the best rank
if let Some((best_rank, _)) = final_candidates {
if branch_rank > best_rank { break }
if branch_rank > best_rank {
break;
}
}
let _left = branch.last_result.0;
let candidates = take(&mut branch.last_result.2);
if candidates.is_empty() {
// we don't have candidates, get next interval.
if !branch.next(&allowed_candidates)? { PeekMut::pop(branch); }
}
else if is_lowest_level {
if !branch.next(&allowed_candidates)? {
PeekMut::pop(branch);
}
} else if is_lowest_level {
// we have candidates, but we can't dig deeper.
allowed_candidates -= &candidates;
final_candidates = match final_candidates.take() {
@ -556,19 +645,20 @@ fn set_compute_candidates<'t>(
best_candidates |= candidates;
branch.lazy_next();
Some((best_rank, best_candidates))
},
}
// we take current candidates as best candidates
None => {
branch.lazy_next();
Some((branch_rank, candidates))
},
}
};
} else {
// we have candidates, lets dig deeper in levels.
branch.dig(ctx)?;
if !branch.next(&allowed_candidates)? { PeekMut::pop(branch); }
if !branch.next(&allowed_candidates)? {
PeekMut::pop(branch);
}
}
}
Ok(final_candidates.map(|(_rank, candidates)| candidates))
@ -578,9 +668,11 @@ fn linear_compute_candidates(
ctx: &dyn Context,
branches: &FlattenedQueryTree,
allowed_candidates: &RoaringBitmap,
) -> Result<BTreeMap<u64, RoaringBitmap>>
{
fn compute_candidate_rank(branches: &FlattenedQueryTree, words_positions: HashMap<String, RoaringBitmap>) -> u64 {
) -> Result<BTreeMap<u64, RoaringBitmap>> {
fn compute_candidate_rank(
branches: &FlattenedQueryTree,
words_positions: HashMap<String, RoaringBitmap>,
) -> u64 {
let mut min_rank = u64::max_value();
for branch in branches {
let branch_len = branch.len();
@ -593,17 +685,20 @@ fn linear_compute_candidates(
QueryKind::Exact { word, .. } => {
if *prefix {
word_derivations(word, true, 0, &words_positions)
.flat_map(|positions| positions.iter().next()).min()
.flat_map(|positions| positions.iter().next())
.min()
} else {
words_positions.get(word)
words_positions
.get(word)
.map(|positions| positions.iter().next())
.flatten()
}
},
}
QueryKind::Tolerant { typo, word } => {
word_derivations(word, *prefix, *typo, &words_positions)
.flat_map(|positions| positions.iter().next()).min()
},
.flat_map(|positions| positions.iter().next())
.min()
}
};
match (position, current_position) {
@ -627,9 +722,11 @@ fn linear_compute_candidates(
branch_rank.sort_unstable();
// because several words in same query can't match all a the position 0,
// we substract the word index to the position.
let branch_rank: u64 = branch_rank.into_iter().enumerate().map(|(i, r)| r - i as u64).sum();
let branch_rank: u64 =
branch_rank.into_iter().enumerate().map(|(i, r)| r - i as u64).sum();
// here we do the means of the words of the branch
min_rank = min_rank.min(branch_rank * LCM_10_FIRST_NUMBERS as u64 / branch_len as u64);
min_rank =
min_rank.min(branch_rank * LCM_10_FIRST_NUMBERS as u64 / branch_len as u64);
}
}
@ -641,8 +738,7 @@ fn linear_compute_candidates(
is_prefix: bool,
max_typo: u8,
words_positions: &'a HashMap<String, RoaringBitmap>,
) -> impl Iterator<Item = &'a RoaringBitmap>
{
) -> impl Iterator<Item = &'a RoaringBitmap> {
let dfa = build_dfa(word, max_typo, is_prefix);
words_positions.iter().filter_map(move |(document_word, positions)| {
use levenshtein_automata::Distance;
@ -680,25 +776,26 @@ fn flatten_query_tree(query_tree: &Operation) -> FlattenedQueryTree {
}
}
out
},
}
None => recurse(head),
}
}
fn recurse(op: &Operation) -> FlattenedQueryTree {
match op {
And(ops) => {
ops.split_first().map_or_else(Vec::new, |(h, t)| and_recurse(h, t))
},
Or(_, ops) => if ops.iter().all(|op| op.query().is_some()) {
vec![vec![ops.iter().flat_map(|op| op.query()).cloned().collect()]]
} else {
ops.iter().map(recurse).flatten().collect()
},
And(ops) => ops.split_first().map_or_else(Vec::new, |(h, t)| and_recurse(h, t)),
Or(_, ops) => {
if ops.iter().all(|op| op.query().is_some()) {
vec![vec![ops.iter().flat_map(|op| op.query()).cloned().collect()]]
} else {
ops.iter().map(recurse).flatten().collect()
}
}
Phrase(words) => {
let queries = words.iter().map(|word| {
vec![Query {prefix: false, kind: QueryKind::exact(word.clone())}]
}).collect();
let queries = words
.iter()
.map(|word| vec![Query { prefix: false, kind: QueryKind::exact(word.clone()) }])
.collect();
vec![queries]
}
Operation::Query(query) => vec![vec![vec![query.clone()]]],
@ -712,28 +809,43 @@ fn flatten_query_tree(query_tree: &Operation) -> FlattenedQueryTree {
mod tests {
use big_s::S;
use crate::search::criteria::QueryKind;
use super::*;
use crate::search::criteria::QueryKind;
#[test]
fn simple_flatten_query_tree() {
let query_tree = Operation::Or(false, vec![
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("manythefish")) }),
Operation::And(vec![
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("manythe")) }),
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("fish")) }),
]),
Operation::And(vec![
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("many")) }),
Operation::Or(false, vec![
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("thefish")) }),
Operation::And(vec![
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("the")) }),
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("fish")) }),
]),
let query_tree = Operation::Or(
false,
vec![
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("manythefish")) }),
Operation::And(vec![
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("manythe")) }),
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("fish")) }),
]),
]),
]);
Operation::And(vec![
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("many")) }),
Operation::Or(
false,
vec![
Operation::Query(Query {
prefix: false,
kind: QueryKind::exact(S("thefish")),
}),
Operation::And(vec![
Operation::Query(Query {
prefix: false,
kind: QueryKind::exact(S("the")),
}),
Operation::Query(Query {
prefix: false,
kind: QueryKind::exact(S("fish")),
}),
]),
],
),
]),
],
);
let expected = vec![
vec![vec![Query { prefix: false, kind: QueryKind::exact(S("manythefish")) }]],

View file

@ -2,19 +2,15 @@ use std::convert::TryFrom;
use std::mem::take;
use std::ops::BitOr;
use itertools::Itertools;
use log::debug;
use roaring::RoaringBitmap;
use itertools::Itertools;
use crate::search::query_tree::{Operation, PrimitiveQueryPart};
use crate::search::criteria::{
Context,
Criterion,
CriterionParameters,
CriterionResult,
resolve_query_tree,
resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult,
};
use crate::{TreeLevel, Result};
use crate::search::query_tree::{Operation, PrimitiveQueryPart};
use crate::{Result, TreeLevel};
pub struct Exactness<'t> {
ctx: &'t dyn Context<'t>,
@ -26,7 +22,11 @@ pub struct Exactness<'t> {
}
impl<'t> Exactness<'t> {
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>, primitive_query: &[PrimitiveQueryPart]) -> heed::Result<Self> {
pub fn new(
ctx: &'t dyn Context<'t>,
parent: Box<dyn Criterion + 't>,
primitive_query: &[PrimitiveQueryPart],
) -> heed::Result<Self> {
let mut query: Vec<_> = Vec::with_capacity(primitive_query.len());
for part in primitive_query {
query.push(ExactQueryPart::from_primitive_query_part(ctx, part)?);
@ -59,7 +59,7 @@ impl<'t> Criterion for Exactness<'t> {
// reset state
self.state = None;
self.query_tree = None;
},
}
Some(state) => {
let (candidates, state) = resolve_state(self.ctx, take(state), &self.query)?;
self.state = state;
@ -70,40 +70,51 @@ impl<'t> Criterion for Exactness<'t> {
filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)),
}));
},
None => {
match self.parent.next(params)? {
Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => {
let mut candidates = match candidates {
Some(candidates) => candidates,
None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)? - params.excluded_candidates,
};
if let Some(filtered_candidates) = filtered_candidates {
candidates &= filtered_candidates;
}
None => match self.parent.next(params)? {
Some(CriterionResult {
query_tree: Some(query_tree),
candidates,
filtered_candidates,
bucket_candidates,
}) => {
let mut candidates = match candidates {
Some(candidates) => candidates,
None => {
resolve_query_tree(self.ctx, &query_tree, params.wdcache)?
- params.excluded_candidates
}
};
match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates,
}
if let Some(filtered_candidates) = filtered_candidates {
candidates &= filtered_candidates;
}
self.state = Some(State::new(candidates));
self.query_tree = Some(query_tree);
},
Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
}));
},
None => return Ok(None),
match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates,
}
self.state = Some(State::new(candidates));
self.query_tree = Some(query_tree);
}
Some(CriterionResult {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
}) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
}));
}
None => return Ok(None),
},
}
}
}
}
}
@ -125,9 +136,9 @@ impl State {
fn difference_with(&mut self, lhs: &RoaringBitmap) {
match self {
Self::ExactAttribute(candidates) |
Self::AttributeStartsWith(candidates) |
Self::ExactWords(candidates) => *candidates -= lhs,
Self::ExactAttribute(candidates)
| Self::AttributeStartsWith(candidates)
| Self::ExactWords(candidates) => *candidates -= lhs,
Self::Remainings(candidates_array) => {
candidates_array.iter_mut().for_each(|candidates| *candidates -= lhs);
candidates_array.retain(|candidates| !candidates.is_empty());
@ -137,9 +148,9 @@ impl State {
fn is_empty(&self) -> bool {
match self {
Self::ExactAttribute(candidates) |
Self::AttributeStartsWith(candidates) |
Self::ExactWords(candidates) => candidates.is_empty(),
Self::ExactAttribute(candidates)
| Self::AttributeStartsWith(candidates)
| Self::ExactWords(candidates) => candidates.is_empty(),
Self::Remainings(candidates_array) => {
candidates_array.iter().all(RoaringBitmap::is_empty)
}
@ -158,8 +169,7 @@ fn resolve_state(
ctx: &dyn Context,
state: State,
query: &[ExactQueryPart],
) -> Result<(RoaringBitmap, Option<State>)>
{
) -> Result<(RoaringBitmap, Option<State>)> {
use State::*;
match state {
ExactAttribute(mut allowed_candidates) => {
@ -167,8 +177,11 @@ fn resolve_state(
if let Ok(query_len) = u8::try_from(query.len()) {
let attributes_ids = ctx.searchable_fields_ids()?;
for id in attributes_ids {
if let Some(attribute_allowed_docids) = ctx.field_id_word_count_docids(id, query_len)? {
let mut attribute_candidates_array = attribute_start_with_docids(ctx, id as u32, query)?;
if let Some(attribute_allowed_docids) =
ctx.field_id_word_count_docids(id, query_len)?
{
let mut attribute_candidates_array =
attribute_start_with_docids(ctx, id as u32, query)?;
attribute_candidates_array.push(attribute_allowed_docids);
candidates |= intersection_of(attribute_candidates_array.iter().collect());
}
@ -181,12 +194,13 @@ fn resolve_state(
}
Ok((candidates, Some(AttributeStartsWith(allowed_candidates))))
},
}
AttributeStartsWith(mut allowed_candidates) => {
let mut candidates = RoaringBitmap::new();
let attributes_ids = ctx.searchable_fields_ids()?;
for id in attributes_ids {
let attribute_candidates_array = attribute_start_with_docids(ctx, id as u32, query)?;
let attribute_candidates_array =
attribute_start_with_docids(ctx, id as u32, query)?;
candidates |= intersection_of(attribute_candidates_array.iter().collect());
}
@ -195,7 +209,7 @@ fn resolve_state(
// remove current candidates from allowed candidates
allowed_candidates -= &candidates;
Ok((candidates, Some(ExactWords(allowed_candidates))))
},
}
ExactWords(mut allowed_candidates) => {
let number_of_part = query.len();
let mut parts_candidates_array = Vec::with_capacity(number_of_part);
@ -210,7 +224,7 @@ fn resolve_state(
candidates |= synonym_candidates;
}
}
},
}
// compute intersection on pair of words with a proximity of 0.
Phrase(phrase) => {
let mut bitmaps = Vec::with_capacity(phrase.len().saturating_sub(1));
@ -220,8 +234,8 @@ fn resolve_state(
Some(docids) => bitmaps.push(docids),
None => {
bitmaps.clear();
break
},
break;
}
}
}
}
@ -247,7 +261,7 @@ fn resolve_state(
// intersect each word candidates in combinations
.map(intersection_of)
// union combinations of `c_count` exact words
.fold(RoaringBitmap::new(), RoaringBitmap::bitor);
.fold(RoaringBitmap::new(), RoaringBitmap::bitor);
// only keep allowed candidates
combinations_candidates &= &allowed_candidates;
// remove current candidates from allowed candidates
@ -261,7 +275,7 @@ fn resolve_state(
candidates_array.reverse();
Ok((all_exact_candidates, Some(Remainings(candidates_array))))
},
}
// pop remainings candidates until the emptiness
Remainings(mut candidates_array) => {
let candidates = candidates_array.pop().unwrap_or_default();
@ -270,12 +284,15 @@ fn resolve_state(
} else {
Ok((candidates, None))
}
},
}
}
}
fn attribute_start_with_docids(ctx: &dyn Context, attribute_id: u32, query: &[ExactQueryPart]) -> heed::Result<Vec<RoaringBitmap>> {
fn attribute_start_with_docids(
ctx: &dyn Context,
attribute_id: u32,
query: &[ExactQueryPart],
) -> heed::Result<Vec<RoaringBitmap>> {
let lowest_level = TreeLevel::min_value();
let mut attribute_candidates_array = Vec::new();
// start from attribute first position
@ -293,7 +310,7 @@ fn attribute_start_with_docids(ctx: &dyn Context, attribute_id: u32, query: &[Ex
}
attribute_candidates_array.push(synonyms_candidates);
pos += 1;
},
}
Phrase(phrase) => {
for word in phrase {
let wc = ctx.word_level_position_docids(word, lowest_level, pos, pos)?;
@ -325,24 +342,30 @@ pub enum ExactQueryPart {
}
impl ExactQueryPart {
fn from_primitive_query_part(ctx: &dyn Context, part: &PrimitiveQueryPart) -> heed::Result<Self> {
fn from_primitive_query_part(
ctx: &dyn Context,
part: &PrimitiveQueryPart,
) -> heed::Result<Self> {
let part = match part {
PrimitiveQueryPart::Word(word, _) => {
match ctx.synonyms(word)? {
Some(synonyms) => {
let mut synonyms: Vec<_> = synonyms.into_iter().filter_map(|mut array| {
// keep 1 word synonyms only.
match array.pop() {
Some(word) if array.is_empty() => Some(word),
_ => None,
}
}).collect();
let mut synonyms: Vec<_> = synonyms
.into_iter()
.filter_map(|mut array| {
// keep 1 word synonyms only.
match array.pop() {
Some(word) if array.is_empty() => Some(word),
_ => None,
}
})
.collect();
synonyms.push(word.clone());
ExactQueryPart::Synonyms(synonyms)
},
}
None => ExactQueryPart::Synonyms(vec![word.clone()]),
}
},
}
PrimitiveQueryPart::Phrase(phrase) => ExactQueryPart::Phrase(phrase.clone()),
};

View file

@ -1,10 +1,10 @@
use log::debug;
use roaring::RoaringBitmap;
use crate::Result;
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
use crate::search::query_tree::Operation;
use crate::search::WordDerivationsCache;
use super::{resolve_query_tree, Criterion, CriterionResult, CriterionParameters, Context};
use crate::Result;
/// The result of a call to the fetcher.
#[derive(Debug, Clone, PartialEq)]
@ -26,7 +26,12 @@ pub struct Final<'t> {
impl<'t> Final<'t> {
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Final<'t> {
Final { ctx, parent, wdcache: WordDerivationsCache::new(), returned_candidates: RoaringBitmap::new() }
Final {
ctx,
parent,
wdcache: WordDerivationsCache::new(),
returned_candidates: RoaringBitmap::new(),
}
}
#[logging_timer::time("Final::{}")]
@ -40,10 +45,17 @@ impl<'t> Final<'t> {
};
match self.parent.next(&mut criterion_parameters)? {
Some(CriterionResult { query_tree, candidates, filtered_candidates, bucket_candidates }) => {
Some(CriterionResult {
query_tree,
candidates,
filtered_candidates,
bucket_candidates,
}) => {
let mut candidates = match (candidates, query_tree.as_ref()) {
(Some(candidates), _) => candidates,
(None, Some(qt)) => resolve_query_tree(self.ctx, qt, &mut self.wdcache)? - excluded_candidates,
(None, Some(qt)) => {
resolve_query_tree(self.ctx, qt, &mut self.wdcache)? - excluded_candidates
}
(None, None) => self.ctx.documents_ids()? - excluded_candidates,
};
@ -56,7 +68,7 @@ impl<'t> Final<'t> {
self.returned_candidates |= &candidates;
Ok(Some(FinalResult { query_tree, candidates, bucket_candidates }))
},
}
None => Ok(None),
}
}

View file

@ -1,15 +1,18 @@
use roaring::RoaringBitmap;
use crate::Result;
use super::{Criterion, CriterionParameters, CriterionResult};
use crate::search::query_tree::Operation;
use super::{Criterion, CriterionResult, CriterionParameters};
use crate::Result;
pub struct Initial {
answer: Option<CriterionResult>
answer: Option<CriterionResult>,
}
impl Initial {
pub fn new(query_tree: Option<Operation>, filtered_candidates: Option<RoaringBitmap>) -> Initial {
pub fn new(
query_tree: Option<Operation>,
filtered_candidates: Option<RoaringBitmap>,
) -> Initial {
let answer = CriterionResult {
query_tree,
candidates: None,

View file

@ -1,29 +1,28 @@
use std::collections::HashMap;
use std::borrow::Cow;
use std::collections::HashMap;
use roaring::RoaringBitmap;
use crate::{FieldId, TreeLevel, search::{word_derivations, WordDerivationsCache}};
use crate::{Index, DocumentId, Result};
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
use self::asc_desc::AscDesc;
use self::attribute::Attribute;
use self::exactness::Exactness;
use self::r#final::Final;
use self::initial::Initial;
use self::proximity::Proximity;
use self::r#final::Final;
use self::typo::Typo;
use self::words::Words;
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
use crate::search::{word_derivations, WordDerivationsCache};
use crate::{DocumentId, FieldId, Index, Result, TreeLevel};
mod asc_desc;
mod attribute;
mod exactness;
pub mod r#final;
mod initial;
mod proximity;
mod typo;
mod words;
pub mod r#final;
pub trait Criterion {
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>>;
@ -55,7 +54,7 @@ pub struct CriterionParameters<'a> {
#[derive(Debug)]
enum Candidates {
Allowed(RoaringBitmap),
Forbidden(RoaringBitmap)
Forbidden(RoaringBitmap),
}
impl Default for Candidates {
@ -68,17 +67,55 @@ pub trait Context<'c> {
fn documents_ids(&self) -> heed::Result<RoaringBitmap>;
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
fn word_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result<Option<RoaringBitmap>>;
fn word_prefix_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result<Option<RoaringBitmap>>;
fn word_pair_proximity_docids(
&self,
left: &str,
right: &str,
proximity: u8,
) -> heed::Result<Option<RoaringBitmap>>;
fn word_prefix_pair_proximity_docids(
&self,
left: &str,
right: &str,
proximity: u8,
) -> heed::Result<Option<RoaringBitmap>>;
fn words_fst<'t>(&self) -> &'t fst::Set<Cow<[u8]>>;
fn in_prefix_cache(&self, word: &str) -> bool;
fn docid_words_positions(&self, docid: DocumentId) -> heed::Result<HashMap<String, RoaringBitmap>>;
fn word_position_iterator(&self, word: &str, level: TreeLevel, in_prefix_cache: bool, left: Option<u32>, right: Option<u32>) -> heed::Result<Box<dyn Iterator<Item =heed::Result<((&'c str, TreeLevel, u32, u32), RoaringBitmap)>> + 'c>>;
fn word_position_last_level(&self, word: &str, in_prefix_cache: bool) -> heed::Result<Option<TreeLevel>>;
fn docid_words_positions(
&self,
docid: DocumentId,
) -> heed::Result<HashMap<String, RoaringBitmap>>;
fn word_position_iterator(
&self,
word: &str,
level: TreeLevel,
in_prefix_cache: bool,
left: Option<u32>,
right: Option<u32>,
) -> heed::Result<
Box<
dyn Iterator<Item = heed::Result<((&'c str, TreeLevel, u32, u32), RoaringBitmap)>> + 'c,
>,
>;
fn word_position_last_level(
&self,
word: &str,
in_prefix_cache: bool,
) -> heed::Result<Option<TreeLevel>>;
fn synonyms(&self, word: &str) -> heed::Result<Option<Vec<Vec<String>>>>;
fn searchable_fields_ids(&self) -> Result<Vec<FieldId>>;
fn field_id_word_count_docids(&self, field_id: FieldId, word_count: u8) -> heed::Result<Option<RoaringBitmap>>;
fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> heed::Result<Option<RoaringBitmap>>;
fn searchable_fields_ids(&self) -> Result<Vec<FieldId>>;
fn field_id_word_count_docids(
&self,
field_id: FieldId,
word_count: u8,
) -> heed::Result<Option<RoaringBitmap>>;
fn word_level_position_docids(
&self,
word: &str,
level: TreeLevel,
left: u32,
right: u32,
) -> heed::Result<Option<RoaringBitmap>>;
}
pub struct CriteriaBuilder<'t> {
@ -101,12 +138,22 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
self.index.word_prefix_docids.get(self.rtxn, &word)
}
fn word_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result<Option<RoaringBitmap>> {
fn word_pair_proximity_docids(
&self,
left: &str,
right: &str,
proximity: u8,
) -> heed::Result<Option<RoaringBitmap>> {
let key = (left, right, proximity);
self.index.word_pair_proximity_docids.get(self.rtxn, &key)
}
fn word_prefix_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result<Option<RoaringBitmap>> {
fn word_prefix_pair_proximity_docids(
&self,
left: &str,
right: &str,
proximity: u8,
) -> heed::Result<Option<RoaringBitmap>> {
let key = (left, right, proximity);
self.index.word_prefix_pair_proximity_docids.get(self.rtxn, &key)
}
@ -119,7 +166,10 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
self.words_prefixes_fst.contains(word)
}
fn docid_words_positions(&self, docid: DocumentId) -> heed::Result<HashMap<String, RoaringBitmap>> {
fn docid_words_positions(
&self,
docid: DocumentId,
) -> heed::Result<HashMap<String, RoaringBitmap>> {
let mut words_positions = HashMap::new();
for result in self.index.docid_word_positions.prefix_iter(self.rtxn, &(docid, ""))? {
let ((_, word), positions) = result?;
@ -134,9 +184,12 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
level: TreeLevel,
in_prefix_cache: bool,
left: Option<u32>,
right: Option<u32>
) -> heed::Result<Box<dyn Iterator<Item = heed::Result<((&'c str, TreeLevel, u32, u32), RoaringBitmap)>> + 'c>>
{
right: Option<u32>,
) -> heed::Result<
Box<
dyn Iterator<Item = heed::Result<((&'c str, TreeLevel, u32, u32), RoaringBitmap)>> + 'c,
>,
> {
let range = {
let left = left.unwrap_or(u32::min_value());
let right = right.unwrap_or(u32::max_value());
@ -152,7 +205,11 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
Ok(Box::new(db.range(self.rtxn, &range)?))
}
fn word_position_last_level(&self, word: &str, in_prefix_cache: bool) -> heed::Result<Option<TreeLevel>> {
fn word_position_last_level(
&self,
word: &str,
in_prefix_cache: bool,
) -> heed::Result<Option<TreeLevel>> {
let range = {
let left = (word, TreeLevel::min_value(), u32::min_value(), u32::min_value());
let right = (word, TreeLevel::max_value(), u32::max_value(), u32::max_value());
@ -164,7 +221,9 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
};
let last_level = db
.remap_data_type::<heed::types::DecodeIgnore>()
.range(self.rtxn, &range)?.last().transpose()?
.range(self.rtxn, &range)?
.last()
.transpose()?
.map(|((_, level, _, _), _)| level);
Ok(last_level)
@ -181,12 +240,22 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
}
}
fn field_id_word_count_docids(&self, field_id: FieldId, word_count: u8) -> heed::Result<Option<RoaringBitmap>> {
fn field_id_word_count_docids(
&self,
field_id: FieldId,
word_count: u8,
) -> heed::Result<Option<RoaringBitmap>> {
let key = (field_id, word_count);
self.index.field_id_word_count_docids.get(self.rtxn, &key)
}
fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> heed::Result<Option<RoaringBitmap>> {
fn word_level_position_docids(
&self,
word: &str,
level: TreeLevel,
left: u32,
right: u32,
) -> heed::Result<Option<RoaringBitmap>> {
let key = (word, level, left, right);
self.index.word_level_position_docids.get(self.rtxn, &key)
}
@ -204,13 +273,13 @@ impl<'t> CriteriaBuilder<'t> {
query_tree: Option<Operation>,
primitive_query: Option<Vec<PrimitiveQueryPart>>,
filtered_candidates: Option<RoaringBitmap>,
) -> Result<Final<'t>>
{
) -> Result<Final<'t>> {
use crate::criterion::Criterion as Name;
let primitive_query = primitive_query.unwrap_or_default();
let mut criterion = Box::new(Initial::new(query_tree, filtered_candidates)) as Box<dyn Criterion>;
let mut criterion =
Box::new(Initial::new(query_tree, filtered_candidates)) as Box<dyn Criterion>;
for name in self.index.criteria(&self.rtxn)? {
criterion = match name {
Name::Typo => Box::new(Typo::new(self, criterion)),
@ -218,8 +287,12 @@ impl<'t> CriteriaBuilder<'t> {
Name::Proximity => Box::new(Proximity::new(self, criterion)),
Name::Attribute => Box::new(Attribute::new(self, criterion)),
Name::Exactness => Box::new(Exactness::new(self, criterion, &primitive_query)?),
Name::Asc(field) => Box::new(AscDesc::asc(&self.index, &self.rtxn, criterion, field)?),
Name::Desc(field) => Box::new(AscDesc::desc(&self.index, &self.rtxn, criterion, field)?),
Name::Asc(field) => {
Box::new(AscDesc::asc(&self.index, &self.rtxn, criterion, field)?)
}
Name::Desc(field) => {
Box::new(AscDesc::desc(&self.index, &self.rtxn, criterion, field)?)
}
};
}
@ -231,21 +304,20 @@ pub fn resolve_query_tree<'t>(
ctx: &'t dyn Context,
query_tree: &Operation,
wdcache: &mut WordDerivationsCache,
) -> Result<RoaringBitmap>
{
) -> Result<RoaringBitmap> {
fn resolve_operation<'t>(
ctx: &'t dyn Context,
query_tree: &Operation,
wdcache: &mut WordDerivationsCache,
) -> Result<RoaringBitmap>
{
use Operation::{And, Phrase, Or, Query};
) -> Result<RoaringBitmap> {
use Operation::{And, Or, Phrase, Query};
match query_tree {
And(ops) => {
let mut ops = ops.iter().map(|op| {
resolve_operation(ctx, op, wdcache)
}).collect::<Result<Vec<_>>>()?;
let mut ops = ops
.iter()
.map(|op| resolve_operation(ctx, op, wdcache))
.collect::<Result<Vec<_>>>()?;
ops.sort_unstable_by_key(|cds| cds.len());
@ -260,7 +332,7 @@ pub fn resolve_query_tree<'t>(
}
}
Ok(candidates)
},
}
Phrase(words) => {
let mut candidates = RoaringBitmap::new();
let mut first_loop = true;
@ -276,12 +348,12 @@ pub fn resolve_query_tree<'t>(
} else {
candidates &= pair_docids;
}
},
None => return Ok(RoaringBitmap::new())
}
None => return Ok(RoaringBitmap::new()),
}
}
Ok(candidates)
},
}
Or(_, ops) => {
let mut candidates = RoaringBitmap::new();
for op in ops {
@ -289,7 +361,7 @@ pub fn resolve_query_tree<'t>(
candidates.union_with(&docids);
}
Ok(candidates)
},
}
Query(q) => Ok(query_docids(ctx, q, wdcache)?),
}
}
@ -297,18 +369,18 @@ pub fn resolve_query_tree<'t>(
resolve_operation(ctx, query_tree, wdcache)
}
fn all_word_pair_proximity_docids<T: AsRef<str>, U: AsRef<str>>(
ctx: &dyn Context,
left_words: &[(T, u8)],
right_words: &[(U, u8)],
proximity: u8
) -> Result<RoaringBitmap>
{
proximity: u8,
) -> Result<RoaringBitmap> {
let mut docids = RoaringBitmap::new();
for (left, _l_typo) in left_words {
for (right, _r_typo) in right_words {
let current_docids = ctx.word_pair_proximity_docids(left.as_ref(), right.as_ref(), proximity)?.unwrap_or_default();
let current_docids = ctx
.word_pair_proximity_docids(left.as_ref(), right.as_ref(), proximity)?
.unwrap_or_default();
docids.union_with(&current_docids);
}
}
@ -319,8 +391,7 @@ fn query_docids(
ctx: &dyn Context,
query: &Query,
wdcache: &mut WordDerivationsCache,
) -> Result<RoaringBitmap>
{
) -> Result<RoaringBitmap> {
match &query.kind {
QueryKind::Exact { word, .. } => {
if query.prefix && ctx.in_prefix_cache(&word) {
@ -336,7 +407,7 @@ fn query_docids(
} else {
Ok(ctx.word_docids(&word)?.unwrap_or_default())
}
},
}
QueryKind::Tolerant { typo, word } => {
let words = word_derivations(&word, query.prefix, *typo, ctx.words_fst(), wdcache)?;
let mut docids = RoaringBitmap::new();
@ -345,7 +416,7 @@ fn query_docids(
docids.union_with(&current_docids);
}
Ok(docids)
},
}
}
}
@ -355,8 +426,7 @@ fn query_pair_proximity_docids(
right: &Query,
proximity: u8,
wdcache: &mut WordDerivationsCache,
) -> Result<RoaringBitmap>
{
) -> Result<RoaringBitmap> {
if proximity >= 8 {
let mut candidates = query_docids(ctx, left, wdcache)?;
let right_candidates = query_docids(ctx, right, wdcache)?;
@ -368,20 +438,31 @@ fn query_pair_proximity_docids(
match (&left.kind, &right.kind) {
(QueryKind::Exact { word: left, .. }, QueryKind::Exact { word: right, .. }) => {
if prefix && ctx.in_prefix_cache(&right) {
Ok(ctx.word_prefix_pair_proximity_docids(left.as_str(), right.as_str(), proximity)?.unwrap_or_default())
Ok(ctx
.word_prefix_pair_proximity_docids(left.as_str(), right.as_str(), proximity)?
.unwrap_or_default())
} else if prefix {
let r_words = word_derivations(&right, true, 0, ctx.words_fst(), wdcache)?;
all_word_pair_proximity_docids(ctx, &[(left, 0)], &r_words, proximity)
} else {
Ok(ctx.word_pair_proximity_docids(left.as_str(), right.as_str(), proximity)?.unwrap_or_default())
Ok(ctx
.word_pair_proximity_docids(left.as_str(), right.as_str(), proximity)?
.unwrap_or_default())
}
},
}
(QueryKind::Tolerant { typo, word: left }, QueryKind::Exact { word: right, .. }) => {
let l_words = word_derivations(&left, false, *typo, ctx.words_fst(), wdcache)?.to_owned();
let l_words =
word_derivations(&left, false, *typo, ctx.words_fst(), wdcache)?.to_owned();
if prefix && ctx.in_prefix_cache(&right) {
let mut docids = RoaringBitmap::new();
for (left, _) in l_words {
let current_docids = ctx.word_prefix_pair_proximity_docids(left.as_ref(), right.as_ref(), proximity)?.unwrap_or_default();
let current_docids = ctx
.word_prefix_pair_proximity_docids(
left.as_ref(),
right.as_ref(),
proximity,
)?
.unwrap_or_default();
docids.union_with(&current_docids);
}
Ok(docids)
@ -391,28 +472,36 @@ fn query_pair_proximity_docids(
} else {
all_word_pair_proximity_docids(ctx, &l_words, &[(right, 0)], proximity)
}
},
}
(QueryKind::Exact { word: left, .. }, QueryKind::Tolerant { typo, word: right }) => {
let r_words = word_derivations(&right, prefix, *typo, ctx.words_fst(), wdcache)?;
all_word_pair_proximity_docids(ctx, &[(left, 0)], &r_words, proximity)
},
(QueryKind::Tolerant { typo: l_typo, word: left }, QueryKind::Tolerant { typo: r_typo, word: right }) => {
let l_words = word_derivations(&left, false, *l_typo, ctx.words_fst(), wdcache)?.to_owned();
}
(
QueryKind::Tolerant { typo: l_typo, word: left },
QueryKind::Tolerant { typo: r_typo, word: right },
) => {
let l_words =
word_derivations(&left, false, *l_typo, ctx.words_fst(), wdcache)?.to_owned();
let r_words = word_derivations(&right, prefix, *r_typo, ctx.words_fst(), wdcache)?;
all_word_pair_proximity_docids(ctx, &l_words, &r_words, proximity)
},
}
}
}
#[cfg(test)]
pub mod test {
use maplit::hashmap;
use rand::{Rng, SeedableRng, rngs::StdRng};
use super::*;
use std::collections::HashMap;
fn s(s: &str) -> String { s.to_string() }
use maplit::hashmap;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use super::*;
fn s(s: &str) -> String {
s.to_string()
}
pub struct TestContext<'t> {
words_fst: fst::Set<Cow<'t, [u8]>>,
word_docids: HashMap<String, RoaringBitmap>,
@ -435,12 +524,22 @@ pub mod test {
Ok(self.word_prefix_docids.get(&word.to_string()).cloned())
}
fn word_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result<Option<RoaringBitmap>> {
fn word_pair_proximity_docids(
&self,
left: &str,
right: &str,
proximity: u8,
) -> heed::Result<Option<RoaringBitmap>> {
let key = (left.to_string(), right.to_string(), proximity.into());
Ok(self.word_pair_proximity_docids.get(&key).cloned())
}
fn word_prefix_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result<Option<RoaringBitmap>> {
fn word_prefix_pair_proximity_docids(
&self,
left: &str,
right: &str,
proximity: u8,
) -> heed::Result<Option<RoaringBitmap>> {
let key = (left.to_string(), right.to_string(), proximity.into());
Ok(self.word_prefix_pair_proximity_docids.get(&key).cloned())
}
@ -453,24 +552,44 @@ pub mod test {
self.word_prefix_docids.contains_key(&word.to_string())
}
fn docid_words_positions(&self, docid: DocumentId) -> heed::Result<HashMap<String, RoaringBitmap>> {
fn docid_words_positions(
&self,
docid: DocumentId,
) -> heed::Result<HashMap<String, RoaringBitmap>> {
if let Some(docid_words) = self.docid_words.get(&docid) {
Ok(docid_words
.iter()
.enumerate()
.map(|(i,w)| (w.clone(), RoaringBitmap::from_sorted_iter(std::iter::once(i as u32))))
.collect()
)
.map(|(i, w)| {
(w.clone(), RoaringBitmap::from_sorted_iter(std::iter::once(i as u32)))
})
.collect())
} else {
Ok(HashMap::new())
}
}
fn word_position_iterator(&self, _word: &str, _level: TreeLevel, _in_prefix_cache: bool, _left: Option<u32>, _right: Option<u32>) -> heed::Result<Box<dyn Iterator<Item =heed::Result<((&'c str, TreeLevel, u32, u32), RoaringBitmap)>> + 'c>> {
fn word_position_iterator(
&self,
_word: &str,
_level: TreeLevel,
_in_prefix_cache: bool,
_left: Option<u32>,
_right: Option<u32>,
) -> heed::Result<
Box<
dyn Iterator<Item = heed::Result<((&'c str, TreeLevel, u32, u32), RoaringBitmap)>>
+ 'c,
>,
> {
todo!()
}
fn word_position_last_level(&self, _word: &str, _in_prefix_cache: bool) -> heed::Result<Option<TreeLevel>> {
fn word_position_last_level(
&self,
_word: &str,
_in_prefix_cache: bool,
) -> heed::Result<Option<TreeLevel>> {
todo!()
}
@ -478,15 +597,25 @@ pub mod test {
todo!()
}
fn searchable_fields_ids(&self) -> Result<Vec<FieldId>> {
fn searchable_fields_ids(&self) -> Result<Vec<FieldId>> {
todo!()
}
fn word_level_position_docids(&self, _word: &str, _level: TreeLevel, _left: u32, _right: u32) -> heed::Result<Option<RoaringBitmap>> {
fn word_level_position_docids(
&self,
_word: &str,
_level: TreeLevel,
_left: u32,
_right: u32,
) -> heed::Result<Option<RoaringBitmap>> {
todo!()
}
fn field_id_word_count_docids(&self, _field_id: FieldId, _word_count: u8) -> heed::Result<Option<RoaringBitmap>> {
fn field_id_word_count_docids(
&self,
_field_id: FieldId,
_word_count: u8,
) -> heed::Result<Option<RoaringBitmap>> {
todo!()
}
}
@ -506,7 +635,7 @@ pub mod test {
RoaringBitmap::from_sorted_iter(values.into_iter())
}
let word_docids = hashmap!{
let word_docids = hashmap! {
s("hello") => random_postings(rng, 1500),
s("hi") => random_postings(rng, 4000),
s("word") => random_postings(rng, 2500),
@ -530,7 +659,7 @@ pub mod test {
}
}
let word_prefix_docids = hashmap!{
let word_prefix_docids = hashmap! {
s("h") => &word_docids[&s("hello")] | &word_docids[&s("hi")],
s("wor") => &word_docids[&s("word")] | &word_docids[&s("world")],
s("20") => &word_docids[&s("2020")] | &word_docids[&s("2021")],
@ -540,7 +669,9 @@ pub mod test {
let mut word_prefix_pair_proximity_docids = HashMap::new();
for (lword, lcandidates) in &word_docids {
for (rword, rcandidates) in &word_docids {
if lword == rword { continue }
if lword == rword {
continue;
}
let candidates = lcandidates & rcandidates;
for candidate in candidates {
if let Some(docid_words) = docid_words.get(&candidate) {
@ -551,24 +682,31 @@ pub mod test {
} else {
(s(lword), s(rword), (lposition - rposition + 1) as i32)
};
let docids = word_pair_proximity_docids.entry(key).or_insert(RoaringBitmap::new());
let docids = word_pair_proximity_docids
.entry(key)
.or_insert(RoaringBitmap::new());
docids.push(candidate);
}
}
}
for (pword, pcandidates) in &word_prefix_docids {
if lword.starts_with(pword) { continue }
if lword.starts_with(pword) {
continue;
}
let candidates = lcandidates & pcandidates;
for candidate in candidates {
if let Some(docid_words) = docid_words.get(&candidate) {
let lposition = docid_words.iter().position(|w| w == lword).unwrap();
let rposition = docid_words.iter().position(|w| w.starts_with(pword)).unwrap();
let rposition =
docid_words.iter().position(|w| w.starts_with(pword)).unwrap();
let key = if lposition < rposition {
(s(lword), s(pword), (rposition - lposition) as i32)
} else {
(s(lword), s(pword), (lposition - rposition + 1) as i32)
};
let docids = word_prefix_pair_proximity_docids.entry(key).or_insert(RoaringBitmap::new());
let docids = word_prefix_pair_proximity_docids
.entry(key)
.or_insert(RoaringBitmap::new());
docids.push(candidate);
}
}

View file

@ -2,22 +2,16 @@ use std::collections::btree_map::{self, BTreeMap};
use std::collections::hash_map::HashMap;
use std::mem::take;
use roaring::RoaringBitmap;
use log::debug;
use roaring::RoaringBitmap;
use crate::search::query_tree::{maximum_proximity, Operation, Query};
use crate::search::{build_dfa, WordDerivationsCache};
use crate::search::{query_tree::QueryKind};
use crate::{DocumentId, Position, Result};
use super::{
Context,
Criterion,
CriterionParameters,
CriterionResult,
query_docids,
query_pair_proximity_docids,
resolve_query_tree,
query_docids, query_pair_proximity_docids, resolve_query_tree, Context, Criterion,
CriterionParameters, CriterionResult,
};
use crate::search::query_tree::{maximum_proximity, Operation, Query, QueryKind};
use crate::search::{build_dfa, WordDerivationsCache};
use crate::{DocumentId, Position, Result};
type Cache = HashMap<(Operation, u8), Vec<(Query, Query, RoaringBitmap)>>;
@ -63,28 +57,33 @@ impl<'t> Criterion for Proximity<'t> {
}
loop {
debug!("Proximity at iteration {} (max prox {:?}) ({:?})",
debug!(
"Proximity at iteration {} (max prox {:?}) ({:?})",
self.proximity,
self.state.as_ref().map(|(mp, _, _)| mp),
self.state.as_ref().map(|(_, _, cd)| cd),
);
match &mut self.state {
Some((max_prox, _, allowed_candidates)) if allowed_candidates.is_empty() || self.proximity > *max_prox => {
Some((max_prox, _, allowed_candidates))
if allowed_candidates.is_empty() || self.proximity > *max_prox =>
{
self.state = None; // reset state
},
}
Some((_, query_tree, allowed_candidates)) => {
let mut new_candidates = if allowed_candidates.len() <= CANDIDATES_THRESHOLD && self.proximity > PROXIMITY_THRESHOLD {
let mut new_candidates = if allowed_candidates.len() <= CANDIDATES_THRESHOLD
&& self.proximity > PROXIMITY_THRESHOLD
{
if let Some(cache) = self.plane_sweep_cache.as_mut() {
match cache.next() {
Some((p, candidates)) => {
self.proximity = p;
candidates
},
}
None => {
self.state = None; // reset state
continue
},
continue;
}
}
} else {
let cache = resolve_plane_sweep_candidates(
@ -95,9 +94,10 @@ impl<'t> Criterion for Proximity<'t> {
)?;
self.plane_sweep_cache = Some(cache.into_iter());
continue
continue;
}
} else { // use set theory based algorithm
} else {
// use set theory based algorithm
resolve_candidates(
self.ctx,
&query_tree,
@ -117,39 +117,50 @@ impl<'t> Criterion for Proximity<'t> {
filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)),
}));
},
None => {
match self.parent.next(params)? {
Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => {
let mut candidates = match candidates {
Some(candidates) => candidates,
None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)? - params.excluded_candidates,
};
if let Some(filtered_candidates) = filtered_candidates {
candidates &= filtered_candidates;
}
None => match self.parent.next(params)? {
Some(CriterionResult {
query_tree: Some(query_tree),
candidates,
filtered_candidates,
bucket_candidates,
}) => {
let mut candidates = match candidates {
Some(candidates) => candidates,
None => {
resolve_query_tree(self.ctx, &query_tree, params.wdcache)?
- params.excluded_candidates
}
};
match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates,
}
if let Some(filtered_candidates) = filtered_candidates {
candidates &= filtered_candidates;
}
let maximum_proximity = maximum_proximity(&query_tree);
self.state = Some((maximum_proximity as u8, query_tree, candidates));
self.proximity = 0;
self.plane_sweep_cache = None;
},
Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
}));
},
None => return Ok(None),
match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates,
}
let maximum_proximity = maximum_proximity(&query_tree);
self.state = Some((maximum_proximity as u8, query_tree, candidates));
self.proximity = 0;
self.plane_sweep_cache = None;
}
Some(CriterionResult {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
}) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
}));
}
None => return Ok(None),
},
}
}
@ -162,46 +173,48 @@ fn resolve_candidates<'t>(
proximity: u8,
cache: &mut Cache,
wdcache: &mut WordDerivationsCache,
) -> Result<RoaringBitmap>
{
) -> Result<RoaringBitmap> {
fn resolve_operation<'t>(
ctx: &'t dyn Context,
query_tree: &Operation,
proximity: u8,
cache: &mut Cache,
wdcache: &mut WordDerivationsCache,
) -> Result<Vec<(Query, Query, RoaringBitmap)>>
{
use Operation::{And, Phrase, Or};
) -> Result<Vec<(Query, Query, RoaringBitmap)>> {
use Operation::{And, Or, Phrase};
let result = match query_tree {
And(ops) => mdfs(ctx, ops, proximity, cache, wdcache)?,
Phrase(words) => if proximity == 0 {
let most_left = words.first().map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
let most_right = words.last().map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
let mut candidates = None;
for slice in words.windows(2) {
let (left, right) = (&slice[0], &slice[1]);
match ctx.word_pair_proximity_docids(left, right, 1)? {
Some(pair_docids) => {
match candidates.as_mut() {
Phrase(words) => {
if proximity == 0 {
let most_left = words
.first()
.map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
let most_right = words
.last()
.map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
let mut candidates = None;
for slice in words.windows(2) {
let (left, right) = (&slice[0], &slice[1]);
match ctx.word_pair_proximity_docids(left, right, 1)? {
Some(pair_docids) => match candidates.as_mut() {
Some(candidates) => *candidates &= pair_docids,
None => candidates = Some(pair_docids),
},
None => {
candidates = None;
break;
}
},
None => {
candidates = None;
break;
}
}
match (most_left, most_right, candidates) {
(Some(l), Some(r), Some(c)) => vec![(l, r, c)],
_otherwise => Default::default(),
}
} else {
Default::default()
}
match (most_left, most_right, candidates) {
(Some(l), Some(r), Some(c)) => vec![(l, r, c)],
_otherwise => Default::default(),
}
} else {
Default::default()
},
}
Or(_, ops) => {
let mut output = Vec::new();
for op in ops {
@ -209,13 +222,15 @@ fn resolve_candidates<'t>(
output.extend(result);
}
output
},
Operation::Query(q) => if proximity == 0 {
let candidates = query_docids(ctx, q, wdcache)?;
vec![(q.clone(), q.clone(), candidates)]
} else {
Default::default()
},
}
Operation::Query(q) => {
if proximity == 0 {
let candidates = query_docids(ctx, q, wdcache)?;
vec![(q.clone(), q.clone(), candidates)]
} else {
Default::default()
}
}
};
Ok(result)
@ -228,8 +243,7 @@ fn resolve_candidates<'t>(
proximity: u8,
cache: &mut Cache,
wdcache: &mut WordDerivationsCache,
) -> Result<Vec<(Query, Query, RoaringBitmap)>>
{
) -> Result<Vec<(Query, Query, RoaringBitmap)>> {
fn pair_combinations(mana: u8, left_max: u8) -> impl Iterator<Item = (u8, u8)> {
(0..=mana.min(left_max)).map(move |m| (m, mana - m))
}
@ -257,7 +271,8 @@ fn resolve_candidates<'t>(
for (ll, lr, lcandidates) in lefts {
for (rl, rr, rcandidates) in rights {
let mut candidates = query_pair_proximity_docids(ctx, lr, rl, pair_p + 1, wdcache)?;
let mut candidates =
query_pair_proximity_docids(ctx, lr, rl, pair_p + 1, wdcache)?;
if lcandidates.len() < rcandidates.len() {
candidates.intersect_with(lcandidates);
candidates.intersect_with(rcandidates);
@ -282,22 +297,26 @@ fn resolve_candidates<'t>(
proximity: u8,
cache: &mut Cache,
wdcache: &mut WordDerivationsCache,
) -> Result<Vec<(Query, Query, RoaringBitmap)>>
{
) -> Result<Vec<(Query, Query, RoaringBitmap)>> {
// Extract the first two elements but gives the tail
// that is just after the first element.
let next = branches.split_first().map(|(h1, t)| {
(h1, t.split_first().map(|(h2, _)| (h2, t)))
});
let next =
branches.split_first().map(|(h1, t)| (h1, t.split_first().map(|(h2, _)| (h2, t))));
match next {
Some((head1, Some((head2, [_])))) => mdfs_pair(ctx, head1, head2, proximity, cache, wdcache),
Some((head1, Some((head2, [_])))) => {
mdfs_pair(ctx, head1, head2, proximity, cache, wdcache)
}
Some((head1, Some((head2, tail)))) => {
let mut output = Vec::new();
for p in 0..=proximity {
for (lhead, _, head_candidates) in mdfs_pair(ctx, head1, head2, p, cache, wdcache)? {
for (lhead, _, head_candidates) in
mdfs_pair(ctx, head1, head2, p, cache, wdcache)?
{
if !head_candidates.is_empty() {
for (_, rtail, mut candidates) in mdfs(ctx, tail, proximity - p, cache, wdcache)? {
for (_, rtail, mut candidates) in
mdfs(ctx, tail, proximity - p, cache, wdcache)?
{
candidates.intersect_with(&head_candidates);
if !candidates.is_empty() {
output.push((lhead.clone(), rtail, candidates));
@ -307,7 +326,7 @@ fn resolve_candidates<'t>(
}
}
Ok(output)
},
}
Some((head1, None)) => resolve_operation(ctx, head1, proximity, cache, wdcache),
None => Ok(Default::default()),
}
@ -325,47 +344,48 @@ fn resolve_plane_sweep_candidates(
query_tree: &Operation,
allowed_candidates: &RoaringBitmap,
wdcache: &mut WordDerivationsCache,
) -> Result<BTreeMap<u8, RoaringBitmap>>
{
) -> Result<BTreeMap<u8, RoaringBitmap>> {
/// FIXME may be buggy with query like "new new york"
fn plane_sweep(
groups_positions: Vec<Vec<(Position, u8, Position)>>,
consecutive: bool,
) -> Result<Vec<(Position, u8, Position)>>
{
) -> Result<Vec<(Position, u8, Position)>> {
fn compute_groups_proximity(
groups: &[(usize, (Position, u8, Position))],
consecutive: bool,
) -> Option<(Position, u8, Position)>
{
) -> Option<(Position, u8, Position)> {
// take the inner proximity of the first group as initial
let (_, (_, mut proximity, _)) = groups.first()?;
let (_, (left_most_pos, _, _)) = groups.first()?;
let (_, (_, _, right_most_pos)) = groups.iter().max_by_key(|(_, (_, _, right_most_pos))| right_most_pos)?;
let (_, (_, _, right_most_pos)) =
groups.iter().max_by_key(|(_, (_, _, right_most_pos))| right_most_pos)?;
for pair in groups.windows(2) {
if let [(i1, (lpos1, _, rpos1)), (i2, (lpos2, prox2, rpos2))] = pair {
// if two positions are equal, meaning that they share at least a word, we return None
if rpos1 == rpos2 || lpos1 == lpos2 || rpos1 == lpos2 || lpos1 == rpos2 {
return None
return None;
}
let pair_proximity = {
// if intervals are disjoint [..].(..)
if lpos2 > rpos1 { lpos2 - rpos1 }
if lpos2 > rpos1 {
lpos2 - rpos1
}
// if the second interval is a subset of the first [.(..).]
else if rpos2 < rpos1 { (lpos2 - lpos1).min(rpos1 - rpos2) }
else if rpos2 < rpos1 {
(lpos2 - lpos1).min(rpos1 - rpos2)
}
// if intervals overlaps [.(..].)
else { (lpos2 - lpos1).min(rpos2 - rpos1) }
else {
(lpos2 - lpos1).min(rpos2 - rpos1)
}
};
// if groups are in the good order (query order) we remove 1 to the proximity
// the proximity is clamped to 7
let pair_proximity = if i1 < i2 {
(pair_proximity - 1).min(7)
} else {
pair_proximity.min(7)
};
let pair_proximity =
if i1 < i2 { (pair_proximity - 1).min(7) } else { pair_proximity.min(7) };
proximity += pair_proximity as u8 + prox2;
}
@ -381,7 +401,8 @@ fn resolve_plane_sweep_candidates(
let groups_len = groups_positions.len();
let mut groups_positions: Vec<_> = groups_positions.into_iter().map(|pos| pos.into_iter()).collect();
let mut groups_positions: Vec<_> =
groups_positions.into_iter().map(|pos| pos.into_iter()).collect();
// Pop top elements of each list.
let mut current = Vec::with_capacity(groups_len);
@ -452,9 +473,8 @@ fn resolve_plane_sweep_candidates(
rocache: &mut HashMap<&'a Operation, Vec<(Position, u8, Position)>>,
words_positions: &HashMap<String, RoaringBitmap>,
wdcache: &mut WordDerivationsCache,
) -> Result<Vec<(Position, u8, Position)>>
{
use Operation::{And, Phrase, Or};
) -> Result<Vec<(Position, u8, Position)>> {
use Operation::{And, Or, Phrase};
if let Some(result) = rocache.get(query_tree) {
return Ok(result.clone());
@ -462,13 +482,20 @@ fn resolve_plane_sweep_candidates(
let result = match query_tree {
And(ops) => {
let mut groups_positions = Vec::with_capacity(ops.len());
let mut groups_positions = Vec::with_capacity(ops.len());
for operation in ops {
let positions = resolve_operation(ctx, operation, docid, rocache, words_positions, wdcache)?;
let positions = resolve_operation(
ctx,
operation,
docid,
rocache,
words_positions,
wdcache,
)?;
groups_positions.push(positions);
}
plane_sweep(groups_positions, false)?
},
}
Phrase(words) => {
let mut groups_positions = Vec::with_capacity(words.len());
for word in words {
@ -479,16 +506,23 @@ fn resolve_plane_sweep_candidates(
groups_positions.push(positions);
}
plane_sweep(groups_positions, true)?
},
}
Or(_, ops) => {
let mut result = Vec::new();
for op in ops {
result.extend(resolve_operation(ctx, op, docid, rocache, words_positions, wdcache)?)
result.extend(resolve_operation(
ctx,
op,
docid,
rocache,
words_positions,
wdcache,
)?)
}
result.sort_unstable();
result
},
}
Operation::Query(Query { prefix, kind }) => {
let mut result = Vec::new();
match kind {
@ -498,9 +532,9 @@ fn resolve_plane_sweep_candidates(
.flat_map(|positions| positions.iter().map(|p| (p, 0, p)));
result.extend(iter);
} else if let Some(positions) = words_positions.get(word) {
result.extend(positions.iter().map(|p| (p, 0, p)));
result.extend(positions.iter().map(|p| (p, 0, p)));
}
},
}
QueryKind::Tolerant { typo, word } => {
let iter = word_derivations(word, *prefix, *typo, &words_positions)
.flat_map(|positions| positions.iter().map(|p| (p, 0, p)));
@ -522,8 +556,7 @@ fn resolve_plane_sweep_candidates(
is_prefix: bool,
max_typo: u8,
words_positions: &'a HashMap<String, RoaringBitmap>,
) -> impl Iterator<Item = &'a RoaringBitmap>
{
) -> impl Iterator<Item = &'a RoaringBitmap> {
let dfa = build_dfa(word, max_typo, is_prefix);
words_positions.iter().filter_map(move |(document_word, positions)| {
use levenshtein_automata::Distance;
@ -539,7 +572,7 @@ fn resolve_plane_sweep_candidates(
for docid in allowed_candidates {
let words_positions = ctx.docid_words_positions(docid)?;
resolve_operation_cache.clear();
let positions = resolve_operation(
let positions = resolve_operation(
ctx,
query_tree,
docid,

View file

@ -1,20 +1,17 @@
use std::{borrow::Cow, collections::HashMap, mem::take};
use std::borrow::Cow;
use std::collections::HashMap;
use std::mem::take;
use log::debug;
use roaring::RoaringBitmap;
use super::{
query_docids, resolve_query_tree, Candidates, Context, Criterion, CriterionParameters,
CriterionResult,
};
use crate::search::query_tree::{maximum_typo, Operation, Query, QueryKind};
use crate::search::{word_derivations, WordDerivationsCache};
use crate::Result;
use super::{
Candidates,
Context,
Criterion,
CriterionParameters,
CriterionResult,
query_docids,
resolve_query_tree,
};
/// Maximum number of typo for a word of any length.
const MAX_TYPOS_PER_WORD: u8 = 2;
@ -54,7 +51,8 @@ impl<'t> Criterion for Typo<'t> {
}
loop {
debug!("Typo at iteration {} (max typos {:?}) ({:?})",
debug!(
"Typo at iteration {} (max typos {:?}) ({:?})",
self.typos,
self.state.as_ref().map(|(mt, _, _)| mt),
self.state.as_ref().map(|(_, _, cd)| cd),
@ -63,29 +61,42 @@ impl<'t> Criterion for Typo<'t> {
match self.state.as_mut() {
Some((max_typos, _, _)) if self.typos > *max_typos => {
self.state = None; // reset state
},
}
Some((_, _, Allowed(allowed_candidates))) if allowed_candidates.is_empty() => {
self.state = None; // reset state
},
}
Some((_, query_tree, candidates_authorization)) => {
let fst = self.ctx.words_fst();
let new_query_tree = match self.typos {
typos if typos < MAX_TYPOS_PER_WORD => {
alterate_query_tree(&fst, query_tree.clone(), self.typos, params.wdcache)?
},
typos if typos < MAX_TYPOS_PER_WORD => alterate_query_tree(
&fst,
query_tree.clone(),
self.typos,
params.wdcache,
)?,
MAX_TYPOS_PER_WORD => {
// When typos >= MAX_TYPOS_PER_WORD, no more alteration of the query tree is possible,
// we keep the altered query tree
*query_tree = alterate_query_tree(&fst, query_tree.clone(), self.typos, params.wdcache)?;
*query_tree = alterate_query_tree(
&fst,
query_tree.clone(),
self.typos,
params.wdcache,
)?;
// we compute the allowed candidates
let query_tree_allowed_candidates = resolve_query_tree(self.ctx, query_tree, params.wdcache)?;
let query_tree_allowed_candidates =
resolve_query_tree(self.ctx, query_tree, params.wdcache)?;
// we assign the allowed candidates to the candidates authorization.
*candidates_authorization = match take(candidates_authorization) {
Allowed(allowed_candidates) => Allowed(query_tree_allowed_candidates & allowed_candidates),
Forbidden(forbidden_candidates) => Allowed(query_tree_allowed_candidates - forbidden_candidates),
Allowed(allowed_candidates) => {
Allowed(query_tree_allowed_candidates & allowed_candidates)
}
Forbidden(forbidden_candidates) => {
Allowed(query_tree_allowed_candidates - forbidden_candidates)
}
};
query_tree.clone()
},
}
_otherwise => query_tree.clone(),
};
@ -101,11 +112,11 @@ impl<'t> Criterion for Typo<'t> {
Allowed(allowed_candidates) => {
candidates &= &*allowed_candidates;
*allowed_candidates -= &candidates;
},
}
Forbidden(forbidden_candidates) => {
candidates -= &*forbidden_candidates;
*forbidden_candidates |= &candidates;
},
}
}
let bucket_candidates = match self.bucket_candidates.as_mut() {
@ -121,35 +132,45 @@ impl<'t> Criterion for Typo<'t> {
filtered_candidates: None,
bucket_candidates: Some(bucket_candidates),
}));
},
None => {
match self.parent.next(params)? {
Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => {
self.bucket_candidates = match (self.bucket_candidates.take(), bucket_candidates) {
}
None => match self.parent.next(params)? {
Some(CriterionResult {
query_tree: Some(query_tree),
candidates,
filtered_candidates,
bucket_candidates,
}) => {
self.bucket_candidates =
match (self.bucket_candidates.take(), bucket_candidates) {
(Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc),
(self_bc, parent_bc) => self_bc.or(parent_bc),
};
let candidates = match candidates.or(filtered_candidates) {
Some(candidates) => Candidates::Allowed(candidates - params.excluded_candidates),
None => Candidates::Forbidden(params.excluded_candidates.clone()),
};
let candidates = match candidates.or(filtered_candidates) {
Some(candidates) => {
Candidates::Allowed(candidates - params.excluded_candidates)
}
None => Candidates::Forbidden(params.excluded_candidates.clone()),
};
let maximum_typos = maximum_typo(&query_tree) as u8;
self.state = Some((maximum_typos, query_tree, candidates));
self.typos = 0;
},
Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
}));
},
None => return Ok(None),
let maximum_typos = maximum_typo(&query_tree) as u8;
self.state = Some((maximum_typos, query_tree, candidates));
self.typos = 0;
}
Some(CriterionResult {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
}) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
}));
}
None => return Ok(None),
},
}
}
@ -164,21 +185,19 @@ fn alterate_query_tree(
mut query_tree: Operation,
number_typos: u8,
wdcache: &mut WordDerivationsCache,
) -> Result<Operation>
{
) -> Result<Operation> {
fn recurse(
words_fst: &fst::Set<Cow<[u8]>>,
operation: &mut Operation,
number_typos: u8,
wdcache: &mut WordDerivationsCache,
) -> Result<()>
{
use Operation::{And, Phrase, Or};
) -> Result<()> {
use Operation::{And, Or, Phrase};
match operation {
And(ops) | Or(_, ops) => {
ops.iter_mut().try_for_each(|op| recurse(words_fst, op, number_typos, wdcache))
},
}
// Because Phrases don't allow typos, no alteration can be done.
Phrase(_words) => return Ok(()),
Operation::Query(q) => {
@ -193,19 +212,25 @@ fn alterate_query_tree(
} else {
let typo = *typo.min(&number_typos);
let words = word_derivations(word, q.prefix, typo, words_fst, wdcache)?;
let queries = words.iter().map(|(word, typo)| {
Operation::Query(Query {
prefix: false,
kind: QueryKind::Exact { original_typo: *typo, word: word.to_string() },
let queries = words
.iter()
.map(|(word, typo)| {
Operation::Query(Query {
prefix: false,
kind: QueryKind::Exact {
original_typo: *typo,
word: word.to_string(),
},
})
})
}).collect();
.collect();
*operation = Operation::or(false, queries);
}
}
Ok(())
},
}
}
}
@ -219,22 +244,18 @@ fn resolve_candidates<'t>(
number_typos: u8,
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
wdcache: &mut WordDerivationsCache,
) -> Result<RoaringBitmap>
{
) -> Result<RoaringBitmap> {
fn resolve_operation<'t>(
ctx: &'t dyn Context,
query_tree: &Operation,
number_typos: u8,
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
wdcache: &mut WordDerivationsCache,
) -> Result<RoaringBitmap>
{
use Operation::{And, Phrase, Or, Query};
) -> Result<RoaringBitmap> {
use Operation::{And, Or, Phrase, Query};
match query_tree {
And(ops) => {
mdfs(ctx, ops, number_typos, cache, wdcache)
},
And(ops) => mdfs(ctx, ops, number_typos, cache, wdcache),
Phrase(words) => {
let mut candidates = RoaringBitmap::new();
let mut first_loop = true;
@ -250,12 +271,12 @@ fn resolve_candidates<'t>(
} else {
candidates &= pair_docids;
}
},
None => return Ok(RoaringBitmap::new())
}
None => return Ok(RoaringBitmap::new()),
}
}
Ok(candidates)
},
}
Or(_, ops) => {
let mut candidates = RoaringBitmap::new();
for op in ops {
@ -263,12 +284,14 @@ fn resolve_candidates<'t>(
candidates.union_with(&docids);
}
Ok(candidates)
},
Query(q) => if q.kind.typo() == number_typos {
Ok(query_docids(ctx, q, wdcache)?)
} else {
Ok(RoaringBitmap::new())
},
}
Query(q) => {
if q.kind.typo() == number_typos {
Ok(query_docids(ctx, q, wdcache)?)
} else {
Ok(RoaringBitmap::new())
}
}
}
}
@ -278,8 +301,7 @@ fn resolve_candidates<'t>(
mana: u8,
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
wdcache: &mut WordDerivationsCache,
) -> Result<RoaringBitmap>
{
) -> Result<RoaringBitmap> {
match branches.split_first() {
Some((head, [])) => {
let cache_key = (head.clone(), mana);
@ -290,7 +312,7 @@ fn resolve_candidates<'t>(
cache.insert(cache_key, candidates.clone());
Ok(candidates)
}
},
}
Some((head, tail)) => {
let mut candidates = RoaringBitmap::new();
@ -313,7 +335,7 @@ fn resolve_candidates<'t>(
}
Ok(candidates)
},
}
None => Ok(RoaringBitmap::new()),
}
}
@ -323,9 +345,9 @@ fn resolve_candidates<'t>(
#[cfg(test)]
mod test {
use super::*;
use super::super::initial::Initial;
use super::super::test::TestContext;
use super::*;
#[test]
fn initial_placeholder_no_facets() {
@ -348,13 +370,23 @@ mod test {
#[test]
fn initial_query_tree_no_facets() {
let context = TestContext::default();
let query_tree = Operation::Or(false, vec![
Operation::And(vec![
Operation::Query(Query { prefix: false, kind: QueryKind::exact("split".to_string()) }),
Operation::Query(Query { prefix: false, kind: QueryKind::exact("this".to_string()) }),
Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(1, "world".to_string()) }),
])
]);
let query_tree = Operation::Or(
false,
vec![Operation::And(vec![
Operation::Query(Query {
prefix: false,
kind: QueryKind::exact("split".to_string()),
}),
Operation::Query(Query {
prefix: false,
kind: QueryKind::exact("this".to_string()),
}),
Operation::Query(Query {
prefix: false,
kind: QueryKind::tolerant(1, "world".to_string()),
}),
])],
);
let facet_candidates = None;
@ -369,13 +401,23 @@ mod test {
& context.word_docids("this").unwrap().unwrap()
& context.word_docids("world").unwrap().unwrap();
let expected_1 = CriterionResult {
query_tree: Some(Operation::Or(false, vec![
Operation::And(vec![
Operation::Query(Query { prefix: false, kind: QueryKind::exact("split".to_string()) }),
Operation::Query(Query { prefix: false, kind: QueryKind::exact("this".to_string()) }),
Operation::Query(Query { prefix: false, kind: QueryKind::exact("world".to_string()) }),
]),
])),
query_tree: Some(Operation::Or(
false,
vec![Operation::And(vec![
Operation::Query(Query {
prefix: false,
kind: QueryKind::exact("split".to_string()),
}),
Operation::Query(Query {
prefix: false,
kind: QueryKind::exact("this".to_string()),
}),
Operation::Query(Query {
prefix: false,
kind: QueryKind::exact("world".to_string()),
}),
])],
)),
candidates: Some(candidates_1.clone()),
bucket_candidates: Some(candidates_1),
filtered_candidates: None,
@ -383,22 +425,37 @@ mod test {
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1));
let candidates_2 = (
context.word_docids("split").unwrap().unwrap()
& context.word_docids("this").unwrap().unwrap()
& context.word_docids("word").unwrap().unwrap()
) - context.word_docids("world").unwrap().unwrap();
let candidates_2 = (context.word_docids("split").unwrap().unwrap()
& context.word_docids("this").unwrap().unwrap()
& context.word_docids("word").unwrap().unwrap())
- context.word_docids("world").unwrap().unwrap();
let expected_2 = CriterionResult {
query_tree: Some(Operation::Or(false, vec![
Operation::And(vec![
Operation::Query(Query { prefix: false, kind: QueryKind::exact("split".to_string()) }),
Operation::Query(Query { prefix: false, kind: QueryKind::exact("this".to_string()) }),
Operation::Or(false, vec![
Operation::Query(Query { prefix: false, kind: QueryKind::exact_with_typo(1, "word".to_string()) }),
Operation::Query(Query { prefix: false, kind: QueryKind::exact("world".to_string()) }),
]),
]),
])),
query_tree: Some(Operation::Or(
false,
vec![Operation::And(vec![
Operation::Query(Query {
prefix: false,
kind: QueryKind::exact("split".to_string()),
}),
Operation::Query(Query {
prefix: false,
kind: QueryKind::exact("this".to_string()),
}),
Operation::Or(
false,
vec![
Operation::Query(Query {
prefix: false,
kind: QueryKind::exact_with_typo(1, "word".to_string()),
}),
Operation::Query(Query {
prefix: false,
kind: QueryKind::exact("world".to_string()),
}),
],
),
])],
)),
candidates: Some(candidates_2.clone()),
bucket_candidates: Some(candidates_2),
filtered_candidates: None,
@ -437,17 +494,26 @@ mod test {
#[test]
fn initial_query_tree_with_facets() {
let context = TestContext::default();
let query_tree = Operation::Or(false, vec![
Operation::And(vec![
Operation::Query(Query { prefix: false, kind: QueryKind::exact("split".to_string()) }),
Operation::Query(Query { prefix: false, kind: QueryKind::exact("this".to_string()) }),
Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(1, "world".to_string()) }),
])
]);
let query_tree = Operation::Or(
false,
vec![Operation::And(vec![
Operation::Query(Query {
prefix: false,
kind: QueryKind::exact("split".to_string()),
}),
Operation::Query(Query {
prefix: false,
kind: QueryKind::exact("this".to_string()),
}),
Operation::Query(Query {
prefix: false,
kind: QueryKind::tolerant(1, "world".to_string()),
}),
])],
);
let facet_candidates = context.word_docids("earth").unwrap().unwrap();
let mut criterion_parameters = CriterionParameters {
wdcache: &mut WordDerivationsCache::new(),
excluded_candidates: &RoaringBitmap::new(),
@ -459,13 +525,23 @@ mod test {
& context.word_docids("this").unwrap().unwrap()
& context.word_docids("world").unwrap().unwrap();
let expected_1 = CriterionResult {
query_tree: Some(Operation::Or(false, vec![
Operation::And(vec![
Operation::Query(Query { prefix: false, kind: QueryKind::exact("split".to_string()) }),
Operation::Query(Query { prefix: false, kind: QueryKind::exact("this".to_string()) }),
Operation::Query(Query { prefix: false, kind: QueryKind::exact("world".to_string()) }),
]),
])),
query_tree: Some(Operation::Or(
false,
vec![Operation::And(vec![
Operation::Query(Query {
prefix: false,
kind: QueryKind::exact("split".to_string()),
}),
Operation::Query(Query {
prefix: false,
kind: QueryKind::exact("this".to_string()),
}),
Operation::Query(Query {
prefix: false,
kind: QueryKind::exact("world".to_string()),
}),
])],
)),
candidates: Some(&candidates_1 & &facet_candidates),
bucket_candidates: Some(&candidates_1 & &facet_candidates),
filtered_candidates: None,
@ -473,22 +549,37 @@ mod test {
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1));
let candidates_2 = (
context.word_docids("split").unwrap().unwrap()
& context.word_docids("this").unwrap().unwrap()
& context.word_docids("word").unwrap().unwrap()
) - context.word_docids("world").unwrap().unwrap();
let candidates_2 = (context.word_docids("split").unwrap().unwrap()
& context.word_docids("this").unwrap().unwrap()
& context.word_docids("word").unwrap().unwrap())
- context.word_docids("world").unwrap().unwrap();
let expected_2 = CriterionResult {
query_tree: Some(Operation::Or(false, vec![
Operation::And(vec![
Operation::Query(Query { prefix: false, kind: QueryKind::exact("split".to_string()) }),
Operation::Query(Query { prefix: false, kind: QueryKind::exact("this".to_string()) }),
Operation::Or(false, vec![
Operation::Query(Query { prefix: false, kind: QueryKind::exact_with_typo(1, "word".to_string()) }),
Operation::Query(Query { prefix: false, kind: QueryKind::exact("world".to_string()) }),
]),
]),
])),
query_tree: Some(Operation::Or(
false,
vec![Operation::And(vec![
Operation::Query(Query {
prefix: false,
kind: QueryKind::exact("split".to_string()),
}),
Operation::Query(Query {
prefix: false,
kind: QueryKind::exact("this".to_string()),
}),
Operation::Or(
false,
vec![
Operation::Query(Query {
prefix: false,
kind: QueryKind::exact_with_typo(1, "word".to_string()),
}),
Operation::Query(Query {
prefix: false,
kind: QueryKind::exact("world".to_string()),
}),
],
),
])],
)),
candidates: Some(&candidates_2 & &facet_candidates),
bucket_candidates: Some(&candidates_2 & &facet_candidates),
filtered_candidates: None,

View file

@ -3,9 +3,9 @@ use std::mem::take;
use log::debug;
use roaring::RoaringBitmap;
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
use crate::search::query_tree::Operation;
use crate::Result;
use super::{Context, Criterion, CriterionParameters, CriterionResult, resolve_query_tree};
pub struct Words<'t> {
ctx: &'t dyn Context<'t>,
@ -44,11 +44,12 @@ impl<'t> Criterion for Words<'t> {
Some(query_tree) => {
let candidates = match self.candidates.as_mut() {
Some(allowed_candidates) => {
let mut candidates = resolve_query_tree(self.ctx, &query_tree, params.wdcache)?;
let mut candidates =
resolve_query_tree(self.ctx, &query_tree, params.wdcache)?;
candidates &= &*allowed_candidates;
*allowed_candidates -= &candidates;
Some(candidates)
},
}
None => None,
};
@ -63,29 +64,38 @@ impl<'t> Criterion for Words<'t> {
filtered_candidates: self.filtered_candidates.clone(),
bucket_candidates,
}));
},
None => {
match self.parent.next(params)? {
Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => {
self.query_trees = explode_query_tree(query_tree);
self.candidates = candidates;
self.filtered_candidates = filtered_candidates;
}
None => match self.parent.next(params)? {
Some(CriterionResult {
query_tree: Some(query_tree),
candidates,
filtered_candidates,
bucket_candidates,
}) => {
self.query_trees = explode_query_tree(query_tree);
self.candidates = candidates;
self.filtered_candidates = filtered_candidates;
self.bucket_candidates = match (self.bucket_candidates.take(), bucket_candidates) {
self.bucket_candidates =
match (self.bucket_candidates.take(), bucket_candidates) {
(Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc),
(self_bc, parent_bc) => self_bc.or(parent_bc),
};
},
Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
}));
},
None => return Ok(None),
}
Some(CriterionResult {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
}) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
}));
}
None => return Ok(None),
},
}
}

View file

@ -3,11 +3,11 @@ use std::mem::size_of;
use heed::types::ByteSlice;
use roaring::RoaringBitmap;
use super::{Distinct, DocIter};
use crate::error::InternalError;
use crate::heed_codec::facet::*;
use crate::index::db_name;
use crate::{DocumentId, FieldId, Index, Result};
use super::{Distinct, DocIter};
const FID_SIZE: usize = size_of::<FieldId>();
const DOCID_SIZE: usize = size_of::<DocumentId>();
@ -28,11 +28,7 @@ pub struct FacetDistinct<'a> {
impl<'a> FacetDistinct<'a> {
pub fn new(distinct: FieldId, index: &'a Index, txn: &'a heed::RoTxn<'a>) -> Self {
Self {
distinct,
index,
txn,
}
Self { distinct, index, txn }
}
}
@ -47,16 +43,12 @@ pub struct FacetDistinctIter<'a> {
impl<'a> FacetDistinctIter<'a> {
fn facet_string_docids(&self, key: &str) -> heed::Result<Option<RoaringBitmap>> {
self.index
.facet_id_string_docids
.get(self.txn, &(self.distinct, key))
self.index.facet_id_string_docids.get(self.txn, &(self.distinct, key))
}
fn facet_number_docids(&self, key: f64) -> heed::Result<Option<RoaringBitmap>> {
// get facet docids on level 0
self.index
.facet_id_f64_docids
.get(self.txn, &(self.distinct, 0, key, key))
self.index.facet_id_f64_docids.get(self.txn, &(self.distinct, 0, key, key))
}
fn distinct_string(&mut self, id: DocumentId) -> Result<()> {
@ -64,9 +56,8 @@ impl<'a> FacetDistinctIter<'a> {
for item in iter {
let ((_, _, value), _) = item?;
let facet_docids = self
.facet_string_docids(value)?
.ok_or(InternalError::DatabaseMissingEntry {
let facet_docids =
self.facet_string_docids(value)?.ok_or(InternalError::DatabaseMissingEntry {
db_name: db_name::FACET_ID_STRING_DOCIDS,
key: None,
})?;
@ -83,9 +74,8 @@ impl<'a> FacetDistinctIter<'a> {
for item in iter {
let ((_, _, value), _) = item?;
let facet_docids = self
.facet_number_docids(value)?
.ok_or(InternalError::DatabaseMissingEntry {
let facet_docids =
self.facet_number_docids(value)?.ok_or(InternalError::DatabaseMissingEntry {
db_name: db_name::FACET_ID_F64_DOCIDS,
key: None,
})?;

View file

@ -1,11 +1,11 @@
mod facet_distinct;
mod noop_distinct;
pub use facet_distinct::FacetDistinct;
pub use noop_distinct::NoopDistinct;
use roaring::RoaringBitmap;
use crate::{DocumentId, Result};
pub use facet_distinct::FacetDistinct;
pub use noop_distinct::NoopDistinct;
/// A trait implemented by document interators that are returned by calls to `Distinct::distinct`.
/// It provides a way to get back the ownership to the excluded set.
@ -29,13 +29,15 @@ mod test {
use std::collections::HashSet;
use once_cell::sync::Lazy;
use rand::{seq::SliceRandom, Rng};
use rand::seq::SliceRandom;
use rand::Rng;
use roaring::RoaringBitmap;
use serde_json::{json, Value};
use crate::index::{Index, tests::TempIndex};
use crate::index::tests::TempIndex;
use crate::index::Index;
use crate::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
use crate::{BEU32, FieldId, DocumentId};
use crate::{DocumentId, FieldId, BEU32};
static JSON: Lazy<Value> = Lazy::new(generate_json);
@ -89,9 +91,7 @@ mod test {
addition.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
addition.update_format(UpdateFormat::Json);
addition
.execute(JSON.to_string().as_bytes(), |_, _| ())
.unwrap();
addition.execute(JSON.to_string().as_bytes(), |_, _| ()).unwrap();
let fields_map = index.fields_ids_map(&txn).unwrap();
let fid = fields_map.id(&distinct).unwrap();
@ -103,13 +103,12 @@ mod test {
(index, fid, map)
}
/// Checks that all the candidates are distinct, and returns the candidates number.
pub(crate) fn validate_distinct_candidates(
candidates: impl Iterator<Item = crate::Result<DocumentId>>,
distinct: FieldId,
index: &Index,
) -> usize {
) -> usize {
fn test(seen: &mut HashSet<String>, value: &Value) {
match value {
Value::Null | Value::Object(_) | Value::Bool(_) => (),
@ -117,7 +116,7 @@ mod test {
let s = value.to_string();
assert!(seen.insert(s));
}
Value::Array(values) => {values.into_iter().for_each(|value| test(seen, value))}
Value::Array(values) => values.into_iter().for_each(|value| test(seen, value)),
}
}

View file

@ -1,7 +1,8 @@
use roaring::{RoaringBitmap, bitmap::IntoIter};
use roaring::bitmap::IntoIter;
use roaring::RoaringBitmap;
use super::{Distinct, DocIter};
use crate::{DocumentId, Result};
use super::{DocIter, Distinct};
/// A distinct implementer that does not perform any distinct,
/// and simply returns an iterator to the candidates.
@ -30,10 +31,7 @@ impl Distinct for NoopDistinct {
type Iter = NoopDistinctIter;
fn distinct(&mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter {
NoopDistinctIter {
candidates: candidates.into_iter(),
excluded,
}
NoopDistinctIter { candidates: candidates.into_iter(), excluded }
}
}

View file

@ -1,16 +1,16 @@
use std::collections::{HashSet, BTreeMap};
use std::collections::{BTreeMap, HashSet};
use std::ops::Bound::Unbounded;
use std::{cmp, fmt};
use heed::{Database, BytesDecode};
use heed::types::{ByteSlice, Unit};
use heed::{BytesDecode, Database};
use roaring::RoaringBitmap;
use crate::error::FieldIdMapMissingEntry;
use crate::facet::FacetType;
use crate::heed_codec::facet::FacetValueStringCodec;
use crate::search::facet::{FacetIter, FacetRange};
use crate::{Index, FieldId, DocumentId, Result};
use crate::{DocumentId, FieldId, Index, Result};
/// The default number of values by facets that will
/// be fetched from the key-value store.
@ -43,7 +43,7 @@ impl<'a> FacetDistribution<'a> {
}
}
pub fn facets<I: IntoIterator<Item=A>, A: AsRef<str>>(&mut self, names: I) -> &mut Self {
pub fn facets<I: IntoIterator<Item = A>, A: AsRef<str>>(&mut self, names: I) -> &mut Self {
self.facets = Some(names.into_iter().map(|s| s.as_ref().to_string()).collect());
self
}
@ -66,8 +66,7 @@ impl<'a> FacetDistribution<'a> {
facet_type: FacetType,
candidates: &RoaringBitmap,
distribution: &mut BTreeMap<String, u64>,
) -> heed::Result<()>
{
) -> heed::Result<()> {
fn fetch_facet_values<'t, KC, K: 't>(
rtxn: &'t heed::RoTxn,
db: Database<KC, Unit>,
@ -102,7 +101,7 @@ impl<'a> FacetDistribution<'a> {
FacetType::Number => {
let db = self.index.field_id_docid_facet_f64s;
fetch_facet_values(self.rtxn, db, field_id, candidates, distribution)
},
}
FacetType::String => {
let db = self.index.field_id_docid_facet_strings;
fetch_facet_values(self.rtxn, db, field_id, candidates, distribution)
@ -117,11 +116,9 @@ impl<'a> FacetDistribution<'a> {
field_id: FieldId,
candidates: &RoaringBitmap,
distribution: &mut BTreeMap<String, u64>,
) -> heed::Result<()>
{
let iter = FacetIter::new_non_reducing(
self.rtxn, self.index, field_id, candidates.clone(),
)?;
) -> heed::Result<()> {
let iter =
FacetIter::new_non_reducing(self.rtxn, self.index, field_id, candidates.clone())?;
for result in iter {
let (value, mut docids) = result?;
@ -142,8 +139,7 @@ impl<'a> FacetDistribution<'a> {
fn facet_values_from_raw_facet_database(
&self,
field_id: FieldId,
) -> heed::Result<BTreeMap<String, u64>>
{
) -> heed::Result<BTreeMap<String, u64>> {
let mut distribution = BTreeMap::new();
let db = self.index.facet_id_f64_docids;
@ -157,7 +153,8 @@ impl<'a> FacetDistribution<'a> {
}
}
let iter = self.index
let iter = self
.index
.facet_id_string_docids
.remap_key_type::<ByteSlice>()
.prefix_iter(self.rtxn, &[field_id])?
@ -182,11 +179,30 @@ impl<'a> FacetDistribution<'a> {
// to those candidates. We also enter here for facet strings for performance reasons.
let mut distribution = BTreeMap::new();
if candidates.len() <= CANDIDATES_THRESHOLD {
self.facet_distribution_from_documents(field_id, Number, candidates, &mut distribution)?;
self.facet_distribution_from_documents(field_id, String, candidates, &mut distribution)?;
self.facet_distribution_from_documents(
field_id,
Number,
candidates,
&mut distribution,
)?;
self.facet_distribution_from_documents(
field_id,
String,
candidates,
&mut distribution,
)?;
} else {
self.facet_numbers_distribution_from_facet_levels(field_id, candidates, &mut distribution)?;
self.facet_distribution_from_documents(field_id, String, candidates, &mut distribution)?;
self.facet_numbers_distribution_from_facet_levels(
field_id,
candidates,
&mut distribution,
)?;
self.facet_distribution_from_documents(
field_id,
String,
candidates,
&mut distribution,
)?;
}
Ok(distribution)
@ -201,10 +217,11 @@ impl<'a> FacetDistribution<'a> {
let mut distribution = BTreeMap::new();
for name in filterable_fields {
let fid = fields_ids_map.id(&name).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
field_name: name.clone(),
process: "FacetDistribution::execute",
})?;
let fid =
fields_ids_map.id(&name).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
field_name: name.clone(),
process: "FacetDistribution::execute",
})?;
let values = self.facet_values(fid)?;
distribution.insert(name, values);
}
@ -215,13 +232,7 @@ impl<'a> FacetDistribution<'a> {
impl fmt::Debug for FacetDistribution<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let FacetDistribution {
facets,
candidates,
max_values_by_facet,
rtxn: _,
index: _,
} = self;
let FacetDistribution { facets, candidates, max_values_by_facet, rtxn: _, index: _ } = self;
f.debug_struct("FacetDistribution")
.field("facets", facets)

View file

@ -1,6 +1,6 @@
use std::collections::HashSet;
use std::fmt::Debug;
use std::ops::Bound::{self, Included, Excluded};
use std::ops::Bound::{self, Excluded, Included};
use std::result::Result as StdResult;
use std::str::FromStr;
@ -12,16 +12,13 @@ use pest::iterators::{Pair, Pairs};
use pest::Parser;
use roaring::RoaringBitmap;
use crate::error::UserError;
use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec};
use crate::{Index, FieldId, FieldsIdsMap, CboRoaringBitmapCodec, Result};
use super::FacetRange;
use super::parser::Rule;
use super::parser::{PREC_CLIMBER, FilterParser};
use self::FilterCondition::*;
use self::Operator::*;
use super::parser::{FilterParser, Rule, PREC_CLIMBER};
use super::FacetRange;
use crate::error::UserError;
use crate::heed_codec::facet::{FacetLevelValueF64Codec, FacetValueStringCodec};
use crate::{CboRoaringBitmapCodec, FieldId, FieldsIdsMap, Index, Result};
#[derive(Debug, Clone, PartialEq)]
pub enum Operator {
@ -39,13 +36,13 @@ impl Operator {
/// an OR operation for the between case (i.e. `TO`).
fn negate(self) -> (Self, Option<Self>) {
match self {
GreaterThan(n) => (LowerThanOrEqual(n), None),
GreaterThan(n) => (LowerThanOrEqual(n), None),
GreaterThanOrEqual(n) => (LowerThan(n), None),
Equal(n, s) => (NotEqual(n, s), None),
NotEqual(n, s) => (Equal(n, s), None),
LowerThan(n) => (GreaterThanOrEqual(n), None),
LowerThanOrEqual(n) => (GreaterThan(n), None),
Between(n, m) => (LowerThan(n), Some(GreaterThan(m))),
Equal(n, s) => (NotEqual(n, s), None),
NotEqual(n, s) => (Equal(n, s), None),
LowerThan(n) => (GreaterThanOrEqual(n), None),
LowerThanOrEqual(n) => (GreaterThan(n), None),
Between(n, m) => (LowerThan(n), Some(GreaterThan(m))),
}
}
}
@ -63,10 +60,11 @@ impl FilterCondition {
index: &Index,
array: I,
) -> Result<Option<FilterCondition>>
where I: IntoIterator<Item=Either<J, B>>,
J: IntoIterator<Item=A>,
A: AsRef<str>,
B: AsRef<str>,
where
I: IntoIterator<Item = Either<J, B>>,
J: IntoIterator<Item = A>,
A: AsRef<str>,
B: AsRef<str>,
{
let mut ands = None;
@ -88,7 +86,7 @@ impl FilterCondition {
None => Some(rule),
};
}
},
}
Either::Right(rule) => {
let condition = FilterCondition::from_str(rtxn, index, rule.as_ref())?;
ands = match ands.take() {
@ -106,11 +104,11 @@ impl FilterCondition {
rtxn: &heed::RoTxn,
index: &Index,
expression: &str,
) -> Result<FilterCondition>
{
) -> Result<FilterCondition> {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let filterable_fields = index.filterable_fields_ids(rtxn)?;
let lexed = FilterParser::parse(Rule::prgm, expression).map_err(UserError::InvalidFilter)?;
let lexed =
FilterParser::parse(Rule::prgm, expression).map_err(UserError::InvalidFilter)?;
FilterCondition::from_pairs(&fields_ids_map, &filterable_fields, lexed)
}
@ -118,8 +116,7 @@ impl FilterCondition {
fim: &FieldsIdsMap,
ff: &HashSet<FieldId>,
expression: Pairs<Rule>,
) -> Result<Self>
{
) -> Result<Self> {
PREC_CLIMBER.climb(
expression,
|pair: Pair<Rule>| match pair.as_rule() {
@ -135,12 +132,10 @@ impl FilterCondition {
Rule::term => Self::from_pairs(fim, ff, pair.into_inner()),
_ => unreachable!(),
},
|lhs: Result<Self>, op: Pair<Rule>, rhs: Result<Self>| {
match op.as_rule() {
Rule::or => Ok(Or(Box::new(lhs?), Box::new(rhs?))),
Rule::and => Ok(And(Box::new(lhs?), Box::new(rhs?))),
_ => unreachable!(),
}
|lhs: Result<Self>, op: Pair<Rule>, rhs: Result<Self>| match op.as_rule() {
Rule::or => Ok(Or(Box::new(lhs?), Box::new(rhs?))),
Rule::and => Ok(And(Box::new(lhs?), Box::new(rhs?))),
_ => unreachable!(),
},
)
}
@ -160,8 +155,7 @@ impl FilterCondition {
fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>,
item: Pair<Rule>,
) -> Result<FilterCondition>
{
) -> Result<FilterCondition> {
let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?;
@ -179,8 +173,7 @@ impl FilterCondition {
fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>,
item: Pair<Rule>,
) -> Result<FilterCondition>
{
) -> Result<FilterCondition> {
let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?;
@ -196,8 +189,7 @@ impl FilterCondition {
fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>,
item: Pair<Rule>,
) -> Result<FilterCondition>
{
) -> Result<FilterCondition> {
let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?;
@ -213,8 +205,7 @@ impl FilterCondition {
fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>,
item: Pair<Rule>,
) -> Result<FilterCondition>
{
) -> Result<FilterCondition> {
let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?;
@ -230,8 +221,7 @@ impl FilterCondition {
fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>,
item: Pair<Rule>,
) -> Result<FilterCondition>
{
) -> Result<FilterCondition> {
let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?;
@ -247,8 +237,7 @@ impl FilterCondition {
fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>,
item: Pair<Rule>,
) -> Result<FilterCondition>
{
) -> Result<FilterCondition> {
let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?;
@ -272,13 +261,14 @@ impl FilterCondition {
left: Bound<f64>,
right: Bound<f64>,
output: &mut RoaringBitmap,
) -> Result<()>
{
) -> Result<()> {
match (left, right) {
// If the request is an exact value we must go directly to the deepest level.
(Included(l), Included(r)) if l == r && level > 0 => {
return Self::explore_facet_number_levels(rtxn, db, field_id, 0, left, right, output);
},
return Self::explore_facet_number_levels(
rtxn, db, field_id, 0, left, right, output,
);
}
// lower TO upper when lower > upper must return no result
(Included(l), Included(r)) if l > r => return Ok(()),
(Included(l), Excluded(r)) if l >= r => return Ok(()),
@ -301,7 +291,9 @@ impl FilterCondition {
debug!("{:?} to {:?} (level {}) found {} documents", l, r, level, docids.len());
output.union_with(&docids);
// We save the leftest and rightest bounds we actually found at this level.
if i == 0 { left_found = Some(l); }
if i == 0 {
left_found = Some(l);
}
right_found = Some(r);
}
@ -318,20 +310,50 @@ impl FilterCondition {
// If the bound is satisfied we avoid calling this function again.
if !matches!(left, Included(l) if l == left_found) {
let sub_right = Excluded(left_found);
debug!("calling left with {:?} to {:?} (level {})", left, sub_right, deeper_level);
Self::explore_facet_number_levels(rtxn, db, field_id, deeper_level, left, sub_right, output)?;
debug!(
"calling left with {:?} to {:?} (level {})",
left, sub_right, deeper_level
);
Self::explore_facet_number_levels(
rtxn,
db,
field_id,
deeper_level,
left,
sub_right,
output,
)?;
}
if !matches!(right, Included(r) if r == right_found) {
let sub_left = Excluded(right_found);
debug!("calling right with {:?} to {:?} (level {})", sub_left, right, deeper_level);
Self::explore_facet_number_levels(rtxn, db, field_id, deeper_level, sub_left, right, output)?;
debug!(
"calling right with {:?} to {:?} (level {})",
sub_left, right, deeper_level
);
Self::explore_facet_number_levels(
rtxn,
db,
field_id,
deeper_level,
sub_left,
right,
output,
)?;
}
},
}
None => {
// If we found nothing at this level it means that we must find
// the same bounds but at a deeper, more precise level.
Self::explore_facet_number_levels(rtxn, db, field_id, deeper_level, left, right, output)?;
},
Self::explore_facet_number_levels(
rtxn,
db,
field_id,
deeper_level,
left,
right,
output,
)?;
}
}
Ok(())
@ -344,27 +366,34 @@ impl FilterCondition {
strings_db: heed::Database<FacetValueStringCodec, CboRoaringBitmapCodec>,
field_id: FieldId,
operator: &Operator,
) -> Result<RoaringBitmap>
{
) -> Result<RoaringBitmap> {
// Make sure we always bound the ranges with the field id and the level,
// as the facets values are all in the same database and prefixed by the
// field id and the level.
let (left, right) = match operator {
GreaterThan(val) => (Excluded(*val), Included(f64::MAX)),
GreaterThan(val) => (Excluded(*val), Included(f64::MAX)),
GreaterThanOrEqual(val) => (Included(*val), Included(f64::MAX)),
Equal(number, string) => {
Equal(number, string) => {
let string_docids = strings_db.get(rtxn, &(field_id, &string))?.unwrap_or_default();
let number_docids = match number {
Some(n) => {
let n = Included(*n);
let mut output = RoaringBitmap::new();
Self::explore_facet_number_levels(rtxn, numbers_db, field_id, 0, n, n, &mut output)?;
Self::explore_facet_number_levels(
rtxn,
numbers_db,
field_id,
0,
n,
n,
&mut output,
)?;
output
},
}
None => RoaringBitmap::new(),
};
return Ok(string_docids | number_docids);
},
}
NotEqual(number, string) => {
let all_numbers_ids = if number.is_some() {
index.number_faceted_documents_ids(rtxn, field_id)?
@ -373,12 +402,14 @@ impl FilterCondition {
};
let all_strings_ids = index.string_faceted_documents_ids(rtxn, field_id)?;
let operator = Equal(*number, string.clone());
let docids = Self::evaluate_operator(rtxn, index, numbers_db, strings_db, field_id, &operator)?;
let docids = Self::evaluate_operator(
rtxn, index, numbers_db, strings_db, field_id, &operator,
)?;
return Ok((all_numbers_ids | all_strings_ids) - docids);
},
LowerThan(val) => (Included(f64::MIN), Excluded(*val)),
}
LowerThan(val) => (Included(f64::MIN), Excluded(*val)),
LowerThanOrEqual(val) => (Included(f64::MIN), Included(*val)),
Between(left, right) => (Included(*left), Included(*right)),
Between(left, right) => (Included(*left), Included(*right)),
};
// Ask for the biggest value that can exist for this specific field, if it exists
@ -391,36 +422,39 @@ impl FilterCondition {
match biggest_level {
Some(level) => {
let mut output = RoaringBitmap::new();
Self::explore_facet_number_levels(rtxn, numbers_db, field_id, level, left, right, &mut output)?;
Self::explore_facet_number_levels(
rtxn,
numbers_db,
field_id,
level,
left,
right,
&mut output,
)?;
Ok(output)
},
}
None => Ok(RoaringBitmap::new()),
}
}
pub fn evaluate(
&self,
rtxn: &heed::RoTxn,
index: &Index,
) -> Result<RoaringBitmap>
{
pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
let numbers_db = index.facet_id_f64_docids;
let strings_db = index.facet_id_string_docids;
match self {
Operator(fid, op) => {
Self::evaluate_operator(rtxn, index, numbers_db, strings_db, *fid, op)
},
}
Or(lhs, rhs) => {
let lhs = lhs.evaluate(rtxn, index)?;
let rhs = rhs.evaluate(rtxn, index)?;
Ok(lhs | rhs)
},
}
And(lhs, rhs) => {
let lhs = lhs.evaluate(rtxn, index)?;
let rhs = rhs.evaluate(rtxn, index)?;
Ok(lhs & rhs)
},
}
}
}
}
@ -434,23 +468,24 @@ fn field_id(
fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>,
items: &mut Pairs<Rule>,
) -> StdResult<FieldId, PestError<Rule>>
{
) -> StdResult<FieldId, PestError<Rule>> {
// lexing ensures that we at least have a key
let key = items.next().unwrap();
let field_id = match fields_ids_map.id(key.as_str()) {
Some(field_id) => field_id,
None => return Err(PestError::new_from_span(
ErrorVariant::CustomError {
message: format!(
"attribute `{}` not found, available attributes are: {}",
key.as_str(),
fields_ids_map.iter().map(|(_, n)| n).collect::<Vec<_>>().join(", "),
),
},
key.as_span(),
)),
None => {
return Err(PestError::new_from_span(
ErrorVariant::CustomError {
message: format!(
"attribute `{}` not found, available attributes are: {}",
key.as_str(),
fields_ids_map.iter().map(|(_, n)| n).collect::<Vec<_>>().join(", "),
),
},
key.as_span(),
))
}
};
if !filterable_fields.contains(&field_id) {
@ -459,9 +494,11 @@ fn field_id(
message: format!(
"attribute `{}` is not filterable, available filterable attributes are: {}",
key.as_str(),
filterable_fields.iter().flat_map(|id| {
fields_ids_map.name(*id)
}).collect::<Vec<_>>().join(", "),
filterable_fields
.iter()
.flat_map(|id| { fields_ids_map.name(*id) })
.collect::<Vec<_>>()
.join(", "),
),
},
key.as_span(),
@ -476,8 +513,9 @@ fn field_id(
///
/// Returns the parsing error associated with the span if the conversion fails.
fn pest_parse<T>(pair: Pair<Rule>) -> (StdResult<T, pest::error::Error<Rule>>, String)
where T: FromStr,
T::Err: ToString,
where
T: FromStr,
T::Err: ToString,
{
let result = match pair.as_str().parse::<T>() {
Ok(value) => Ok(value),
@ -492,11 +530,12 @@ where T: FromStr,
#[cfg(test)]
mod tests {
use super::*;
use crate::update::Settings;
use big_s::S;
use heed::EnvOpenOptions;
use maplit::hashset;
use big_s::S;
use super::*;
use crate::update::Settings;
#[test]
fn string() {
@ -508,7 +547,7 @@ mod tests {
// Set the filterable fields to be the channel.
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_filterable_fields(hashset!{ S("channel") });
builder.set_filterable_fields(hashset! { S("channel") });
builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap();
@ -537,7 +576,7 @@ mod tests {
// Set the filterable fields to be the channel.
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_filterable_fields(hashset!{ "timestamp".into() });
builder.set_filterable_fields(hashset! { "timestamp".into() });
builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap();
@ -548,10 +587,8 @@ mod tests {
assert_eq!(condition, expected);
let condition = FilterCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap();
let expected = Or(
Box::new(Operator(0, LowerThan(22.0))),
Box::new(Operator(0, GreaterThan(44.0))),
);
let expected =
Or(Box::new(Operator(0, LowerThan(22.0))), Box::new(Operator(0, GreaterThan(44.0))));
assert_eq!(condition, expected);
}
@ -566,29 +603,33 @@ mod tests {
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order
builder.set_filterable_fields(hashset!{ S("channel"), S("timestamp") });
builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") });
builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap();
// Test that the facet condition is correctly generated.
let rtxn = index.read_txn().unwrap();
let condition = FilterCondition::from_str(
&rtxn, &index,
&rtxn,
&index,
"channel = gotaga OR (timestamp 22 TO 44 AND channel != ponce)",
).unwrap();
)
.unwrap();
let expected = Or(
Box::new(Operator(0, Operator::Equal(None, S("gotaga")))),
Box::new(And(
Box::new(Operator(1, Between(22.0, 44.0))),
Box::new(Operator(0, Operator::NotEqual(None, S("ponce")))),
))
)),
);
assert_eq!(condition, expected);
let condition = FilterCondition::from_str(
&rtxn, &index,
&rtxn,
&index,
"channel = gotaga OR NOT (timestamp 22 TO 44 AND channel != ponce)",
).unwrap();
)
.unwrap();
let expected = Or(
Box::new(Operator(0, Operator::Equal(None, S("gotaga")))),
Box::new(Or(
@ -613,20 +654,28 @@ mod tests {
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order
builder.set_filterable_fields(hashset!{ S("channel"), S("timestamp") });
builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") });
builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap();
// Test that the facet condition is correctly generated.
let rtxn = index.read_txn().unwrap();
let condition = FilterCondition::from_array(
&rtxn, &index,
vec![Either::Right("channel = gotaga"), Either::Left(vec!["timestamp = 44", "channel != ponce"])],
).unwrap().unwrap();
&rtxn,
&index,
vec![
Either::Right("channel = gotaga"),
Either::Left(vec!["timestamp = 44", "channel != ponce"]),
],
)
.unwrap()
.unwrap();
let expected = FilterCondition::from_str(
&rtxn, &index,
&rtxn,
&index,
"channel = gotaga AND (timestamp = 44 OR channel != ponce)",
).unwrap();
)
.unwrap();
assert_eq!(condition, expected);
}
}

View file

@ -1,20 +1,19 @@
use std::ops::Bound::{self, Included, Excluded, Unbounded};
use std::ops::Bound::{self, Excluded, Included, Unbounded};
use either::Either::{self, Left, Right};
use heed::types::{DecodeIgnore, ByteSlice};
use heed::{Database, RoRange, RoRevRange, LazyDecode};
use heed::types::{ByteSlice, DecodeIgnore};
use heed::{Database, LazyDecode, RoRange, RoRevRange};
use roaring::RoaringBitmap;
use crate::heed_codec::CboRoaringBitmapCodec;
use crate::heed_codec::facet::FacetLevelValueF64Codec;
use crate::{Index, FieldId};
pub use self::facet_distribution::FacetDistribution;
pub use self::filter_condition::{FilterCondition, Operator};
pub(crate) use self::parser::Rule as ParserRule;
use crate::heed_codec::facet::FacetLevelValueF64Codec;
use crate::heed_codec::CboRoaringBitmapCodec;
use crate::{FieldId, Index};
mod filter_condition;
mod facet_distribution;
mod filter_condition;
mod parser;
pub struct FacetRange<'t> {
@ -30,8 +29,7 @@ impl<'t> FacetRange<'t> {
level: u8,
left: Bound<f64>,
right: Bound<f64>,
) -> heed::Result<FacetRange<'t>>
{
) -> heed::Result<FacetRange<'t>> {
let left_bound = match left {
Included(left) => Included((field_id, level, left, f64::MIN)),
Excluded(left) => Excluded((field_id, level, left, f64::MIN)),
@ -62,7 +60,7 @@ impl<'t> Iterator for FacetRange<'t> {
} else {
None
}
},
}
Some(Err(e)) => Some(Err(e)),
None => None,
}
@ -82,8 +80,7 @@ impl<'t> FacetRevRange<'t> {
level: u8,
left: Bound<f64>,
right: Bound<f64>,
) -> heed::Result<FacetRevRange<'t>>
{
) -> heed::Result<FacetRevRange<'t>> {
let left_bound = match left {
Included(left) => Included((field_id, level, left, f64::MIN)),
Excluded(left) => Excluded((field_id, level, left, f64::MIN)),
@ -114,7 +111,7 @@ impl<'t> Iterator for FacetRevRange<'t> {
}
}
continue;
},
}
Some(Err(e)) => return Some(Err(e)),
None => return None,
}
@ -139,11 +136,11 @@ impl<'t> FacetIter<'t> {
index: &'t Index,
field_id: FieldId,
documents_ids: RoaringBitmap,
) -> heed::Result<FacetIter<'t>>
{
) -> heed::Result<FacetIter<'t>> {
let db = index.facet_id_f64_docids.remap_key_type::<FacetLevelValueF64Codec>();
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
let highest_iter = FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
let highest_iter =
FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
let level_iters = vec![(documents_ids, Left(highest_iter))];
Ok(FacetIter { rtxn, db, field_id, level_iters, must_reduce: true })
}
@ -156,11 +153,11 @@ impl<'t> FacetIter<'t> {
index: &'t Index,
field_id: FieldId,
documents_ids: RoaringBitmap,
) -> heed::Result<FacetIter<'t>>
{
) -> heed::Result<FacetIter<'t>> {
let db = index.facet_id_f64_docids.remap_key_type::<FacetLevelValueF64Codec>();
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
let highest_iter = FacetRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
let highest_iter =
FacetRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
let level_iters = vec![(documents_ids, Right(highest_iter))];
Ok(FacetIter { rtxn, db, field_id, level_iters, must_reduce: true })
}
@ -174,11 +171,11 @@ impl<'t> FacetIter<'t> {
index: &'t Index,
field_id: FieldId,
documents_ids: RoaringBitmap,
) -> heed::Result<FacetIter<'t>>
{
) -> heed::Result<FacetIter<'t>> {
let db = index.facet_id_f64_docids.remap_key_type::<FacetLevelValueF64Codec>();
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
let highest_iter = FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
let highest_iter =
FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
let level_iters = vec![(documents_ids, Left(highest_iter))];
Ok(FacetIter { rtxn, db, field_id, level_iters, must_reduce: false })
}
@ -187,12 +184,13 @@ impl<'t> FacetIter<'t> {
rtxn: &'t heed::RoTxn,
db: Database<FacetLevelValueF64Codec, X>,
fid: FieldId,
) -> heed::Result<Option<u8>>
{
let level = db.remap_types::<ByteSlice, DecodeIgnore>()
) -> heed::Result<Option<u8>> {
let level = db
.remap_types::<ByteSlice, DecodeIgnore>()
.prefix_iter(rtxn, &[fid][..])?
.remap_key_type::<FacetLevelValueF64Codec>()
.last().transpose()?
.last()
.transpose()?
.map(|((_, level, _, _), _)| level);
Ok(level)
}
@ -215,7 +213,6 @@ impl<'t> Iterator for FacetIter<'t> {
match result {
Ok(((_fid, level, left, right), mut docids)) => {
docids.intersect_with(&documents_ids);
if !docids.is_empty() {
if self.must_reduce {
@ -242,11 +239,11 @@ impl<'t> Iterator for FacetIter<'t> {
Ok(iter) => {
self.level_iters.push((docids, iter));
continue 'outer;
},
}
Err(e) => return Some(Err(e)),
}
}
},
}
Err(e) => return Some(Err(e)),
}
}

View file

@ -1,5 +1,5 @@
use once_cell::sync::Lazy;
use pest::prec_climber::{Operator, Assoc, PrecClimber};
use pest::prec_climber::{Assoc, Operator, PrecClimber};
pub static PREC_CLIMBER: Lazy<PrecClimber<Rule>> = Lazy::new(|| {
use Assoc::*;

View file

@ -1,13 +1,11 @@
use std::collections::HashSet;
use std::cmp::{min, Reverse};
use std::collections::BTreeMap;
use std::collections::{BTreeMap, HashSet};
use std::ops::{Index, IndexMut};
use levenshtein_automata::{DFA, Distance};
use crate::search::query_tree::{Operation, Query};
use levenshtein_automata::{Distance, DFA};
use super::build_dfa;
use crate::search::query_tree::{Operation, Query};
type IsPrefix = bool;
@ -28,7 +26,9 @@ impl MatchingWords {
.collect();
// Sort word by len in DESC order prioritizing the longuest word,
// in order to highlight the longuest part of the matched word.
dfas.sort_unstable_by_key(|(_dfa, query_word, _typo, _is_prefix)| Reverse(query_word.len()));
dfas.sort_unstable_by_key(|(_dfa, query_word, _typo, _is_prefix)| {
Reverse(query_word.len())
});
Self { dfas }
}
@ -37,12 +37,13 @@ impl MatchingWords {
self.dfas.iter().find_map(|(dfa, query_word, typo, is_prefix)| match dfa.eval(word) {
Distance::Exact(t) if t <= *typo => {
if *is_prefix {
let (_dist, len) = prefix_damerau_levenshtein(query_word.as_bytes(), word.as_bytes());
let (_dist, len) =
prefix_damerau_levenshtein(query_word.as_bytes(), word.as_bytes());
Some(len)
} else {
Some(word.len())
}
},
}
_otherwise => None,
})
}
@ -54,11 +55,11 @@ fn fetch_queries(tree: &Operation) -> HashSet<(&str, u8, IsPrefix)> {
match tree {
Operation::Or(_, ops) | Operation::And(ops) => {
ops.as_slice().iter().for_each(|op| resolve_ops(op, out));
},
}
Operation::Query(Query { prefix, kind }) => {
let typo = if kind.is_exact() { 0 } else { kind.typo() };
out.insert((kind.word(), typo, *prefix));
},
}
Operation::Phrase(words) => {
for word in words {
out.insert((word, 0, false));
@ -80,10 +81,7 @@ struct N2Array<T> {
impl<T: Clone> N2Array<T> {
fn new(x: usize, y: usize, value: T) -> N2Array<T> {
N2Array {
y_size: y,
buf: vec![value; x * y],
}
N2Array { y_size: y, buf: vec![value; x * y] }
}
}
@ -178,9 +176,8 @@ fn prefix_damerau_levenshtein(source: &[u8], target: &[u8]) -> (u32, usize) {
#[cfg(test)]
mod tests {
use super::*;
use crate::MatchingWords;
use crate::search::query_tree::{Operation, Query, QueryKind};
use crate::MatchingWords;
#[test]
fn matched_length() {
@ -194,13 +191,23 @@ mod tests {
#[test]
fn matching_words() {
let query_tree = Operation::Or(false, vec![
Operation::And(vec![
Operation::Query(Query { prefix: true, kind: QueryKind::exact("split".to_string()) }),
Operation::Query(Query { prefix: false, kind: QueryKind::exact("this".to_string()) }),
Operation::Query(Query { prefix: true, kind: QueryKind::tolerant(1, "world".to_string()) }),
]),
]);
let query_tree = Operation::Or(
false,
vec![Operation::And(vec![
Operation::Query(Query {
prefix: true,
kind: QueryKind::exact("split".to_string()),
}),
Operation::Query(Query {
prefix: false,
kind: QueryKind::exact("this".to_string()),
}),
Operation::Query(Query {
prefix: true,
kind: QueryKind::tolerant(1, "world".to_string()),
}),
])],
);
let matching_words = MatchingWords::from_query_tree(&query_tree);

View file

@ -6,6 +6,7 @@ use std::result::Result as StdResult;
use std::str::Utf8Error;
use std::time::Instant;
use distinct::{Distinct, DocIter, FacetDistinct, NoopDistinct};
use fst::{IntoStreamer, Streamer};
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
use log::debug;
@ -13,16 +14,13 @@ use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
use once_cell::sync::Lazy;
use roaring::bitmap::RoaringBitmap;
pub(crate) use self::facet::ParserRule;
pub use self::facet::{FacetDistribution, FacetIter, FilterCondition, Operator};
pub use self::matching_words::MatchingWords;
use self::query_tree::QueryTreeBuilder;
use crate::error::FieldIdMapMissingEntry;
use crate::search::criteria::r#final::{Final, FinalResult};
use crate::{Index, DocumentId, Result};
pub use self::facet::{FilterCondition, FacetDistribution, FacetIter, Operator};
pub use self::matching_words::MatchingWords;
pub(crate) use self::facet::ParserRule;
use self::query_tree::QueryTreeBuilder;
use distinct::{Distinct, DocIter, FacetDistinct, NoopDistinct};
use crate::{DocumentId, Index, Result};
// Building these factories is not free.
static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
@ -32,8 +30,8 @@ static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));
mod criteria;
mod distinct;
mod facet;
mod query_tree;
mod matching_words;
mod query_tree;
pub struct Search<'a> {
query: Option<String>,
@ -117,7 +115,7 @@ impl<'a> Search<'a> {
let result = analyzer.analyze(query);
let tokens = result.tokens();
builder.build(tokens)?.map_or((None, None), |(qt, pq)| (Some(qt), Some(pq)))
},
}
None => (None, None),
};
@ -144,10 +142,11 @@ impl<'a> Search<'a> {
None => self.perform_sort(NoopDistinct, matching_words, criteria),
Some(name) => {
let field_ids_map = self.index.fields_ids_map(self.rtxn)?;
let id = field_ids_map.id(name).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
field_name: name.to_string(),
process: "distinct attribute",
})?;
let id =
field_ids_map.id(name).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
field_name: name.to_string(),
process: "distinct attribute",
})?;
let distinct = FacetDistinct::new(id, self.index, self.rtxn);
self.perform_sort(distinct, matching_words, criteria)
}
@ -159,14 +158,15 @@ impl<'a> Search<'a> {
mut distinct: D,
matching_words: MatchingWords,
mut criteria: Final,
) -> Result<SearchResult>
{
) -> Result<SearchResult> {
let mut offset = self.offset;
let mut initial_candidates = RoaringBitmap::new();
let mut excluded_candidates = RoaringBitmap::new();
let mut documents_ids = Vec::with_capacity(self.limit);
while let Some(FinalResult { candidates, bucket_candidates, .. }) = criteria.next(&excluded_candidates)? {
while let Some(FinalResult { candidates, bucket_candidates, .. }) =
criteria.next(&excluded_candidates)?
{
debug!("Number of candidates found {}", candidates.len());
let excluded = take(&mut excluded_candidates);
@ -183,7 +183,9 @@ impl<'a> Search<'a> {
for candidate in candidates.by_ref().take(self.limit - documents_ids.len()) {
documents_ids.push(candidate?);
}
if documents_ids.len() == self.limit { break }
if documents_ids.len() == self.limit {
break;
}
excluded_candidates = candidates.into_excluded();
}
@ -247,7 +249,7 @@ pub fn word_derivations<'c>(
}
Ok(entry.insert(derived_words))
},
}
}
}

File diff suppressed because it is too large Load diff