mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Simplify word level position DB into a word position DB
This commit is contained in:
parent
75d341d928
commit
3296bb243c
18 changed files with 220 additions and 545 deletions
|
@ -10,7 +10,7 @@ use super::{resolve_query_tree, Context, Criterion, CriterionParameters, Criteri
|
|||
use crate::search::criteria::Query;
|
||||
use crate::search::query_tree::{Operation, QueryKind};
|
||||
use crate::search::{build_dfa, word_derivations, WordDerivationsCache};
|
||||
use crate::{Result, TreeLevel};
|
||||
use crate::Result;
|
||||
|
||||
/// To be able to divide integers by the number of words in the query
|
||||
/// we want to find a multiplier that allow us to divide by any number between 1 and 10.
|
||||
|
@ -176,20 +176,14 @@ impl<'t> Criterion for Attribute<'t> {
|
|||
}
|
||||
}
|
||||
|
||||
/// QueryLevelIterator is an pseudo-Iterator for a Query,
|
||||
/// It contains WordLevelIterators and is chainned with other QueryLevelIterator.
|
||||
struct QueryLevelIterator<'t> {
|
||||
inner: Vec<
|
||||
Peekable<
|
||||
Box<
|
||||
dyn Iterator<Item = heed::Result<((&'t str, TreeLevel, u32, u32), RoaringBitmap)>>
|
||||
+ 't,
|
||||
>,
|
||||
>,
|
||||
>,
|
||||
/// QueryPositionIterator is an Iterator over positions of a Query,
|
||||
/// It contains iterators over words positions.
|
||||
struct QueryPositionIterator<'t> {
|
||||
inner:
|
||||
Vec<Peekable<Box<dyn Iterator<Item = heed::Result<((&'t str, u32), RoaringBitmap)>> + 't>>>,
|
||||
}
|
||||
|
||||
impl<'t> QueryLevelIterator<'t> {
|
||||
impl<'t> QueryPositionIterator<'t> {
|
||||
fn new(
|
||||
ctx: &'t dyn Context<'t>,
|
||||
queries: &[Query],
|
||||
|
@ -201,25 +195,14 @@ impl<'t> QueryLevelIterator<'t> {
|
|||
match &query.kind {
|
||||
QueryKind::Exact { word, .. } => {
|
||||
if !query.prefix || in_prefix_cache {
|
||||
let iter = ctx.word_position_iterator(
|
||||
query.kind.word(),
|
||||
TreeLevel::min_value(),
|
||||
in_prefix_cache,
|
||||
None,
|
||||
None,
|
||||
)?;
|
||||
let iter =
|
||||
ctx.word_position_iterator(query.kind.word(), in_prefix_cache)?;
|
||||
|
||||
inner.push(iter.peekable());
|
||||
} else {
|
||||
for (word, _) in word_derivations(&word, true, 0, ctx.words_fst(), wdcache)?
|
||||
{
|
||||
let iter = ctx.word_position_iterator(
|
||||
&word,
|
||||
TreeLevel::min_value(),
|
||||
in_prefix_cache,
|
||||
None,
|
||||
None,
|
||||
)?;
|
||||
let iter = ctx.word_position_iterator(&word, in_prefix_cache)?;
|
||||
|
||||
inner.push(iter.peekable());
|
||||
}
|
||||
|
@ -229,13 +212,7 @@ impl<'t> QueryLevelIterator<'t> {
|
|||
for (word, _) in
|
||||
word_derivations(&word, query.prefix, *typo, ctx.words_fst(), wdcache)?
|
||||
{
|
||||
let iter = ctx.word_position_iterator(
|
||||
&word,
|
||||
TreeLevel::min_value(),
|
||||
in_prefix_cache,
|
||||
None,
|
||||
None,
|
||||
)?;
|
||||
let iter = ctx.word_position_iterator(&word, in_prefix_cache)?;
|
||||
|
||||
inner.push(iter.peekable());
|
||||
}
|
||||
|
@ -247,7 +224,7 @@ impl<'t> QueryLevelIterator<'t> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'t> Iterator for QueryLevelIterator<'t> {
|
||||
impl<'t> Iterator for QueryPositionIterator<'t> {
|
||||
type Item = heed::Result<(u32, RoaringBitmap)>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
|
@ -256,14 +233,14 @@ impl<'t> Iterator for QueryLevelIterator<'t> {
|
|||
.inner
|
||||
.iter_mut()
|
||||
.filter_map(|wli| match wli.peek() {
|
||||
Some(Ok(((_, _, pos, _), _))) => Some(*pos),
|
||||
Some(Ok(((_, pos), _))) => Some(*pos),
|
||||
_ => None,
|
||||
})
|
||||
.min()?;
|
||||
|
||||
let mut candidates = None;
|
||||
for wli in self.inner.iter_mut() {
|
||||
if let Some(Ok(((_, _, pos, _), _))) = wli.peek() {
|
||||
if let Some(Ok(((_, pos), _))) = wli.peek() {
|
||||
if *pos > expected_pos {
|
||||
continue;
|
||||
}
|
||||
|
@ -286,9 +263,9 @@ impl<'t> Iterator for QueryLevelIterator<'t> {
|
|||
}
|
||||
|
||||
/// A Branch is represent a possible alternative of the original query and is build with the Query Tree,
|
||||
/// This branch allows us to iterate over meta-interval of position and to dig in it if it contains interesting candidates.
|
||||
/// This branch allows us to iterate over meta-interval of positions.
|
||||
struct Branch<'t> {
|
||||
query_level_iterator: Vec<(u32, RoaringBitmap, Peekable<QueryLevelIterator<'t>>)>,
|
||||
query_level_iterator: Vec<(u32, RoaringBitmap, Peekable<QueryPositionIterator<'t>>)>,
|
||||
last_result: (u32, RoaringBitmap),
|
||||
branch_size: u32,
|
||||
}
|
||||
|
@ -302,7 +279,7 @@ impl<'t> Branch<'t> {
|
|||
) -> Result<Self> {
|
||||
let mut query_level_iterator = Vec::new();
|
||||
for queries in flatten_branch {
|
||||
let mut qli = QueryLevelIterator::new(ctx, queries, wdcache)?.peekable();
|
||||
let mut qli = QueryPositionIterator::new(ctx, queries, wdcache)?.peekable();
|
||||
let (pos, docids) = qli.next().transpose()?.unwrap_or((0, RoaringBitmap::new()));
|
||||
query_level_iterator.push((pos, docids & allowed_candidates, qli));
|
||||
}
|
||||
|
|
|
@ -10,7 +10,7 @@ use crate::search::criteria::{
|
|||
resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult,
|
||||
};
|
||||
use crate::search::query_tree::{Operation, PrimitiveQueryPart};
|
||||
use crate::{Result, TreeLevel};
|
||||
use crate::Result;
|
||||
|
||||
pub struct Exactness<'t> {
|
||||
ctx: &'t dyn Context<'t>,
|
||||
|
@ -293,7 +293,6 @@ fn attribute_start_with_docids(
|
|||
attribute_id: u32,
|
||||
query: &[ExactQueryPart],
|
||||
) -> heed::Result<Vec<RoaringBitmap>> {
|
||||
let lowest_level = TreeLevel::min_value();
|
||||
let mut attribute_candidates_array = Vec::new();
|
||||
// start from attribute first position
|
||||
let mut pos = attribute_id * 1000;
|
||||
|
@ -303,7 +302,7 @@ fn attribute_start_with_docids(
|
|||
Synonyms(synonyms) => {
|
||||
let mut synonyms_candidates = RoaringBitmap::new();
|
||||
for word in synonyms {
|
||||
let wc = ctx.word_level_position_docids(word, lowest_level, pos, pos)?;
|
||||
let wc = ctx.word_position_docids(word, pos)?;
|
||||
if let Some(word_candidates) = wc {
|
||||
synonyms_candidates |= word_candidates;
|
||||
}
|
||||
|
@ -313,7 +312,7 @@ fn attribute_start_with_docids(
|
|||
}
|
||||
Phrase(phrase) => {
|
||||
for word in phrase {
|
||||
let wc = ctx.word_level_position_docids(word, lowest_level, pos, pos)?;
|
||||
let wc = ctx.word_position_docids(word, pos)?;
|
||||
if let Some(word_candidates) = wc {
|
||||
attribute_candidates_array.push(word_candidates);
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@ use self::words::Words;
|
|||
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
|
||||
use crate::search::criteria::geo::Geo;
|
||||
use crate::search::{word_derivations, WordDerivationsCache};
|
||||
use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result, TreeLevel};
|
||||
use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result};
|
||||
|
||||
mod asc_desc;
|
||||
mod attribute;
|
||||
|
@ -90,20 +90,8 @@ pub trait Context<'c> {
|
|||
fn word_position_iterator(
|
||||
&self,
|
||||
word: &str,
|
||||
level: TreeLevel,
|
||||
in_prefix_cache: bool,
|
||||
left: Option<u32>,
|
||||
right: Option<u32>,
|
||||
) -> heed::Result<
|
||||
Box<
|
||||
dyn Iterator<Item = heed::Result<((&'c str, TreeLevel, u32, u32), RoaringBitmap)>> + 'c,
|
||||
>,
|
||||
>;
|
||||
fn word_position_last_level(
|
||||
&self,
|
||||
word: &str,
|
||||
in_prefix_cache: bool,
|
||||
) -> heed::Result<Option<TreeLevel>>;
|
||||
) -> heed::Result<Box<dyn Iterator<Item = heed::Result<((&'c str, u32), RoaringBitmap)>> + 'c>>;
|
||||
fn synonyms(&self, word: &str) -> heed::Result<Option<Vec<Vec<String>>>>;
|
||||
fn searchable_fields_ids(&self) -> Result<Vec<FieldId>>;
|
||||
fn field_id_word_count_docids(
|
||||
|
@ -111,13 +99,7 @@ pub trait Context<'c> {
|
|||
field_id: FieldId,
|
||||
word_count: u8,
|
||||
) -> heed::Result<Option<RoaringBitmap>>;
|
||||
fn word_level_position_docids(
|
||||
&self,
|
||||
word: &str,
|
||||
level: TreeLevel,
|
||||
left: u32,
|
||||
right: u32,
|
||||
) -> heed::Result<Option<RoaringBitmap>>;
|
||||
fn word_position_docids(&self, word: &str, pos: u32) -> heed::Result<Option<RoaringBitmap>>;
|
||||
}
|
||||
|
||||
pub struct CriteriaBuilder<'t> {
|
||||
|
@ -183,54 +165,24 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
|
|||
fn word_position_iterator(
|
||||
&self,
|
||||
word: &str,
|
||||
level: TreeLevel,
|
||||
in_prefix_cache: bool,
|
||||
left: Option<u32>,
|
||||
right: Option<u32>,
|
||||
) -> heed::Result<
|
||||
Box<
|
||||
dyn Iterator<Item = heed::Result<((&'c str, TreeLevel, u32, u32), RoaringBitmap)>> + 'c,
|
||||
>,
|
||||
> {
|
||||
) -> heed::Result<Box<dyn Iterator<Item = heed::Result<((&'c str, u32), RoaringBitmap)>> + 'c>>
|
||||
{
|
||||
let range = {
|
||||
let left = left.unwrap_or(u32::min_value());
|
||||
let right = right.unwrap_or(u32::max_value());
|
||||
let left = (word, level, left, left);
|
||||
let right = (word, level, right, right);
|
||||
let left = u32::min_value();
|
||||
let right = u32::max_value();
|
||||
let left = (word, left);
|
||||
let right = (word, right);
|
||||
left..=right
|
||||
};
|
||||
let db = match in_prefix_cache {
|
||||
true => self.index.word_prefix_level_position_docids,
|
||||
false => self.index.word_level_position_docids,
|
||||
true => self.index.word_prefix_position_docids,
|
||||
false => self.index.word_position_docids,
|
||||
};
|
||||
|
||||
Ok(Box::new(db.range(self.rtxn, &range)?))
|
||||
}
|
||||
|
||||
fn word_position_last_level(
|
||||
&self,
|
||||
word: &str,
|
||||
in_prefix_cache: bool,
|
||||
) -> heed::Result<Option<TreeLevel>> {
|
||||
let range = {
|
||||
let left = (word, TreeLevel::min_value(), u32::min_value(), u32::min_value());
|
||||
let right = (word, TreeLevel::max_value(), u32::max_value(), u32::max_value());
|
||||
left..=right
|
||||
};
|
||||
let db = match in_prefix_cache {
|
||||
true => self.index.word_prefix_level_position_docids,
|
||||
false => self.index.word_level_position_docids,
|
||||
};
|
||||
let last_level = db
|
||||
.remap_data_type::<heed::types::DecodeIgnore>()
|
||||
.range(self.rtxn, &range)?
|
||||
.last()
|
||||
.transpose()?
|
||||
.map(|((_, level, _, _), _)| level);
|
||||
|
||||
Ok(last_level)
|
||||
}
|
||||
|
||||
fn synonyms(&self, word: &str) -> heed::Result<Option<Vec<Vec<String>>>> {
|
||||
self.index.words_synonyms(self.rtxn, &[word])
|
||||
}
|
||||
|
@ -251,15 +203,9 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
|
|||
self.index.field_id_word_count_docids.get(self.rtxn, &key)
|
||||
}
|
||||
|
||||
fn word_level_position_docids(
|
||||
&self,
|
||||
word: &str,
|
||||
level: TreeLevel,
|
||||
left: u32,
|
||||
right: u32,
|
||||
) -> heed::Result<Option<RoaringBitmap>> {
|
||||
let key = (word, level, left, right);
|
||||
self.index.word_level_position_docids.get(self.rtxn, &key)
|
||||
fn word_position_docids(&self, word: &str, pos: u32) -> heed::Result<Option<RoaringBitmap>> {
|
||||
let key = (word, pos);
|
||||
self.index.word_position_docids.get(self.rtxn, &key)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -616,27 +562,13 @@ pub mod test {
|
|||
fn word_position_iterator(
|
||||
&self,
|
||||
_word: &str,
|
||||
_level: TreeLevel,
|
||||
_in_prefix_cache: bool,
|
||||
_left: Option<u32>,
|
||||
_right: Option<u32>,
|
||||
) -> heed::Result<
|
||||
Box<
|
||||
dyn Iterator<Item = heed::Result<((&'c str, TreeLevel, u32, u32), RoaringBitmap)>>
|
||||
+ 'c,
|
||||
>,
|
||||
Box<dyn Iterator<Item = heed::Result<((&'c str, u32), RoaringBitmap)>> + 'c>,
|
||||
> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn word_position_last_level(
|
||||
&self,
|
||||
_word: &str,
|
||||
_in_prefix_cache: bool,
|
||||
) -> heed::Result<Option<TreeLevel>> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn synonyms(&self, _word: &str) -> heed::Result<Option<Vec<Vec<String>>>> {
|
||||
todo!()
|
||||
}
|
||||
|
@ -645,12 +577,10 @@ pub mod test {
|
|||
todo!()
|
||||
}
|
||||
|
||||
fn word_level_position_docids(
|
||||
fn word_position_docids(
|
||||
&self,
|
||||
_word: &str,
|
||||
_level: TreeLevel,
|
||||
_left: u32,
|
||||
_right: u32,
|
||||
_pos: u32,
|
||||
) -> heed::Result<Option<RoaringBitmap>> {
|
||||
todo!()
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue