mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 08:44:27 +01:00
Introduce exactness criterion
This commit is contained in:
parent
25f75d4d03
commit
a3f8686fbf
@ -65,6 +65,16 @@ impl FieldsIdsMap {
|
|||||||
pub fn iter(&self) -> impl Iterator<Item=(FieldId, &str)> {
|
pub fn iter(&self) -> impl Iterator<Item=(FieldId, &str)> {
|
||||||
self.ids_names.iter().map(|(id, name)| (*id, name.as_str()))
|
self.ids_names.iter().map(|(id, name)| (*id, name.as_str()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Iterate over the ids in the ids order.
|
||||||
|
pub fn ids<'a>(&'a self) -> impl Iterator<Item=FieldId> + 'a {
|
||||||
|
self.ids_names.keys().copied()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Iterate over the names in the ids order.
|
||||||
|
pub fn names(&self) -> impl Iterator<Item=&str> {
|
||||||
|
self.ids_names.values().map(AsRef::as_ref)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for FieldsIdsMap {
|
impl Default for FieldsIdsMap {
|
||||||
|
335
milli/src/search/criteria/exactness.rs
Normal file
335
milli/src/search/criteria/exactness.rs
Normal file
@ -0,0 +1,335 @@
|
|||||||
|
use std::{collections::HashMap, mem};
|
||||||
|
|
||||||
|
use log::debug;
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
use itertools::Itertools;
|
||||||
|
use std::ops::BitOr;
|
||||||
|
|
||||||
|
use crate::search::query_tree::{Operation, PrimitiveQueryPart};
|
||||||
|
use crate::search::criteria::{
|
||||||
|
Context,
|
||||||
|
Criterion,
|
||||||
|
CriterionParameters,
|
||||||
|
CriterionResult,
|
||||||
|
resolve_query_tree,
|
||||||
|
};
|
||||||
|
use crate::TreeLevel;
|
||||||
|
|
||||||
|
pub struct Exactness<'t> {
|
||||||
|
ctx: &'t dyn Context<'t>,
|
||||||
|
query_tree: Option<Operation>,
|
||||||
|
state: Option<State>,
|
||||||
|
bucket_candidates: RoaringBitmap,
|
||||||
|
parent: Box<dyn Criterion + 't>,
|
||||||
|
query: Vec<ExactQueryPart>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'t> Exactness<'t> {
|
||||||
|
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>, primitive_query: &[PrimitiveQueryPart]) -> heed::Result<Self> {
|
||||||
|
let mut query: Vec<_> = Vec::with_capacity(primitive_query.len());
|
||||||
|
for part in primitive_query {
|
||||||
|
query.push(ExactQueryPart::from_primitive_query_part(ctx, part)?);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Exactness {
|
||||||
|
ctx,
|
||||||
|
query_tree: None,
|
||||||
|
state: None,
|
||||||
|
bucket_candidates: RoaringBitmap::new(),
|
||||||
|
parent,
|
||||||
|
query,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'t> Criterion for Exactness<'t> {
|
||||||
|
#[logging_timer::time("Exactness::{}")]
|
||||||
|
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
|
||||||
|
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||||
|
if let Some(state) = self.state.as_mut() {
|
||||||
|
state.difference_with(params.excluded_candidates);
|
||||||
|
}
|
||||||
|
|
||||||
|
loop {
|
||||||
|
debug!("Exactness for query {:?} at state {:?}", self.query, self.state);
|
||||||
|
|
||||||
|
match self.state.as_mut() {
|
||||||
|
Some(state) if state.is_empty() => {
|
||||||
|
// reset state
|
||||||
|
self.state = None;
|
||||||
|
self.query_tree = None;
|
||||||
|
},
|
||||||
|
Some(state) => {
|
||||||
|
let (candidates, state) = resolve_state(self.ctx, mem::take(state), &self.query)?;
|
||||||
|
self.state = state;
|
||||||
|
|
||||||
|
return Ok(Some(CriterionResult {
|
||||||
|
query_tree: self.query_tree.clone(),
|
||||||
|
candidates: Some(candidates),
|
||||||
|
bucket_candidates: mem::take(&mut self.bucket_candidates),
|
||||||
|
}));
|
||||||
|
},
|
||||||
|
None => {
|
||||||
|
match self.parent.next(params)? {
|
||||||
|
Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => {
|
||||||
|
let candidates = match candidates {
|
||||||
|
Some(candidates) => candidates,
|
||||||
|
None => resolve_query_tree(self.ctx, &query_tree, &mut HashMap::new(), params.wdcache)?,
|
||||||
|
};
|
||||||
|
self.state = Some(State::new(candidates));
|
||||||
|
self.query_tree = Some(query_tree);
|
||||||
|
self.bucket_candidates |= bucket_candidates;
|
||||||
|
},
|
||||||
|
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
||||||
|
return Ok(Some(CriterionResult {
|
||||||
|
query_tree,
|
||||||
|
candidates,
|
||||||
|
bucket_candidates,
|
||||||
|
}));
|
||||||
|
},
|
||||||
|
None => return Ok(None),
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum State {
|
||||||
|
/// Extract the documents that have an attribute that contains exactly the query.
|
||||||
|
ExactAttribute(RoaringBitmap),
|
||||||
|
/// Extract the documents that have an attribute that starts with exactly the query.
|
||||||
|
AttributeStartsWith(RoaringBitmap),
|
||||||
|
/// Rank the remaining documents by the number of exact words contained.
|
||||||
|
ExactWords(RoaringBitmap),
|
||||||
|
Remainings(Vec<RoaringBitmap>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl State {
|
||||||
|
fn new(candidates: RoaringBitmap) -> Self {
|
||||||
|
Self::ExactAttribute(candidates)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn difference_with(&mut self, lhs: &RoaringBitmap) {
|
||||||
|
match self {
|
||||||
|
Self::ExactAttribute(candidates) |
|
||||||
|
Self::AttributeStartsWith(candidates) |
|
||||||
|
Self::ExactWords(candidates) => *candidates -= lhs,
|
||||||
|
Self::Remainings(candidates_array) => {
|
||||||
|
candidates_array.iter_mut().for_each(|candidates| *candidates -= lhs);
|
||||||
|
candidates_array.retain(|candidates| !candidates.is_empty());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_empty(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
Self::ExactAttribute(candidates) |
|
||||||
|
Self::AttributeStartsWith(candidates) |
|
||||||
|
Self::ExactWords(candidates) => candidates.is_empty(),
|
||||||
|
Self::Remainings(candidates_array) => {
|
||||||
|
candidates_array.iter().all(RoaringBitmap::is_empty)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for State {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::Remainings(vec![])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[logging_timer::time("Exactness::{}")]
|
||||||
|
fn resolve_state(
|
||||||
|
ctx: &dyn Context,
|
||||||
|
state: State,
|
||||||
|
query: &[ExactQueryPart],
|
||||||
|
) -> anyhow::Result<(RoaringBitmap, Option<State>)>
|
||||||
|
{
|
||||||
|
use State::*;
|
||||||
|
match state {
|
||||||
|
ExactAttribute(mut allowed_candidates) |
|
||||||
|
AttributeStartsWith(mut allowed_candidates) => {
|
||||||
|
let mut candidates = RoaringBitmap::new();
|
||||||
|
let attributes_ids = ctx.searchable_fields_ids()?;
|
||||||
|
for id in attributes_ids {
|
||||||
|
let attribute_candidates_array = attribute_start_with_docids(ctx, id as u32, query)?;
|
||||||
|
candidates |= intersection_of(attribute_candidates_array.iter().collect());
|
||||||
|
}
|
||||||
|
|
||||||
|
// only keep allowed candidates
|
||||||
|
candidates &= &allowed_candidates;
|
||||||
|
// remove current candidates from allowed candidates
|
||||||
|
allowed_candidates -= &candidates;
|
||||||
|
Ok((candidates, Some(ExactWords(allowed_candidates))))
|
||||||
|
},
|
||||||
|
ExactWords(mut allowed_candidates) => {
|
||||||
|
let number_of_part = query.len();
|
||||||
|
let mut parts_candidates_array = Vec::with_capacity(number_of_part);
|
||||||
|
|
||||||
|
for part in query {
|
||||||
|
let mut candidates = RoaringBitmap::new();
|
||||||
|
use ExactQueryPart::*;
|
||||||
|
match part {
|
||||||
|
Synonyms(synonyms) => {
|
||||||
|
for synonym in synonyms {
|
||||||
|
if let Some(synonym_candidates) = ctx.word_docids(synonym)? {
|
||||||
|
candidates |= synonym_candidates;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
// compute intersection on pair of words with a proximity of 0.
|
||||||
|
Phrase(phrase) => {
|
||||||
|
let mut bitmaps = Vec::with_capacity(phrase.len().saturating_sub(1));
|
||||||
|
for words in phrase.windows(2) {
|
||||||
|
if let [left, right] = words {
|
||||||
|
match ctx.word_pair_proximity_docids(left, right, 0)? {
|
||||||
|
Some(docids) => bitmaps.push(docids),
|
||||||
|
None => {
|
||||||
|
bitmaps.clear();
|
||||||
|
break
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
candidates |= intersection_of(bitmaps.iter().collect());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
parts_candidates_array.push(candidates);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut candidates_array = Vec::new();
|
||||||
|
|
||||||
|
// compute documents that contain all exact words.
|
||||||
|
let mut all_exact_candidates = intersection_of(parts_candidates_array.iter().collect());
|
||||||
|
all_exact_candidates &= &allowed_candidates;
|
||||||
|
allowed_candidates -= &all_exact_candidates;
|
||||||
|
|
||||||
|
// push the result of combinations of exact words grouped by the number of exact words contained by documents.
|
||||||
|
for c_count in (1..number_of_part).rev() {
|
||||||
|
let mut combinations_candidates = parts_candidates_array
|
||||||
|
.iter()
|
||||||
|
// create all `c_count` combinations of exact words
|
||||||
|
.combinations(c_count)
|
||||||
|
// intersect each word candidates in combinations
|
||||||
|
.map(intersection_of)
|
||||||
|
// union combinations of `c_count` exact words
|
||||||
|
.fold(RoaringBitmap::new(), RoaringBitmap::bitor);
|
||||||
|
// only keep allowed candidates
|
||||||
|
combinations_candidates &= &allowed_candidates;
|
||||||
|
// remove current candidates from allowed candidates
|
||||||
|
allowed_candidates -= &combinations_candidates;
|
||||||
|
candidates_array.push(combinations_candidates);
|
||||||
|
}
|
||||||
|
|
||||||
|
// push remainings allowed candidates as the worst valid candidates
|
||||||
|
candidates_array.push(allowed_candidates);
|
||||||
|
// reverse the array to be able to pop candidates from the best to the worst.
|
||||||
|
candidates_array.reverse();
|
||||||
|
|
||||||
|
Ok((all_exact_candidates, Some(Remainings(candidates_array))))
|
||||||
|
},
|
||||||
|
// pop remainings candidates until the emptiness
|
||||||
|
Remainings(mut candidates_array) => {
|
||||||
|
let candidates = candidates_array.pop().unwrap_or_default();
|
||||||
|
if !candidates_array.is_empty() {
|
||||||
|
Ok((candidates, Some(Remainings(candidates_array))))
|
||||||
|
} else {
|
||||||
|
Ok((candidates, None))
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn attribute_start_with_docids(ctx: &dyn Context, attribute_id: u32, query: &[ExactQueryPart]) -> heed::Result<Vec<RoaringBitmap>> {
|
||||||
|
let lowest_level = TreeLevel::min_value();
|
||||||
|
let mut attribute_candidates_array = Vec::new();
|
||||||
|
// start from attribute first position
|
||||||
|
let mut pos = attribute_id * 1000;
|
||||||
|
for part in query {
|
||||||
|
use ExactQueryPart::*;
|
||||||
|
match part {
|
||||||
|
Synonyms(synonyms) => {
|
||||||
|
let mut synonyms_candidates = RoaringBitmap::new();
|
||||||
|
for word in synonyms {
|
||||||
|
let wc = ctx.word_level_position_docids(word, lowest_level, pos, pos)?;
|
||||||
|
if let Some(word_candidates) = wc {
|
||||||
|
synonyms_candidates |= word_candidates;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
attribute_candidates_array.push(synonyms_candidates);
|
||||||
|
pos += 1;
|
||||||
|
},
|
||||||
|
Phrase(phrase) => {
|
||||||
|
for word in phrase {
|
||||||
|
let wc = ctx.word_level_position_docids(word, lowest_level, pos, pos)?;
|
||||||
|
if let Some(word_candidates) = wc {
|
||||||
|
attribute_candidates_array.push(word_candidates);
|
||||||
|
}
|
||||||
|
pos += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(attribute_candidates_array)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn intersection_of(mut to_intersect: Vec<&RoaringBitmap>) -> RoaringBitmap {
|
||||||
|
match to_intersect.len() {
|
||||||
|
0 => RoaringBitmap::new(),
|
||||||
|
1 => to_intersect[0].clone(),
|
||||||
|
2 => to_intersect[0] & to_intersect[1],
|
||||||
|
_ => {
|
||||||
|
to_intersect.sort_unstable_by(|a, b| a.len().cmp(&b.len()).reverse());
|
||||||
|
|
||||||
|
match to_intersect.pop() {
|
||||||
|
None => RoaringBitmap::new(),
|
||||||
|
Some(candidates) => {
|
||||||
|
let mut candidates = candidates.clone();
|
||||||
|
while let Some(bitmap) = to_intersect.pop() {
|
||||||
|
if candidates.is_empty() { break; }
|
||||||
|
candidates &= bitmap;
|
||||||
|
}
|
||||||
|
|
||||||
|
candidates
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum ExactQueryPart {
|
||||||
|
Phrase(Vec<String>),
|
||||||
|
Synonyms(Vec<String>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ExactQueryPart {
|
||||||
|
fn from_primitive_query_part(ctx: &dyn Context, part: &PrimitiveQueryPart) -> heed::Result<Self> {
|
||||||
|
let part = match part {
|
||||||
|
PrimitiveQueryPart::Word(word, _) => {
|
||||||
|
match ctx.synonyms(word)? {
|
||||||
|
Some(synonyms) => {
|
||||||
|
let mut synonyms: Vec<_> = synonyms.into_iter().filter_map(|mut array| {
|
||||||
|
// keep 1 word synonyms only.
|
||||||
|
match array.pop() {
|
||||||
|
Some(word) if array.is_empty() => Some(word),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}).collect();
|
||||||
|
synonyms.push(word.clone());
|
||||||
|
ExactQueryPart::Synonyms(synonyms)
|
||||||
|
},
|
||||||
|
None => ExactQueryPart::Synonyms(vec![word.clone()]),
|
||||||
|
}
|
||||||
|
},
|
||||||
|
PrimitiveQueryPart::Phrase(phrase) => ExactQueryPart::Phrase(phrase.clone()),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(part)
|
||||||
|
}
|
||||||
|
}
|
@ -7,9 +7,10 @@ use roaring::RoaringBitmap;
|
|||||||
use crate::{TreeLevel, search::{word_derivations, WordDerivationsCache}};
|
use crate::{TreeLevel, search::{word_derivations, WordDerivationsCache}};
|
||||||
use crate::{Index, DocumentId};
|
use crate::{Index, DocumentId};
|
||||||
|
|
||||||
use super::query_tree::{Operation, Query, QueryKind};
|
use super::query_tree::{Operation, PrimitiveQuery, PrimitiveQueryPart, Query, QueryKind};
|
||||||
use self::asc_desc::AscDesc;
|
use self::asc_desc::AscDesc;
|
||||||
use self::attribute::Attribute;
|
use self::attribute::Attribute;
|
||||||
|
use self::exactness::Exactness;
|
||||||
use self::r#final::Final;
|
use self::r#final::Final;
|
||||||
use self::initial::Initial;
|
use self::initial::Initial;
|
||||||
use self::proximity::Proximity;
|
use self::proximity::Proximity;
|
||||||
@ -18,6 +19,7 @@ use self::words::Words;
|
|||||||
|
|
||||||
mod asc_desc;
|
mod asc_desc;
|
||||||
mod attribute;
|
mod attribute;
|
||||||
|
mod exactness;
|
||||||
mod initial;
|
mod initial;
|
||||||
mod proximity;
|
mod proximity;
|
||||||
mod typo;
|
mod typo;
|
||||||
@ -81,6 +83,9 @@ pub trait Context<'c> {
|
|||||||
fn docid_words_positions(&self, docid: DocumentId) -> heed::Result<HashMap<String, RoaringBitmap>>;
|
fn docid_words_positions(&self, docid: DocumentId) -> heed::Result<HashMap<String, RoaringBitmap>>;
|
||||||
fn word_position_iterator(&self, word: &str, level: TreeLevel, in_prefix_cache: bool, left: Option<u32>, right: Option<u32>) -> heed::Result<Box<dyn Iterator<Item =heed::Result<((&'c str, TreeLevel, u32, u32), RoaringBitmap)>> + 'c>>;
|
fn word_position_iterator(&self, word: &str, level: TreeLevel, in_prefix_cache: bool, left: Option<u32>, right: Option<u32>) -> heed::Result<Box<dyn Iterator<Item =heed::Result<((&'c str, TreeLevel, u32, u32), RoaringBitmap)>> + 'c>>;
|
||||||
fn word_position_last_level(&self, word: &str, in_prefix_cache: bool) -> heed::Result<Option<TreeLevel>>;
|
fn word_position_last_level(&self, word: &str, in_prefix_cache: bool) -> heed::Result<Option<TreeLevel>>;
|
||||||
|
fn synonyms(&self, word: &str) -> heed::Result<Option<Vec<Vec<String>>>>;
|
||||||
|
fn searchable_fields_ids(&self) -> heed::Result<Vec<crate::FieldId>>;
|
||||||
|
fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> Result<Option<RoaringBitmap>, heed::Error>;
|
||||||
}
|
}
|
||||||
pub struct CriteriaBuilder<'t> {
|
pub struct CriteriaBuilder<'t> {
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
@ -170,6 +175,23 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
|
|||||||
|
|
||||||
Ok(last_level)
|
Ok(last_level)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn synonyms(&self, word: &str) -> heed::Result<Option<Vec<Vec<String>>>> {
|
||||||
|
self.index.words_synonyms(self.rtxn, &[word])
|
||||||
|
}
|
||||||
|
|
||||||
|
fn searchable_fields_ids(&self) -> heed::Result<Vec<crate::FieldId>> {
|
||||||
|
match self.index.searchable_fields_ids(self.rtxn)? {
|
||||||
|
Some(searchable_fields_ids) => Ok(searchable_fields_ids),
|
||||||
|
None => Ok(self.index.fields_ids_map(self.rtxn)?.ids().collect()),
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> Result<Option<RoaringBitmap>, heed::Error> {
|
||||||
|
let key = (word, level, left, right);
|
||||||
|
self.index.word_level_position_docids.get(self.rtxn, &key)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'t> CriteriaBuilder<'t> {
|
impl<'t> CriteriaBuilder<'t> {
|
||||||
@ -182,11 +204,14 @@ impl<'t> CriteriaBuilder<'t> {
|
|||||||
pub fn build(
|
pub fn build(
|
||||||
&'t self,
|
&'t self,
|
||||||
query_tree: Option<Operation>,
|
query_tree: Option<Operation>,
|
||||||
|
primitive_query: Option<Vec<PrimitiveQueryPart>>,
|
||||||
facet_candidates: Option<RoaringBitmap>,
|
facet_candidates: Option<RoaringBitmap>,
|
||||||
) -> anyhow::Result<Final<'t>>
|
) -> anyhow::Result<Final<'t>>
|
||||||
{
|
{
|
||||||
use crate::criterion::Criterion as Name;
|
use crate::criterion::Criterion as Name;
|
||||||
|
|
||||||
|
let primitive_query = primitive_query.unwrap_or_default();
|
||||||
|
|
||||||
let mut criterion = Box::new(Initial::new(query_tree, facet_candidates)) as Box<dyn Criterion>;
|
let mut criterion = Box::new(Initial::new(query_tree, facet_candidates)) as Box<dyn Criterion>;
|
||||||
for name in self.index.criteria(&self.rtxn)? {
|
for name in self.index.criteria(&self.rtxn)? {
|
||||||
criterion = match name {
|
criterion = match name {
|
||||||
@ -194,6 +219,7 @@ impl<'t> CriteriaBuilder<'t> {
|
|||||||
Name::Words => Box::new(Words::new(self, criterion)),
|
Name::Words => Box::new(Words::new(self, criterion)),
|
||||||
Name::Proximity => Box::new(Proximity::new(self, criterion)),
|
Name::Proximity => Box::new(Proximity::new(self, criterion)),
|
||||||
Name::Attribute => Box::new(Attribute::new(self, criterion)),
|
Name::Attribute => Box::new(Attribute::new(self, criterion)),
|
||||||
|
Name::Exactness => Box::new(Exactness::new(self, criterion, &primitive_query)?),
|
||||||
Name::Asc(field) => Box::new(AscDesc::asc(&self.index, &self.rtxn, criterion, field)?),
|
Name::Asc(field) => Box::new(AscDesc::asc(&self.index, &self.rtxn, criterion, field)?),
|
||||||
Name::Desc(field) => Box::new(AscDesc::desc(&self.index, &self.rtxn, criterion, field)?),
|
Name::Desc(field) => Box::new(AscDesc::desc(&self.index, &self.rtxn, criterion, field)?),
|
||||||
_otherwise => criterion,
|
_otherwise => criterion,
|
||||||
@ -455,6 +481,18 @@ pub mod test {
|
|||||||
fn word_position_last_level(&self, _word: &str, _in_prefix_cache: bool) -> heed::Result<Option<TreeLevel>> {
|
fn word_position_last_level(&self, _word: &str, _in_prefix_cache: bool) -> heed::Result<Option<TreeLevel>> {
|
||||||
todo!()
|
todo!()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn synonyms(&self, word: &str) -> heed::Result<Option<Vec<Vec<String>>>> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn searchable_fields_ids(&self) -> heed::Result<Vec<crate::FieldId>> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> Result<Option<RoaringBitmap>, heed::Error> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Default for TestContext<'a> {
|
impl<'a> Default for TestContext<'a> {
|
||||||
|
@ -97,7 +97,7 @@ impl<'a> Search<'a> {
|
|||||||
pub fn execute(&self) -> anyhow::Result<SearchResult> {
|
pub fn execute(&self) -> anyhow::Result<SearchResult> {
|
||||||
// We create the query tree by spliting the query into tokens.
|
// We create the query tree by spliting the query into tokens.
|
||||||
let before = Instant::now();
|
let before = Instant::now();
|
||||||
let query_tree = match self.query.as_ref() {
|
let (query_tree, primitive_query) = match self.query.as_ref() {
|
||||||
Some(query) => {
|
Some(query) => {
|
||||||
let mut builder = QueryTreeBuilder::new(self.rtxn, self.index);
|
let mut builder = QueryTreeBuilder::new(self.rtxn, self.index);
|
||||||
builder.optional_words(self.optional_words);
|
builder.optional_words(self.optional_words);
|
||||||
@ -113,9 +113,9 @@ impl<'a> Search<'a> {
|
|||||||
let analyzer = Analyzer::new(config);
|
let analyzer = Analyzer::new(config);
|
||||||
let result = analyzer.analyze(query);
|
let result = analyzer.analyze(query);
|
||||||
let tokens = result.tokens();
|
let tokens = result.tokens();
|
||||||
builder.build(tokens)?
|
builder.build(tokens)?.map_or((None, None), |(qt, pq)| (Some(qt), Some(pq)))
|
||||||
},
|
},
|
||||||
None => None,
|
None => (None, None),
|
||||||
};
|
};
|
||||||
|
|
||||||
debug!("query tree: {:?} took {:.02?}", query_tree, before.elapsed());
|
debug!("query tree: {:?} took {:.02?}", query_tree, before.elapsed());
|
||||||
@ -135,7 +135,7 @@ impl<'a> Search<'a> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let criteria_builder = criteria::CriteriaBuilder::new(self.rtxn, self.index)?;
|
let criteria_builder = criteria::CriteriaBuilder::new(self.rtxn, self.index)?;
|
||||||
let criteria = criteria_builder.build(query_tree, facet_candidates)?;
|
let criteria = criteria_builder.build(query_tree, primitive_query, facet_candidates)?;
|
||||||
|
|
||||||
match self.index.distinct_attribute(self.rtxn)? {
|
match self.index.distinct_attribute(self.rtxn)? {
|
||||||
None => self.perform_sort(NoopDistinct, matching_words, criteria),
|
None => self.perform_sort(NoopDistinct, matching_words, criteria),
|
||||||
|
@ -228,11 +228,12 @@ impl<'a> QueryTreeBuilder<'a> {
|
|||||||
/// - if `authorize_typos` is set to `false` the query tree will be generated
|
/// - if `authorize_typos` is set to `false` the query tree will be generated
|
||||||
/// forcing all query words to match documents without any typo
|
/// forcing all query words to match documents without any typo
|
||||||
/// (the criterion `typo` will be ignored)
|
/// (the criterion `typo` will be ignored)
|
||||||
pub fn build(&self, query: TokenStream) -> anyhow::Result<Option<Operation>> {
|
pub fn build(&self, query: TokenStream) -> anyhow::Result<Option<(Operation, PrimitiveQuery)>> {
|
||||||
let stop_words = self.index.stop_words(self.rtxn)?;
|
let stop_words = self.index.stop_words(self.rtxn)?;
|
||||||
let primitive_query = create_primitive_query(query, stop_words, self.words_limit);
|
let primitive_query = create_primitive_query(query, stop_words, self.words_limit);
|
||||||
if !primitive_query.is_empty() {
|
if !primitive_query.is_empty() {
|
||||||
create_query_tree(self, self.optional_words, self.authorize_typos, primitive_query).map(Some)
|
let qt = create_query_tree(self, self.optional_words, self.authorize_typos, &primitive_query)?;
|
||||||
|
Ok(Some((qt, primitive_query)))
|
||||||
} else {
|
} else {
|
||||||
Ok(None)
|
Ok(None)
|
||||||
}
|
}
|
||||||
@ -340,7 +341,7 @@ fn create_query_tree(
|
|||||||
ctx: &impl Context,
|
ctx: &impl Context,
|
||||||
optional_words: bool,
|
optional_words: bool,
|
||||||
authorize_typos: bool,
|
authorize_typos: bool,
|
||||||
query: PrimitiveQuery,
|
query: &[PrimitiveQueryPart],
|
||||||
) -> anyhow::Result<Operation>
|
) -> anyhow::Result<Operation>
|
||||||
{
|
{
|
||||||
/// Matches on the `PrimitiveQueryPart` and create an operation from it.
|
/// Matches on the `PrimitiveQueryPart` and create an operation from it.
|
||||||
@ -458,16 +459,16 @@ fn create_query_tree(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if optional_words {
|
if optional_words {
|
||||||
optional_word(ctx, authorize_typos, query)
|
optional_word(ctx, authorize_typos, query.to_vec())
|
||||||
} else {
|
} else {
|
||||||
ngrams(ctx, authorize_typos, query.as_slice())
|
ngrams(ctx, authorize_typos, query)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type PrimitiveQuery = Vec<PrimitiveQueryPart>;
|
pub type PrimitiveQuery = Vec<PrimitiveQueryPart>;
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
enum PrimitiveQueryPart {
|
pub enum PrimitiveQueryPart {
|
||||||
Phrase(Vec<String>),
|
Phrase(Vec<String>),
|
||||||
Word(String, IsPrefix),
|
Word(String, IsPrefix),
|
||||||
}
|
}
|
||||||
@ -579,11 +580,12 @@ mod test {
|
|||||||
authorize_typos: bool,
|
authorize_typos: bool,
|
||||||
words_limit: Option<usize>,
|
words_limit: Option<usize>,
|
||||||
query: TokenStream,
|
query: TokenStream,
|
||||||
) -> anyhow::Result<Option<Operation>>
|
) -> anyhow::Result<Option<(Operation, PrimitiveQuery)>>
|
||||||
{
|
{
|
||||||
let primitive_query = create_primitive_query(query, None, words_limit);
|
let primitive_query = create_primitive_query(query, None, words_limit);
|
||||||
if !primitive_query.is_empty() {
|
if !primitive_query.is_empty() {
|
||||||
create_query_tree(self, optional_words, authorize_typos, primitive_query).map(Some)
|
let qt = create_query_tree(self, optional_words, authorize_typos, &primitive_query)?;
|
||||||
|
Ok(Some((qt, primitive_query)))
|
||||||
} else {
|
} else {
|
||||||
Ok(None)
|
Ok(None)
|
||||||
}
|
}
|
||||||
@ -674,7 +676,7 @@ mod test {
|
|||||||
Operation::Query(Query { prefix: true, kind: QueryKind::tolerant(2, "heyfriends".to_string()) }),
|
Operation::Query(Query { prefix: true, kind: QueryKind::tolerant(2, "heyfriends".to_string()) }),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
let query_tree = TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
|
let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
|
||||||
|
|
||||||
assert_eq!(expected, query_tree);
|
assert_eq!(expected, query_tree);
|
||||||
}
|
}
|
||||||
@ -694,7 +696,7 @@ mod test {
|
|||||||
Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(2, "heyfriends".to_string()) }),
|
Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(2, "heyfriends".to_string()) }),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
let query_tree = TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
|
let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
|
||||||
|
|
||||||
assert_eq!(expected, query_tree);
|
assert_eq!(expected, query_tree);
|
||||||
}
|
}
|
||||||
@ -725,7 +727,7 @@ mod test {
|
|||||||
Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(2, "helloworld".to_string()) }),
|
Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(2, "helloworld".to_string()) }),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
let query_tree = TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
|
let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
|
||||||
|
|
||||||
assert_eq!(expected, query_tree);
|
assert_eq!(expected, query_tree);
|
||||||
}
|
}
|
||||||
@ -770,7 +772,7 @@ mod test {
|
|||||||
]),
|
]),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
let query_tree = TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
|
let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
|
||||||
|
|
||||||
assert_eq!(expected, query_tree);
|
assert_eq!(expected, query_tree);
|
||||||
}
|
}
|
||||||
@ -790,7 +792,7 @@ mod test {
|
|||||||
Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(1, "ngrams".to_string()) }),
|
Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(1, "ngrams".to_string()) }),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
let query_tree = TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
|
let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
|
||||||
|
|
||||||
assert_eq!(expected, query_tree);
|
assert_eq!(expected, query_tree);
|
||||||
}
|
}
|
||||||
@ -816,7 +818,7 @@ mod test {
|
|||||||
Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(2, "wordsplitfish".to_string()) }),
|
Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(2, "wordsplitfish".to_string()) }),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
let query_tree = TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
|
let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
|
||||||
|
|
||||||
assert_eq!(expected, query_tree);
|
assert_eq!(expected, query_tree);
|
||||||
}
|
}
|
||||||
@ -836,7 +838,7 @@ mod test {
|
|||||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("wooop".to_string()) }),
|
Operation::Query(Query { prefix: false, kind: QueryKind::exact("wooop".to_string()) }),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
let query_tree = TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
|
let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
|
||||||
|
|
||||||
assert_eq!(expected, query_tree);
|
assert_eq!(expected, query_tree);
|
||||||
}
|
}
|
||||||
@ -875,7 +877,7 @@ mod test {
|
|||||||
Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(2, "heymyfriend".to_string()) }),
|
Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(2, "heymyfriend".to_string()) }),
|
||||||
]),
|
]),
|
||||||
]);
|
]);
|
||||||
let query_tree = TestContext::default().build(true, true, None, tokens).unwrap().unwrap();
|
let (query_tree, _) = TestContext::default().build(true, true, None, tokens).unwrap().unwrap();
|
||||||
|
|
||||||
assert_eq!(expected, query_tree);
|
assert_eq!(expected, query_tree);
|
||||||
}
|
}
|
||||||
@ -891,7 +893,7 @@ mod test {
|
|||||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("hey".to_string()) }),
|
Operation::Query(Query { prefix: false, kind: QueryKind::exact("hey".to_string()) }),
|
||||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("my".to_string()) }),
|
Operation::Query(Query { prefix: false, kind: QueryKind::exact("my".to_string()) }),
|
||||||
]);
|
]);
|
||||||
let query_tree = TestContext::default().build(true, true, None, tokens).unwrap().unwrap();
|
let (query_tree, _) = TestContext::default().build(true, true, None, tokens).unwrap().unwrap();
|
||||||
|
|
||||||
assert_eq!(expected, query_tree);
|
assert_eq!(expected, query_tree);
|
||||||
}
|
}
|
||||||
@ -925,7 +927,7 @@ mod test {
|
|||||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("friend".to_string()) }),
|
Operation::Query(Query { prefix: false, kind: QueryKind::exact("friend".to_string()) }),
|
||||||
]),
|
]),
|
||||||
]);
|
]);
|
||||||
let query_tree = TestContext::default().build(true, true, None, tokens).unwrap().unwrap();
|
let (query_tree, _) = TestContext::default().build(true, true, None, tokens).unwrap().unwrap();
|
||||||
|
|
||||||
assert_eq!(expected, query_tree);
|
assert_eq!(expected, query_tree);
|
||||||
}
|
}
|
||||||
@ -944,7 +946,7 @@ mod test {
|
|||||||
]),
|
]),
|
||||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("heyfriends".to_string()) }),
|
Operation::Query(Query { prefix: false, kind: QueryKind::exact("heyfriends".to_string()) }),
|
||||||
]);
|
]);
|
||||||
let query_tree = TestContext::default().build(false, false, None, tokens).unwrap().unwrap();
|
let (query_tree, _) = TestContext::default().build(false, false, None, tokens).unwrap().unwrap();
|
||||||
|
|
||||||
assert_eq!(expected, query_tree);
|
assert_eq!(expected, query_tree);
|
||||||
}
|
}
|
||||||
@ -957,7 +959,7 @@ mod test {
|
|||||||
let tokens = result.tokens();
|
let tokens = result.tokens();
|
||||||
|
|
||||||
let context = TestContext::default();
|
let context = TestContext::default();
|
||||||
let query_tree = context.build(false, true, None, tokens).unwrap().unwrap();
|
let (query_tree, _) = context.build(false, true, None, tokens).unwrap().unwrap();
|
||||||
|
|
||||||
let expected = hashset!{
|
let expected = hashset!{
|
||||||
("word", 0, false),
|
("word", 0, false),
|
||||||
@ -997,7 +999,7 @@ mod test {
|
|||||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("good".to_string()) }),
|
Operation::Query(Query { prefix: false, kind: QueryKind::exact("good".to_string()) }),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
let query_tree = TestContext::default().build(false, false, Some(2), tokens).unwrap().unwrap();
|
let (query_tree, _) = TestContext::default().build(false, false, Some(2), tokens).unwrap().unwrap();
|
||||||
|
|
||||||
assert_eq!(expected, query_tree);
|
assert_eq!(expected, query_tree);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user