mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-24 21:50:07 +01:00
Merge #594
594: Fix(Search): Fix phrase search candidates computation r=Kerollmops a=ManyTheFish This bug is an old bug but was hidden by the proximity criterion, Phrase searches were always returning an empty candidates list when the proximity criterion is deactivated. Before the fix, we were trying to find any words[n] near words[n] instead of finding any words[n] near words[n+1], for example: for a phrase search '"Hello world"' we were searching for "hello" near "hello" first, instead of "hello" near "world". Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
commit
e4a52e6e45
@ -293,13 +293,13 @@ impl<'t> CriteriaBuilder<'t> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn resolve_query_tree<'t>(
|
pub fn resolve_query_tree(
|
||||||
ctx: &'t dyn Context,
|
ctx: &dyn Context,
|
||||||
query_tree: &Operation,
|
query_tree: &Operation,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> Result<RoaringBitmap> {
|
) -> Result<RoaringBitmap> {
|
||||||
fn resolve_operation<'t>(
|
fn resolve_operation(
|
||||||
ctx: &'t dyn Context,
|
ctx: &dyn Context,
|
||||||
query_tree: &Operation,
|
query_tree: &Operation,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> Result<RoaringBitmap> {
|
) -> Result<RoaringBitmap> {
|
||||||
@ -326,16 +326,32 @@ pub fn resolve_query_tree<'t>(
|
|||||||
}
|
}
|
||||||
Ok(candidates)
|
Ok(candidates)
|
||||||
}
|
}
|
||||||
Phrase(words) => {
|
Phrase(words) => resolve_phrase(ctx, &words),
|
||||||
|
Or(_, ops) => {
|
||||||
|
let mut candidates = RoaringBitmap::new();
|
||||||
|
for op in ops {
|
||||||
|
let docids = resolve_operation(ctx, op, wdcache)?;
|
||||||
|
candidates |= docids;
|
||||||
|
}
|
||||||
|
Ok(candidates)
|
||||||
|
}
|
||||||
|
Query(q) => Ok(query_docids(ctx, q, wdcache)?),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resolve_operation(ctx, query_tree, wdcache)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn resolve_phrase(ctx: &dyn Context, phrase: &[String]) -> Result<RoaringBitmap> {
|
||||||
let mut candidates = RoaringBitmap::new();
|
let mut candidates = RoaringBitmap::new();
|
||||||
let mut first_iter = true;
|
let mut first_iter = true;
|
||||||
let winsize = words.len().min(7);
|
let winsize = phrase.len().min(7);
|
||||||
|
|
||||||
for win in words.windows(winsize) {
|
for win in phrase.windows(winsize) {
|
||||||
// Get all the documents with the matching distance for each word pairs.
|
// Get all the documents with the matching distance for each word pairs.
|
||||||
let mut bitmaps = Vec::with_capacity(winsize.pow(2));
|
let mut bitmaps = Vec::with_capacity(winsize.pow(2));
|
||||||
for (offset, s1) in win.iter().enumerate() {
|
for (offset, s1) in win.iter().enumerate() {
|
||||||
for (dist, s2) in win.iter().skip(offset).enumerate() {
|
for (dist, s2) in win.iter().skip(offset + 1).enumerate() {
|
||||||
match ctx.word_pair_proximity_docids(s1, s2, dist as u8 + 1)? {
|
match ctx.word_pair_proximity_docids(s1, s2, dist as u8 + 1)? {
|
||||||
Some(m) => bitmaps.push(m),
|
Some(m) => bitmaps.push(m),
|
||||||
// If there are no document for this distance, there will be no
|
// If there are no document for this distance, there will be no
|
||||||
@ -362,20 +378,6 @@ pub fn resolve_query_tree<'t>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(candidates)
|
Ok(candidates)
|
||||||
}
|
|
||||||
Or(_, ops) => {
|
|
||||||
let mut candidates = RoaringBitmap::new();
|
|
||||||
for op in ops {
|
|
||||||
let docids = resolve_operation(ctx, op, wdcache)?;
|
|
||||||
candidates |= docids;
|
|
||||||
}
|
|
||||||
Ok(candidates)
|
|
||||||
}
|
|
||||||
Query(q) => Ok(query_docids(ctx, q, wdcache)?),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
resolve_operation(ctx, query_tree, wdcache)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn all_word_pair_proximity_docids<T: AsRef<str>, U: AsRef<str>>(
|
fn all_word_pair_proximity_docids<T: AsRef<str>, U: AsRef<str>>(
|
||||||
|
@ -6,8 +6,8 @@ use log::debug;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
query_docids, query_pair_proximity_docids, resolve_query_tree, Context, Criterion,
|
query_docids, query_pair_proximity_docids, resolve_phrase, resolve_query_tree, Context,
|
||||||
CriterionParameters, CriterionResult,
|
Criterion, CriterionParameters, CriterionResult,
|
||||||
};
|
};
|
||||||
use crate::search::query_tree::{maximum_proximity, Operation, Query, QueryKind};
|
use crate::search::query_tree::{maximum_proximity, Operation, Query, QueryKind};
|
||||||
use crate::search::{build_dfa, WordDerivationsCache};
|
use crate::search::{build_dfa, WordDerivationsCache};
|
||||||
@ -192,22 +192,9 @@ fn resolve_candidates<'t>(
|
|||||||
let most_right = words
|
let most_right = words
|
||||||
.last()
|
.last()
|
||||||
.map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
|
.map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
|
||||||
let mut candidates = None;
|
|
||||||
for slice in words.windows(2) {
|
match (most_left, most_right) {
|
||||||
let (left, right) = (&slice[0], &slice[1]);
|
(Some(l), Some(r)) => vec![(l, r, resolve_phrase(ctx, &words)?)],
|
||||||
match ctx.word_pair_proximity_docids(left, right, 1)? {
|
|
||||||
Some(pair_docids) => match candidates.as_mut() {
|
|
||||||
Some(candidates) => *candidates &= pair_docids,
|
|
||||||
None => candidates = Some(pair_docids),
|
|
||||||
},
|
|
||||||
None => {
|
|
||||||
candidates = None;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
match (most_left, most_right, candidates) {
|
|
||||||
(Some(l), Some(r), Some(c)) => vec![(l, r, c)],
|
|
||||||
_otherwise => Default::default(),
|
_otherwise => Default::default(),
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user