mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 22:14:32 +01:00
Factorize phrase computation
This commit is contained in:
parent
cbb3b25459
commit
b389be48a0
@ -326,43 +326,7 @@ pub fn resolve_query_tree<'t>(
|
|||||||
}
|
}
|
||||||
Ok(candidates)
|
Ok(candidates)
|
||||||
}
|
}
|
||||||
Phrase(words) => {
|
Phrase(words) => resolve_phrase(ctx, &words),
|
||||||
let mut candidates = RoaringBitmap::new();
|
|
||||||
let mut first_iter = true;
|
|
||||||
let winsize = words.len().min(7);
|
|
||||||
|
|
||||||
for win in words.windows(winsize) {
|
|
||||||
// Get all the documents with the matching distance for each word pairs.
|
|
||||||
let mut bitmaps = Vec::with_capacity(winsize.pow(2));
|
|
||||||
for (offset, s1) in win.iter().enumerate() {
|
|
||||||
for (dist, s2) in win.iter().skip(offset + 1).enumerate() {
|
|
||||||
match ctx.word_pair_proximity_docids(s1, s2, dist as u8 + 1)? {
|
|
||||||
Some(m) => bitmaps.push(m),
|
|
||||||
// If there are no document for this distance, there will be no
|
|
||||||
// results for the phrase query.
|
|
||||||
None => return Ok(RoaringBitmap::new()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// We sort the bitmaps so that we perform the small intersections first, which is faster.
|
|
||||||
bitmaps.sort_unstable_by(|a, b| a.len().cmp(&b.len()));
|
|
||||||
|
|
||||||
for bitmap in bitmaps {
|
|
||||||
if first_iter {
|
|
||||||
candidates = bitmap;
|
|
||||||
first_iter = false;
|
|
||||||
} else {
|
|
||||||
candidates &= bitmap;
|
|
||||||
}
|
|
||||||
// There will be no match, return early
|
|
||||||
if candidates.is_empty() {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(candidates)
|
|
||||||
}
|
|
||||||
Or(_, ops) => {
|
Or(_, ops) => {
|
||||||
let mut candidates = RoaringBitmap::new();
|
let mut candidates = RoaringBitmap::new();
|
||||||
for op in ops {
|
for op in ops {
|
||||||
@ -378,6 +342,44 @@ pub fn resolve_query_tree<'t>(
|
|||||||
resolve_operation(ctx, query_tree, wdcache)
|
resolve_operation(ctx, query_tree, wdcache)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn resolve_phrase<'t>(ctx: &'t dyn Context, phrase: &[String]) -> Result<RoaringBitmap> {
|
||||||
|
let mut candidates = RoaringBitmap::new();
|
||||||
|
let mut first_iter = true;
|
||||||
|
let winsize = phrase.len().min(7);
|
||||||
|
|
||||||
|
for win in phrase.windows(winsize) {
|
||||||
|
// Get all the documents with the matching distance for each word pairs.
|
||||||
|
let mut bitmaps = Vec::with_capacity(winsize.pow(2));
|
||||||
|
for (offset, s1) in win.iter().enumerate() {
|
||||||
|
for (dist, s2) in win.iter().skip(offset + 1).enumerate() {
|
||||||
|
match ctx.word_pair_proximity_docids(s1, s2, dist as u8 + 1)? {
|
||||||
|
Some(m) => bitmaps.push(m),
|
||||||
|
// If there are no document for this distance, there will be no
|
||||||
|
// results for the phrase query.
|
||||||
|
None => return Ok(RoaringBitmap::new()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We sort the bitmaps so that we perform the small intersections first, which is faster.
|
||||||
|
bitmaps.sort_unstable_by(|a, b| a.len().cmp(&b.len()));
|
||||||
|
|
||||||
|
for bitmap in bitmaps {
|
||||||
|
if first_iter {
|
||||||
|
candidates = bitmap;
|
||||||
|
first_iter = false;
|
||||||
|
} else {
|
||||||
|
candidates &= bitmap;
|
||||||
|
}
|
||||||
|
// There will be no match, return early
|
||||||
|
if candidates.is_empty() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(candidates)
|
||||||
|
}
|
||||||
|
|
||||||
fn all_word_pair_proximity_docids<T: AsRef<str>, U: AsRef<str>>(
|
fn all_word_pair_proximity_docids<T: AsRef<str>, U: AsRef<str>>(
|
||||||
ctx: &dyn Context,
|
ctx: &dyn Context,
|
||||||
left_words: &[(T, u8)],
|
left_words: &[(T, u8)],
|
||||||
|
@ -6,8 +6,8 @@ use log::debug;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
query_docids, query_pair_proximity_docids, resolve_query_tree, Context, Criterion,
|
query_docids, query_pair_proximity_docids, resolve_phrase, resolve_query_tree, Context,
|
||||||
CriterionParameters, CriterionResult,
|
Criterion, CriterionParameters, CriterionResult,
|
||||||
};
|
};
|
||||||
use crate::search::query_tree::{maximum_proximity, Operation, Query, QueryKind};
|
use crate::search::query_tree::{maximum_proximity, Operation, Query, QueryKind};
|
||||||
use crate::search::{build_dfa, WordDerivationsCache};
|
use crate::search::{build_dfa, WordDerivationsCache};
|
||||||
@ -192,42 +192,9 @@ fn resolve_candidates<'t>(
|
|||||||
let most_right = words
|
let most_right = words
|
||||||
.last()
|
.last()
|
||||||
.map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
|
.map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
|
||||||
let mut candidates = RoaringBitmap::new();
|
|
||||||
let mut first_iter = true;
|
|
||||||
let winsize = words.len().min(7);
|
|
||||||
|
|
||||||
for win in words.windows(winsize) {
|
|
||||||
// Get all the documents with the matching distance for each word pairs.
|
|
||||||
let mut bitmaps = Vec::with_capacity(winsize.pow(2));
|
|
||||||
for (offset, s1) in win.iter().enumerate() {
|
|
||||||
for (dist, s2) in win.iter().skip(offset + 1).enumerate() {
|
|
||||||
match ctx.word_pair_proximity_docids(s1, s2, dist as u8 + 1)? {
|
|
||||||
Some(m) => bitmaps.push(m),
|
|
||||||
// If there are no document for this distance, there will be no
|
|
||||||
// results for the phrase query.
|
|
||||||
None => return Ok(Default::default()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// We sort the bitmaps so that we perform the small intersections first, which is faster.
|
|
||||||
bitmaps.sort_unstable_by(|a, b| a.len().cmp(&b.len()));
|
|
||||||
|
|
||||||
for bitmap in bitmaps {
|
|
||||||
if first_iter {
|
|
||||||
candidates = bitmap;
|
|
||||||
first_iter = false;
|
|
||||||
} else {
|
|
||||||
candidates &= bitmap;
|
|
||||||
}
|
|
||||||
// There will be no match, return early
|
|
||||||
if candidates.is_empty() {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
match (most_left, most_right) {
|
match (most_left, most_right) {
|
||||||
(Some(l), Some(r)) => vec![(l, r, candidates)],
|
(Some(l), Some(r)) => vec![(l, r, resolve_phrase(ctx, &words)?)],
|
||||||
_otherwise => Default::default(),
|
_otherwise => Default::default(),
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user