diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs index 05305d724..4613acb4f 100644 --- a/milli/src/search/criteria/mod.rs +++ b/milli/src/search/criteria/mod.rs @@ -335,7 +335,7 @@ pub fn resolve_query_tree<'t>( // Get all the documents with the matching distance for each word pairs. let mut bitmaps = Vec::with_capacity(winsize.pow(2)); for (offset, s1) in win.iter().enumerate() { - for (dist, s2) in win.iter().skip(offset).enumerate() { + for (dist, s2) in win.iter().skip(offset + 1).enumerate() { match ctx.word_pair_proximity_docids(s1, s2, dist as u8 + 1)? { Some(m) => bitmaps.push(m), // If there are no document for this distance, there will be no diff --git a/milli/src/search/criteria/proximity.rs b/milli/src/search/criteria/proximity.rs index 2bfa61e85..30919585b 100644 --- a/milli/src/search/criteria/proximity.rs +++ b/milli/src/search/criteria/proximity.rs @@ -192,22 +192,42 @@ fn resolve_candidates<'t>( let most_right = words .last() .map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) }); - let mut candidates = None; - for slice in words.windows(2) { - let (left, right) = (&slice[0], &slice[1]); - match ctx.word_pair_proximity_docids(left, right, 1)? { - Some(pair_docids) => match candidates.as_mut() { - Some(candidates) => *candidates &= pair_docids, - None => candidates = Some(pair_docids), - }, - None => { - candidates = None; + let mut candidates = RoaringBitmap::new(); + let mut first_iter = true; + let winsize = words.len().min(7); + + for win in words.windows(winsize) { + // Get all the documents with the matching distance for each word pairs. + let mut bitmaps = Vec::with_capacity(winsize.pow(2)); + for (offset, s1) in win.iter().enumerate() { + for (dist, s2) in win.iter().skip(offset + 1).enumerate() { + match ctx.word_pair_proximity_docids(s1, s2, dist as u8 + 1)? { + Some(m) => bitmaps.push(m), + // If there are no document for this distance, there will be no + // results for the phrase query. + None => return Ok(Default::default()), + } + } + } + + // We sort the bitmaps so that we perform the small intersections first, which is faster. + bitmaps.sort_unstable_by(|a, b| a.len().cmp(&b.len())); + + for bitmap in bitmaps { + if first_iter { + candidates = bitmap; + first_iter = false; + } else { + candidates &= bitmap; + } + // There will be no match, return early + if candidates.is_empty() { break; } } } - match (most_left, most_right, candidates) { - (Some(l), Some(r), Some(c)) => vec![(l, r, c)], + match (most_left, most_right) { + (Some(l), Some(r)) => vec![(l, r, candidates)], _otherwise => Default::default(), } } else {