From 361193099fdeedb8b4b6fb5bf450bc9baa07f5cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 29 Mar 2021 16:25:14 +0200 Subject: [PATCH] Reduce the amount of branches when query tree flattened --- milli/src/search/criteria/attribute.rs | 83 +++++++++++++++----------- 1 file changed, 49 insertions(+), 34 deletions(-) diff --git a/milli/src/search/criteria/attribute.rs b/milli/src/search/criteria/attribute.rs index 160807847..31c11e7bb 100644 --- a/milli/src/search/criteria/attribute.rs +++ b/milli/src/search/criteria/attribute.rs @@ -1,3 +1,4 @@ +use std::cmp; use std::collections::{BTreeMap, HashMap, btree_map}; use std::mem::take; @@ -15,7 +16,7 @@ pub struct Attribute<'t> { candidates: Option, bucket_candidates: RoaringBitmap, parent: Box, - flattened_query_tree: Option>>, + flattened_query_tree: Option>>>, current_buckets: Option>, } @@ -115,33 +116,43 @@ impl<'t> Criterion for Attribute<'t> { fn linear_compute_candidates( ctx: &dyn Context, - branches: &Vec>, + branches: &Vec>>, allowed_candidates: &RoaringBitmap, ) -> anyhow::Result> { - fn compute_candidate_rank(branches: &Vec>, words_positions: HashMap) -> u64 { + fn compute_candidate_rank(branches: &Vec>>, words_positions: HashMap) -> u64 { let mut min_rank = u64::max_value(); for branch in branches { + let branch_len = branch.len(); let mut branch_rank = Vec::with_capacity(branch_len); - for Query { prefix, kind } in branch { - // find the best position of the current word in the document. - let position = match kind { - QueryKind::Exact { word, .. } => { - if *prefix { - word_derivations(word, true, 0, &words_positions) - .flat_map(|positions| positions.iter().next()).min() - } else { - words_positions.get(word) - .map(|positions| positions.iter().next()) - .flatten() - } - }, - QueryKind::Tolerant { typo, word } => { - word_derivations(word, *prefix, *typo, &words_positions) - .flat_map(|positions| positions.iter().next()).min() - }, - }; + for derivates in branch { + let mut position = None; + for Query { prefix, kind } in derivates { + // find the best position of the current word in the document. + let current_position = match kind { + QueryKind::Exact { word, .. } => { + if *prefix { + word_derivations(word, true, 0, &words_positions) + .flat_map(|positions| positions.iter().next()).min() + } else { + words_positions.get(word) + .map(|positions| positions.iter().next()) + .flatten() + } + }, + QueryKind::Tolerant { typo, word } => { + word_derivations(word, *prefix, *typo, &words_positions) + .flat_map(|positions| positions.iter().next()).min() + }, + }; + + match (position, current_position) { + (Some(p), Some(cp)) => position = Some(cmp::min(p, cp)), + (None, Some(cp)) => position = Some(cp), + _ => (), + } + } // if a position is found, we add it to the branch score, // otherwise the branch is considered as unfindable in this document and we break. @@ -194,10 +205,10 @@ fn linear_compute_candidates( } // TODO can we keep refs of Query -fn flatten_query_tree(query_tree: &Operation) -> Vec> { +fn flatten_query_tree(query_tree: &Operation) -> Vec>> { use crate::search::criteria::Operation::{And, Or, Consecutive}; - fn and_recurse(head: &Operation, tail: &[Operation]) -> Vec> { + fn and_recurse(head: &Operation, tail: &[Operation]) -> Vec>> { match tail.split_first() { Some((thead, tail)) => { let tail = and_recurse(thead, tail); @@ -215,13 +226,17 @@ fn flatten_query_tree(query_tree: &Operation) -> Vec> { } } - fn recurse(op: &Operation) -> Vec> { + fn recurse(op: &Operation) -> Vec>> { match op { And(ops) | Consecutive(ops) => { ops.split_first().map_or_else(Vec::new, |(h, t)| and_recurse(h, t)) }, - Or(_, ops) => ops.into_iter().map(recurse).flatten().collect(), - Operation::Query(query) => vec![vec![query.clone()]], + Or(_, ops) => if ops.iter().all(|op| op.query().is_some()) { + vec![vec![ops.iter().flat_map(|op| op.query()).cloned().collect()]] + } else { + ops.into_iter().map(recurse).flatten().collect() + }, + Operation::Query(query) => vec![vec![vec![query.clone()]]], } } @@ -256,19 +271,19 @@ mod tests { ]); let expected = vec![ - vec![Query { prefix: false, kind: QueryKind::exact(S("manythefish")) }], + vec![vec![Query { prefix: false, kind: QueryKind::exact(S("manythefish")) }]], vec![ - Query { prefix: false, kind: QueryKind::exact(S("manythe")) }, - Query { prefix: false, kind: QueryKind::exact(S("fish")) }, + vec![Query { prefix: false, kind: QueryKind::exact(S("manythe")) }], + vec![Query { prefix: false, kind: QueryKind::exact(S("fish")) }], ], vec![ - Query { prefix: false, kind: QueryKind::exact(S("many")) }, - Query { prefix: false, kind: QueryKind::exact(S("thefish")) }, + vec![Query { prefix: false, kind: QueryKind::exact(S("many")) }], + vec![Query { prefix: false, kind: QueryKind::exact(S("thefish")) }], ], vec![ - Query { prefix: false, kind: QueryKind::exact(S("many")) }, - Query { prefix: false, kind: QueryKind::exact(S("the")) }, - Query { prefix: false, kind: QueryKind::exact(S("fish")) }, + vec![Query { prefix: false, kind: QueryKind::exact(S("many")) }], + vec![Query { prefix: false, kind: QueryKind::exact(S("the")) }], + vec![Query { prefix: false, kind: QueryKind::exact(S("fish")) }], ], ];