From 75464a1baadc86a8549a48a2ee3fff3f79588d30 Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Wed, 14 Apr 2021 12:18:13 +0200 Subject: [PATCH] review fixes --- milli/src/index.rs | 1 - milli/src/search/criteria/asc_desc.rs | 9 ++- milli/src/search/criteria/fetcher.rs | 10 +--- milli/src/search/criteria/mod.rs | 7 +-- milli/src/search/criteria/proximity.rs | 15 +++-- milli/src/search/criteria/typo.rs | 66 ++++++---------------- milli/src/search/criteria/words.rs | 9 ++- milli/src/search/distinct/map_distinct.rs | 27 ++++----- milli/src/search/distinct/noop_distinct.rs | 2 +- milli/src/search/mod.rs | 5 +- 10 files changed, 50 insertions(+), 101 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index a2b6cc440..643a9ffb9 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -475,7 +475,6 @@ impl Index { pub(crate) fn set_updated_at(&self, wtxn: &mut RwTxn, time: &DateTime) -> heed::Result<()> { self.main.put::<_, Str, SerdeJson>>(wtxn, UPDATED_AT_KEY, &time) } - } #[cfg(test)] diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs index ddd25009d..78ae540e4 100644 --- a/milli/src/search/criteria/asc_desc.rs +++ b/milli/src/search/criteria/asc_desc.rs @@ -17,7 +17,7 @@ use crate::search::facet::FacetIter; use crate::search::query_tree::Operation; use crate::search::WordDerivationsCache; use crate::{FieldsIdsMap, FieldId, Index}; -use super::{Criterion, CriterionResult, CriterionContext}; +use super::{Criterion, CriterionResult}; /// Threshold on the number of candidates that will make /// the system to choose between one algorithm or another. @@ -151,7 +151,7 @@ impl<'t> AscDesc<'t> { impl<'t> Criterion for AscDesc<'t> { #[logging_timer::time("AscDesc::{}")] - fn next(&mut self, context: CriterionContext) -> anyhow::Result> { + fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result> { loop { debug!("Facet {}({}) iteration", if self.ascending { "Asc" } else { "Desc" }, self.field_name @@ -163,8 +163,7 @@ impl<'t> Criterion for AscDesc<'t> { let bucket_candidates = take(&mut self.bucket_candidates); match self.parent.as_mut() { Some(parent) => { - let CriterionContext { word_cache, exclude } = context; - match parent.next(CriterionContext { exclude, word_cache })? { + match parent.next(wdcache)? { Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { self.query_tree = query_tree; let candidates = match (&self.query_tree, candidates) { @@ -174,7 +173,7 @@ impl<'t> Criterion for AscDesc<'t> { }, (Some(qt), None) => { let context = CriteriaBuilder::new(&self.rtxn, &self.index)?; - let mut candidates = resolve_query_tree(&context, qt, &mut HashMap::new(), word_cache)?; + let mut candidates = resolve_query_tree(&context, qt, &mut HashMap::new(), wdcache)?; candidates.intersect_with(&self.faceted_candidates); candidates }, diff --git a/milli/src/search/criteria/fetcher.rs b/milli/src/search/criteria/fetcher.rs index dcd40e43d..fa204bdf2 100644 --- a/milli/src/search/criteria/fetcher.rs +++ b/milli/src/search/criteria/fetcher.rs @@ -6,7 +6,7 @@ use roaring::RoaringBitmap; use crate::search::query_tree::Operation; use crate::search::WordDerivationsCache; -use super::{resolve_query_tree, Candidates, Criterion, CriterionResult, Context, CriterionContext}; +use super::{resolve_query_tree, Candidates, Criterion, CriterionResult, Context}; /// The result of a call to the fetcher. #[derive(Debug, Clone, PartialEq)] @@ -61,7 +61,7 @@ impl<'t> Fetcher<'t> { } #[logging_timer::time("Fetcher::{}")] - pub fn next(&mut self, exclude: &RoaringBitmap) -> anyhow::Result> { + pub fn next(&mut self) -> anyhow::Result> { use Candidates::{Allowed, Forbidden}; loop { debug!("Fetcher iteration (should_get_documents_ids: {}) ({:?})", @@ -90,11 +90,7 @@ impl<'t> Fetcher<'t> { Forbidden(_) => { match self.parent.as_mut() { Some(parent) => { - let context = CriterionContext { - word_cache: &mut self.wdcache, - exclude - }; - match parent.next(context)? { + match parent.next(&mut self.wdcache)? { Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { let candidates = match (&query_tree, candidates) { (_, Some(candidates)) => candidates, diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs index 5e25001a2..22f081871 100644 --- a/milli/src/search/criteria/mod.rs +++ b/milli/src/search/criteria/mod.rs @@ -20,13 +20,8 @@ mod asc_desc; mod proximity; pub mod fetcher; -pub struct CriterionContext<'a, 'b> { - exclude: &'a RoaringBitmap, - word_cache: &'b mut WordDerivationsCache, -} - pub trait Criterion { - fn next(&mut self, wdcache: CriterionContext) -> anyhow::Result>; + fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result>; } /// The result of a call to the parent criterion. diff --git a/milli/src/search/criteria/proximity.rs b/milli/src/search/criteria/proximity.rs index 45cffb93d..b62eb8cfd 100644 --- a/milli/src/search/criteria/proximity.rs +++ b/milli/src/search/criteria/proximity.rs @@ -8,7 +8,7 @@ use log::debug; use crate::{DocumentId, Position, search::{query_tree::QueryKind}}; use crate::search::query_tree::{maximum_proximity, Operation, Query}; use crate::search::{build_dfa, WordDerivationsCache}; -use super::{Candidates, Criterion, CriterionResult, Context, query_docids, query_pair_proximity_docids, resolve_query_tree, CriterionContext}; +use super::{Candidates, Criterion, CriterionResult, Context, query_docids, query_pair_proximity_docids, resolve_query_tree}; pub struct Proximity<'t> { ctx: &'t dyn Context, @@ -56,9 +56,8 @@ impl<'t> Proximity<'t> { impl<'t> Criterion for Proximity<'t> { #[logging_timer::time("Proximity::{}")] - fn next(&mut self, context: CriterionContext) -> anyhow::Result> { + fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result> { use Candidates::{Allowed, Forbidden}; - let CriterionContext { word_cache, exclude } = context; loop { debug!("Proximity at iteration {} (max {:?}) ({:?})", self.proximity, @@ -99,7 +98,7 @@ impl<'t> Criterion for Proximity<'t> { self.ctx, query_tree, candidates, - word_cache, + wdcache, )?; self.plane_sweep_cache = Some(cache.into_iter()); @@ -111,7 +110,7 @@ impl<'t> Criterion for Proximity<'t> { &query_tree, self.proximity, &mut self.candidates_cache, - word_cache, + wdcache, )? }; @@ -141,7 +140,7 @@ impl<'t> Criterion for Proximity<'t> { &query_tree, self.proximity, &mut self.candidates_cache, - word_cache, + wdcache, )?; new_candidates.difference_with(&candidates); @@ -171,11 +170,11 @@ impl<'t> Criterion for Proximity<'t> { (None, Forbidden(_)) => { match self.parent.as_mut() { Some(parent) => { - match parent.next(CriterionContext { exclude, word_cache })? { + match parent.next(wdcache)? { Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { let candidates = match (&query_tree, candidates) { (_, Some(candidates)) => candidates, - (Some(qt), None) => resolve_query_tree(self.ctx, qt, &mut HashMap::new(), word_cache)?, + (Some(qt), None) => resolve_query_tree(self.ctx, qt, &mut HashMap::new(), wdcache)?, (None, None) => RoaringBitmap::new(), }; diff --git a/milli/src/search/criteria/typo.rs b/milli/src/search/criteria/typo.rs index 1c3942495..3877f53ed 100644 --- a/milli/src/search/criteria/typo.rs +++ b/milli/src/search/criteria/typo.rs @@ -6,7 +6,7 @@ use roaring::RoaringBitmap; use crate::search::query_tree::{maximum_typo, Operation, Query, QueryKind}; use crate::search::{word_derivations, WordDerivationsCache}; -use super::{Candidates, Criterion, CriterionResult, Context, query_docids, query_pair_proximity_docids, CriterionContext}; +use super::{Candidates, Criterion, CriterionResult, Context, query_docids, query_pair_proximity_docids}; pub struct Typo<'t> { ctx: &'t dyn Context, @@ -51,9 +51,8 @@ impl<'t> Typo<'t> { impl<'t> Criterion for Typo<'t> { #[logging_timer::time("Typo::{}")] - fn next(&mut self, context: CriterionContext) -> anyhow::Result> { + fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result> { use Candidates::{Allowed, Forbidden}; - let CriterionContext { word_cache, exclude } = context; loop { debug!("Typo at iteration {} ({:?})", self.number_typos, self.candidates); @@ -72,9 +71,9 @@ impl<'t> Criterion for Typo<'t> { } else { let fst = self.ctx.words_fst(); let new_query_tree = if self.number_typos < 2 { - alterate_query_tree(&fst, query_tree.clone(), self.number_typos, word_cache)? + alterate_query_tree(&fst, query_tree.clone(), self.number_typos, wdcache)? } else if self.number_typos == 2 { - *query_tree = alterate_query_tree(&fst, query_tree.clone(), self.number_typos, word_cache)?; + *query_tree = alterate_query_tree(&fst, query_tree.clone(), self.number_typos, wdcache)?; query_tree.clone() } else { query_tree.clone() @@ -85,7 +84,7 @@ impl<'t> Criterion for Typo<'t> { &new_query_tree, self.number_typos, &mut self.candidates_cache, - word_cache, + wdcache, )?; new_candidates.intersect_with(&candidates); candidates.difference_with(&new_candidates); @@ -110,9 +109,9 @@ impl<'t> Criterion for Typo<'t> { } else { let fst = self.ctx.words_fst(); let new_query_tree = if self.number_typos < 2 { - alterate_query_tree(&fst, query_tree.clone(), self.number_typos, word_cache)? + alterate_query_tree(&fst, query_tree.clone(), self.number_typos, wdcache)? } else if self.number_typos == 2 { - *query_tree = alterate_query_tree(&fst, query_tree.clone(), self.number_typos, word_cache)?; + *query_tree = alterate_query_tree(&fst, query_tree.clone(), self.number_typos, wdcache)?; query_tree.clone() } else { query_tree.clone() @@ -123,7 +122,7 @@ impl<'t> Criterion for Typo<'t> { &new_query_tree, self.number_typos, &mut self.candidates_cache, - word_cache, + wdcache, )?; new_candidates.difference_with(&candidates); candidates.union_with(&new_candidates); @@ -148,7 +147,7 @@ impl<'t> Criterion for Typo<'t> { (None, Forbidden(_)) => { match self.parent.as_mut() { Some(parent) => { - match parent.next(CriterionContext { exclude, word_cache })? { + match parent.next(wdcache)? { Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { self.query_tree = query_tree.map(|op| (maximum_typo(&op), op)); self.number_typos = 0; @@ -347,12 +346,8 @@ mod test { let mut wdcache = WordDerivationsCache::new(); let mut criteria = Typo::initial(&context, query_tree, facet_candidates); - let sort_context = CriterionContext { - word_cache: &mut wdcache, - exclude: &RoaringBitmap::new(), - }; - assert!(criteria.next(sort_context).unwrap().is_none()); + assert!(criteria.next(&mut wdcache).unwrap().is_none()); } #[test] @@ -386,12 +381,7 @@ mod test { bucket_candidates: candidates_1, }; - let sort_context = CriterionContext { - word_cache: &mut wdcache, - exclude: &RoaringBitmap::new(), - }; - - assert_eq!(criteria.next(sort_context).unwrap(), Some(expected_1)); + assert_eq!(criteria.next(&mut wdcache).unwrap(), Some(expected_1)); let candidates_2 = ( context.word_docids("split").unwrap().unwrap() @@ -413,12 +403,7 @@ mod test { bucket_candidates: candidates_2, }; - let sort_context = CriterionContext { - word_cache: &mut wdcache, - exclude: &RoaringBitmap::new(), - }; - - assert_eq!(criteria.next(sort_context).unwrap(), Some(expected_2)); + assert_eq!(criteria.next(&mut wdcache).unwrap(), Some(expected_2)); } #[test] @@ -436,19 +421,11 @@ mod test { bucket_candidates: facet_candidates, }; - let sort_context = CriterionContext { - word_cache: &mut wdcache, - exclude: &RoaringBitmap::new(), - }; // first iteration, returns the facet candidates - assert_eq!(criteria.next(sort_context).unwrap(), Some(expected)); + assert_eq!(criteria.next(&mut wdcache).unwrap(), Some(expected)); - let sort_context = CriterionContext { - word_cache: &mut wdcache, - exclude: &RoaringBitmap::new(), - }; // second iteration, returns None because there is no more things to do - assert!(criteria.next(sort_context ).unwrap().is_none()); + assert!(criteria.next(&mut wdcache).unwrap().is_none()); } #[test] @@ -482,12 +459,7 @@ mod test { bucket_candidates: candidates_1 & &facet_candidates, }; - let sort_context = CriterionContext { - word_cache: &mut wdcache, - exclude: &RoaringBitmap::new(), - }; - - assert_eq!(criteria.next(sort_context).unwrap(), Some(expected_1)); + assert_eq!(criteria.next(&mut wdcache).unwrap(), Some(expected_1)); let candidates_2 = ( context.word_docids("split").unwrap().unwrap() @@ -509,12 +481,6 @@ mod test { bucket_candidates: candidates_2 & &facet_candidates, }; - let sort_context = CriterionContext { - word_cache: &mut wdcache, - exclude: &RoaringBitmap::new(), - }; - - assert_eq!(criteria.next(sort_context).unwrap(), Some(expected_2)); + assert_eq!(criteria.next(&mut wdcache).unwrap(), Some(expected_2)); } - } diff --git a/milli/src/search/criteria/words.rs b/milli/src/search/criteria/words.rs index b401f99fa..0aa3b483a 100644 --- a/milli/src/search/criteria/words.rs +++ b/milli/src/search/criteria/words.rs @@ -5,7 +5,7 @@ use log::debug; use roaring::RoaringBitmap; use crate::search::query_tree::Operation; -use super::{resolve_query_tree, Criterion, CriterionResult, Context, CriterionContext}; +use super::{resolve_query_tree, Criterion, CriterionResult, Context, WordDerivationsCache}; pub struct Words<'t> { ctx: &'t dyn Context, @@ -47,8 +47,7 @@ impl<'t> Words<'t> { impl<'t> Criterion for Words<'t> { #[logging_timer::time("Words::{}")] - fn next(&mut self, context: CriterionContext) -> anyhow::Result> { - let CriterionContext { word_cache, exclude } = context; + fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result> { loop { debug!("Words at iteration {} ({:?})", self.query_trees.len(), self.candidates); @@ -62,7 +61,7 @@ impl<'t> Criterion for Words<'t> { })); }, (Some(qt), Some(candidates)) => { - let mut found_candidates = resolve_query_tree(self.ctx, &qt, &mut self.candidates_cache, word_cache)?; + let mut found_candidates = resolve_query_tree(self.ctx, &qt, &mut self.candidates_cache, wdcache)?; found_candidates.intersect_with(&candidates); candidates.difference_with(&found_candidates); @@ -100,7 +99,7 @@ impl<'t> Criterion for Words<'t> { (None, None) => { match self.parent.as_mut() { Some(parent) => { - match parent.next(CriterionContext { word_cache, exclude })? { + match parent.next(wdcache)? { Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { self.query_trees = query_tree.map(explode_query_tree).unwrap_or_default(); self.candidates = candidates; diff --git a/milli/src/search/distinct/map_distinct.rs b/milli/src/search/distinct/map_distinct.rs index 37e52aa6f..f2e31bce4 100644 --- a/milli/src/search/distinct/map_distinct.rs +++ b/milli/src/search/distinct/map_distinct.rs @@ -18,10 +18,9 @@ pub struct MapDistinct<'a> { impl<'a> MapDistinct<'a> { pub fn new(distinct: FieldId, index: &'a Index, txn: &'a heed::RoTxn<'a>) -> Self { - let map = HashMap::new(); Self { distinct, - map, + map: HashMap::new(), index, txn, } @@ -38,6 +37,9 @@ pub struct MapDistinctIter<'a, 'b> { } impl<'a, 'b> MapDistinctIter<'a, 'b> { + /// Performs the next iteration of the mafacetp distinct. This is a convenience method that is + /// called by the Iterator::next implementation that tranposes the result. It makes error + /// handling easier. fn next_inner(&mut self) -> anyhow::Result> { let map = &mut self.map; let mut filter = |value: Value| { @@ -54,22 +56,15 @@ impl<'a, 'b> MapDistinctIter<'a, 'b> { .transpose()?; let accept = match value { - Some(value) => { - match value { - // Since we can't distinct these values, we always accept them - Value::Null | Value::Object(_) => true, - Value::Array(values) => { - let mut accept = true; - for value in values { - accept &= filter(value); - } - accept - } - value => filter(value), + Some(Value::Array(values)) => { + let mut accept = true; + for value in values { + accept &= filter(value); } + accept } - // Accept values by default. - _ => true, + Some(Value::Null) | Some(Value::Object(_)) | None => true, + Some(value) => filter(value), }; if accept { diff --git a/milli/src/search/distinct/noop_distinct.rs b/milli/src/search/distinct/noop_distinct.rs index 9fdf17187..8f7bc7d17 100644 --- a/milli/src/search/distinct/noop_distinct.rs +++ b/milli/src/search/distinct/noop_distinct.rs @@ -16,7 +16,7 @@ impl Iterator for NoopDistinctIter { type Item = anyhow::Result; fn next(&mut self) -> Option { - self.candidates.next().map(Result::Ok) + self.candidates.next().map(Ok) } } diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 2c55330a7..7324ea72a 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -1,6 +1,7 @@ use std::borrow::Cow; use std::collections::hash_map::{HashMap, Entry}; use std::fmt; +use std::mem::take; use std::str::Utf8Error; use std::time::Instant; @@ -159,11 +160,11 @@ impl<'a> Search<'a> { let mut excluded_documents = RoaringBitmap::new(); let mut documents_ids = Vec::with_capacity(self.limit); - while let Some(FetcherResult { candidates, bucket_candidates, .. }) = criteria.next(&excluded_documents)? { + while let Some(FetcherResult { candidates, bucket_candidates, .. }) = criteria.next()? { debug!("Number of candidates found {}", candidates.len()); - let excluded = std::mem::take(&mut excluded_documents); + let excluded = take(&mut excluded_documents); let mut candidates = distinct.distinct(candidates, excluded);