From e2a9dbc40421987782e27b60a0c4e182a3e7e6a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 29 Dec 2018 20:16:29 +0100 Subject: [PATCH 1/3] feat: Introduce filtering methods for Distinct/QueryBuilder --- src/database/database_view.rs | 4 +-- src/rank/mod.rs | 2 +- src/rank/query_builder.rs | 59 +++++++++++++++++++++++++---------- 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/src/database/database_view.rs b/src/database/database_view.rs index 6e8d32a78..b37d84042 100644 --- a/src/database/database_view.rs +++ b/src/database/database_view.rs @@ -12,7 +12,7 @@ use crate::database::{retrieve_data_schema, retrieve_data_index}; use crate::database::blob::positive::PositiveBlob; use crate::database::deserializer::Deserializer; use crate::database::schema::Schema; -use crate::rank::QueryBuilder; +use crate::rank::{QueryBuilder, FilterFunc}; use crate::DocumentId; pub struct DatabaseView @@ -71,7 +71,7 @@ where D: Deref Ok(()) } - pub fn query_builder(&self) -> Result, Box> { + pub fn query_builder(&self) -> Result>, Box> { QueryBuilder::new(self) } diff --git a/src/rank/mod.rs b/src/rank/mod.rs index 81b9463e2..4d1b6b1ea 100644 --- a/src/rank/mod.rs +++ b/src/rank/mod.rs @@ -4,7 +4,7 @@ mod distinct_map; use crate::{Match, DocumentId}; -pub use self::query_builder::{QueryBuilder, DistinctQueryBuilder}; +pub use self::query_builder::{FilterFunc, QueryBuilder, DistinctQueryBuilder}; #[inline] fn match_query_index(a: &Match, b: &Match) -> bool { diff --git a/src/rank/query_builder.rs b/src/rank/query_builder.rs index fe0904160..586eb7c88 100644 --- a/src/rank/query_builder.rs +++ b/src/rank/query_builder.rs @@ -34,14 +34,17 @@ fn split_whitespace_automatons(query: &str) -> Vec { automatons } -pub struct QueryBuilder<'a, D> +pub type FilterFunc = fn(DocumentId, &DatabaseView) -> bool; + +pub struct QueryBuilder<'a, D, FI> where D: Deref { view: &'a DatabaseView, criteria: Criteria, + filter: Option, } -impl<'a, D> QueryBuilder<'a, D> +impl<'a, D> QueryBuilder<'a, D, FilterFunc> where D: Deref { pub fn new(view: &'a DatabaseView) -> Result> { @@ -49,19 +52,27 @@ where D: Deref } } -impl<'a, D> QueryBuilder<'a, D> -where D: Deref +impl<'a, D, FI> QueryBuilder<'a, D, FI> +where D: Deref, { pub fn with_criteria(view: &'a DatabaseView, criteria: Criteria) -> Result> { - Ok(QueryBuilder { view, criteria }) + Ok(QueryBuilder { view, criteria, filter: None }) } - pub fn criteria(&mut self, criteria: Criteria) -> &mut Self { - self.criteria = criteria; - self + pub fn with_filter(self, function: F) -> QueryBuilder<'a, D, F> + where F: Fn(DocumentId, &DatabaseView) -> bool, + { + QueryBuilder { + view: self.view, + criteria: self.criteria, + filter: Some(function) + } } - pub fn with_distinct(self, function: F, size: usize) -> DistinctQueryBuilder<'a, D, F> { + pub fn with_distinct(self, function: F, size: usize) -> DistinctQueryBuilder<'a, D, FI, F> + where F: Fn(DocumentId, &DatabaseView) -> Option, + K: Hash + Eq, + { DistinctQueryBuilder { inner: self, function: function, @@ -109,8 +120,9 @@ where D: Deref } } -impl<'a, D> QueryBuilder<'a, D> +impl<'a, D, FI> QueryBuilder<'a, D, FI> where D: Deref, + FI: Fn(DocumentId, &DatabaseView) -> bool, { pub fn query(&self, query: &str, range: Range) -> Vec { let mut documents = self.query_all(query); @@ -152,20 +164,35 @@ where D: Deref, } } -pub struct DistinctQueryBuilder<'a, D, F> +pub struct DistinctQueryBuilder<'a, D, FI, FD> where D: Deref { - inner: QueryBuilder<'a, D>, - function: F, + inner: QueryBuilder<'a, D, FI>, + function: FD, size: usize, } -impl<'a, D, F, K> DistinctQueryBuilder<'a, D, F> +impl<'a, D, FI, FD> DistinctQueryBuilder<'a, D, FI, FD> where D: Deref, - F: Fn(DocumentId, &DatabaseView) -> Option, +{ + pub fn with_filter(self, function: F) -> DistinctQueryBuilder<'a, D, F, FD> + where F: Fn(DocumentId, &DatabaseView) -> bool, + { + DistinctQueryBuilder { + inner: self.inner.with_filter(function), + function: self.function, + size: self.size + } + } +} + +impl<'a, D, FI, FD, K> DistinctQueryBuilder<'a, D, FI, FD> +where D: Deref, + FI: Fn(DocumentId, &DatabaseView) -> bool, + FD: Fn(DocumentId, &DatabaseView) -> Option, K: Hash + Eq, { - pub fn query(&self, query: &str, range: Range) -> Vec { + pub fn query(self, query: &str, range: Range) -> Vec { let mut documents = self.inner.query_all(query); let mut groups = vec![documents.as_mut_slice()]; let mut key_cache = HashMap::new(); From aa3d0593636a95df77db44c8eeaa3a6f2b173f8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 29 Dec 2018 20:16:48 +0100 Subject: [PATCH 2/3] feat: Allow filtering on DistinctQueryBuilder --- src/rank/query_builder.rs | 45 +++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/src/rank/query_builder.rs b/src/rank/query_builder.rs index 586eb7c88..103a9a30d 100644 --- a/src/rank/query_builder.rs +++ b/src/rank/query_builder.rs @@ -198,6 +198,7 @@ where D: Deref, let mut key_cache = HashMap::new(); let view = &self.inner.view; + let mut filter_map = HashMap::new(); // these two variables informs on the current distinct map and // on the raw offset of the start of the group where the // range.start bound is located according to the distinct function @@ -223,14 +224,24 @@ where D: Deref, for group in GroupByMut::new(group, |a, b| criterion.eq(a, b, view)) { // we must compute the real distinguished len of this sub-group for document in group.iter() { - let entry = key_cache.entry(document.id); - let key = entry.or_insert_with(|| (self.function)(document.id, view).map(Rc::new)); - - match key.clone() { - Some(key) => buf_distinct.register(key), - None => buf_distinct.register_without_key(), + let filter_accepted = match &self.inner.filter { + None => true, + Some(filter) => { + let entry = filter_map.entry(document.id); + *entry.or_insert_with(|| (filter)(document.id, view)) + }, }; + if filter_accepted { + let entry = key_cache.entry(document.id); + let key = entry.or_insert_with(|| (self.function)(document.id, view).map(Rc::new)); + + match key.clone() { + Some(key) => buf_distinct.register(key), + None => buf_distinct.register_without_key(), + }; + } + // the requested range end is reached: stop computing distinct if buf_distinct.len() >= range.end { break } } @@ -256,16 +267,22 @@ where D: Deref, let mut seen = BufferedDistinctMap::new(&mut distinct_map); for document in documents.into_iter().skip(distinct_raw_offset) { - let key = key_cache.remove(&document.id).expect("BUG: cached key not found"); - - let accepted = match key { - Some(key) => seen.register(key), - None => seen.register_without_key(), + let filter_accepted = match &self.inner.filter { + Some(_) => filter_map.remove(&document.id).expect("BUG: filtered not found"), + None => true, }; - if accepted && seen.len() > range.start { - out_documents.push(document); - if out_documents.len() == range.len() { break } + if filter_accepted { + let key = key_cache.remove(&document.id).expect("BUG: cached key not found"); + let distinct_accepted = match key { + Some(key) => seen.register(key), + None => seen.register_without_key(), + }; + + if distinct_accepted && seen.len() > range.start { + out_documents.push(document); + if out_documents.len() == range.len() { break } + } } } From 7132c3be899adad444c5583f543d3ad3efa2978f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 29 Dec 2018 20:16:59 +0100 Subject: [PATCH 3/3] feat: Allow filtering on QueryBuilder --- src/rank/query_builder.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/rank/query_builder.rs b/src/rank/query_builder.rs index 103a9a30d..39419024c 100644 --- a/src/rank/query_builder.rs +++ b/src/rank/query_builder.rs @@ -124,7 +124,14 @@ impl<'a, D, FI> QueryBuilder<'a, D, FI> where D: Deref, FI: Fn(DocumentId, &DatabaseView) -> bool, { - pub fn query(&self, query: &str, range: Range) -> Vec { + pub fn query(self, query: &str, range: Range) -> Vec { + // We give the filtering work to the query distinct builder, + // specifying a distinct rule that has no effect. + if self.filter.is_some() { + let builder = self.with_distinct(|_, _| None as Option<()>, 1); + return builder.query(query, range); + } + let mut documents = self.query_all(query); let mut groups = vec![documents.as_mut_slice()]; let view = &self.view;