From 0f4c0beffd0a25437c2e58cfb2f7686ab17da87f Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Thu, 11 Mar 2021 11:48:55 +0100 Subject: [PATCH] Introduce the Attribute criterion --- Cargo.lock | 7 ++ milli/Cargo.toml | 1 + milli/src/search/criteria/attribute.rs | 133 +++++++++++++++++++++++++ milli/src/search/criteria/mod.rs | 4 + 4 files changed, 145 insertions(+) create mode 100644 milli/src/search/criteria/attribute.rs diff --git a/Cargo.lock b/Cargo.lock index bbe86a2a7..065be362f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -122,6 +122,12 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" +[[package]] +name = "big_s" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "199edb7b90631283b10c2422e6a0bc8b7d987bf732995ba1de53b576c97e51a8" + [[package]] name = "bincode" version = "1.3.1" @@ -1251,6 +1257,7 @@ name = "milli" version = "0.1.1" dependencies = [ "anyhow", + "big_s", "bstr", "byteorder", "chrono", diff --git a/milli/Cargo.toml b/milli/Cargo.toml index b198131c1..eefdfa7d5 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -52,6 +52,7 @@ logging_timer = "1.0.0" tinytemplate = "=1.1.0" [dev-dependencies] +big_s = "1.0.2" criterion = "0.3.4" maplit = "1.0.2" rand = "0.8.3" diff --git a/milli/src/search/criteria/attribute.rs b/milli/src/search/criteria/attribute.rs new file mode 100644 index 000000000..9c31740b1 --- /dev/null +++ b/milli/src/search/criteria/attribute.rs @@ -0,0 +1,133 @@ +use log::debug; +use roaring::RoaringBitmap; + +use crate::search::criteria::Query; +use crate::search::query_tree::Operation; +use crate::search::WordDerivationsCache; +use super::{Criterion, CriterionResult, Context}; + +pub struct Attribute<'t> { + ctx: &'t dyn Context, + query_tree: Option, + candidates: Option, + bucket_candidates: RoaringBitmap, + parent: Option>, +} + +impl<'t> Attribute<'t> { + pub fn initial( + ctx: &'t dyn Context, + query_tree: Option, + candidates: Option, + ) -> Self + { + Attribute { + ctx, + query_tree, + candidates, + bucket_candidates: RoaringBitmap::new(), + parent: None, + } + } + + pub fn new(ctx: &'t dyn Context, parent: Box) -> Self { + Attribute { + ctx, + query_tree: None, + candidates: None, + bucket_candidates: RoaringBitmap::new(), + parent: Some(parent), + } + } +} + +impl<'t> Criterion for Attribute<'t> { + #[logging_timer::time("Attribute::{}")] + fn next(&mut self, wdcache: &mut WordDerivationsCache) -> anyhow::Result> { + todo!("Attribute") + } +} + +// TODO can we keep refs of Query +fn explode_query_tree(query_tree: &Operation) -> Vec> { + use crate::search::criteria::Operation::{And, Or, Consecutive}; + + fn and_recurse(head: &Operation, tail: &[Operation]) -> Vec> { + match tail.split_first() { + Some((thead, tail)) => { + let tail = and_recurse(thead, tail); + let mut out = Vec::new(); + for array in recurse(head) { + for tail_array in &tail { + let mut array = array.clone(); + array.extend(tail_array.iter().cloned()); + out.push(array); + } + } + out + }, + None => recurse(head), + } + } + + fn recurse(op: &Operation) -> Vec> { + match op { + And(ops) | Consecutive(ops) => { + ops.split_first().map_or_else(Vec::new, |(h, t)| and_recurse(h, t)) + }, + Or(_, ops) => ops.into_iter().map(recurse).flatten().collect(), + Operation::Query(query) => vec![vec![query.clone()]], + } + } + + recurse(query_tree) +} + +#[cfg(test)] +mod tests { + use big_s::S; + + use crate::search::criteria::QueryKind; + use super::*; + + #[test] + fn simple_explode_query_tree() { + let query_tree = Operation::Or(false, vec![ + Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("manythefish")) }), + Operation::And(vec![ + Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("manythe")) }), + Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("fish")) }), + ]), + Operation::And(vec![ + Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("many")) }), + Operation::Or(false, vec![ + Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("thefish")) }), + Operation::And(vec![ + Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("the")) }), + Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("fish")) }), + ]), + ]), + ]), + ]); + + let expected = vec![ + vec![Query { prefix: false, kind: QueryKind::exact(S("manythefish")) }], + vec![ + Query { prefix: false, kind: QueryKind::exact(S("manythe")) }, + Query { prefix: false, kind: QueryKind::exact(S("fish")) }, + ], + vec![ + Query { prefix: false, kind: QueryKind::exact(S("many")) }, + Query { prefix: false, kind: QueryKind::exact(S("thefish")) }, + ], + vec![ + Query { prefix: false, kind: QueryKind::exact(S("many")) }, + Query { prefix: false, kind: QueryKind::exact(S("the")) }, + Query { prefix: false, kind: QueryKind::exact(S("fish")) }, + ], + ]; + + let result = explode_query_tree(&query_tree); + assert_eq!(expected, result); + } +} diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs index 22f081871..8d9c21f6e 100644 --- a/milli/src/search/criteria/mod.rs +++ b/milli/src/search/criteria/mod.rs @@ -12,12 +12,14 @@ use self::typo::Typo; use self::words::Words; use self::asc_desc::AscDesc; use self::proximity::Proximity; +use self::attribute::Attribute; use self::fetcher::Fetcher; mod typo; mod words; mod asc_desc; mod proximity; +mod attribute; pub mod fetcher; pub trait Criterion { @@ -139,6 +141,7 @@ impl<'t> CriteriaBuilder<'t> { Name::Typo => Box::new(Typo::new(self, father)), Name::Words => Box::new(Words::new(self, father)), Name::Proximity => Box::new(Proximity::new(self, father)), + Name::Attribute => Box::new(Attribute::new(self, father)), Name::Asc(field) => Box::new(AscDesc::asc(&self.index, &self.rtxn, father, field)?), Name::Desc(field) => Box::new(AscDesc::desc(&self.index, &self.rtxn, father, field)?), _otherwise => father, @@ -147,6 +150,7 @@ impl<'t> CriteriaBuilder<'t> { Name::Typo => Box::new(Typo::initial(self, query_tree.take(), facet_candidates.take())), Name::Words => Box::new(Words::initial(self, query_tree.take(), facet_candidates.take())), Name::Proximity => Box::new(Proximity::initial(self, query_tree.take(), facet_candidates.take())), + Name::Attribute => Box::new(Attribute::initial(self, query_tree.take(), facet_candidates.take())), Name::Asc(field) => { Box::new(AscDesc::initial_asc(&self.index, &self.rtxn, query_tree.take(), facet_candidates.take(), field)?) },