From 91d6e90d5d0dfeb0633195887b09a97c4459fc37 Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 16 Feb 2021 18:21:16 +0100 Subject: [PATCH] enable faceted searches --- Cargo.lock | 47 +-------------- Cargo.toml | 3 +- src/data/search.rs | 139 ++++++++++++++++++++++++++++++++----------- src/routes/search.rs | 1 - 4 files changed, 106 insertions(+), 84 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0b8128118..7e4054d87 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -634,7 +634,6 @@ dependencies = [ "atty", "bitflags", "strsim", - "term_size", "textwrap", "unicode-width", "vec_map", @@ -1634,6 +1633,7 @@ dependencies = [ "chrono", "crossbeam-channel", "dashmap", + "either", "env_logger 0.8.2", "flate2", "fst", @@ -1720,7 +1720,6 @@ version = "0.1.0" dependencies = [ "anyhow", "bstr", - "byte-unit", "byteorder", "crossbeam-channel", "csv", @@ -1732,13 +1731,11 @@ dependencies = [ "heed", "human_format", "itertools 0.9.0", - "jemallocator", "levenshtein_automata", "linked-hash-map", "log", "meilisearch-tokenizer", "memmap", - "near-proximity", "num-traits", "obkv", "once_cell", @@ -1747,15 +1744,11 @@ dependencies = [ "pest_derive", "rayon", "regex", - "ringtail", "roaring", "serde", "serde_json", - "slice-group-by", "smallstr", "smallvec", - "stderrlog", - "structopt", "tempfile", "uuid", ] @@ -1850,14 +1843,6 @@ dependencies = [ "winapi 0.3.9", ] -[[package]] -name = "near-proximity" -version = "0.1.0" -source = "git+https://github.com/Kerollmops/plane-sweep-proximity?rev=6608205#66082058537f6fe7709adc4690048d62f3c0e9b7" -dependencies = [ - "tinyvec", -] - [[package]] name = "net2" version = "0.2.37" @@ -2435,12 +2420,6 @@ dependencies = [ "winapi 0.3.9", ] -[[package]] -name = "ringtail" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21215c1b9d8f7832b433255bd9eea3e2779aa55b21b2f8e13aad62c74749b237" - [[package]] name = "roaring" version = "0.6.4" @@ -2737,19 +2716,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "stderrlog" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b02f316286ae558d83acc93dd81eaba096e746987a7961d4a9ae026842bae67f" -dependencies = [ - "atty", - "chrono", - "log", - "termcolor", - "thread_local", -] - [[package]] name = "stdweb" version = "0.4.20" @@ -2897,16 +2863,6 @@ dependencies = [ "winapi 0.3.9", ] -[[package]] -name = "term_size" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e4129646ca0ed8f45d09b929036bafad5377103edd06e50bf574b353d2b08d9" -dependencies = [ - "libc", - "winapi 0.3.9", -] - [[package]] name = "termcolor" version = "1.1.2" @@ -2922,7 +2878,6 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" dependencies = [ - "term_size", "unicode-width", ] diff --git a/Cargo.toml b/Cargo.toml index 4668a6897..993c80946 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,7 +38,7 @@ main_error = "0.1.0" meilisearch-error = { path = "../MeiliSearch/meilisearch-error" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", branch = "main" } memmap = "0.7.0" -milli = { path = "../milli" } +milli = { path = "../milli/milli" } mime = "0.3.16" once_cell = "1.5.2" rand = "0.7.3" @@ -57,6 +57,7 @@ tokio = { version = "0.2", features = ["full"] } dashmap = "4.0.2" uuid = "0.8.2" itertools = "0.10.0" +either = "1.6.1" [dependencies.sentry] default-features = false diff --git a/src/data/search.rs b/src/data/search.rs index 68e8aa193..391cc1960 100644 --- a/src/data/search.rs +++ b/src/data/search.rs @@ -3,17 +3,21 @@ use std::mem; use std::time::Instant; use anyhow::{bail, Context}; +use either::Either; +use heed::RoTxn; use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; -use milli::{Index, obkv_to_json, FacetCondition}; +use milli::{obkv_to_json, FacetCondition, Index}; use serde::{Deserialize, Serialize}; -use serde_json::{Value, Map}; +use serde_json::{Map, Value}; -use crate::index_controller::IndexController; use super::Data; +use crate::index_controller::IndexController; pub const DEFAULT_SEARCH_LIMIT: usize = 20; -const fn default_search_limit() -> usize { DEFAULT_SEARCH_LIMIT } +const fn default_search_limit() -> usize { + DEFAULT_SEARCH_LIMIT +} #[derive(Deserialize)] #[serde(rename_all = "camelCase", deny_unknown_fields)] @@ -31,11 +35,10 @@ pub struct SearchQuery { pub matches: Option, pub facet_filters: Option, pub facets_distribution: Option>, - pub facet_condition: Option, } impl SearchQuery { - pub fn perform(&self, index: impl AsRef) -> anyhow::Result{ + pub fn perform(&self, index: impl AsRef) -> anyhow::Result { let index = index.as_ref(); let before_search = Instant::now(); let rtxn = index.read_txn()?; @@ -49,14 +52,17 @@ impl SearchQuery { search.limit(self.limit); search.offset(self.offset.unwrap_or_default()); - if let Some(ref condition) = self.facet_condition { - if !condition.trim().is_empty() { - let condition = FacetCondition::from_str(&rtxn, &index, &condition)?; - search.facet_condition(condition); + if let Some(ref facets) = self.facet_filters { + if let Some(facets) = parse_facets(facets, index, &rtxn)? { + search.facet_condition(facets); } } - let milli::SearchResult { documents_ids, found_words, candidates } = search.execute()?; + let milli::SearchResult { + documents_ids, + found_words, + candidates, + } = search.execute()?; let mut documents = Vec::new(); let fields_ids_map = index.fields_ids_map(&rtxn)?; @@ -66,14 +72,14 @@ impl SearchQuery { let attributes_to_retrieve_ids = match self.attributes_to_retrieve { Some(ref attrs) if attrs.iter().any(|f| f == "*") => None, Some(ref attrs) => attrs - .iter() - .filter_map(|f| fields_ids_map.id(f)) - .collect::>() - .into(), + .iter() + .filter_map(|f| fields_ids_map.id(f)) + .collect::>() + .into(), None => None, }; - let displayed_fields_ids = match (displayed_fields_ids, attributes_to_retrieve_ids) { + let displayed_fields_ids = match (displayed_fields_ids, attributes_to_retrieve_ids) { (_, Some(ids)) => ids, (Some(ids), None) => ids, (None, None) => fields_ids_map.iter().map(|(id, _)| id).collect(), @@ -133,25 +139,31 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> { for (word, token) in analyzed.reconstruct() { if token.is_word() { let to_highlight = words_to_highlight.contains(token.text()); - if to_highlight { string.push_str("") } + if to_highlight { + string.push_str("") + } string.push_str(word); - if to_highlight { string.push_str("") } + if to_highlight { + string.push_str("") + } } else { string.push_str(word); } } Value::String(string) - }, - Value::Array(values) => { - Value::Array(values.into_iter() + } + Value::Array(values) => Value::Array( + values + .into_iter() .map(|v| self.highlight_value(v, words_to_highlight)) - .collect()) - }, - Value::Object(object) => { - Value::Object(object.into_iter() + .collect(), + ), + Value::Object(object) => Value::Object( + object + .into_iter() .map(|(k, v)| (k, self.highlight_value(v, words_to_highlight))) - .collect()) - }, + .collect(), + ), } } @@ -172,7 +184,11 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> { } impl Data { - pub fn search>(&self, index: S, search_query: SearchQuery) -> anyhow::Result { + pub fn search>( + &self, + index: S, + search_query: SearchQuery, + ) -> anyhow::Result { match self.index_controller.index(&index)? { Some(index) => Ok(search_query.perform(index)?), None => bail!("index {:?} doesn't exists", index.as_ref()), @@ -187,7 +203,7 @@ impl Data { attributes_to_retrieve: Option>, ) -> anyhow::Result>> where - S: AsRef + Send + Sync + 'static + S: AsRef + Send + Sync + 'static, { let index_controller = self.index_controller.clone(); let documents: anyhow::Result<_> = tokio::task::spawn_blocking(move || { @@ -207,9 +223,7 @@ impl Data { None => fields_ids_map.iter().map(|(id, _)| id).collect(), }; - let iter = index.documents.range(&txn, &(..))? - .skip(offset) - .take(limit); + let iter = index.documents.range(&txn, &(..))?.skip(offset).take(limit); let mut documents = Vec::new(); @@ -220,7 +234,8 @@ impl Data { } Ok(documents) - }).await?; + }) + .await?; documents } @@ -255,16 +270,68 @@ impl Data { .get(document_id.as_ref().as_bytes()) .with_context(|| format!("Document with id {} not found", document_id.as_ref()))?; - let document = index.documents(&txn, std::iter::once(internal_id))? + let document = index + .documents(&txn, std::iter::once(internal_id))? .into_iter() .next() .map(|(_, d)| d); match document { - Some(document) => Ok(obkv_to_json(&attributes_to_retrieve_ids, &fields_ids_map, document)?), + Some(document) => Ok(obkv_to_json( + &attributes_to_retrieve_ids, + &fields_ids_map, + document, + )?), None => bail!("Document with id {} not found", document_id.as_ref()), } - }).await?; + }) + .await?; document } } + +fn parse_facets_array( + txn: &RoTxn, + index: &Index, + arr: &Vec, +) -> anyhow::Result> { + let mut ands = Vec::new(); + for value in arr { + match value { + Value::String(s) => ands.push(Either::Right(s.clone())), + Value::Array(arr) => { + let mut ors = Vec::new(); + for value in arr { + match value { + Value::String(s) => ors.push(s.clone()), + v => bail!("Invalid facet expression, expected String, found: {:?}", v), + } + } + ands.push(Either::Left(ors)); + } + v => bail!( + "Invalid facet expression, expected String or [String], found: {:?}", + v + ), + } + } + + FacetCondition::from_array(txn, index, ands) +} + +fn parse_facets( + facets: &Value, + index: &Index, + txn: &RoTxn, +) -> anyhow::Result> { + match facets { + // Disabled for now + //Value::String(expr) => Ok(Some(FacetCondition::from_str(txn, index, expr)?)), + Value::Array(arr) => parse_facets_array(txn, index, arr), + v => bail!( + "Invalid facet expression, expected Array, found: {:?}", + v + ), + } +} + diff --git a/src/routes/search.rs b/src/routes/search.rs index a9e87d1a6..794c8ac74 100644 --- a/src/routes/search.rs +++ b/src/routes/search.rs @@ -67,7 +67,6 @@ impl TryFrom for SearchQuery { matches: other.matches, facet_filters, facets_distribution, - facet_condition: None, }) } }