From bbe3a1010720a19aa0013a361de5e8a8d4bb3b43 Mon Sep 17 00:00:00 2001 From: mpostma Date: Thu, 28 May 2020 19:35:34 +0200 Subject: [PATCH] implement placeholder search --- errors.yaml | 51 ++++ meilisearch-core/examples/from_file.rs | 2 +- meilisearch-core/src/bucket_sort.rs | 55 +++- meilisearch-core/src/database.rs | 4 +- meilisearch-core/src/query_builder.rs | 267 ++++++++++++------ meilisearch-core/src/store/main.rs | 11 +- .../src/update/documents_addition.rs | 18 +- .../src/update/documents_deletion.rs | 29 +- meilisearch-core/src/update/mod.rs | 13 +- meilisearch-http/Cargo.toml | 2 +- meilisearch-http/src/helpers/meilisearch.rs | 10 +- meilisearch-http/src/routes/search.rs | 5 +- 12 files changed, 361 insertions(+), 106 deletions(-) create mode 100644 errors.yaml diff --git a/errors.yaml b/errors.yaml new file mode 100644 index 000000000..718796d57 --- /dev/null +++ b/errors.yaml @@ -0,0 +1,51 @@ +--- +errors: + - code: create_index + description: "Error relatice to index creation, check out our guide on [index creation](link.com)" + - code: existing_index + description: "An index with this name already exists, check out our guide on [index creation](link.com)" + - code: invalid_index_uid + description: "The provided index formatting is wrong, check out our guide on [index creation](link.com)" + - code: open_index + description: "An error occured while trying to open the requested index, ..." + - code: invalid_state + description: "" + - code: missing_primary_key + description: "" + - code: primary_key_already_present + description: "" + - code: max_field_limit_exceeded + description: "" + - code: missing_document_id + description: "" + - code: invalid_facet + description: "" + - code: invalid_filter + description: "" + - code: bad_parameter + description: "" + - code: bad_request + description: "" + - code: document_not_found + description: "" + - code: internal + description: "" + - code: invalid_token + description: "" + - code: maintenance + description: "" + - code: missing_authorization_header + description: "" + - code: missing_header + description: "" + - code: not_found + description: "" + - code: payload_too_large + description: "" + - code: retrieve_document + description: "" + - code: search_error + description: "" + - code: unsupported_media_type + description: "" +... diff --git a/meilisearch-core/examples/from_file.rs b/meilisearch-core/examples/from_file.rs index a64e2b626..36add12a6 100644 --- a/meilisearch-core/examples/from_file.rs +++ b/meilisearch-core/examples/from_file.rs @@ -368,7 +368,7 @@ fn search_command(command: SearchCommand, database: Database) -> Result<(), Box< }); } - let result = builder.query(ref_reader, &query, 0..command.number_results)?; + let result = builder.query(ref_reader, Some(&query), 0..command.number_results)?; let mut retrieve_duration = Duration::default(); diff --git a/meilisearch-core/src/bucket_sort.rs b/meilisearch-core/src/bucket_sort.rs index 87f518868..01200008b 100644 --- a/meilisearch-core/src/bucket_sort.rs +++ b/meilisearch-core/src/bucket_sort.rs @@ -10,16 +10,16 @@ use std::fmt; use compact_arena::{SmallArena, Idx32, mk_arena}; use log::debug; -use meilisearch_types::DocIndex; use sdset::{Set, SetBuf, exponential_search, SetOperation, Counter, duo::OpBuilder}; use slice_group_by::{GroupBy, GroupByMut}; -use crate::error::Error; +use meilisearch_types::DocIndex; + use crate::criterion::{Criteria, Context, ContextMut}; use crate::distinct_map::{BufferedDistinctMap, DistinctMap}; use crate::raw_document::RawDocument; use crate::{database::MainT, reordered_attrs::ReorderedAttrs}; -use crate::{Document, DocumentId, MResult, Index}; +use crate::{store, Document, DocumentId, MResult, Index, RankedMap, MainReader, Error}; use crate::query_tree::{create_query_tree, traverse_query_tree}; use crate::query_tree::{Operation, QueryResult, QueryKind, QueryId, PostingsKey}; use crate::query_tree::Context as QTContext; @@ -588,8 +588,55 @@ impl Deref for PostingsListView<'_> { } } +/// sorts documents ids according to user defined ranking rules. +pub fn placeholder_document_sort( + document_ids: &mut [DocumentId], + index: &store::Index, + reader: &MainReader, + ranked_map: &RankedMap +) -> MResult<()> { + use crate::settings::RankingRule; + use std::cmp::Ordering; + + enum SortOrder { + Asc, + Desc, + } + + if let Some(ranking_rules) = index.main.ranking_rules(reader)? { + let schema = index.main.schema(reader)? + .ok_or(Error::SchemaMissing)?; + + // Select custom rules from ranking rules, and map them to custom rules + // containing a field_id + let ranking_rules = ranking_rules.iter().filter_map(|r| + match r { + RankingRule::Asc(name) => schema.id(name).map(|f| (f, SortOrder::Asc)), + RankingRule::Desc(name) => schema.id(name).map(|f| (f, SortOrder::Desc)), + _ => None, + }).collect::>(); + + document_ids.sort_unstable_by(|a, b| { + for (field_id, order) in &ranking_rules { + let a_value = ranked_map.get(*a, *field_id); + let b_value = ranked_map.get(*b, *field_id); + let (a, b) = match order { + SortOrder::Asc => (a_value, b_value), + SortOrder::Desc => (b_value, a_value), + }; + match a.cmp(&b) { + Ordering::Equal => continue, + ordering => return ordering, + } + } + Ordering::Equal + }); + } + Ok(()) +} + /// For each entry in facet_docids, calculates the number of documents in the intersection with candidate_docids. -fn facet_count( +pub fn facet_count( facet_docids: HashMap>>>, candidate_docids: &Set, ) -> HashMap> { diff --git a/meilisearch-core/src/database.rs b/meilisearch-core/src/database.rs index 71273f803..b9e952616 100644 --- a/meilisearch-core/src/database.rs +++ b/meilisearch-core/src/database.rs @@ -814,7 +814,7 @@ mod tests { // even try to search for a document let reader = db.main_read_txn().unwrap(); - let SortResult {documents, .. } = index.query_builder().query(&reader, "21 ", 0..20).unwrap(); + let SortResult {documents, .. } = index.query_builder().query(&reader, Some("21 "), 0..20).unwrap(); assert_matches!(documents.len(), 1); reader.abort().unwrap(); @@ -1212,7 +1212,7 @@ mod tests { let builder = index.query_builder_with_criteria(criteria); - let SortResult {documents, .. } = builder.query(&reader, "Kevin", 0..20).unwrap(); + let SortResult {documents, .. } = builder.query(&reader, Some("Kevin"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!( diff --git a/meilisearch-core/src/query_builder.rs b/meilisearch-core/src/query_builder.rs index cb1c85da7..effaa3f2a 100644 --- a/meilisearch-core/src/query_builder.rs +++ b/meilisearch-core/src/query_builder.rs @@ -1,18 +1,20 @@ use std::borrow::Cow; use std::collections::HashMap; -use std::ops::{Range, Deref}; +use std::ops::{Deref, Range}; use std::time::Duration; use either::Either; -use sdset::SetOperation; +use sdset::{SetOperation, SetBuf, Set}; use meilisearch_schema::FieldId; +use crate::bucket_sort::{bucket_sort, bucket_sort_with_distinct, SortResult, placeholder_document_sort, facet_count}; use crate::database::MainT; -use crate::bucket_sort::{bucket_sort, bucket_sort_with_distinct, SortResult}; -use crate::{criterion::Criteria, DocumentId}; -use crate::{reordered_attrs::ReorderedAttrs, store, MResult}; use crate::facets::FacetFilter; +use crate::distinct_map::{DistinctMap, BufferedDistinctMap}; +use crate::Document; +use crate::{criterion::Criteria, DocumentId}; +use crate::{reordered_attrs::ReorderedAttrs, store, MResult, MainReader}; pub struct QueryBuilder<'c, 'f, 'd, 'i> { criteria: Criteria<'c>, @@ -27,10 +29,7 @@ pub struct QueryBuilder<'c, 'f, 'd, 'i> { impl<'c, 'f, 'd, 'i> QueryBuilder<'c, 'f, 'd, 'i> { pub fn new(index: &'i store::Index) -> Self { - QueryBuilder::with_criteria( - index, - Criteria::default(), - ) + QueryBuilder::with_criteria(index, Criteria::default()) } /// sets facet attributes to filter on @@ -43,10 +42,7 @@ impl<'c, 'f, 'd, 'i> QueryBuilder<'c, 'f, 'd, 'i> { self.facets = facets; } - pub fn with_criteria( - index: &'i store::Index, - criteria: Criteria<'c>, - ) -> Self { + pub fn with_criteria(index: &'i store::Index, criteria: Criteria<'c>) -> Self { QueryBuilder { criteria, searchable_attrs: None, @@ -82,14 +78,11 @@ impl<'c, 'f, 'd, 'i> QueryBuilder<'c, 'f, 'd, 'i> { reorders.insert_attribute(attribute); } - pub fn query( - self, - reader: &heed::RoTxn, - query: &str, - range: Range, - ) -> MResult { - let facets_docids = match self.facet_filter { - Some(facets) => { + /// returns the documents ids associated with a facet filter by computing the union and + /// intersection of the document sets + fn facets_docids(&self, reader: &MainReader) -> MResult>> { + let facet_docids = match self.facet_filter { + Some(ref facets) => { let mut ands = Vec::with_capacity(facets.len()); let mut ors = Vec::new(); for f in facets.deref() { @@ -97,48 +90,50 @@ impl<'c, 'f, 'd, 'i> QueryBuilder<'c, 'f, 'd, 'i> { Either::Left(keys) => { ors.reserve(keys.len()); for key in keys { - let docids = self.index.facets.facet_document_ids(reader, &key)?.unwrap_or_default(); + let docids = self + .index + .facets + .facet_document_ids(reader, &key)? + .unwrap_or_default(); ors.push(docids); } let sets: Vec<_> = ors.iter().map(Cow::deref).collect(); - let or_result = sdset::multi::OpBuilder::from_vec(sets).union().into_set_buf(); + let or_result = sdset::multi::OpBuilder::from_vec(sets) + .union() + .into_set_buf(); ands.push(Cow::Owned(or_result)); ors.clear(); } - Either::Right(key) =>{ + Either::Right(key) => { match self.index.facets.facet_document_ids(reader, &key)? { Some(docids) => ands.push(docids), // no candidates for search, early return. - None => return Ok(SortResult::default()), + None => return Ok(Some(SetBuf::default())), } } }; } let ands: Vec<_> = ands.iter().map(Cow::deref).collect(); - Some(sdset::multi::OpBuilder::from_vec(ands).intersection().into_set_buf()) - } - None => None - }; - - // for each field to retrieve the count for, create an HashMap associating the attribute - // value to a set of matching documents. The HashMaps are them collected in another - // HashMap, associating each HashMap to it's field. - let facet_count_docids = match self.facets { - Some(field_ids) => { - let mut facet_count_map = HashMap::new(); - for (field_id, field_name) in field_ids { - let mut key_map = HashMap::new(); - for pair in self.index.facets.field_document_ids(reader, field_id)? { - let (facet_key, document_ids) = pair?; - let value = facet_key.value(); - key_map.insert(value.to_string(), document_ids); - } - facet_count_map.insert(field_name, key_map); - } - Some(facet_count_map) + Some( + sdset::multi::OpBuilder::from_vec(ands) + .intersection() + .into_set_buf(), + ) } None => None, }; + Ok(facet_docids) + } + + fn standard_query(self, reader: &MainReader, query: &str, range: Range) -> MResult { + let facets_docids = match self.facets_docids(reader)? { + Some(ids) if ids.is_empty() => return Ok(SortResult::default()), + other => other + }; + // for each field to retrieve the count for, create an HashMap associating the attribute + // value to a set of matching documents. The HashMaps are them collected in another + // HashMap, associating each HashMap to it's field. + let facet_count_docids = self.facet_count_docids(reader)?; match self.distinct { Some((distinct, distinct_size)) => bucket_sort_with_distinct( @@ -167,6 +162,117 @@ impl<'c, 'f, 'd, 'i> QueryBuilder<'c, 'f, 'd, 'i> { ), } } + + fn placeholder_query(self, reader: &heed::RoTxn, range: Range) -> MResult { + match self.facets_docids(reader)? { + Some(docids) => { + // We sort the docids from facets according to the criteria set by the user + let mut sorted_docids = docids.clone().into_vec(); + let mut sort_result = match self.index.main.ranked_map(reader)? { + Some(ranked_map) => { + placeholder_document_sort(&mut sorted_docids, self.index, reader, &ranked_map)?; + self.sort_result_from_docids(&sorted_docids, range) + }, + // if we can't perform a sort, we return documents unordered + None => self.sort_result_from_docids(&docids, range), + }; + + if let Some(f) = self.facet_count_docids(reader)? { + sort_result.exhaustive_facets_count = Some(true); + sort_result.facets = Some(facet_count(f, &docids)); + } + + Ok(sort_result) + }, + None => { + match self.index.main.sorted_document_ids_cache(reader)? { + // build result from cached document ids + Some(docids) => { + let mut sort_result = self.sort_result_from_docids(&docids, range); + + if let Some(f) = self.facet_count_docids(reader)? { + sort_result.exhaustive_facets_count = Some(true); + // document ids are not sorted in natural order, we need to construct a new set + let document_set = SetBuf::from_dirty(Vec::from(docids)); + sort_result.facets = Some(facet_count(f, &document_set)); + } + + Ok(sort_result) + }, + // no document id cached, return empty result + None => Ok(SortResult::default()), + } + } + } + } + + fn facet_count_docids<'a>(&self, reader: &'a MainReader) -> MResult>>>>> { + match self.facets { + Some(ref field_ids) => { + let mut facet_count_map = HashMap::new(); + for (field_id, field_name) in field_ids { + let mut key_map = HashMap::new(); + for pair in self.index.facets.field_document_ids(reader, *field_id)? { + let (facet_key, document_ids) = pair?; + let value = facet_key.value(); + key_map.insert(value.to_string(), document_ids); + } + facet_count_map.insert(field_name.clone(), key_map); + } + Ok(Some(facet_count_map)) + } + None => Ok(None), + } + } + + fn sort_result_from_docids(&self, docids: &[DocumentId], range: Range) -> SortResult { + let mut sort_result = SortResult::default(); + let mut result = match self.filter { + Some(ref filter) => docids + .iter() + .filter(|item| (filter)(**item)) + .skip(range.start) + .take(range.end - range.start) + .map(|&id| Document::from_highlights(id, &[])) + .collect::>(), + None => docids + .iter() + .skip(range.start) + .take(range.end - range.start) + .map(|&id| Document::from_highlights(id, &[])) + .collect::>(), + }; + + // distinct is set, remove duplicates with disctinct function + if let Some((distinct, distinct_size)) = &self.distinct { + let mut distinct_map = DistinctMap::new(*distinct_size); + let mut distinct_map = BufferedDistinctMap::new(&mut distinct_map); + result.retain(|doc| { + let id = doc.id; + let key = (distinct)(id); + match key { + Some(key) => distinct_map.register(key), + None => distinct_map.register_without_key(), + } + }); + } + + sort_result.documents = result; + sort_result.nb_hits = docids.len(); + sort_result + } + + pub fn query( + self, + reader: &heed::RoTxn, + query: Option<&str>, + range: Range, + ) -> MResult { + match query { + Some(query) => self.standard_query(reader, query, range), + None => self.placeholder_query(reader, range), + } + } } #[cfg(test)] @@ -181,12 +287,12 @@ mod tests { use sdset::SetBuf; use tempfile::TempDir; - use crate::DocIndex; - use crate::Document; use crate::automaton::normalize_str; use crate::bucket_sort::SimpleMatch; - use crate::database::{Database,DatabaseOptions}; + use crate::database::{Database, DatabaseOptions}; use crate::store::Index; + use crate::DocIndex; + use crate::Document; use meilisearch_schema::Schema; fn set_from_stream<'f, I, S>(stream: I) -> fst::Set> @@ -366,7 +472,7 @@ mod tests { let reader = db.main_read_txn().unwrap(); let builder = store.query_builder(); - let SortResult { documents, .. } = builder.query(&reader, "iphone from apple", 0..20).unwrap(); + let SortResult { documents, .. } = builder.query(&reader, Some("iphone from apple"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { @@ -389,7 +495,7 @@ mod tests { let reader = db.main_read_txn().unwrap(); let builder = store.query_builder(); - let SortResult { documents, .. } = builder.query(&reader, "hello", 0..20).unwrap(); + let SortResult { documents, .. } = builder.query(&reader, Some("hello"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { @@ -400,7 +506,7 @@ mod tests { assert_matches!(iter.next(), None); let builder = store.query_builder(); - let SortResult { documents, .. } = builder.query(&reader, "bonjour", 0..20).unwrap(); + let SortResult { documents, .. } = builder.query(&reader, Some("bonjour"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { @@ -504,7 +610,7 @@ mod tests { let reader = db.main_read_txn().unwrap(); let builder = store.query_builder(); - let SortResult { documents, .. } = builder.query(&reader, "hello", 0..20).unwrap(); + let SortResult { documents, .. } = builder.query(&reader, Some("hello"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { @@ -525,7 +631,7 @@ mod tests { assert_matches!(iter.next(), None); let builder = store.query_builder(); - let SortResult { documents, .. } = builder.query(&reader, "bonjour", 0..20).unwrap(); + let SortResult { documents, .. } = builder.query(&reader, Some("bonjour"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { @@ -546,7 +652,7 @@ mod tests { assert_matches!(iter.next(), None); let builder = store.query_builder(); - let SortResult { documents, .. } = builder.query(&reader, "salut", 0..20).unwrap(); + let SortResult { documents, .. } = builder.query(&reader, Some("salut"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { @@ -592,7 +698,7 @@ mod tests { let reader = db.main_read_txn().unwrap(); let builder = store.query_builder(); - let SortResult { documents, .. } = builder.query(&reader, "NY subway", 0..20).unwrap(); + let SortResult { documents, .. } = builder.query(&reader, Some("NY subway"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { @@ -614,7 +720,7 @@ mod tests { assert_matches!(iter.next(), None); let builder = store.query_builder(); - let SortResult { documents, .. } = builder.query(&reader, "NYC subway", 0..20).unwrap(); + let SortResult { documents, .. } = builder.query(&reader, Some("NYC subway"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { @@ -656,7 +762,7 @@ mod tests { let reader = db.main_read_txn().unwrap(); let builder = store.query_builder(); - let SortResult {documents, .. } = builder.query(&reader, "NY", 0..20).unwrap(); + let SortResult { documents, .. } = builder.query(&reader, Some("NY"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches, .. }) => { @@ -680,7 +786,7 @@ mod tests { assert_matches!(iter.next(), None); let builder = store.query_builder(); - let SortResult {documents, .. } = builder.query(&reader, "new york", 0..20).unwrap(); + let SortResult { documents, .. } = builder.query(&reader, Some("new york"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { @@ -714,7 +820,7 @@ mod tests { let reader = db.main_read_txn().unwrap(); let builder = store.query_builder(); - let SortResult {documents, .. } = builder.query(&reader, "NY subway", 0..20).unwrap(); + let SortResult { documents, .. } = builder.query(&reader, Some("NY subway"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { @@ -731,7 +837,8 @@ mod tests { assert_matches!(iter.next(), None); let builder = store.query_builder(); - let SortResult {documents, .. } = builder.query(&reader, "new york subway", 0..20).unwrap(); + let SortResult { documents, .. } = + builder.query(&reader, Some("new york subway"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { @@ -779,7 +886,7 @@ mod tests { let reader = db.main_read_txn().unwrap(); let builder = store.query_builder(); - let SortResult {documents, .. } = builder.query(&reader, "NY subway", 0..20).unwrap(); + let SortResult { documents, .. } = builder.query(&reader, Some("NY subway"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { @@ -801,7 +908,7 @@ mod tests { assert_matches!(iter.next(), None); let builder = store.query_builder(); - let SortResult {documents, .. } = builder.query(&reader, "NYC subway", 0..20).unwrap(); + let SortResult { documents, .. } = builder.query(&reader, Some("NYC subway"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { @@ -854,7 +961,7 @@ mod tests { let reader = db.main_read_txn().unwrap(); let builder = store.query_builder(); - let SortResult {documents, .. } = builder.query(&reader, "NY subway broken", 0..20).unwrap(); + let SortResult {documents, .. } = builder.query(&reader, Some("NY subway broken"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { @@ -870,7 +977,7 @@ mod tests { assert_matches!(iter.next(), None); let builder = store.query_builder(); - let SortResult {documents, .. } = builder.query(&reader, "NYC subway", 0..20).unwrap(); + let SortResult { documents, .. } = builder.query(&reader, Some("NYC subway"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { @@ -926,8 +1033,8 @@ mod tests { let reader = db.main_read_txn().unwrap(); let builder = store.query_builder(); - let SortResult {documents, .. } = builder - .query(&reader, "new york underground train broken", 0..20) + let SortResult { documents, .. } = builder + .query(&reader, Some("new york underground train broken"), 0..20) .unwrap(); let mut iter = documents.into_iter(); @@ -956,8 +1063,8 @@ mod tests { assert_matches!(iter.next(), None); let builder = store.query_builder(); - let SortResult {documents, .. } = builder - .query(&reader, "new york city underground train broken", 0..20) + let SortResult { documents, .. } = builder + .query(&reader, Some("new york city underground train broken"), 0..20) .unwrap(); let mut iter = documents.into_iter(); @@ -1000,7 +1107,7 @@ mod tests { let reader = db.main_read_txn().unwrap(); let builder = store.query_builder(); - let SortResult {documents, .. } = builder.query(&reader, "new york big ", 0..20).unwrap(); + let SortResult { documents, .. } = builder.query(&reader, Some("new york big "), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { @@ -1034,7 +1141,7 @@ mod tests { let reader = db.main_read_txn().unwrap(); let builder = store.query_builder(); - let SortResult {documents, .. } = builder.query(&reader, "NY subway ", 0..20).unwrap(); + let SortResult { documents, .. } = builder.query(&reader, Some("NY subway "), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { @@ -1084,8 +1191,8 @@ mod tests { let reader = db.main_read_txn().unwrap(); let builder = store.query_builder(); - let SortResult {documents, .. } = builder - .query(&reader, "new york city long subway cool ", 0..20) + let SortResult { documents, .. } = builder + .query(&reader, Some("new york city long subway cool "), 0..20) .unwrap(); let mut iter = documents.into_iter(); @@ -1117,7 +1224,7 @@ mod tests { let reader = db.main_read_txn().unwrap(); let builder = store.query_builder(); - let SortResult {documents, .. } = builder.query(&reader, "telephone", 0..20).unwrap(); + let SortResult { documents, .. } = builder.query(&reader, Some("telephone"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { @@ -1134,7 +1241,7 @@ mod tests { assert_matches!(iter.next(), None); let builder = store.query_builder(); - let SortResult {documents, .. } = builder.query(&reader, "téléphone", 0..20).unwrap(); + let SortResult { documents, .. } = builder.query(&reader, Some("téléphone"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { @@ -1151,7 +1258,7 @@ mod tests { assert_matches!(iter.next(), None); let builder = store.query_builder(); - let SortResult {documents, .. } = builder.query(&reader, "télephone", 0..20).unwrap(); + let SortResult { documents, .. } = builder.query(&reader, Some("télephone"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { @@ -1178,7 +1285,7 @@ mod tests { let reader = db.main_read_txn().unwrap(); let builder = store.query_builder(); - let SortResult {documents, .. } = builder.query(&reader, "i phone case", 0..20).unwrap(); + let SortResult { documents, .. } = builder.query(&reader, Some("i phone case"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { @@ -1207,7 +1314,7 @@ mod tests { let reader = db.main_read_txn().unwrap(); let builder = store.query_builder(); - let SortResult {documents, .. } = builder.query(&reader, "searchengine", 0..20).unwrap(); + let SortResult { documents, .. } = builder.query(&reader, Some("searchengine"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { @@ -1247,7 +1354,7 @@ mod tests { let reader = db.main_read_txn().unwrap(); let builder = store.query_builder(); - let SortResult {documents, .. } = builder.query(&reader, "searchengine", 0..20).unwrap(); + let SortResult { documents, .. } = builder.query(&reader, Some("searchengine"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { @@ -1279,7 +1386,7 @@ mod tests { let reader = db.main_read_txn().unwrap(); let builder = store.query_builder(); - let SortResult {documents, .. } = builder.query(&reader, "searchengine", 0..20).unwrap(); + let SortResult { documents, .. } = builder.query(&reader, Some("searchengine"), 0..20).unwrap(); let mut iter = documents.into_iter(); assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { diff --git a/meilisearch-core/src/store/main.rs b/meilisearch-core/src/store/main.rs index db3c61dcf..cf74e0698 100644 --- a/meilisearch-core/src/store/main.rs +++ b/meilisearch-core/src/store/main.rs @@ -2,7 +2,7 @@ use std::borrow::Cow; use std::collections::HashMap; use chrono::{DateTime, Utc}; -use heed::types::{ByteSlice, OwnedType, SerdeBincode, Str}; +use heed::types::{ByteSlice, OwnedType, SerdeBincode, Str, CowSlice}; use meilisearch_schema::{FieldId, Schema}; use meilisearch_types::DocumentId; use sdset::Set; @@ -25,6 +25,7 @@ const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents"; const RANKED_MAP_KEY: &str = "ranked-map"; const RANKING_RULES_KEY: &str = "ranking-rules"; const SCHEMA_KEY: &str = "schema"; +const SORTED_DOCUMENT_IDS_CACHE_KEY: &str = "sorted-document-ids-cache"; const STOP_WORDS_KEY: &str = "stop-words"; const SYNONYMS_KEY: &str = "synonyms"; const UPDATED_AT_KEY: &str = "updated-at"; @@ -165,6 +166,14 @@ impl Main { Ok(self.main.put::<_, Str, ByteSlice>(writer, WORDS_KEY, fst.as_fst().as_bytes())?) } + pub fn put_sorted_document_ids_cache(self, writer: &mut heed::RwTxn, documents_ids: &[DocumentId]) -> MResult<()> { + Ok(self.main.put::<_, Str, CowSlice>(writer, SORTED_DOCUMENT_IDS_CACHE_KEY, documents_ids)?) + } + + pub fn sorted_document_ids_cache(self, reader: &heed::RoTxn) -> MResult>> { + Ok(self.main.get::<_, Str, CowSlice>(reader, SORTED_DOCUMENT_IDS_CACHE_KEY)?) + } + pub fn put_schema(self, writer: &mut heed::RwTxn, schema: &Schema) -> MResult<()> { Ok(self.main.put::<_, Str, SerdeBincode>(writer, SCHEMA_KEY, schema)?) } diff --git a/meilisearch-core/src/update/documents_addition.rs b/meilisearch-core/src/update/documents_addition.rs index 103b5c923..a4b41d2a8 100644 --- a/meilisearch-core/src/update/documents_addition.rs +++ b/meilisearch-core/src/update/documents_addition.rs @@ -217,7 +217,7 @@ pub fn apply_addition<'a, 'b>( let mut indexer = RawIndexer::new(stop_words); // For each document in this update - for (document_id, document) in documents_additions { + for (document_id, document) in &documents_additions { // For each key-value pair in the document. for (attribute, value) in document { let field_id = schema.insert_and_index(&attribute)?; @@ -229,7 +229,7 @@ pub fn apply_addition<'a, 'b>( &mut indexer, &schema, field_id, - document_id, + *document_id, &value, )?; } @@ -257,6 +257,10 @@ pub fn apply_addition<'a, 'b>( index.facets.add(writer, facet_map)?; } + // update is finished; update sorted document id cache with new state + let mut document_ids = index.main.internal_docids(writer)?.to_vec(); + super::cache_document_ids_sorted(writer, &ranked_map, index, &mut document_ids)?; + Ok(()) } @@ -313,8 +317,8 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn, index: &store::Ind index.facets.add(writer, facet_map)?; } // ^-- https://github.com/meilisearch/MeiliSearch/pull/631#issuecomment-626624470 --v - for document_id in documents_ids_to_reindex { - for result in index.documents_fields.document_fields(writer, document_id)? { + for document_id in &documents_ids_to_reindex { + for result in index.documents_fields.document_fields(writer, *document_id)? { let (field_id, bytes) = result?; let value: Value = serde_json::from_slice(bytes)?; ram_store.insert((document_id, field_id), value); @@ -330,7 +334,7 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn, index: &store::Ind &mut indexer, &schema, field_id, - document_id, + *document_id, &value, )?; } @@ -354,6 +358,10 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn, index: &store::Ind index.facets.add(writer, facet_map)?; } + // update is finished; update sorted document id cache with new state + let mut document_ids = index.main.internal_docids(writer)?.to_vec(); + super::cache_document_ids_sorted(writer, &ranked_map, index, &mut document_ids)?; + Ok(()) } diff --git a/meilisearch-core/src/update/documents_deletion.rs b/meilisearch-core/src/update/documents_deletion.rs index 8b31170ae..def6251c8 100644 --- a/meilisearch-core/src/update/documents_deletion.rs +++ b/meilisearch-core/src/update/documents_deletion.rs @@ -8,7 +8,7 @@ use crate::database::{UpdateEvent, UpdateEventsEmitter}; use crate::facets; use crate::store; use crate::update::{next_update_id, compute_short_prefixes, Update}; -use crate::{DocumentId, Error, MResult, RankedMap}; +use crate::{DocumentId, Error, MResult, RankedMap, MainWriter, Index}; pub struct DocumentsDeletion { updates_store: store::Updates, @@ -153,8 +153,8 @@ pub fn apply_documents_deletion( } let deleted_documents_len = deleted_documents.len() as u64; - for id in deleted_documents { - index.docs_words.del_doc_words(writer, id)?; + for id in &deleted_documents { + index.docs_words.del_doc_words(writer, *id)?; } let removed_words = fst::Set::from_iter(removed_words).unwrap(); @@ -180,5 +180,28 @@ pub fn apply_documents_deletion( compute_short_prefixes(writer, &words, index)?; + // update is finished; update sorted document id cache with new state + document_cache_remove_deleted(writer, index, &ranked_map, &deleted_documents)?; + + Ok(()) +} + +/// rebuilds the document id cache by either removing deleted documents from the existing cache, +/// and generating a new one from docs in store +fn document_cache_remove_deleted(writer: &mut MainWriter, index: &Index, ranked_map: &RankedMap, documents_to_delete: &HashSet) -> MResult<()> { + let new_cache = match index.main.sorted_document_ids_cache(writer)? { + // only keep documents that are not in the list of deleted documents. Order is preserved, + // no need to resort + Some(old_cache) => { + old_cache.iter().filter(|docid| !documents_to_delete.contains(docid)).cloned().collect::>() + } + // couldn't find cached documents, try building a new cache from documents in store + None => { + let mut document_ids = index.main.internal_docids(writer)?.to_vec(); + super::cache_document_ids_sorted(writer, ranked_map, index, &mut document_ids)?; + document_ids + } + }; + index.main.put_sorted_document_ids_cache(writer, &new_cache)?; Ok(()) } diff --git a/meilisearch-core/src/update/mod.rs b/meilisearch-core/src/update/mod.rs index 2443790c1..2f3d7bd8c 100644 --- a/meilisearch-core/src/update/mod.rs +++ b/meilisearch-core/src/update/mod.rs @@ -25,8 +25,9 @@ use serde::{Deserialize, Serialize}; use serde_json::Value; use meilisearch_error::ErrorCode; +use meilisearch_types::DocumentId; -use crate::{store, MResult}; +use crate::{store, MResult, RankedMap}; use crate::database::{MainT, UpdateT}; use crate::settings::SettingsUpdate; @@ -371,3 +372,13 @@ where A: AsRef<[u8]>, Ok(()) } + +fn cache_document_ids_sorted( + writer: &mut heed::RwTxn, + ranked_map: &RankedMap, + index: &store::Index, + document_ids: &mut [DocumentId], +) -> MResult<()> { + crate::bucket_sort::placeholder_document_sort(document_ids, index, writer, ranked_map)?; + index.main.put_sorted_document_ids_cache(writer, &document_ids) +} diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 6673598ca..16de4ea38 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -70,9 +70,9 @@ features = [ optional = true [dev-dependencies] +serde_url_params = "0.2.0" tempdir = "0.3.7" tokio = { version = "0.2.18", features = ["macros", "time"] } -serde_url_params = "0.2.0" [dev-dependencies.assert-json-diff] git = "https://github.com/qdequele/assert-json-diff" diff --git a/meilisearch-http/src/helpers/meilisearch.rs b/meilisearch-http/src/helpers/meilisearch.rs index f9183d013..171098c8f 100644 --- a/meilisearch-http/src/helpers/meilisearch.rs +++ b/meilisearch-http/src/helpers/meilisearch.rs @@ -20,11 +20,11 @@ use slice_group_by::GroupBy; use crate::error::{Error, ResponseError}; pub trait IndexSearchExt { - fn new_search(&self, query: String) -> SearchBuilder; + fn new_search(&self, query: Option) -> SearchBuilder; } impl IndexSearchExt for Index { - fn new_search(&self, query: String) -> SearchBuilder { + fn new_search(&self, query: Option) -> SearchBuilder { SearchBuilder { index: self, query, @@ -43,7 +43,7 @@ impl IndexSearchExt for Index { pub struct SearchBuilder<'a> { index: &'a Index, - query: String, + query: Option, offset: usize, limit: usize, attributes_to_crop: Option>, @@ -156,7 +156,7 @@ impl<'a> SearchBuilder<'a> { query_builder.set_facets(self.facets); let start = Instant::now(); - let result = query_builder.query(reader, &self.query, self.offset..(self.offset + self.limit)); + let result = query_builder.query(reader, self.query.as_deref(), self.offset..(self.offset + self.limit)); let search_result = result.map_err(Error::search_documents)?; let time_ms = start.elapsed().as_millis() as usize; @@ -245,7 +245,7 @@ impl<'a> SearchBuilder<'a> { nb_hits: search_result.nb_hits, exhaustive_nb_hits: search_result.exhaustive_nb_hit, processing_time_ms: time_ms, - query: self.query.to_string(), + query: self.query.unwrap_or_default(), facets_distribution: search_result.facets, exhaustive_facets_count: search_result.exhaustive_facets_count, }; diff --git a/meilisearch-http/src/routes/search.rs b/meilisearch-http/src/routes/search.rs index beffd8fd6..66471c01e 100644 --- a/meilisearch-http/src/routes/search.rs +++ b/meilisearch-http/src/routes/search.rs @@ -24,7 +24,7 @@ pub fn services(cfg: &mut web::ServiceConfig) { #[derive(Serialize, Deserialize)] #[serde(rename_all = "camelCase", deny_unknown_fields)] pub struct SearchQuery { - q: String, + q: Option, offset: Option, limit: Option, attributes_to_retrieve: Option, @@ -50,7 +50,7 @@ async fn search_with_url_query( #[derive(Deserialize)] #[serde(rename_all = "camelCase", deny_unknown_fields)] pub struct SearchQueryPost { - q: String, + q: Option, offset: Option, limit: Option, attributes_to_retrieve: Option>, @@ -177,7 +177,6 @@ impl SearchQuery { None => (), } } - search_builder.attributes_to_crop(final_attributes); }