diff --git a/meilisearch-core/src/bucket_sort.rs b/meilisearch-core/src/bucket_sort.rs index 2234c6529..5a819962e 100644 --- a/meilisearch-core/src/bucket_sort.rs +++ b/meilisearch-core/src/bucket_sort.rs @@ -33,6 +33,7 @@ pub fn bucket_sort<'c, FI>( range: Range, filter: Option, criteria: Criteria<'c>, + searchable_attrs: Option, main_store: store::Main, postings_lists_store: store::PostingsLists, documents_fields_counts_store: store::DocumentsFieldsCounts, @@ -54,6 +55,7 @@ where distinct, distinct_size, criteria, + searchable_attrs, main_store, postings_lists_store, documents_fields_counts_store, @@ -84,7 +86,7 @@ where let mut raw_documents = Vec::new(); for bare_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) { prefiltered_documents += 1; - if let Some(raw_document) = RawDocument::new(bare_matches, &automatons, &mut arena) { + if let Some(raw_document) = RawDocument::new(bare_matches, &automatons, &mut arena, searchable_attrs.as_ref()) { raw_documents.push(raw_document); } } @@ -140,7 +142,7 @@ where } let iter = raw_documents.into_iter().skip(range.start).take(range.len()); - let iter = iter.map(|rd| Document::from_raw(rd, &automatons, &arena)); + let iter = iter.map(|rd| Document::from_raw(rd, &automatons, &arena, searchable_attrs.as_ref())); Ok(iter.collect()) } @@ -153,6 +155,7 @@ pub fn bucket_sort_with_distinct<'c, FI, FD>( distinct: FD, distinct_size: usize, criteria: Criteria<'c>, + searchable_attrs: Option, main_store: store::Main, postings_lists_store: store::PostingsLists, documents_fields_counts_store: store::DocumentsFieldsCounts, @@ -182,7 +185,7 @@ where let mut raw_documents = Vec::new(); for bare_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) { prefiltered_documents += 1; - if let Some(raw_document) = RawDocument::new(bare_matches, &automatons, &mut arena) { + if let Some(raw_document) = RawDocument::new(bare_matches, &automatons, &mut arena, searchable_attrs.as_ref()) { raw_documents.push(raw_document); } } @@ -303,7 +306,7 @@ where }; if distinct_accepted && seen.len() > range.start { - documents.push(Document::from_raw(raw_document, &automatons, &arena)); + documents.push(Document::from_raw(raw_document, &automatons, &arena, searchable_attrs.as_ref())); if documents.len() == range.len() { break; } diff --git a/meilisearch-core/src/lib.rs b/meilisearch-core/src/lib.rs index 478870504..ea36abd42 100644 --- a/meilisearch-core/src/lib.rs +++ b/meilisearch-core/src/lib.rs @@ -31,6 +31,7 @@ pub use meilisearch_types::{DocIndex, DocumentId, Highlight}; use compact_arena::SmallArena; use crate::bucket_sort::{QueryWordAutomaton, PostingsListView}; use crate::levenshtein::prefix_damerau_levenshtein; +use crate::reordered_attrs::ReorderedAttrs; #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] pub struct Document { @@ -41,42 +42,91 @@ pub struct Document { pub matches: Vec, } +fn highlights_from_raw_document<'a, 'tag, 'txn>( + raw_document: &RawDocument<'a, 'tag>, + automatons: &[QueryWordAutomaton], + arena: &SmallArena<'tag, PostingsListView<'txn>>, + searchable_attrs: Option<&ReorderedAttrs>, +) -> Vec +{ + let mut highlights = Vec::new(); + + for bm in raw_document.bare_matches.iter() { + let postings_list = &arena[bm.postings_list]; + let input = postings_list.input(); + let query = &automatons[bm.query_index as usize].query; + + for di in postings_list.iter() { + let covered_area = if query.len() > input.len() { + input.len() + } else { + prefix_damerau_levenshtein(query.as_bytes(), input).1 + }; + + let attribute = searchable_attrs + .and_then(|sa| sa.reverse(di.attribute)) + .unwrap_or(di.attribute); + + let highlight = Highlight { + attribute: attribute, + char_index: di.char_index, + char_length: covered_area as u16, + }; + + highlights.push(highlight); + } + } + + highlights +} + impl Document { + #[cfg(not(test))] pub fn from_raw<'a, 'tag, 'txn>( raw_document: RawDocument<'a, 'tag>, automatons: &[QueryWordAutomaton], arena: &SmallArena<'tag, PostingsListView<'txn>>, + searchable_attrs: Option<&ReorderedAttrs>, ) -> Document { - let highlights = raw_document.bare_matches.iter().flat_map(|sm| { - let postings_list = &arena[sm.postings_list]; - let input = postings_list.input(); - let query = &automatons[sm.query_index as usize].query; - postings_list.iter().map(move |m| { - let covered_area = if query.len() > input.len() { - input.len() - } else { - prefix_damerau_levenshtein(query.as_bytes(), input).1 - }; + let highlights = highlights_from_raw_document( + &raw_document, + automatons, + arena, + searchable_attrs, + ); - Highlight { - attribute: m.attribute, - char_index: m.char_index, - char_length: covered_area as u16, - } - }) - }).collect(); + Document { id: raw_document.id, highlights } + } - #[cfg(not(test))] - { - Document { id: raw_document.id, highlights } + #[cfg(test)] + pub fn from_raw<'a, 'tag, 'txn>( + raw_document: RawDocument<'a, 'tag>, + automatons: &[QueryWordAutomaton], + arena: &SmallArena<'tag, PostingsListView<'txn>>, + searchable_attrs: Option<&ReorderedAttrs>, + ) -> Document + { + use crate::bucket_sort::SimpleMatch; + + let highlights = highlights_from_raw_document( + &raw_document, + automatons, + arena, + searchable_attrs, + ); + + let mut matches = Vec::new(); + for sm in raw_document.processed_matches { + let attribute = searchable_attrs + .and_then(|sa| sa.reverse(sm.attribute)) + .unwrap_or(sm.attribute); + + matches.push(SimpleMatch { attribute, ..sm }); } + matches.sort_unstable(); - #[cfg(test)] - { - let matches = raw_document.processed_matches; - Document { id: raw_document.id, highlights, matches } - } + Document { id: raw_document.id, highlights, matches } } } diff --git a/meilisearch-core/src/query_builder.rs b/meilisearch-core/src/query_builder.rs index 20e9ba917..e46858241 100644 --- a/meilisearch-core/src/query_builder.rs +++ b/meilisearch-core/src/query_builder.rs @@ -73,9 +73,7 @@ impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> { } pub fn add_searchable_attribute(&mut self, attribute: u16) { - let reorders = self - .searchable_attrs - .get_or_insert_with(ReorderedAttrs::new); + let reorders = self.searchable_attrs.get_or_insert_with(ReorderedAttrs::new); reorders.insert_attribute(attribute); } @@ -94,6 +92,7 @@ impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> { distinct, distinct_size, self.criteria, + self.searchable_attrs, self.main_store, self.postings_lists_store, self.documents_fields_counts_store, @@ -105,6 +104,7 @@ impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> { range, self.filter, self.criteria, + self.searchable_attrs, self.main_store, self.postings_lists_store, self.documents_fields_counts_store, @@ -181,6 +181,16 @@ mod tests { } } + const fn doc_attr_index(document_id: u64, attribute: u16, word_index: u16) -> DocIndex { + DocIndex { + document_id: DocumentId(document_id), + attribute, + word_index, + char_index: 0, + char_length: 0, + } + } + pub struct TempDatabase { database: Database, index: Index, @@ -1261,4 +1271,73 @@ mod tests { }); assert_matches!(iter.next(), None); } + + #[test] + fn searchable_attributes() { + let store = TempDatabase::from_iter(vec![ + ("search", &[doc_attr_index(0, 0, 0)][..]), + ("engine", &[doc_attr_index(0, 0, 1)][..]), + + ("search", &[doc_attr_index(1, 1, 0)][..]), + ("engine", &[doc_attr_index(1, 1, 1)][..]), + ]); + + let db = &store.database; + let reader = db.main_read_txn().unwrap(); + + let builder = store.query_builder(); + let results = builder.query(&reader, "search engine", 0..20).unwrap(); + let mut iter = results.into_iter(); + + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { + let mut iter = matches.into_iter(); + assert_matches!(iter.next(), Some(SimpleMatch { query_index: 0, word_index: 0, distance: 0, .. })); // search + assert_matches!(iter.next(), Some(SimpleMatch { query_index: 1, word_index: 1, distance: 0, .. })); // engine + assert_matches!(iter.next(), None); + }); + assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { + let mut iter = matches.into_iter(); + assert_matches!(iter.next(), Some(SimpleMatch { query_index: 0, word_index: 0, distance: 0, .. })); // search + assert_matches!(iter.next(), Some(SimpleMatch { query_index: 1, word_index: 1, distance: 0, .. })); // engine + assert_matches!(iter.next(), None); + }); + assert_matches!(iter.next(), None); + + // reorderer the searchable attributes + let mut builder = store.query_builder(); + builder.add_searchable_attribute(1); + builder.add_searchable_attribute(0); + + let results = builder.query(&reader, "search engine", 0..20).unwrap(); + let mut iter = results.into_iter(); + + assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { + let mut iter = matches.into_iter(); + assert_matches!(iter.next(), Some(SimpleMatch { query_index: 0, word_index: 0, distance: 0, .. })); // search + assert_matches!(iter.next(), Some(SimpleMatch { query_index: 1, word_index: 1, distance: 0, .. })); // engine + assert_matches!(iter.next(), None); + }); + assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => { + let mut iter = matches.into_iter(); + assert_matches!(iter.next(), Some(SimpleMatch { query_index: 0, word_index: 0, distance: 0, .. })); // search + assert_matches!(iter.next(), Some(SimpleMatch { query_index: 1, word_index: 1, distance: 0, .. })); // engine + assert_matches!(iter.next(), None); + }); + assert_matches!(iter.next(), None); + + // remove a searchable attributes + let mut builder = store.query_builder(); + builder.add_searchable_attribute(1); + + let results = builder.query(&reader, "search engine", 0..20).unwrap(); + let mut iter = results.into_iter(); + + assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => { + let mut iter = matches.into_iter(); + assert_matches!(iter.next(), Some(SimpleMatch { query_index: 0, word_index: 0, distance: 0, .. })); // search + assert_matches!(iter.next(), Some(SimpleMatch { query_index: 1, word_index: 1, distance: 0, .. })); // engine + assert_matches!(iter.next(), None); + }); + assert_matches!(iter.next(), None); + } } diff --git a/meilisearch-core/src/raw_document.rs b/meilisearch-core/src/raw_document.rs index de486d906..f047de8e8 100644 --- a/meilisearch-core/src/raw_document.rs +++ b/meilisearch-core/src/raw_document.rs @@ -1,7 +1,9 @@ use compact_arena::SmallArena; use itertools::EitherOrBoth; use sdset::SetBuf; +use crate::DocIndex; use crate::bucket_sort::{SimpleMatch, BareMatch, QueryWordAutomaton, PostingsListView}; +use crate::reordered_attrs::ReorderedAttrs; pub struct RawDocument<'a, 'tag> { pub id: crate::DocumentId, @@ -19,8 +21,25 @@ impl<'a, 'tag> RawDocument<'a, 'tag> { bare_matches: &'a mut [BareMatch<'tag>], automatons: &[QueryWordAutomaton], postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>, + searchable_attrs: Option<&ReorderedAttrs>, ) -> Option> { + if let Some(reordered_attrs) = searchable_attrs { + for bm in bare_matches.iter() { + let postings_list = &postings_lists[bm.postings_list]; + + let mut rewritten = Vec::new(); + for di in postings_list.iter() { + if let Some(attribute) = reordered_attrs.get(di.attribute) { + rewritten.push(DocIndex { attribute, ..*di }); + } + } + + let new_postings = SetBuf::from_dirty(rewritten); + postings_lists[bm.postings_list].rewrite_with(new_postings); + } + } + bare_matches.sort_unstable_by_key(|m| m.query_index); let mut previous_word = None; diff --git a/meilisearch-core/src/reordered_attrs.rs b/meilisearch-core/src/reordered_attrs.rs index b2f9f1d6c..590cac7b2 100644 --- a/meilisearch-core/src/reordered_attrs.rs +++ b/meilisearch-core/src/reordered_attrs.rs @@ -1,27 +1,31 @@ +use std::cmp; + #[derive(Default, Clone)] pub struct ReorderedAttrs { - count: usize, reorders: Vec>, + reverse: Vec, } impl ReorderedAttrs { pub fn new() -> ReorderedAttrs { - ReorderedAttrs { - count: 0, - reorders: Vec::new(), - } + ReorderedAttrs { reorders: Vec::new(), reverse: Vec::new() } } pub fn insert_attribute(&mut self, attribute: u16) { - self.reorders.resize(attribute as usize + 1, None); - self.reorders[attribute as usize] = Some(self.count as u16); - self.count += 1; + let new_len = cmp::max(attribute as usize + 1, self.reorders.len()); + self.reorders.resize(new_len, None); + self.reorders[attribute as usize] = Some(self.reverse.len() as u16); + self.reverse.push(attribute); } pub fn get(&self, attribute: u16) -> Option { - match self.reorders.get(attribute as usize) { - Some(Some(attribute)) => Some(*attribute), - _ => None, + match self.reorders.get(attribute as usize)? { + Some(attribute) => Some(*attribute), + None => None, } } + + pub fn reverse(&self, attribute: u16) -> Option { + self.reverse.get(attribute as usize).copied() + } }