diff --git a/meilisearch-core/src/bucket_sort.rs b/meilisearch-core/src/bucket_sort.rs index bf68aefdd..7cc4561da 100644 --- a/meilisearch-core/src/bucket_sort.rs +++ b/meilisearch-core/src/bucket_sort.rs @@ -96,18 +96,28 @@ where let mut bare_matches = Vec::new(); mk_arena!(arena); + for ((query, input, distance), matches) in queries { let postings_list_view = PostingsListView::original(Rc::from(input), Rc::new(matches)); - // TODO optimize the filter by skipping docids that have already been seen let mut offset = 0; - for matches in postings_list_view.linear_group_by_key(|m| m.document_id) { - let document_id = matches[0].document_id; - if docids.contains(&document_id) { - let range = postings_list_view.range(offset, matches.len()); + for id in docids.as_slice() { + let di = DocIndex { document_id: *id, ..DocIndex::default() }; + let pos = postings_list_view[offset..].binary_search(&di).unwrap_or_else(|x| x); + + let group = postings_list_view[offset + pos..] + .linear_group_by_key(|m| m.document_id) + .next() + .filter(|matches| matches[0].document_id == *id); + + offset += pos; + + if let Some(matches) = group { + let range = postings_list_view.range(pos, matches.len()); let posting_list_index = arena.add(range); + let bare_match = BareMatch { - document_id, + document_id: *id, query_index: query.id, distance: distance, is_exact: true, // TODO where can I find this info? @@ -116,8 +126,6 @@ where bare_matches.push(bare_match); } - - offset += matches.len(); } } diff --git a/meilisearch-core/src/criterion/mod.rs b/meilisearch-core/src/criterion/mod.rs index 948d8f796..989d173e3 100644 --- a/meilisearch-core/src/criterion/mod.rs +++ b/meilisearch-core/src/criterion/mod.rs @@ -245,8 +245,7 @@ fn multiword_rewrite_matches( if !found { // if we find a corresponding padding for the // first time we must push preceding paddings - for (i, query_index) in replacement.clone().enumerate().take(i) - { + for (i, query_index) in replacement.clone().enumerate().take(i) { let word_index = match_.word_index + padding as u16 + (i + 1) as u16; let match_ = SimpleMatch { query_index, word_index, ..*match_ }; padded_matches.push(match_); diff --git a/meilisearch-types/src/lib.rs b/meilisearch-types/src/lib.rs index ae714ccd8..d37618eb9 100644 --- a/meilisearch-types/src/lib.rs +++ b/meilisearch-types/src/lib.rs @@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize}; /// /// It is used to inform the database the document you want to deserialize. /// Helpful for custom ranking. -#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)] +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[repr(C)] @@ -19,7 +19,7 @@ pub struct DocumentId(pub u64); /// /// This is stored in the map, generated at index time, /// extracted and interpreted at search time. -#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))] #[repr(C)] pub struct DocIndex {