mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-25 22:34:28 +01:00
Change the way we filter the documents
This commit is contained in:
parent
681711fced
commit
40dab80dfa
@ -96,18 +96,28 @@ where
|
|||||||
|
|
||||||
let mut bare_matches = Vec::new();
|
let mut bare_matches = Vec::new();
|
||||||
mk_arena!(arena);
|
mk_arena!(arena);
|
||||||
|
|
||||||
for ((query, input, distance), matches) in queries {
|
for ((query, input, distance), matches) in queries {
|
||||||
|
|
||||||
let postings_list_view = PostingsListView::original(Rc::from(input), Rc::new(matches));
|
let postings_list_view = PostingsListView::original(Rc::from(input), Rc::new(matches));
|
||||||
// TODO optimize the filter by skipping docids that have already been seen
|
|
||||||
let mut offset = 0;
|
let mut offset = 0;
|
||||||
for matches in postings_list_view.linear_group_by_key(|m| m.document_id) {
|
for id in docids.as_slice() {
|
||||||
let document_id = matches[0].document_id;
|
let di = DocIndex { document_id: *id, ..DocIndex::default() };
|
||||||
if docids.contains(&document_id) {
|
let pos = postings_list_view[offset..].binary_search(&di).unwrap_or_else(|x| x);
|
||||||
let range = postings_list_view.range(offset, matches.len());
|
|
||||||
|
let group = postings_list_view[offset + pos..]
|
||||||
|
.linear_group_by_key(|m| m.document_id)
|
||||||
|
.next()
|
||||||
|
.filter(|matches| matches[0].document_id == *id);
|
||||||
|
|
||||||
|
offset += pos;
|
||||||
|
|
||||||
|
if let Some(matches) = group {
|
||||||
|
let range = postings_list_view.range(pos, matches.len());
|
||||||
let posting_list_index = arena.add(range);
|
let posting_list_index = arena.add(range);
|
||||||
|
|
||||||
let bare_match = BareMatch {
|
let bare_match = BareMatch {
|
||||||
document_id,
|
document_id: *id,
|
||||||
query_index: query.id,
|
query_index: query.id,
|
||||||
distance: distance,
|
distance: distance,
|
||||||
is_exact: true, // TODO where can I find this info?
|
is_exact: true, // TODO where can I find this info?
|
||||||
@ -116,8 +126,6 @@ where
|
|||||||
|
|
||||||
bare_matches.push(bare_match);
|
bare_matches.push(bare_match);
|
||||||
}
|
}
|
||||||
|
|
||||||
offset += matches.len();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -245,8 +245,7 @@ fn multiword_rewrite_matches(
|
|||||||
if !found {
|
if !found {
|
||||||
// if we find a corresponding padding for the
|
// if we find a corresponding padding for the
|
||||||
// first time we must push preceding paddings
|
// first time we must push preceding paddings
|
||||||
for (i, query_index) in replacement.clone().enumerate().take(i)
|
for (i, query_index) in replacement.clone().enumerate().take(i) {
|
||||||
{
|
|
||||||
let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
|
let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
|
||||||
let match_ = SimpleMatch { query_index, word_index, ..*match_ };
|
let match_ = SimpleMatch { query_index, word_index, ..*match_ };
|
||||||
padded_matches.push(match_);
|
padded_matches.push(match_);
|
||||||
|
@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize};
|
|||||||
///
|
///
|
||||||
/// It is used to inform the database the document you want to deserialize.
|
/// It is used to inform the database the document you want to deserialize.
|
||||||
/// Helpful for custom ranking.
|
/// Helpful for custom ranking.
|
||||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
|
#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
|
||||||
#[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))]
|
#[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))]
|
||||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
@ -19,7 +19,7 @@ pub struct DocumentId(pub u64);
|
|||||||
///
|
///
|
||||||
/// This is stored in the map, generated at index time,
|
/// This is stored in the map, generated at index time,
|
||||||
/// extracted and interpreted at search time.
|
/// extracted and interpreted at search time.
|
||||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
#[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))]
|
#[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))]
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
pub struct DocIndex {
|
pub struct DocIndex {
|
||||||
|
Loading…
Reference in New Issue
Block a user