mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 03:47:02 +02:00
Merge #849
849: Update nbHits count with filtered documents r=MarinPostma a=balajisivaraman Closes #764 close #1039 After discussing with @MarinPostma on Slack, this is my first attempt at implementing this for the basic flow that will go through `bucket_sort_with_distinct`. A few thoughts here: - For getting the count of filtered documents alone, I originally thought of using `filter_map.values().filter(|&&v| !v).count()`. In a few cases, this was the same as what I have now implemented. But I realised I couldn't do something similar for `distinct`. So for being consistent, I have implemented both in a similar fashion. - I also needed the `contains_key` check to ensure we're not counting the same document ID twice. @MarinPostma also mentioned that this will be an approximation since the sort is lazy. In the test example that I've updated, the actual filtered count will be just 19 (for `male` records), but due to the `limit` in play, it returns 32 (filtering out 11 records overall). Please let me know if this is the kind of fix we are looking for, and I can implement it in the placeholder search also. Co-authored-by: Balaji Sivaraman <balaji@balajisivaraman.com>
This commit is contained in:
commit
f564a9ce51
4 changed files with 129 additions and 7 deletions
|
@ -212,6 +212,7 @@ where
|
|||
FD: Fn(DocumentId) -> Option<u64>,
|
||||
{
|
||||
let mut result = SortResult::default();
|
||||
let mut filtered_count = 0;
|
||||
|
||||
let words_set = index.main.words_fst(reader)?;
|
||||
let stop_words = index.main.stop_words_fst(reader)?;
|
||||
|
@ -322,19 +323,36 @@ where
|
|||
let filter_accepted = match &filter {
|
||||
Some(filter) => {
|
||||
let entry = filter_map.entry(document.id);
|
||||
*entry.or_insert_with(|| (filter)(document.id))
|
||||
*entry.or_insert_with(|| {
|
||||
let accepted = (filter)(document.id);
|
||||
// we only want to count it out the first time we see it
|
||||
if !accepted {
|
||||
filtered_count += 1;
|
||||
}
|
||||
accepted
|
||||
})
|
||||
}
|
||||
None => true,
|
||||
};
|
||||
|
||||
if filter_accepted {
|
||||
let entry = key_cache.entry(document.id);
|
||||
let key = entry.or_insert_with(|| (distinct)(document.id).map(Rc::new));
|
||||
let mut seen = true;
|
||||
let key = entry.or_insert_with(|| {
|
||||
seen = false;
|
||||
(distinct)(document.id).map(Rc::new)
|
||||
});
|
||||
|
||||
match key.clone() {
|
||||
let distinct = match key.clone() {
|
||||
Some(key) => buf_distinct.register(key),
|
||||
None => buf_distinct.register_without_key(),
|
||||
};
|
||||
|
||||
// we only want to count the document if it is the first time we see it and
|
||||
// if it wasn't accepted by distinct
|
||||
if !seen && !distinct {
|
||||
filtered_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// the requested range end is reached: stop computing distinct
|
||||
|
@ -396,7 +414,7 @@ where
|
|||
}
|
||||
}
|
||||
result.documents = documents;
|
||||
result.nb_hits = docids.len();
|
||||
result.nb_hits = docids.len() - filtered_count;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
|
|
@ -225,10 +225,17 @@ impl<'c, 'f, 'd, 'i> QueryBuilder<'c, 'f, 'd, 'i> {
|
|||
|
||||
fn sort_result_from_docids(&self, docids: &[DocumentId], range: Range<usize>) -> SortResult {
|
||||
let mut sort_result = SortResult::default();
|
||||
let mut filtered_count = 0;
|
||||
let mut result = match self.filter {
|
||||
Some(ref filter) => docids
|
||||
.iter()
|
||||
.filter(|item| (filter)(**item))
|
||||
.filter(|item| {
|
||||
let accepted = (filter)(**item);
|
||||
if !accepted {
|
||||
filtered_count += 1;
|
||||
}
|
||||
accepted
|
||||
})
|
||||
.skip(range.start)
|
||||
.take(range.end - range.start)
|
||||
.map(|&id| Document::from_highlights(id, &[]))
|
||||
|
@ -248,15 +255,19 @@ impl<'c, 'f, 'd, 'i> QueryBuilder<'c, 'f, 'd, 'i> {
|
|||
result.retain(|doc| {
|
||||
let id = doc.id;
|
||||
let key = (distinct)(id);
|
||||
match key {
|
||||
let distinct_accepted = match key {
|
||||
Some(key) => distinct_map.register(key),
|
||||
None => distinct_map.register_without_key(),
|
||||
};
|
||||
if !distinct_accepted {
|
||||
filtered_count += 1;
|
||||
}
|
||||
distinct_accepted
|
||||
});
|
||||
}
|
||||
|
||||
sort_result.documents = result;
|
||||
sort_result.nb_hits = docids.len();
|
||||
sort_result.nb_hits = docids.len() - filtered_count;
|
||||
sort_result
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue