mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Make the search to always return the facets ordered by count
This commit is contained in:
parent
bd3c026406
commit
f42bef2f66
@ -240,42 +240,49 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn facet_values(&self, field_id: FieldId) -> heed::Result<BTreeMap<String, u64>> {
|
fn facet_values(&self, field_id: FieldId) -> heed::Result<BTreeMap<String, u64>> {
|
||||||
use FacetType::{Number, String};
|
// use FacetType::{Number, String};
|
||||||
|
|
||||||
match self.candidates {
|
let candidates = match self.candidates.as_ref() {
|
||||||
Some(ref candidates) => {
|
Some(candidates) => candidates.clone(),
|
||||||
// Classic search, candidates were specified, we must return facet values only related
|
None => todo!("fetch candidates"),
|
||||||
// to those candidates. We also enter here for facet strings for performance reasons.
|
};
|
||||||
let mut distribution = BTreeMap::new();
|
|
||||||
if candidates.len() <= CANDIDATES_THRESHOLD {
|
let mut distribution = BTreeMap::new();
|
||||||
self.facet_distribution_from_documents(
|
|
||||||
field_id,
|
let number_distribution = facet_distribution_iter::count_iterate_over_facet_distribution(
|
||||||
Number,
|
self.rtxn,
|
||||||
candidates,
|
self.index
|
||||||
&mut distribution,
|
.facet_id_f64_docids
|
||||||
)?;
|
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||||
self.facet_distribution_from_documents(
|
field_id,
|
||||||
field_id,
|
&candidates,
|
||||||
String,
|
)?;
|
||||||
candidates,
|
|
||||||
&mut distribution,
|
for (count, facet_key, _) in number_distribution {
|
||||||
)?;
|
let facet_key = OrderedF64Codec::bytes_decode(facet_key).unwrap();
|
||||||
} else {
|
distribution.insert(facet_key.to_string(), count);
|
||||||
self.facet_numbers_distribution_from_facet_levels(
|
|
||||||
field_id,
|
|
||||||
candidates,
|
|
||||||
&mut distribution,
|
|
||||||
)?;
|
|
||||||
self.facet_strings_distribution_from_facet_levels(
|
|
||||||
field_id,
|
|
||||||
candidates,
|
|
||||||
&mut distribution,
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
Ok(distribution)
|
|
||||||
}
|
|
||||||
None => self.facet_values_from_raw_facet_database(field_id),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let string_distribution = facet_distribution_iter::count_iterate_over_facet_distribution(
|
||||||
|
self.rtxn,
|
||||||
|
self.index
|
||||||
|
.facet_id_string_docids
|
||||||
|
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||||
|
field_id,
|
||||||
|
&candidates,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
for (count, facet_key, any_docid) in string_distribution {
|
||||||
|
let facet_key = StrRefCodec::bytes_decode(facet_key).unwrap();
|
||||||
|
|
||||||
|
let key: (FieldId, _, &str) = (field_id, any_docid, facet_key);
|
||||||
|
let original_string =
|
||||||
|
self.index.field_id_docid_facet_strings.get(self.rtxn, &key)?.unwrap().to_owned();
|
||||||
|
|
||||||
|
distribution.insert(original_string, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(distribution)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn compute_stats(&self) -> Result<BTreeMap<String, (f64, f64)>> {
|
pub fn compute_stats(&self) -> Result<BTreeMap<String, (f64, f64)>> {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use std::cmp::Reverse;
|
use std::cmp::Reverse;
|
||||||
use std::collections::{BTreeMap, BinaryHeap};
|
use std::collections::BinaryHeap;
|
||||||
use std::ops::ControlFlow;
|
use std::ops::ControlFlow;
|
||||||
|
|
||||||
use heed::Result;
|
use heed::Result;
|
||||||
@ -46,15 +46,12 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn count_iterate_over_facet_distribution<'t, CB>(
|
pub fn count_iterate_over_facet_distribution<'t>(
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
candidates: &RoaringBitmap,
|
candidates: &RoaringBitmap,
|
||||||
) -> Result<Vec<(u64, &'t [u8])>>
|
) -> Result<Vec<(u64, &'t [u8], u32)>> {
|
||||||
where
|
|
||||||
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
|
|
||||||
{
|
|
||||||
#[derive(Debug, PartialOrd, Ord, PartialEq, Eq)]
|
#[derive(Debug, PartialOrd, Ord, PartialEq, Eq)]
|
||||||
struct LevelEntry<'t> {
|
struct LevelEntry<'t> {
|
||||||
/// The number of candidates in this entry.
|
/// The number of candidates in this entry.
|
||||||
@ -65,6 +62,8 @@ where
|
|||||||
left_bound: &'t [u8],
|
left_bound: &'t [u8],
|
||||||
/// The number of keys we must look for after `left_bound`.
|
/// The number of keys we must look for after `left_bound`.
|
||||||
group_size: u8,
|
group_size: u8,
|
||||||
|
/// Any docid in the set of matching documents. Used to find the original facet string.
|
||||||
|
any_docid: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Represents the list of keys that we must explore.
|
// Represents the list of keys that we must explore.
|
||||||
@ -88,20 +87,23 @@ where
|
|||||||
if key.field_id != field_id {
|
if key.field_id != field_id {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
let count = value.bitmap.intersection_len(&candidates);
|
let intersection = value.bitmap & candidates;
|
||||||
|
let count = intersection.len();
|
||||||
if count != 0 {
|
if count != 0 {
|
||||||
heap.push(LevelEntry {
|
heap.push(LevelEntry {
|
||||||
count,
|
count,
|
||||||
level: Reverse(key.level),
|
level: Reverse(key.level),
|
||||||
left_bound: key.left_bound,
|
left_bound: key.left_bound,
|
||||||
group_size: value.size,
|
group_size: value.size,
|
||||||
|
any_docid: intersection.min().unwrap(),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
while let Some(LevelEntry { count, level, left_bound, group_size }) = heap.pop() {
|
while let Some(LevelEntry { count, level, left_bound, group_size, any_docid }) = heap.pop()
|
||||||
|
{
|
||||||
if let Reverse(0) = level {
|
if let Reverse(0) = level {
|
||||||
results.push((count, left_bound));
|
results.push((count, left_bound, any_docid));
|
||||||
// TODO better just call the user callback and ask for a ControlFlow
|
// TODO better just call the user callback and ask for a ControlFlow
|
||||||
if results.len() == 20 {
|
if results.len() == 20 {
|
||||||
break;
|
break;
|
||||||
@ -116,13 +118,15 @@ where
|
|||||||
if key.field_id != field_id {
|
if key.field_id != field_id {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
let count = value.bitmap.intersection_len(&candidates);
|
let intersection = value.bitmap & candidates;
|
||||||
|
let count = intersection.len();
|
||||||
if count != 0 {
|
if count != 0 {
|
||||||
heap.push(LevelEntry {
|
heap.push(LevelEntry {
|
||||||
count,
|
count,
|
||||||
level: Reverse(key.level),
|
level: Reverse(key.level),
|
||||||
left_bound: key.left_bound,
|
left_bound: key.left_bound,
|
||||||
group_size: value.size,
|
group_size: value.size,
|
||||||
|
any_docid: intersection.min().unwrap(),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user