mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 08:44:27 +01:00
Use both number and string facet databases in the distinct system
This commit is contained in:
parent
837c1041c7
commit
597144b0b9
@ -8,7 +8,6 @@ use ordered_float::OrderedFloat;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::FieldDocIdFacetF64Codec;
|
|
||||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
||||||
use crate::search::facet::FacetIter;
|
use crate::search::facet::FacetIter;
|
||||||
use crate::search::query_tree::Operation;
|
use crate::search::query_tree::Operation;
|
||||||
@ -39,8 +38,7 @@ impl<'t> AscDesc<'t> {
|
|||||||
rtxn: &'t heed::RoTxn,
|
rtxn: &'t heed::RoTxn,
|
||||||
parent: Box<dyn Criterion + 't>,
|
parent: Box<dyn Criterion + 't>,
|
||||||
field_name: String,
|
field_name: String,
|
||||||
) -> anyhow::Result<Self>
|
) -> anyhow::Result<Self> {
|
||||||
{
|
|
||||||
Self::new(index, rtxn, parent, field_name, true)
|
Self::new(index, rtxn, parent, field_name, true)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -49,8 +47,7 @@ impl<'t> AscDesc<'t> {
|
|||||||
rtxn: &'t heed::RoTxn,
|
rtxn: &'t heed::RoTxn,
|
||||||
parent: Box<dyn Criterion + 't>,
|
parent: Box<dyn Criterion + 't>,
|
||||||
field_name: String,
|
field_name: String,
|
||||||
) -> anyhow::Result<Self>
|
) -> anyhow::Result<Self> {
|
||||||
{
|
|
||||||
Self::new(index, rtxn, parent, field_name, false)
|
Self::new(index, rtxn, parent, field_name, false)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -60,11 +57,11 @@ impl<'t> AscDesc<'t> {
|
|||||||
parent: Box<dyn Criterion + 't>,
|
parent: Box<dyn Criterion + 't>,
|
||||||
field_name: String,
|
field_name: String,
|
||||||
ascending: bool,
|
ascending: bool,
|
||||||
) -> anyhow::Result<Self>
|
) -> anyhow::Result<Self> {
|
||||||
{
|
|
||||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||||
let faceted_fields = index.faceted_fields(rtxn)?;
|
let faceted_fields = index.faceted_fields(rtxn)?;
|
||||||
let (field_id, facet_type) = field_id_facet_type(&fields_ids_map, &faceted_fields, &field_name)?;
|
let (field_id, facet_type) =
|
||||||
|
field_id_facet_type(&fields_ids_map, &faceted_fields, &field_name)?;
|
||||||
|
|
||||||
Ok(AscDesc {
|
Ok(AscDesc {
|
||||||
index,
|
index,
|
||||||
@ -86,8 +83,10 @@ impl<'t> Criterion for AscDesc<'t> {
|
|||||||
#[logging_timer::time("AscDesc::{}")]
|
#[logging_timer::time("AscDesc::{}")]
|
||||||
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
|
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
|
||||||
loop {
|
loop {
|
||||||
debug!("Facet {}({}) iteration",
|
debug!(
|
||||||
if self.ascending { "Asc" } else { "Desc" }, self.field_name
|
"Facet {}({}) iteration",
|
||||||
|
if self.ascending { "Asc" } else { "Desc" },
|
||||||
|
self.field_name
|
||||||
);
|
);
|
||||||
|
|
||||||
match self.candidates.next().transpose()? {
|
match self.candidates.next().transpose()? {
|
||||||
@ -138,7 +137,7 @@ impl<'t> Criterion for AscDesc<'t> {
|
|||||||
filtered_candidates: None,
|
filtered_candidates: None,
|
||||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||||
}));
|
}));
|
||||||
},
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -148,14 +147,13 @@ fn field_id_facet_type(
|
|||||||
fields_ids_map: &FieldsIdsMap,
|
fields_ids_map: &FieldsIdsMap,
|
||||||
faceted_fields: &HashMap<String, FacetType>,
|
faceted_fields: &HashMap<String, FacetType>,
|
||||||
field: &str,
|
field: &str,
|
||||||
) -> anyhow::Result<(FieldId, FacetType)>
|
) -> anyhow::Result<(FieldId, FacetType)> {
|
||||||
{
|
let id = fields_ids_map
|
||||||
let id = fields_ids_map.id(field).with_context(|| {
|
.id(field)
|
||||||
format!("field {:?} isn't registered", field)
|
.with_context(|| format!("field {:?} isn't registered", field))?;
|
||||||
})?;
|
let facet_type = faceted_fields
|
||||||
let facet_type = faceted_fields.get(field).with_context(|| {
|
.get(field)
|
||||||
format!("field {:?} isn't faceted", field)
|
.with_context(|| format!("field {:?} isn't faceted", field))?;
|
||||||
})?;
|
|
||||||
Ok((id, *facet_type))
|
Ok((id, *facet_type))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -170,14 +168,12 @@ fn facet_ordered<'t>(
|
|||||||
facet_type: FacetType,
|
facet_type: FacetType,
|
||||||
ascending: bool,
|
ascending: bool,
|
||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
) -> anyhow::Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>>
|
) -> anyhow::Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
|
||||||
{
|
|
||||||
match facet_type {
|
match facet_type {
|
||||||
FacetType::Number => {
|
FacetType::Number => {
|
||||||
if candidates.len() <= CANDIDATES_THRESHOLD {
|
if candidates.len() <= CANDIDATES_THRESHOLD {
|
||||||
let iter = iterative_facet_ordered_iter(
|
let iter =
|
||||||
index, rtxn, field_id, ascending, candidates,
|
iterative_facet_ordered_iter(index, rtxn, field_id, ascending, candidates)?;
|
||||||
)?;
|
|
||||||
Ok(Box::new(iter.map(Ok)) as Box<dyn Iterator<Item = _>>)
|
Ok(Box::new(iter.map(Ok)) as Box<dyn Iterator<Item = _>>)
|
||||||
} else {
|
} else {
|
||||||
let facet_fn = if ascending {
|
let facet_fn = if ascending {
|
||||||
@ -188,7 +184,7 @@ fn facet_ordered<'t>(
|
|||||||
let iter = facet_fn(rtxn, index, field_id, candidates)?;
|
let iter = facet_fn(rtxn, index, field_id, candidates)?;
|
||||||
Ok(Box::new(iter.map(|res| res.map(|(_, docids)| docids))))
|
Ok(Box::new(iter.map(|res| res.map(|(_, docids)| docids))))
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
FacetType::String => bail!("criteria facet type must be a number"),
|
FacetType::String => bail!("criteria facet type must be a number"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -202,14 +198,14 @@ fn iterative_facet_ordered_iter<'t>(
|
|||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
ascending: bool,
|
ascending: bool,
|
||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
) -> anyhow::Result<impl Iterator<Item = RoaringBitmap> + 't>
|
) -> anyhow::Result<impl Iterator<Item = RoaringBitmap> + 't> {
|
||||||
{
|
|
||||||
let db = index.field_id_docid_facet_values.remap_key_type::<FieldDocIdFacetF64Codec>();
|
|
||||||
let mut docids_values = Vec::with_capacity(candidates.len() as usize);
|
let mut docids_values = Vec::with_capacity(candidates.len() as usize);
|
||||||
for docid in candidates.iter() {
|
for docid in candidates.iter() {
|
||||||
let left = (field_id, docid, f64::MIN);
|
let left = (field_id, docid, f64::MIN);
|
||||||
let right = (field_id, docid, f64::MAX);
|
let right = (field_id, docid, f64::MAX);
|
||||||
let mut iter = db.range(rtxn, &(left..=right))?;
|
let mut iter = index
|
||||||
|
.field_id_docid_facet_f64s
|
||||||
|
.range(rtxn, &(left..=right))?;
|
||||||
let entry = if ascending { iter.next() } else { iter.last() };
|
let entry = if ascending { iter.next() } else { iter.last() };
|
||||||
if let Some(((_, _, value), ())) = entry.transpose()? {
|
if let Some(((_, _, value), ())) = entry.transpose()? {
|
||||||
docids_values.push((docid, OrderedFloat(value)));
|
docids_values.push((docid, OrderedFloat(value)));
|
||||||
@ -226,7 +222,8 @@ fn iterative_facet_ordered_iter<'t>(
|
|||||||
// The itertools GroupBy iterator doesn't provide an owned version, we are therefore
|
// The itertools GroupBy iterator doesn't provide an owned version, we are therefore
|
||||||
// required to collect the result into an owned collection (a Vec).
|
// required to collect the result into an owned collection (a Vec).
|
||||||
// https://github.com/rust-itertools/itertools/issues/499
|
// https://github.com/rust-itertools/itertools/issues/499
|
||||||
let vec: Vec<_> = iter.group_by(|(_, v)| *v)
|
let vec: Vec<_> = iter
|
||||||
|
.group_by(|(_, v)| v.clone())
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|(_, ids)| ids.map(|(id, _)| id).collect())
|
.map(|(_, ids)| ids.map(|(id, _)| id).collect())
|
||||||
.collect();
|
.collect();
|
||||||
|
@ -1,10 +1,14 @@
|
|||||||
use std::mem::size_of;
|
use std::mem::size_of;
|
||||||
|
|
||||||
|
use heed::types::ByteSlice;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use super::{Distinct, DocIter};
|
||||||
use crate::heed_codec::facet::*;
|
use crate::heed_codec::facet::*;
|
||||||
use crate::{facet::FacetType, DocumentId, FieldId, Index};
|
use crate::{facet::FacetType, DocumentId, FieldId, Index};
|
||||||
use super::{Distinct, DocIter};
|
|
||||||
|
const FID_SIZE: usize = size_of::<FieldId>();
|
||||||
|
const DOCID_SIZE: usize = size_of::<DocumentId>();
|
||||||
|
|
||||||
/// A distinct implementer that is backed by facets.
|
/// A distinct implementer that is backed by facets.
|
||||||
///
|
///
|
||||||
@ -48,31 +52,27 @@ pub struct FacetDistinctIter<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> FacetDistinctIter<'a> {
|
impl<'a> FacetDistinctIter<'a> {
|
||||||
fn get_facet_docids<'c, KC>(&self, key: &'c KC::EItem) -> anyhow::Result<RoaringBitmap>
|
fn facet_string_docids(&self, key: &str) -> heed::Result<Option<RoaringBitmap>> {
|
||||||
where
|
self.index
|
||||||
KC: heed::BytesEncode<'c>,
|
.facet_id_string_docids
|
||||||
{
|
.get(self.txn, &(self.distinct, key))
|
||||||
let facet_docids = self
|
}
|
||||||
.index
|
|
||||||
.facet_field_id_value_docids
|
fn facet_number_docids(&self, key: f64) -> heed::Result<Option<RoaringBitmap>> {
|
||||||
.remap_key_type::<KC>()
|
// get facet docids on level 0
|
||||||
.get(self.txn, key)?
|
self.index
|
||||||
.expect("Corrupted data: Facet values must exist");
|
.facet_id_f64_docids
|
||||||
Ok(facet_docids)
|
.get(self.txn, &(self.distinct, 0, key, key))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn distinct_string(&mut self, id: DocumentId) -> anyhow::Result<()> {
|
fn distinct_string(&mut self, id: DocumentId) -> anyhow::Result<()> {
|
||||||
let iter = get_facet_values::<FieldDocIdFacetStringCodec>(
|
let iter = facet_string_values(id, self.distinct, self.index, self.txn)?;
|
||||||
id,
|
|
||||||
self.distinct,
|
|
||||||
self.index,
|
|
||||||
self.txn,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
for item in iter {
|
for item in iter {
|
||||||
let ((_, _, value), _) = item?;
|
let ((_, _, value), _) = item?;
|
||||||
let key = (self.distinct, value);
|
let facet_docids = self
|
||||||
let facet_docids = self.get_facet_docids::<FacetValueStringCodec>(&key)?;
|
.facet_string_docids(value)?
|
||||||
|
.expect("Corrupted data: Facet values must exist");
|
||||||
self.excluded.union_with(&facet_docids);
|
self.excluded.union_with(&facet_docids);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -82,17 +82,13 @@ impl<'a> FacetDistinctIter<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn distinct_number(&mut self, id: DocumentId) -> anyhow::Result<()> {
|
fn distinct_number(&mut self, id: DocumentId) -> anyhow::Result<()> {
|
||||||
let iter = get_facet_values::<FieldDocIdFacetF64Codec>(id,
|
let iter = facet_number_values(id, self.distinct, self.index, self.txn)?;
|
||||||
self.distinct,
|
|
||||||
self.index,
|
|
||||||
self.txn,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
for item in iter {
|
for item in iter {
|
||||||
let ((_, _, value), _) = item?;
|
let ((_, _, value), _) = item?;
|
||||||
// get facet docids on level 0
|
let facet_docids = self
|
||||||
let key = (self.distinct, 0, value, value);
|
.facet_number_docids(value)?
|
||||||
let facet_docids = self.get_facet_docids::<FacetLevelValueF64Codec>(&key)?;
|
.expect("Corrupted data: Facet values must exist");
|
||||||
self.excluded.union_with(&facet_docids);
|
self.excluded.union_with(&facet_docids);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -129,26 +125,44 @@ impl<'a> FacetDistinctIter<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_facet_values<'a, KC>(
|
fn facet_values_prefix_key(distinct: FieldId, id: DocumentId) -> [u8; FID_SIZE + DOCID_SIZE] {
|
||||||
|
let mut key = [0; FID_SIZE + DOCID_SIZE];
|
||||||
|
key[0..FID_SIZE].copy_from_slice(&distinct.to_be_bytes());
|
||||||
|
key[FID_SIZE..].copy_from_slice(&id.to_be_bytes());
|
||||||
|
key
|
||||||
|
}
|
||||||
|
|
||||||
|
fn facet_number_values<'a>(
|
||||||
id: DocumentId,
|
id: DocumentId,
|
||||||
distinct: FieldId,
|
distinct: FieldId,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &'a heed::RoTxn,
|
txn: &'a heed::RoTxn,
|
||||||
) -> anyhow::Result<heed::RoPrefix<'a, KC, heed::types::Unit>>
|
) -> anyhow::Result<heed::RoPrefix<'a, FieldDocIdFacetF64Codec, heed::types::Unit>> {
|
||||||
where
|
let key = facet_values_prefix_key(distinct, id);
|
||||||
KC: heed::BytesDecode<'a>,
|
|
||||||
{
|
|
||||||
const FID_SIZE: usize = size_of::<FieldId>();
|
|
||||||
const DOCID_SIZE: usize = size_of::<DocumentId>();
|
|
||||||
|
|
||||||
let mut key = [0; FID_SIZE + DOCID_SIZE];
|
|
||||||
key[0..FID_SIZE].copy_from_slice(&distinct.to_be_bytes());
|
|
||||||
key[FID_SIZE..].copy_from_slice(&id.to_be_bytes());
|
|
||||||
|
|
||||||
let iter = index
|
let iter = index
|
||||||
.field_id_docid_facet_values
|
.field_id_docid_facet_f64s
|
||||||
|
.remap_key_type::<ByteSlice>()
|
||||||
.prefix_iter(txn, &key)?
|
.prefix_iter(txn, &key)?
|
||||||
.remap_key_type::<KC>();
|
.remap_key_type::<FieldDocIdFacetF64Codec>();
|
||||||
|
|
||||||
|
Ok(iter)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn facet_string_values<'a>(
|
||||||
|
id: DocumentId,
|
||||||
|
distinct: FieldId,
|
||||||
|
index: &Index,
|
||||||
|
txn: &'a heed::RoTxn,
|
||||||
|
) -> anyhow::Result<heed::RoPrefix<'a, FieldDocIdFacetStringCodec, heed::types::Unit>> {
|
||||||
|
let key = facet_values_prefix_key(distinct, id);
|
||||||
|
|
||||||
|
let iter = index
|
||||||
|
.field_id_docid_facet_strings
|
||||||
|
.remap_key_type::<ByteSlice>()
|
||||||
|
.prefix_iter(txn, &key)?
|
||||||
|
.remap_key_type::<FieldDocIdFacetStringCodec>();
|
||||||
|
|
||||||
Ok(iter)
|
Ok(iter)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -186,8 +200,8 @@ impl<'a> Distinct<'_> for FacetDistinct<'a> {
|
|||||||
mod test {
|
mod test {
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use super::*;
|
|
||||||
use super::super::test::{generate_index, validate_distinct_candidates};
|
use super::super::test::{generate_index, validate_distinct_candidates};
|
||||||
|
use super::*;
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
|
|
||||||
macro_rules! test_facet_distinct {
|
macro_rules! test_facet_distinct {
|
||||||
@ -196,7 +210,8 @@ mod test {
|
|||||||
fn $name() {
|
fn $name() {
|
||||||
use std::iter::FromIterator;
|
use std::iter::FromIterator;
|
||||||
|
|
||||||
let facets = HashMap::from_iter(Some(($distinct.to_string(), $facet_type.to_string())));
|
let facets =
|
||||||
|
HashMap::from_iter(Some(($distinct.to_string(), $facet_type.to_string())));
|
||||||
let (index, fid, candidates) = generate_index($distinct, facets);
|
let (index, fid, candidates) = generate_index($distinct, facets);
|
||||||
let txn = index.read_txn().unwrap();
|
let txn = index.read_txn().unwrap();
|
||||||
let mut map_distinct = FacetDistinct::new(fid, &index, &txn, $facet_type);
|
let mut map_distinct = FacetDistinct::new(fid, &index, &txn, $facet_type);
|
||||||
|
Loading…
Reference in New Issue
Block a user