Split the update side to use the number and the strings facet databases

This commit is contained in:
Clément Renault 2021-04-28 17:58:16 +02:00 committed by Kerollmops
parent 038e03a4e4
commit bd7b285bae
No known key found for this signature in database
GPG key ID: 92ADA4E935E71FA4
11 changed files with 406 additions and 285 deletions

View file

@ -1,7 +1,7 @@
use std::collections::HashMap;
use std::mem::take;
use anyhow::{bail, Context as _};
use anyhow::Context;
use itertools::Itertools;
use log::debug;
use ordered_float::OrderedFloat;
@ -23,7 +23,6 @@ pub struct AscDesc<'t> {
rtxn: &'t heed::RoTxn<'t>,
field_name: String,
field_id: FieldId,
facet_type: FacetType,
ascending: bool,
query_tree: Option<Operation>,
candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>,
@ -51,6 +50,7 @@ impl<'t> AscDesc<'t> {
Self::new(index, rtxn, parent, field_name, false)
}
fn new(
index: &'t Index,
rtxn: &'t heed::RoTxn,
@ -60,19 +60,19 @@ impl<'t> AscDesc<'t> {
) -> anyhow::Result<Self> {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let faceted_fields = index.faceted_fields(rtxn)?;
let (field_id, facet_type) =
field_id_facet_type(&fields_ids_map, &faceted_fields, &field_name)?;
let field_id = fields_ids_map
.id(&field_name)
.with_context(|| format!("field {:?} isn't registered", field_name))?;
Ok(AscDesc {
index,
rtxn,
field_name,
field_id,
facet_type,
ascending,
query_tree: None,
candidates: Box::new(std::iter::empty()),
faceted_candidates: index.faceted_documents_ids(rtxn, field_id)?,
faceted_candidates: index.number_faceted_documents_ids(rtxn, field_id)?,
bucket_candidates: RoaringBitmap::new(),
parent,
})
@ -165,27 +165,20 @@ fn facet_ordered<'t>(
index: &'t Index,
rtxn: &'t heed::RoTxn,
field_id: FieldId,
facet_type: FacetType,
ascending: bool,
candidates: RoaringBitmap,
) -> anyhow::Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
match facet_type {
FacetType::Number => {
if candidates.len() <= CANDIDATES_THRESHOLD {
let iter =
iterative_facet_ordered_iter(index, rtxn, field_id, ascending, candidates)?;
Ok(Box::new(iter.map(Ok)) as Box<dyn Iterator<Item = _>>)
} else {
let facet_fn = if ascending {
FacetIter::new_reducing
} else {
FacetIter::new_reverse_reducing
};
let iter = facet_fn(rtxn, index, field_id, candidates)?;
Ok(Box::new(iter.map(|res| res.map(|(_, docids)| docids))))
}
}
FacetType::String => bail!("criteria facet type must be a number"),
if candidates.len() <= CANDIDATES_THRESHOLD {
let iter = iterative_facet_ordered_iter(index, rtxn, field_id, ascending, candidates)?;
Ok(Box::new(iter.map(Ok)) as Box<dyn Iterator<Item = _>>)
} else {
let facet_fn = if ascending {
FacetIter::new_reducing
} else {
FacetIter::new_reverse_reducing
};
let iter = facet_fn(rtxn, index, field_id, candidates)?;
Ok(Box::new(iter.map(|res| res.map(|(_, docids)| docids))))
}
}

View file

@ -5,7 +5,7 @@ use roaring::RoaringBitmap;
use super::{Distinct, DocIter};
use crate::heed_codec::facet::*;
use crate::{facet::FacetType, DocumentId, FieldId, Index};
use crate::{DocumentId, FieldId, Index};
const FID_SIZE: usize = size_of::<FieldId>();
const DOCID_SIZE: usize = size_of::<DocumentId>();
@ -22,7 +22,6 @@ pub struct FacetDistinct<'a> {
distinct: FieldId,
index: &'a Index,
txn: &'a heed::RoTxn<'a>,
facet_type: FacetType,
}
impl<'a> FacetDistinct<'a> {
@ -30,14 +29,9 @@ impl<'a> FacetDistinct<'a> {
distinct: FieldId,
index: &'a Index,
txn: &'a heed::RoTxn<'a>,
facet_type: FacetType,
) -> Self {
Self {
distinct,
index,
txn,
facet_type,
}
) -> Self
{
Self { distinct, index, txn }
}
}
@ -45,7 +39,6 @@ pub struct FacetDistinctIter<'a> {
candidates: RoaringBitmap,
distinct: FieldId,
excluded: RoaringBitmap,
facet_type: FacetType,
index: &'a Index,
iter_offset: usize,
txn: &'a heed::RoTxn<'a>,
@ -117,6 +110,7 @@ impl<'a> FacetDistinctIter<'a> {
// increasing the offset we make sure to get the first valid value for the next
// distinct document to keep.
self.iter_offset += 1;
Ok(Some(id))
}
// no more candidate at this offset, return.
@ -188,7 +182,6 @@ impl<'a> Distinct<'_> for FacetDistinct<'a> {
candidates,
distinct: self.distinct,
excluded,
facet_type: self.facet_type,
index: self.index,
iter_offset: 0,
txn: self.txn,

View file

@ -145,7 +145,7 @@ impl<'a> Search<'a> {
let faceted_fields = self.index.faceted_fields(self.rtxn)?;
match faceted_fields.get(name) {
Some(facet_type) => {
let distinct = FacetDistinct::new(id, self.index, self.rtxn, *facet_type);
let distinct = FacetDistinct::new(id, self.index, self.rtxn);
self.perform_sort(distinct, matching_words, criteria)
}
None => {