2022-09-08 08:47:40 +02:00
|
|
|
pub use facet_sort_ascending::ascending_facet_sort;
|
|
|
|
pub use facet_sort_descending::descending_facet_sort;
|
2023-11-27 11:52:22 +01:00
|
|
|
use heed::types::{Bytes, DecodeIgnore};
|
2022-09-08 08:47:40 +02:00
|
|
|
use heed::{BytesDecode, RoTxn};
|
2023-03-23 09:35:53 +01:00
|
|
|
use roaring::RoaringBitmap;
|
2020-11-27 16:10:40 +01:00
|
|
|
|
2023-05-29 15:32:09 +02:00
|
|
|
pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET};
|
2023-02-14 17:03:44 +01:00
|
|
|
pub use self::filter::{BadGeoError, Filter};
|
2023-03-23 09:35:53 +01:00
|
|
|
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec};
|
2023-11-27 11:52:22 +01:00
|
|
|
use crate::heed_codec::BytesRefCodec;
|
2023-03-23 09:35:53 +01:00
|
|
|
use crate::{Index, Result};
|
2020-12-28 19:08:53 +01:00
|
|
|
mod facet_distribution;
|
2022-08-30 14:17:40 +02:00
|
|
|
mod facet_distribution_iter;
|
2022-08-30 15:22:39 +02:00
|
|
|
mod facet_range_search;
|
2022-09-08 08:47:40 +02:00
|
|
|
mod facet_sort_ascending;
|
|
|
|
mod facet_sort_descending;
|
2021-11-06 16:37:55 +01:00
|
|
|
mod filter;
|
2022-08-30 14:17:40 +02:00
|
|
|
|
2023-03-23 09:35:53 +01:00
|
|
|
fn facet_extreme_value<'t>(
|
|
|
|
mut extreme_it: impl Iterator<Item = heed::Result<(RoaringBitmap, &'t [u8])>> + 't,
|
|
|
|
) -> Result<Option<f64>> {
|
|
|
|
let extreme_value =
|
|
|
|
if let Some(extreme_value) = extreme_it.next() { extreme_value } else { return Ok(None) };
|
|
|
|
let (_, extreme_value) = extreme_value?;
|
2023-11-22 18:21:19 +01:00
|
|
|
OrderedF64Codec::bytes_decode(extreme_value)
|
|
|
|
.map(Some)
|
|
|
|
.map_err(heed::Error::Decoding)
|
|
|
|
.map_err(Into::into)
|
2023-03-23 09:35:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn facet_min_value<'t>(
|
|
|
|
index: &'t Index,
|
|
|
|
rtxn: &'t heed::RoTxn,
|
|
|
|
field_id: u16,
|
|
|
|
candidates: RoaringBitmap,
|
|
|
|
) -> Result<Option<f64>> {
|
2023-11-27 11:52:22 +01:00
|
|
|
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
|
2023-03-23 09:35:53 +01:00
|
|
|
let it = ascending_facet_sort(rtxn, db, field_id, candidates)?;
|
|
|
|
facet_extreme_value(it)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn facet_max_value<'t>(
|
|
|
|
index: &'t Index,
|
|
|
|
rtxn: &'t heed::RoTxn,
|
|
|
|
field_id: u16,
|
|
|
|
candidates: RoaringBitmap,
|
|
|
|
) -> Result<Option<f64>> {
|
2023-11-27 11:52:22 +01:00
|
|
|
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
|
2023-03-23 09:35:53 +01:00
|
|
|
let it = descending_facet_sort(rtxn, db, field_id, candidates)?;
|
|
|
|
facet_extreme_value(it)
|
|
|
|
}
|
|
|
|
|
2022-09-08 08:47:40 +02:00
|
|
|
/// Get the first facet value in the facet database
|
2022-08-30 15:22:39 +02:00
|
|
|
pub(crate) fn get_first_facet_value<'t, BoundCodec>(
|
2022-08-30 14:17:40 +02:00
|
|
|
txn: &'t RoTxn,
|
2023-11-27 11:52:22 +01:00
|
|
|
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
2022-08-30 14:17:40 +02:00
|
|
|
field_id: u16,
|
2022-08-31 07:50:18 +02:00
|
|
|
) -> heed::Result<Option<BoundCodec::DItem>>
|
2022-08-30 14:17:40 +02:00
|
|
|
where
|
|
|
|
BoundCodec: BytesDecode<'t>,
|
|
|
|
{
|
|
|
|
let mut level0prefix = vec![];
|
|
|
|
level0prefix.extend_from_slice(&field_id.to_be_bytes());
|
|
|
|
level0prefix.push(0);
|
2023-11-22 18:21:19 +01:00
|
|
|
let mut level0_iter_forward =
|
2023-11-27 11:52:22 +01:00
|
|
|
db.remap_types::<Bytes, DecodeIgnore>().prefix_iter(txn, level0prefix.as_slice())?;
|
2022-08-30 14:17:40 +02:00
|
|
|
if let Some(first) = level0_iter_forward.next() {
|
2022-08-30 15:22:39 +02:00
|
|
|
let (first_key, _) = first?;
|
2022-09-05 17:31:26 +02:00
|
|
|
let first_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(first_key)
|
2023-11-22 18:21:19 +01:00
|
|
|
.map_err(heed::Error::Decoding)?;
|
2022-08-30 15:22:39 +02:00
|
|
|
Ok(Some(first_key.left_bound))
|
2022-08-30 14:17:40 +02:00
|
|
|
} else {
|
2022-08-30 15:22:39 +02:00
|
|
|
Ok(None)
|
2022-08-30 14:17:40 +02:00
|
|
|
}
|
|
|
|
}
|
2022-09-08 08:47:40 +02:00
|
|
|
|
|
|
|
/// Get the last facet value in the facet database
|
2022-08-30 15:22:39 +02:00
|
|
|
pub(crate) fn get_last_facet_value<'t, BoundCodec>(
|
2022-08-30 14:17:40 +02:00
|
|
|
txn: &'t RoTxn,
|
2023-11-27 11:52:22 +01:00
|
|
|
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
2022-08-30 14:17:40 +02:00
|
|
|
field_id: u16,
|
2022-08-31 07:50:18 +02:00
|
|
|
) -> heed::Result<Option<BoundCodec::DItem>>
|
2022-08-30 14:17:40 +02:00
|
|
|
where
|
|
|
|
BoundCodec: BytesDecode<'t>,
|
|
|
|
{
|
|
|
|
let mut level0prefix = vec![];
|
|
|
|
level0prefix.extend_from_slice(&field_id.to_be_bytes());
|
|
|
|
level0prefix.push(0);
|
2023-11-27 11:52:22 +01:00
|
|
|
let mut level0_iter_backward =
|
|
|
|
db.remap_types::<Bytes, DecodeIgnore>().rev_prefix_iter(txn, level0prefix.as_slice())?;
|
2022-08-30 14:17:40 +02:00
|
|
|
if let Some(last) = level0_iter_backward.next() {
|
2022-08-30 15:22:39 +02:00
|
|
|
let (last_key, _) = last?;
|
2022-09-05 17:31:26 +02:00
|
|
|
let last_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(last_key)
|
2023-11-22 18:21:19 +01:00
|
|
|
.map_err(heed::Error::Decoding)?;
|
2022-08-30 15:22:39 +02:00
|
|
|
Ok(Some(last_key.left_bound))
|
2022-08-30 14:17:40 +02:00
|
|
|
} else {
|
2022-08-30 15:22:39 +02:00
|
|
|
Ok(None)
|
2022-08-30 14:17:40 +02:00
|
|
|
}
|
|
|
|
}
|
2022-09-08 08:47:40 +02:00
|
|
|
|
|
|
|
/// Get the height of the highest level in the facet database
|
2022-08-30 15:22:39 +02:00
|
|
|
pub(crate) fn get_highest_level<'t>(
|
2022-08-30 14:17:40 +02:00
|
|
|
txn: &'t RoTxn<'t>,
|
2023-11-27 11:52:22 +01:00
|
|
|
db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
|
2022-08-30 14:17:40 +02:00
|
|
|
field_id: u16,
|
2022-08-31 07:50:18 +02:00
|
|
|
) -> heed::Result<u8> {
|
2022-08-30 14:17:40 +02:00
|
|
|
let field_id_prefix = &field_id.to_be_bytes();
|
2022-08-30 15:22:39 +02:00
|
|
|
Ok(db
|
2023-11-27 11:52:22 +01:00
|
|
|
.remap_types::<Bytes, DecodeIgnore>()
|
2023-11-22 18:21:19 +01:00
|
|
|
.rev_prefix_iter(txn, field_id_prefix)?
|
2022-08-30 14:17:40 +02:00
|
|
|
.next()
|
|
|
|
.map(|el| {
|
|
|
|
let (key, _) = el.unwrap();
|
2023-11-27 11:52:22 +01:00
|
|
|
let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key).unwrap();
|
2022-08-30 14:17:40 +02:00
|
|
|
key.level
|
|
|
|
})
|
2022-08-30 15:22:39 +02:00
|
|
|
.unwrap_or(0))
|
|
|
|
}
|
2022-09-07 17:56:38 +02:00
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
pub(crate) mod tests {
|
|
|
|
use rand::{Rng, SeedableRng};
|
|
|
|
use roaring::RoaringBitmap;
|
|
|
|
|
2022-09-07 18:04:07 +02:00
|
|
|
use crate::heed_codec::facet::OrderedF64Codec;
|
2022-12-05 10:33:31 +01:00
|
|
|
use crate::heed_codec::StrRefCodec;
|
|
|
|
use crate::update::facet::test_helpers::FacetIndex;
|
2022-09-07 17:56:38 +02:00
|
|
|
|
|
|
|
pub fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
|
|
|
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
|
|
|
let mut txn = index.env.write_txn().unwrap();
|
|
|
|
for i in 0..256u16 {
|
|
|
|
let mut bitmap = RoaringBitmap::new();
|
|
|
|
bitmap.insert(i as u32);
|
|
|
|
index.insert(&mut txn, 0, &(i as f64), &bitmap);
|
|
|
|
}
|
|
|
|
txn.commit().unwrap();
|
|
|
|
index
|
|
|
|
}
|
|
|
|
pub fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
|
|
|
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
|
|
|
let mut txn = index.env.write_txn().unwrap();
|
|
|
|
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
|
|
|
|
2022-10-27 16:58:13 +02:00
|
|
|
for (_i, key) in std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).enumerate() {
|
2022-09-07 17:56:38 +02:00
|
|
|
let mut bitmap = RoaringBitmap::new();
|
|
|
|
bitmap.insert(key);
|
|
|
|
bitmap.insert(key + 100);
|
|
|
|
index.insert(&mut txn, 0, &(key as f64), &bitmap);
|
|
|
|
}
|
|
|
|
txn.commit().unwrap();
|
|
|
|
index
|
|
|
|
}
|
2022-09-21 14:39:11 +02:00
|
|
|
pub fn get_simple_index_with_multiple_field_ids() -> FacetIndex<OrderedF64Codec> {
|
|
|
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
|
|
|
let mut txn = index.env.write_txn().unwrap();
|
|
|
|
for fid in 0..2 {
|
|
|
|
for i in 0..256u16 {
|
|
|
|
let mut bitmap = RoaringBitmap::new();
|
|
|
|
bitmap.insert(i as u32);
|
|
|
|
index.insert(&mut txn, fid, &(i as f64), &bitmap);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
txn.commit().unwrap();
|
|
|
|
index
|
|
|
|
}
|
|
|
|
pub fn get_random_looking_index_with_multiple_field_ids() -> FacetIndex<OrderedF64Codec> {
|
|
|
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
|
|
|
let mut txn = index.env.write_txn().unwrap();
|
|
|
|
|
|
|
|
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
|
|
|
let keys =
|
|
|
|
std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
|
|
|
|
for fid in 0..2 {
|
|
|
|
for (_i, &key) in keys.iter().enumerate() {
|
|
|
|
let mut bitmap = RoaringBitmap::new();
|
|
|
|
bitmap.insert(key);
|
|
|
|
bitmap.insert(key + 100);
|
|
|
|
index.insert(&mut txn, fid, &(key as f64), &bitmap);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
txn.commit().unwrap();
|
|
|
|
index
|
|
|
|
}
|
2022-12-05 10:33:31 +01:00
|
|
|
pub fn get_simple_string_index_with_multiple_field_ids() -> FacetIndex<StrRefCodec> {
|
|
|
|
let index = FacetIndex::<StrRefCodec>::new(4, 8, 5);
|
|
|
|
let mut txn = index.env.write_txn().unwrap();
|
|
|
|
for fid in 0..2 {
|
|
|
|
for i in 0..256u16 {
|
|
|
|
let mut bitmap = RoaringBitmap::new();
|
|
|
|
bitmap.insert(i as u32);
|
|
|
|
if i % 2 == 0 {
|
|
|
|
index.insert(&mut txn, fid, &format!("{i}").as_str(), &bitmap);
|
|
|
|
} else {
|
|
|
|
index.insert(&mut txn, fid, &"", &bitmap);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
txn.commit().unwrap();
|
|
|
|
index
|
|
|
|
}
|
|
|
|
pub fn get_random_looking_string_index_with_multiple_field_ids() -> FacetIndex<StrRefCodec> {
|
|
|
|
let index = FacetIndex::<StrRefCodec>::new(4, 8, 5);
|
|
|
|
let mut txn = index.env.write_txn().unwrap();
|
|
|
|
|
|
|
|
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
|
|
|
let keys =
|
|
|
|
std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
|
|
|
|
for fid in 0..2 {
|
|
|
|
for (_i, &key) in keys.iter().enumerate() {
|
|
|
|
let mut bitmap = RoaringBitmap::new();
|
|
|
|
bitmap.insert(key);
|
|
|
|
bitmap.insert(key + 100);
|
|
|
|
if key % 2 == 0 {
|
|
|
|
index.insert(&mut txn, fid, &format!("{key}").as_str(), &bitmap);
|
|
|
|
} else {
|
|
|
|
index.insert(&mut txn, fid, &"", &bitmap);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
txn.commit().unwrap();
|
|
|
|
index
|
|
|
|
}
|
2022-09-07 17:56:38 +02:00
|
|
|
}
|