mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 05:14:27 +01:00
Give same interface to bulk and incremental facet indexing types
+ cargo fmt, oops, sorry for the bad history :(
This commit is contained in:
parent
330c9eb1b2
commit
9026867d17
@ -3,17 +3,19 @@ mod field_doc_id_facet_string_codec;
|
|||||||
mod ordered_f64_codec;
|
mod ordered_f64_codec;
|
||||||
mod str_ref;
|
mod str_ref;
|
||||||
|
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::convert::TryFrom;
|
||||||
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
|
use heed::types::OwnedType;
|
||||||
|
use heed::{BytesDecode, BytesEncode};
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec;
|
pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec;
|
||||||
pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec;
|
pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec;
|
||||||
pub use self::ordered_f64_codec::OrderedF64Codec;
|
pub use self::ordered_f64_codec::OrderedF64Codec;
|
||||||
pub use self::str_ref::StrRefCodec;
|
pub use self::str_ref::StrRefCodec;
|
||||||
use crate::{CboRoaringBitmapCodec, BEU16};
|
use crate::{CboRoaringBitmapCodec, BEU16};
|
||||||
use heed::types::OwnedType;
|
|
||||||
use heed::{BytesDecode, BytesEncode};
|
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
use std::borrow::Cow;
|
|
||||||
use std::convert::TryFrom;
|
|
||||||
use std::marker::PhantomData;
|
|
||||||
|
|
||||||
pub type FieldIdCodec = OwnedType<BEU16>;
|
pub type FieldIdCodec = OwnedType<BEU16>;
|
||||||
|
|
||||||
|
@ -14,10 +14,10 @@ use time::OffsetDateTime;
|
|||||||
use crate::error::{InternalError, UserError};
|
use crate::error::{InternalError, UserError};
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::fields_ids_map::FieldsIdsMap;
|
use crate::fields_ids_map::FieldsIdsMap;
|
||||||
use crate::heed_codec::facet::OrderedF64Codec;
|
use crate::heed_codec::facet::{
|
||||||
use crate::heed_codec::facet::StrRefCodec;
|
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
||||||
use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec};
|
FieldIdCodec, OrderedF64Codec, StrRefCodec,
|
||||||
use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, FieldIdCodec};
|
};
|
||||||
use crate::{
|
use crate::{
|
||||||
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
|
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
|
||||||
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
|
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
|
||||||
|
@ -7,7 +7,7 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, ByteSliceRef};
|
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec};
|
||||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
||||||
use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
|
use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
|
||||||
use crate::search::facet::facet_sort_descending::descending_facet_sort;
|
use crate::search::facet::facet_sort_descending::descending_facet_sort;
|
||||||
|
@ -6,8 +6,7 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::{Distinct, DocIter};
|
use super::{Distinct, DocIter};
|
||||||
use crate::error::InternalError;
|
use crate::error::InternalError;
|
||||||
use crate::heed_codec::facet::FacetGroupKey;
|
use crate::heed_codec::facet::{FacetGroupKey, *};
|
||||||
use crate::heed_codec::facet::*;
|
|
||||||
use crate::index::db_name;
|
use crate::index::db_name;
|
||||||
use crate::{DocumentId, FieldId, Index, Result};
|
use crate::{DocumentId, FieldId, Index, Result};
|
||||||
|
|
||||||
|
@ -8,10 +8,10 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use crate::error::UserError;
|
use crate::error::UserError;
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::OrderedF64Codec;
|
use crate::heed_codec::facet::{
|
||||||
use crate::heed_codec::facet::StrRefCodec;
|
ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec,
|
||||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
|
FieldDocIdFacetStringCodec, OrderedF64Codec, StrRefCodec,
|
||||||
use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec};
|
};
|
||||||
use crate::search::facet::facet_distribution_iter;
|
use crate::search::facet::facet_distribution_iter;
|
||||||
use crate::{FieldId, Index, Result};
|
use crate::{FieldId, Index, Result};
|
||||||
|
|
||||||
|
@ -112,17 +112,19 @@ where
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use std::ops::ControlFlow;
|
||||||
|
|
||||||
|
use heed::BytesDecode;
|
||||||
|
use rand::{Rng, SeedableRng};
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::iterate_over_facet_distribution;
|
use super::iterate_over_facet_distribution;
|
||||||
use crate::heed_codec::facet::OrderedF64Codec;
|
use crate::heed_codec::facet::OrderedF64Codec;
|
||||||
use crate::milli_snap;
|
use crate::milli_snap;
|
||||||
use crate::search::facet::test::FacetIndex;
|
use crate::search::facet::test::FacetIndex;
|
||||||
use heed::BytesDecode;
|
|
||||||
use rand::{Rng, SeedableRng};
|
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
use std::ops::ControlFlow;
|
|
||||||
|
|
||||||
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
|
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
|
||||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||||
let mut txn = index.env.write_txn().unwrap();
|
let mut txn = index.env.write_txn().unwrap();
|
||||||
for i in 0..256u16 {
|
for i in 0..256u16 {
|
||||||
let mut bitmap = RoaringBitmap::new();
|
let mut bitmap = RoaringBitmap::new();
|
||||||
@ -133,7 +135,7 @@ mod tests {
|
|||||||
index
|
index
|
||||||
}
|
}
|
||||||
fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
|
fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
|
||||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||||
let mut txn = index.env.write_txn().unwrap();
|
let mut txn = index.env.write_txn().unwrap();
|
||||||
|
|
||||||
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
||||||
|
@ -261,14 +261,13 @@ mod tests {
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::find_docids_of_facet_within_bounds;
|
use super::find_docids_of_facet_within_bounds;
|
||||||
use crate::heed_codec::facet::FacetGroupKeyCodec;
|
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
|
||||||
use crate::heed_codec::facet::OrderedF64Codec;
|
|
||||||
use crate::milli_snap;
|
use crate::milli_snap;
|
||||||
use crate::search::facet::test::FacetIndex;
|
use crate::search::facet::test::FacetIndex;
|
||||||
use crate::snapshot_tests::display_bitmap;
|
use crate::snapshot_tests::display_bitmap;
|
||||||
|
|
||||||
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
|
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
|
||||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||||
let mut txn = index.env.write_txn().unwrap();
|
let mut txn = index.env.write_txn().unwrap();
|
||||||
for i in 0..256u16 {
|
for i in 0..256u16 {
|
||||||
let mut bitmap = RoaringBitmap::new();
|
let mut bitmap = RoaringBitmap::new();
|
||||||
@ -279,7 +278,7 @@ mod tests {
|
|||||||
index
|
index
|
||||||
}
|
}
|
||||||
fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
|
fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
|
||||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||||
let mut txn = index.env.write_txn().unwrap();
|
let mut txn = index.env.write_txn().unwrap();
|
||||||
|
|
||||||
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
||||||
|
@ -93,7 +93,7 @@ mod tests {
|
|||||||
use crate::snapshot_tests::display_bitmap;
|
use crate::snapshot_tests::display_bitmap;
|
||||||
|
|
||||||
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
|
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
|
||||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||||
let mut txn = index.env.write_txn().unwrap();
|
let mut txn = index.env.write_txn().unwrap();
|
||||||
for i in 0..256u16 {
|
for i in 0..256u16 {
|
||||||
let mut bitmap = RoaringBitmap::new();
|
let mut bitmap = RoaringBitmap::new();
|
||||||
@ -104,7 +104,7 @@ mod tests {
|
|||||||
index
|
index
|
||||||
}
|
}
|
||||||
fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
|
fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
|
||||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||||
let mut txn = index.env.write_txn().unwrap();
|
let mut txn = index.env.write_txn().unwrap();
|
||||||
|
|
||||||
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
||||||
|
@ -119,15 +119,14 @@ mod tests {
|
|||||||
use rand::{Rng, SeedableRng};
|
use rand::{Rng, SeedableRng};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::heed_codec::facet::OrderedF64Codec;
|
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, OrderedF64Codec};
|
||||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec};
|
|
||||||
use crate::milli_snap;
|
use crate::milli_snap;
|
||||||
use crate::search::facet::facet_sort_descending::descending_facet_sort;
|
use crate::search::facet::facet_sort_descending::descending_facet_sort;
|
||||||
use crate::search::facet::test::FacetIndex;
|
use crate::search::facet::test::FacetIndex;
|
||||||
use crate::snapshot_tests::display_bitmap;
|
use crate::snapshot_tests::display_bitmap;
|
||||||
|
|
||||||
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
|
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
|
||||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||||
let mut txn = index.env.write_txn().unwrap();
|
let mut txn = index.env.write_txn().unwrap();
|
||||||
for i in 0..256u16 {
|
for i in 0..256u16 {
|
||||||
let mut bitmap = RoaringBitmap::new();
|
let mut bitmap = RoaringBitmap::new();
|
||||||
@ -138,7 +137,7 @@ mod tests {
|
|||||||
index
|
index
|
||||||
}
|
}
|
||||||
fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
|
fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
|
||||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||||
let mut txn = index.env.write_txn().unwrap();
|
let mut txn = index.env.write_txn().unwrap();
|
||||||
|
|
||||||
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
||||||
|
@ -9,8 +9,9 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::facet_range_search;
|
use super::facet_range_search;
|
||||||
use crate::error::{Error, UserError};
|
use crate::error::{Error, UserError};
|
||||||
use crate::heed_codec::facet::OrderedF64Codec;
|
use crate::heed_codec::facet::{
|
||||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec,
|
||||||
|
};
|
||||||
use crate::{distance_between_two_points, lat_lng_to_xyz, FieldId, Index, Result};
|
use crate::{distance_between_two_points, lat_lng_to_xyz, FieldId, Index, Result};
|
||||||
|
|
||||||
/// The maximum number of filters the filter AST can process.
|
/// The maximum number of filters the filter AST can process.
|
||||||
|
@ -3,7 +3,7 @@ use heed::{BytesDecode, RoTxn};
|
|||||||
|
|
||||||
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
|
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
|
||||||
pub use self::filter::Filter;
|
pub use self::filter::Filter;
|
||||||
use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef};
|
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||||
|
|
||||||
mod facet_distribution;
|
mod facet_distribution;
|
||||||
mod facet_distribution_iter;
|
mod facet_distribution_iter;
|
||||||
@ -27,8 +27,8 @@ where
|
|||||||
db.as_polymorph().prefix_iter::<_, ByteSlice, ByteSlice>(txn, level0prefix.as_slice())?;
|
db.as_polymorph().prefix_iter::<_, ByteSlice, ByteSlice>(txn, level0prefix.as_slice())?;
|
||||||
if let Some(first) = level0_iter_forward.next() {
|
if let Some(first) = level0_iter_forward.next() {
|
||||||
let (first_key, _) = first?;
|
let (first_key, _) = first?;
|
||||||
let first_key =
|
let first_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(first_key)
|
||||||
FacetGroupKeyCodec::<BoundCodec>::bytes_decode(first_key).ok_or(heed::Error::Encoding)?;
|
.ok_or(heed::Error::Encoding)?;
|
||||||
Ok(Some(first_key.left_bound))
|
Ok(Some(first_key.left_bound))
|
||||||
} else {
|
} else {
|
||||||
Ok(None)
|
Ok(None)
|
||||||
@ -50,8 +50,8 @@ where
|
|||||||
.rev_prefix_iter::<_, ByteSlice, ByteSlice>(txn, level0prefix.as_slice())?;
|
.rev_prefix_iter::<_, ByteSlice, ByteSlice>(txn, level0prefix.as_slice())?;
|
||||||
if let Some(last) = level0_iter_backward.next() {
|
if let Some(last) = level0_iter_backward.next() {
|
||||||
let (last_key, _) = last?;
|
let (last_key, _) = last?;
|
||||||
let last_key =
|
let last_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(last_key)
|
||||||
FacetGroupKeyCodec::<BoundCodec>::bytes_decode(last_key).ok_or(heed::Error::Encoding)?;
|
.ok_or(heed::Error::Encoding)?;
|
||||||
Ok(Some(last_key.left_bound))
|
Ok(Some(last_key.left_bound))
|
||||||
} else {
|
} else {
|
||||||
Ok(None)
|
Ok(None)
|
||||||
@ -85,11 +85,12 @@ pub mod test {
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
|
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||||
};
|
};
|
||||||
use crate::snapshot_tests::display_bitmap;
|
use crate::snapshot_tests::display_bitmap;
|
||||||
use crate::update::FacetsUpdateIncremental;
|
use crate::update::FacetsUpdateIncrementalInner;
|
||||||
|
|
||||||
|
// A dummy index that only contains the facet database, used for testing
|
||||||
pub struct FacetIndex<BoundCodec>
|
pub struct FacetIndex<BoundCodec>
|
||||||
where
|
where
|
||||||
for<'a> BoundCodec:
|
for<'a> BoundCodec:
|
||||||
@ -100,10 +101,12 @@ pub mod test {
|
|||||||
_phantom: PhantomData<BoundCodec>,
|
_phantom: PhantomData<BoundCodec>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The faecet database and its settings
|
||||||
pub struct Database {
|
pub struct Database {
|
||||||
pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
pub group_size: usize,
|
pub group_size: u8,
|
||||||
pub max_group_size: usize,
|
pub min_level_size: u8,
|
||||||
|
pub max_group_size: u8,
|
||||||
_tempdir: Rc<tempfile::TempDir>,
|
_tempdir: Rc<tempfile::TempDir>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -117,9 +120,12 @@ pub mod test {
|
|||||||
tempdir: Rc<tempfile::TempDir>,
|
tempdir: Rc<tempfile::TempDir>,
|
||||||
group_size: u8,
|
group_size: u8,
|
||||||
max_group_size: u8,
|
max_group_size: u8,
|
||||||
|
min_level_size: u8,
|
||||||
) -> FacetIndex<BoundCodec> {
|
) -> FacetIndex<BoundCodec> {
|
||||||
let group_size = std::cmp::min(127, std::cmp::max(group_size, 2)) as usize;
|
let group_size = std::cmp::min(127, std::cmp::max(group_size, 2)); // 2 <= x <= 127
|
||||||
let max_group_size = std::cmp::max(group_size * 2, max_group_size as usize);
|
let max_group_size = std::cmp::min(127, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 127
|
||||||
|
let min_level_size = std::cmp::max(1, min_level_size); // 1 <= x <= inf
|
||||||
|
|
||||||
let mut options = heed::EnvOpenOptions::new();
|
let mut options = heed::EnvOpenOptions::new();
|
||||||
let options = options.map_size(4096 * 4 * 10 * 100);
|
let options = options.map_size(4096 * 4 * 10 * 100);
|
||||||
unsafe {
|
unsafe {
|
||||||
@ -129,14 +135,25 @@ pub mod test {
|
|||||||
let content = env.open_database(None).unwrap().unwrap();
|
let content = env.open_database(None).unwrap().unwrap();
|
||||||
|
|
||||||
FacetIndex {
|
FacetIndex {
|
||||||
db: Database { content, group_size, max_group_size, _tempdir: tempdir },
|
db: Database {
|
||||||
|
content,
|
||||||
|
group_size,
|
||||||
|
max_group_size,
|
||||||
|
min_level_size,
|
||||||
|
_tempdir: tempdir,
|
||||||
|
},
|
||||||
env,
|
env,
|
||||||
_phantom: PhantomData,
|
_phantom: PhantomData,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn new(group_size: u8, max_group_size: u8) -> FacetIndex<BoundCodec> {
|
pub fn new(
|
||||||
let group_size = std::cmp::min(127, std::cmp::max(group_size, 2)) as usize;
|
group_size: u8,
|
||||||
let max_group_size = std::cmp::max(group_size * 2, max_group_size as usize);
|
max_group_size: u8,
|
||||||
|
min_level_size: u8,
|
||||||
|
) -> FacetIndex<BoundCodec> {
|
||||||
|
let group_size = std::cmp::min(127, std::cmp::max(group_size, 2)); // 2 <= x <= 127
|
||||||
|
let max_group_size = std::cmp::min(127, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 127
|
||||||
|
let min_level_size = std::cmp::max(1, min_level_size); // 1 <= x <= inf
|
||||||
let mut options = heed::EnvOpenOptions::new();
|
let mut options = heed::EnvOpenOptions::new();
|
||||||
let options = options.map_size(4096 * 4 * 100);
|
let options = options.map_size(4096 * 4 * 100);
|
||||||
let tempdir = tempfile::TempDir::new().unwrap();
|
let tempdir = tempfile::TempDir::new().unwrap();
|
||||||
@ -144,7 +161,13 @@ pub mod test {
|
|||||||
let content = env.create_database(None).unwrap();
|
let content = env.create_database(None).unwrap();
|
||||||
|
|
||||||
FacetIndex {
|
FacetIndex {
|
||||||
db: Database { content, group_size, max_group_size, _tempdir: Rc::new(tempdir) },
|
db: Database {
|
||||||
|
content,
|
||||||
|
group_size,
|
||||||
|
max_group_size,
|
||||||
|
min_level_size,
|
||||||
|
_tempdir: Rc::new(tempdir),
|
||||||
|
},
|
||||||
env,
|
env,
|
||||||
_phantom: PhantomData,
|
_phantom: PhantomData,
|
||||||
}
|
}
|
||||||
@ -156,7 +179,7 @@ pub mod test {
|
|||||||
key: &'a <BoundCodec as BytesEncode<'a>>::EItem,
|
key: &'a <BoundCodec as BytesEncode<'a>>::EItem,
|
||||||
docids: &RoaringBitmap,
|
docids: &RoaringBitmap,
|
||||||
) {
|
) {
|
||||||
let update = FacetsUpdateIncremental::new(self.db.content);
|
let update = FacetsUpdateIncrementalInner::new(self.db.content);
|
||||||
let key_bytes = BoundCodec::bytes_encode(&key).unwrap();
|
let key_bytes = BoundCodec::bytes_encode(&key).unwrap();
|
||||||
update.insert(rwtxn, field_id, &key_bytes, docids).unwrap();
|
update.insert(rwtxn, field_id, &key_bytes, docids).unwrap();
|
||||||
}
|
}
|
||||||
@ -167,7 +190,7 @@ pub mod test {
|
|||||||
key: &'a <BoundCodec as BytesEncode<'a>>::EItem,
|
key: &'a <BoundCodec as BytesEncode<'a>>::EItem,
|
||||||
value: u32,
|
value: u32,
|
||||||
) {
|
) {
|
||||||
let update = FacetsUpdateIncremental::new(self.db.content);
|
let update = FacetsUpdateIncrementalInner::new(self.db.content);
|
||||||
let key_bytes = BoundCodec::bytes_encode(&key).unwrap();
|
let key_bytes = BoundCodec::bytes_encode(&key).unwrap();
|
||||||
update.delete(rwtxn, field_id, &key_bytes, value).unwrap();
|
update.delete(rwtxn, field_id, &key_bytes, value).unwrap();
|
||||||
}
|
}
|
||||||
|
@ -5,7 +5,7 @@ use std::path::Path;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::{FacetGroupValue, FacetGroupKey};
|
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
|
||||||
use crate::{make_db_snap_from_iter, ExternalDocumentsIds, Index};
|
use crate::{make_db_snap_from_iter, ExternalDocumentsIds, Index};
|
||||||
|
|
||||||
#[track_caller]
|
#[track_caller]
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
use crate::{facet::FacetType, ExternalDocumentsIds, FieldDistribution, Index, Result};
|
use crate::facet::FacetType;
|
||||||
|
use crate::{ExternalDocumentsIds, FieldDistribution, Index, Result};
|
||||||
|
|
||||||
pub struct ClearDocuments<'t, 'u, 'i> {
|
pub struct ClearDocuments<'t, 'u, 'i> {
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||||
|
@ -11,7 +11,7 @@ use time::OffsetDateTime;
|
|||||||
use super::{ClearDocuments, FacetsUpdateBulk};
|
use super::{ClearDocuments, FacetsUpdateBulk};
|
||||||
use crate::error::{InternalError, UserError};
|
use crate::error::{InternalError, UserError};
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef};
|
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||||
use crate::index::{db_name, main_key};
|
use crate::index::{db_name, main_key};
|
||||||
use crate::{
|
use crate::{
|
||||||
|
@ -1,18 +1,20 @@
|
|||||||
|
use std::borrow::Cow;
|
||||||
|
use std::cmp;
|
||||||
|
use std::fs::File;
|
||||||
|
|
||||||
|
use grenad::CompressionType;
|
||||||
|
use heed::types::ByteSlice;
|
||||||
|
use heed::{BytesEncode, Error, RoTxn, RwTxn};
|
||||||
|
use log::debug;
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||||
};
|
};
|
||||||
use crate::update::index_documents::{create_writer, writer_into_reader};
|
use crate::update::index_documents::{create_writer, writer_into_reader};
|
||||||
use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
|
use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
|
||||||
use grenad::CompressionType;
|
|
||||||
use heed::types::ByteSlice;
|
|
||||||
use heed::{BytesEncode, Error, RoTxn, RwTxn};
|
|
||||||
use log::debug;
|
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
use std::borrow::Cow;
|
|
||||||
use std::cmp;
|
|
||||||
use std::fs::File;
|
|
||||||
use time::OffsetDateTime;
|
|
||||||
|
|
||||||
pub struct FacetsUpdateBulk<'i> {
|
pub struct FacetsUpdateBulk<'i> {
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
@ -367,9 +369,7 @@ mod tests {
|
|||||||
documents.push(serde_json::json!({ "facet2": i }).as_object().unwrap().clone());
|
documents.push(serde_json::json!({ "facet2": i }).as_object().unwrap().clone());
|
||||||
}
|
}
|
||||||
let documents = documents_batch_reader_from_objects(documents);
|
let documents = documents_batch_reader_from_objects(documents);
|
||||||
dbg!();
|
|
||||||
index.add_documents(documents).unwrap();
|
index.add_documents(documents).unwrap();
|
||||||
dbg!();
|
|
||||||
db_snap!(index, facet_id_f64_docids, name);
|
db_snap!(index, facet_id_f64_docids, name);
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -421,4 +421,100 @@ mod tests {
|
|||||||
test("default", None, None);
|
test("default", None, None);
|
||||||
test("tiny_groups_tiny_levels", NonZeroUsize::new(1), NonZeroUsize::new(1));
|
test("tiny_groups_tiny_levels", NonZeroUsize::new(1), NonZeroUsize::new(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_facets_number_incremental_update() {
|
||||||
|
let test =
|
||||||
|
|name: &str, group_size: Option<NonZeroUsize>, min_level_size: Option<NonZeroUsize>| {
|
||||||
|
let mut index = TempIndex::new_with_map_size(4096 * 1000 * 10); // 40MB
|
||||||
|
index.index_documents_config.autogenerate_docids = true;
|
||||||
|
index.index_documents_config.facet_level_group_size = group_size;
|
||||||
|
index.index_documents_config.facet_min_level_size = min_level_size;
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|settings| {
|
||||||
|
settings.set_filterable_fields(
|
||||||
|
IntoIterator::into_iter(["facet".to_owned(), "facet2".to_owned()])
|
||||||
|
.collect(),
|
||||||
|
);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let mut documents = vec![];
|
||||||
|
for i in 0..1000 {
|
||||||
|
documents.push(serde_json::json!({ "facet": i }).as_object().unwrap().clone());
|
||||||
|
}
|
||||||
|
for i in 0..100 {
|
||||||
|
documents.push(serde_json::json!({ "facet2": i }).as_object().unwrap().clone());
|
||||||
|
}
|
||||||
|
let documents_batch = documents_batch_reader_from_objects(documents.clone());
|
||||||
|
|
||||||
|
index.add_documents(documents_batch).unwrap();
|
||||||
|
|
||||||
|
let mut documents = vec![];
|
||||||
|
for i in 1000..1010 {
|
||||||
|
documents.push(serde_json::json!({ "facet": i }).as_object().unwrap().clone());
|
||||||
|
}
|
||||||
|
for i in 100..110 {
|
||||||
|
documents.push(serde_json::json!({ "facet2": i }).as_object().unwrap().clone());
|
||||||
|
}
|
||||||
|
let documents_batch = documents_batch_reader_from_objects(documents.clone());
|
||||||
|
|
||||||
|
index.add_documents(documents_batch).unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, facet_id_f64_docids, name);
|
||||||
|
};
|
||||||
|
|
||||||
|
test("default", None, None);
|
||||||
|
test("tiny_groups_tiny_levels", NonZeroUsize::new(1), NonZeroUsize::new(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_facets_number_delete_facet_id_then_bulk_update() {
|
||||||
|
let test =
|
||||||
|
|name: &str, group_size: Option<NonZeroUsize>, min_level_size: Option<NonZeroUsize>| {
|
||||||
|
let mut index = TempIndex::new_with_map_size(4096 * 1000 * 10); // 40MB
|
||||||
|
index.index_documents_config.autogenerate_docids = true;
|
||||||
|
index.index_documents_config.facet_level_group_size = group_size;
|
||||||
|
index.index_documents_config.facet_min_level_size = min_level_size;
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|settings| {
|
||||||
|
settings.set_filterable_fields(
|
||||||
|
IntoIterator::into_iter(["facet".to_owned(), "facet2".to_owned()])
|
||||||
|
.collect(),
|
||||||
|
);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let mut documents = vec![];
|
||||||
|
for i in 0..1000 {
|
||||||
|
documents.push(serde_json::json!({ "facet": i }).as_object().unwrap().clone());
|
||||||
|
}
|
||||||
|
for i in 0..100 {
|
||||||
|
documents.push(serde_json::json!({ "facet2": i }).as_object().unwrap().clone());
|
||||||
|
}
|
||||||
|
let documents_batch = documents_batch_reader_from_objects(documents.clone());
|
||||||
|
|
||||||
|
index.add_documents(documents_batch).unwrap();
|
||||||
|
|
||||||
|
// 1100 facets -> how long is the DB?
|
||||||
|
|
||||||
|
let mut documents = vec![];
|
||||||
|
for i in 1000..1010 {
|
||||||
|
documents.push(serde_json::json!({ "facet": i }).as_object().unwrap().clone());
|
||||||
|
}
|
||||||
|
for i in 100..110 {
|
||||||
|
documents.push(serde_json::json!({ "facet2": i }).as_object().unwrap().clone());
|
||||||
|
}
|
||||||
|
let documents_batch = documents_batch_reader_from_objects(documents.clone());
|
||||||
|
|
||||||
|
index.add_documents(documents_batch).unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, facet_id_f64_docids, name);
|
||||||
|
};
|
||||||
|
|
||||||
|
test("default", None, None);
|
||||||
|
test("tiny_groups_tiny_levels", NonZeroUsize::new(1), NonZeroUsize::new(1));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,12 +1,16 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
use std::fs::File;
|
||||||
|
|
||||||
use heed::types::ByteSlice;
|
use heed::types::ByteSlice;
|
||||||
use heed::{BytesDecode, Error, RoTxn, RwTxn};
|
use heed::{BytesDecode, Error, RoTxn, RwTxn};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||||
};
|
};
|
||||||
use crate::search::facet::get_highest_level;
|
use crate::search::facet::get_highest_level;
|
||||||
use crate::Result;
|
use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
|
||||||
|
|
||||||
enum InsertionResult {
|
enum InsertionResult {
|
||||||
InPlace,
|
InPlace,
|
||||||
@ -18,30 +22,79 @@ enum DeletionResult {
|
|||||||
Remove { prev: Option<Vec<u8>>, next: Option<Vec<u8>> },
|
Remove { prev: Option<Vec<u8>>, next: Option<Vec<u8>> },
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct FacetsUpdateIncremental {
|
pub struct FacetsUpdateIncremental<'i> {
|
||||||
|
index: &'i Index,
|
||||||
|
inner: FacetsUpdateIncrementalInner,
|
||||||
|
facet_type: FacetType,
|
||||||
|
new_data: grenad::Reader<File>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'i> FacetsUpdateIncremental<'i> {
|
||||||
|
pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
|
||||||
|
FacetsUpdateIncremental {
|
||||||
|
index,
|
||||||
|
inner: FacetsUpdateIncrementalInner {
|
||||||
|
db: match facet_type {
|
||||||
|
FacetType::String => index
|
||||||
|
.facet_id_string_docids
|
||||||
|
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
||||||
|
FacetType::Number => index
|
||||||
|
.facet_id_f64_docids
|
||||||
|
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
||||||
|
},
|
||||||
|
group_size: 4,
|
||||||
|
max_group_size: 8,
|
||||||
|
min_level_size: 5,
|
||||||
|
},
|
||||||
|
facet_type,
|
||||||
|
new_data,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn group_size(mut self, size: u8) -> Self {
|
||||||
|
self.inner.group_size = size;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
pub fn min_level_size(mut self, size: u8) -> Self {
|
||||||
|
self.inner.min_level_size = size;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
pub fn max_group_size(mut self, size: u8) -> Self {
|
||||||
|
self.inner.max_group_size = size;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
pub fn execute(self, wtxn: &'i mut RwTxn) -> crate::Result<()> {
|
||||||
|
let mut new_faceted_docids = HashMap::<FieldId, RoaringBitmap>::default();
|
||||||
|
|
||||||
|
let mut cursor = self.new_data.into_cursor()?;
|
||||||
|
while let Some((key, value)) = cursor.move_on_next()? {
|
||||||
|
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(key)
|
||||||
|
.ok_or(heed::Error::Encoding)?;
|
||||||
|
let docids = CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?;
|
||||||
|
self.inner.insert(wtxn, key.field_id, key.left_bound, &docids)?;
|
||||||
|
*new_faceted_docids.entry(key.field_id).or_default() |= docids;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (field_id, new_docids) in new_faceted_docids {
|
||||||
|
let mut docids = self.index.faceted_documents_ids(wtxn, field_id, self.facet_type)?;
|
||||||
|
docids |= new_docids;
|
||||||
|
self.index.put_faceted_documents_ids(wtxn, field_id, self.facet_type, &docids)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct FacetsUpdateIncrementalInner {
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
group_size: u8,
|
group_size: u8,
|
||||||
min_level_size: u8,
|
min_level_size: u8,
|
||||||
max_group_size: u8,
|
max_group_size: u8,
|
||||||
}
|
}
|
||||||
impl FacetsUpdateIncremental {
|
impl FacetsUpdateIncrementalInner {
|
||||||
pub fn new(db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>) -> Self {
|
pub fn new(db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>) -> Self {
|
||||||
Self { db, group_size: 4, min_level_size: 5, max_group_size: 8 }
|
Self { db, group_size: 4, min_level_size: 5, max_group_size: 8 }
|
||||||
}
|
}
|
||||||
pub fn group_size(mut self, size: u8) -> Self {
|
|
||||||
self.group_size = size;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
pub fn min_level_size(mut self, size: u8) -> Self {
|
|
||||||
self.min_level_size = size;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
pub fn max_group_size(mut self, size: u8) -> Self {
|
|
||||||
self.max_group_size = size;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
impl FacetsUpdateIncremental {
|
impl FacetsUpdateIncrementalInner {
|
||||||
fn find_insertion_key_value(
|
fn find_insertion_key_value(
|
||||||
&self,
|
&self,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
@ -481,9 +534,9 @@ mod tests {
|
|||||||
use rand::{Rng, SeedableRng};
|
use rand::{Rng, SeedableRng};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::heed_codec::facet::OrderedF64Codec;
|
use crate::heed_codec::facet::{
|
||||||
use crate::heed_codec::facet::StrRefCodec;
|
ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec, StrRefCodec,
|
||||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
|
};
|
||||||
use crate::milli_snap;
|
use crate::milli_snap;
|
||||||
use crate::search::facet::get_highest_level;
|
use crate::search::facet::get_highest_level;
|
||||||
use crate::search::facet::test::FacetIndex;
|
use crate::search::facet::test::FacetIndex;
|
||||||
@ -534,7 +587,7 @@ mod tests {
|
|||||||
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes).unwrap()
|
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes).unwrap()
|
||||||
};
|
};
|
||||||
|
|
||||||
assert!(value.size > 0 && (value.size as usize) < db.max_group_size);
|
assert!(value.size > 0 && value.size < db.max_group_size);
|
||||||
|
|
||||||
let mut actual_size = 0;
|
let mut actual_size = 0;
|
||||||
let mut values_below = RoaringBitmap::new();
|
let mut values_below = RoaringBitmap::new();
|
||||||
@ -553,7 +606,7 @@ mod tests {
|
|||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn append() {
|
fn append() {
|
||||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||||
for i in 0..256u16 {
|
for i in 0..256u16 {
|
||||||
let mut bitmap = RoaringBitmap::new();
|
let mut bitmap = RoaringBitmap::new();
|
||||||
bitmap.insert(i as u32);
|
bitmap.insert(i as u32);
|
||||||
@ -566,7 +619,7 @@ mod tests {
|
|||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn many_field_ids_append() {
|
fn many_field_ids_append() {
|
||||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||||
for i in 0..256u16 {
|
for i in 0..256u16 {
|
||||||
let mut bitmap = RoaringBitmap::new();
|
let mut bitmap = RoaringBitmap::new();
|
||||||
bitmap.insert(i as u32);
|
bitmap.insert(i as u32);
|
||||||
@ -595,7 +648,7 @@ mod tests {
|
|||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn many_field_ids_prepend() {
|
fn many_field_ids_prepend() {
|
||||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||||
for i in (0..256).into_iter().rev() {
|
for i in (0..256).into_iter().rev() {
|
||||||
let mut bitmap = RoaringBitmap::new();
|
let mut bitmap = RoaringBitmap::new();
|
||||||
bitmap.insert(i as u32);
|
bitmap.insert(i as u32);
|
||||||
@ -625,7 +678,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn prepend() {
|
fn prepend() {
|
||||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||||
let mut txn = index.env.write_txn().unwrap();
|
let mut txn = index.env.write_txn().unwrap();
|
||||||
|
|
||||||
for i in (0..256).into_iter().rev() {
|
for i in (0..256).into_iter().rev() {
|
||||||
@ -640,7 +693,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn shuffled() {
|
fn shuffled() {
|
||||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||||
let mut txn = index.env.write_txn().unwrap();
|
let mut txn = index.env.write_txn().unwrap();
|
||||||
|
|
||||||
let mut keys = (0..256).into_iter().collect::<Vec<_>>();
|
let mut keys = (0..256).into_iter().collect::<Vec<_>>();
|
||||||
@ -659,7 +712,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn merge_values() {
|
fn merge_values() {
|
||||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||||
|
|
||||||
let mut keys = (0..256).into_iter().collect::<Vec<_>>();
|
let mut keys = (0..256).into_iter().collect::<Vec<_>>();
|
||||||
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
||||||
@ -680,7 +733,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn delete_from_end() {
|
fn delete_from_end() {
|
||||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||||
for i in 0..256 {
|
for i in 0..256 {
|
||||||
let mut bitmap = RoaringBitmap::new();
|
let mut bitmap = RoaringBitmap::new();
|
||||||
bitmap.insert(i);
|
bitmap.insert(i);
|
||||||
@ -745,7 +798,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn delete_from_start() {
|
fn delete_from_start() {
|
||||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||||
|
|
||||||
for i in 0..256 {
|
for i in 0..256 {
|
||||||
let mut bitmap = RoaringBitmap::new();
|
let mut bitmap = RoaringBitmap::new();
|
||||||
@ -783,7 +836,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn delete_shuffled() {
|
fn delete_shuffled() {
|
||||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||||
|
|
||||||
for i in 0..256 {
|
for i in 0..256 {
|
||||||
let mut bitmap = RoaringBitmap::new();
|
let mut bitmap = RoaringBitmap::new();
|
||||||
@ -829,7 +882,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn in_place_level0_insert() {
|
fn in_place_level0_insert() {
|
||||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||||
let mut keys = (0..16).into_iter().collect::<Vec<_>>();
|
let mut keys = (0..16).into_iter().collect::<Vec<_>>();
|
||||||
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
||||||
keys.shuffle(&mut rng);
|
keys.shuffle(&mut rng);
|
||||||
@ -849,7 +902,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn in_place_level0_delete() {
|
fn in_place_level0_delete() {
|
||||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||||
|
|
||||||
let mut keys = (0..64).into_iter().collect::<Vec<_>>();
|
let mut keys = (0..64).into_iter().collect::<Vec<_>>();
|
||||||
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
||||||
@ -879,7 +932,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn shuffle_merge_string_and_delete() {
|
fn shuffle_merge_string_and_delete() {
|
||||||
let index = FacetIndex::<StrRefCodec>::new(4, 8);
|
let index = FacetIndex::<StrRefCodec>::new(4, 8, 5);
|
||||||
|
|
||||||
let mut keys = (1000..1064).into_iter().collect::<Vec<_>>();
|
let mut keys = (1000..1064).into_iter().collect::<Vec<_>>();
|
||||||
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
||||||
|
@ -1,12 +1,9 @@
|
|||||||
use super::{FacetsUpdateBulk, FacetsUpdateIncremental};
|
use self::incremental::FacetsUpdateIncremental;
|
||||||
use crate::{
|
use super::FacetsUpdateBulk;
|
||||||
facet::FacetType,
|
use crate::facet::FacetType;
|
||||||
heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec},
|
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||||
CboRoaringBitmapCodec, FieldId, Index, Result,
|
use crate::{Index, Result};
|
||||||
};
|
use std::fs::File;
|
||||||
use heed::BytesDecode;
|
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
use std::{collections::HashMap, fs::File};
|
|
||||||
|
|
||||||
pub mod bulk;
|
pub mod bulk;
|
||||||
pub mod incremental;
|
pub mod incremental;
|
||||||
@ -14,11 +11,13 @@ pub mod incremental;
|
|||||||
pub struct FacetsUpdate<'i> {
|
pub struct FacetsUpdate<'i> {
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
|
facet_type: FacetType,
|
||||||
|
new_data: grenad::Reader<File>,
|
||||||
|
// Options:
|
||||||
|
// there's no way to change these for now
|
||||||
level_group_size: u8,
|
level_group_size: u8,
|
||||||
max_level_group_size: u8,
|
max_level_group_size: u8,
|
||||||
min_level_size: u8,
|
min_level_size: u8,
|
||||||
facet_type: FacetType,
|
|
||||||
new_data: grenad::Reader<File>,
|
|
||||||
}
|
}
|
||||||
impl<'i> FacetsUpdate<'i> {
|
impl<'i> FacetsUpdate<'i> {
|
||||||
pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
|
pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
|
||||||
@ -42,36 +41,37 @@ impl<'i> FacetsUpdate<'i> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> {
|
pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> {
|
||||||
|
if self.new_data.is_empty() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
// here, come up with a better condition!
|
// here, come up with a better condition!
|
||||||
if self.database.is_empty(wtxn)? {
|
// ideally we'd choose which method to use for each field id individually
|
||||||
|
// but I dont' think it's worth the effort yet
|
||||||
|
// As a first requirement, we ask that the length of the new data is less
|
||||||
|
// than a 1/50th of the length of the database in order to use the incremental
|
||||||
|
// method.
|
||||||
|
if self.new_data.len() >= (self.database.len(wtxn)? as u64 / 50) {
|
||||||
let bulk_update = FacetsUpdateBulk::new(self.index, self.facet_type, self.new_data)
|
let bulk_update = FacetsUpdateBulk::new(self.index, self.facet_type, self.new_data)
|
||||||
.level_group_size(self.level_group_size)
|
.level_group_size(self.level_group_size)
|
||||||
.min_level_size(self.min_level_size);
|
.min_level_size(self.min_level_size);
|
||||||
bulk_update.execute(wtxn)?;
|
bulk_update.execute(wtxn)?;
|
||||||
} else {
|
} else {
|
||||||
let indexer = FacetsUpdateIncremental::new(self.database)
|
let incremental_update =
|
||||||
|
FacetsUpdateIncremental::new(self.index, self.facet_type, self.new_data)
|
||||||
|
.group_size(self.level_group_size)
|
||||||
.max_group_size(self.max_level_group_size)
|
.max_group_size(self.max_level_group_size)
|
||||||
.min_level_size(self.min_level_size);
|
.min_level_size(self.min_level_size);
|
||||||
|
incremental_update.execute(wtxn)?;
|
||||||
let mut new_faceted_docids = HashMap::<FieldId, RoaringBitmap>::default();
|
|
||||||
|
|
||||||
let mut cursor = self.new_data.into_cursor()?;
|
|
||||||
while let Some((key, value)) = cursor.move_on_next()? {
|
|
||||||
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(key)
|
|
||||||
.ok_or(heed::Error::Encoding)?;
|
|
||||||
let docids =
|
|
||||||
CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?;
|
|
||||||
indexer.insert(wtxn, key.field_id, key.left_bound, &docids)?;
|
|
||||||
*new_faceted_docids.entry(key.field_id).or_default() |= docids;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (field_id, new_docids) in new_faceted_docids {
|
|
||||||
let mut docids =
|
|
||||||
self.index.faceted_documents_ids(wtxn, field_id, self.facet_type)?;
|
|
||||||
docids |= new_docids;
|
|
||||||
self.index.put_faceted_documents_ids(wtxn, field_id, self.facet_type, &docids)?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
// here I want to create a benchmark
|
||||||
|
// to find out at which point it is faster to do it incrementally
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn update() {}
|
||||||
|
}
|
||||||
|
@ -0,0 +1,4 @@
|
|||||||
|
---
|
||||||
|
source: milli/src/update/facet/bulk.rs
|
||||||
|
---
|
||||||
|
9e9175e0a56db39f0dc04fb8f15c28fe
|
@ -0,0 +1,4 @@
|
|||||||
|
---
|
||||||
|
source: milli/src/update/facet/bulk.rs
|
||||||
|
---
|
||||||
|
9e9175e0a56db39f0dc04fb8f15c28fe
|
@ -0,0 +1,4 @@
|
|||||||
|
---
|
||||||
|
source: milli/src/update/facet/bulk.rs
|
||||||
|
---
|
||||||
|
b494fb6565707ce401f6d6ac03f46b93
|
@ -0,0 +1,4 @@
|
|||||||
|
---
|
||||||
|
source: milli/src/update/facet/bulk.rs
|
||||||
|
---
|
||||||
|
b494fb6565707ce401f6d6ac03f46b93
|
@ -6,9 +6,9 @@ use heed::{BytesDecode, BytesEncode};
|
|||||||
use super::helpers::{
|
use super::helpers::{
|
||||||
create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
|
create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
|
||||||
};
|
};
|
||||||
use crate::heed_codec::facet::FieldDocIdFacetF64Codec;
|
use crate::heed_codec::facet::{
|
||||||
use crate::heed_codec::facet::OrderedF64Codec;
|
FacetGroupKey, FacetGroupKeyCodec, FieldDocIdFacetF64Codec, OrderedF64Codec,
|
||||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
|
};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
/// Extracts the facet number and the documents ids where this facet number appear.
|
/// Extracts the facet number and the documents ids where this facet number appear.
|
||||||
|
@ -4,8 +4,7 @@ use std::io;
|
|||||||
use heed::BytesEncode;
|
use heed::BytesEncode;
|
||||||
|
|
||||||
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
|
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
|
||||||
use crate::heed_codec::facet::StrRefCodec;
|
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, StrRefCodec};
|
||||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
|
|
||||||
use crate::update::index_documents::merge_cbo_roaring_bitmaps;
|
use crate::update::index_documents::merge_cbo_roaring_bitmaps;
|
||||||
use crate::{FieldId, Result};
|
use crate::{FieldId, Result};
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@ use std::fs::File;
|
|||||||
use std::io::{self, Seek, SeekFrom};
|
use std::io::{self, Seek, SeekFrom};
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use grenad::{CompressionType, Reader, Sorter};
|
use grenad::{CompressionType, Sorter};
|
||||||
use heed::types::ByteSlice;
|
use heed::types::ByteSlice;
|
||||||
use log::debug;
|
use log::debug;
|
||||||
|
|
||||||
@ -208,36 +208,6 @@ pub fn grenad_obkv_into_chunks<R: io::Read + io::Seek>(
|
|||||||
Ok(std::iter::from_fn(move || transposer().transpose()))
|
Ok(std::iter::from_fn(move || transposer().transpose()))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn write_into_lmdb_database(
|
|
||||||
wtxn: &mut heed::RwTxn,
|
|
||||||
database: heed::PolyDatabase,
|
|
||||||
reader: Reader<File>,
|
|
||||||
merge: MergeFn,
|
|
||||||
) -> Result<()> {
|
|
||||||
debug!("Writing MTBL stores...");
|
|
||||||
let before = Instant::now();
|
|
||||||
|
|
||||||
let mut cursor = reader.into_cursor()?;
|
|
||||||
while let Some((k, v)) = cursor.move_on_next()? {
|
|
||||||
let mut iter = database.prefix_iter_mut::<_, ByteSlice, ByteSlice>(wtxn, k)?;
|
|
||||||
match iter.next().transpose()? {
|
|
||||||
Some((key, old_val)) if key == k => {
|
|
||||||
let vals = &[Cow::Borrowed(old_val), Cow::Borrowed(v)][..];
|
|
||||||
let val = merge(k, vals)?;
|
|
||||||
// safety: we don't keep references from inside the LMDB database.
|
|
||||||
unsafe { iter.put_current(k, &val)? };
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
drop(iter);
|
|
||||||
database.put::<_, ByteSlice, ByteSlice>(wtxn, k, v)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
debug!("MTBL stores merged in {:.02?}!", before.elapsed());
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn sorter_into_lmdb_database(
|
pub fn sorter_into_lmdb_database(
|
||||||
wtxn: &mut heed::RwTxn,
|
wtxn: &mut heed::RwTxn,
|
||||||
database: heed::PolyDatabase,
|
database: heed::PolyDatabase,
|
||||||
|
@ -9,8 +9,8 @@ pub use clonable_mmap::{ClonableMmap, CursorClonableMmap};
|
|||||||
use fst::{IntoStreamer, Streamer};
|
use fst::{IntoStreamer, Streamer};
|
||||||
pub use grenad_helpers::{
|
pub use grenad_helpers::{
|
||||||
as_cloneable_grenad, create_sorter, create_writer, grenad_obkv_into_chunks,
|
as_cloneable_grenad, create_sorter, create_writer, grenad_obkv_into_chunks,
|
||||||
merge_ignore_values, sorter_into_lmdb_database, sorter_into_reader, write_into_lmdb_database,
|
merge_ignore_values, sorter_into_lmdb_database, sorter_into_reader, writer_into_reader,
|
||||||
writer_into_reader, GrenadParameters, MergeableReader,
|
GrenadParameters, MergeableReader,
|
||||||
};
|
};
|
||||||
pub use merge_functions::{
|
pub use merge_functions::{
|
||||||
concat_u32s_array, keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps, merge_obkvs,
|
concat_u32s_array, keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps, merge_obkvs,
|
||||||
|
@ -27,8 +27,7 @@ pub use self::enrich::{
|
|||||||
pub use self::helpers::{
|
pub use self::helpers::{
|
||||||
as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
|
as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
|
||||||
fst_stream_into_vec, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
|
fst_stream_into_vec, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
|
||||||
sorter_into_lmdb_database, valid_lmdb_key, write_into_lmdb_database, writer_into_reader,
|
sorter_into_lmdb_database, valid_lmdb_key, writer_into_reader, ClonableMmap, MergeFn,
|
||||||
ClonableMmap, MergeFn,
|
|
||||||
};
|
};
|
||||||
use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
|
use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
|
||||||
pub use self::transform::{Transform, TransformOutput};
|
pub use self::transform::{Transform, TransformOutput};
|
||||||
|
@ -2,7 +2,7 @@ pub use self::available_documents_ids::AvailableDocumentsIds;
|
|||||||
pub use self::clear_documents::ClearDocuments;
|
pub use self::clear_documents::ClearDocuments;
|
||||||
pub use self::delete_documents::{DeleteDocuments, DocumentDeletionResult};
|
pub use self::delete_documents::{DeleteDocuments, DocumentDeletionResult};
|
||||||
pub use self::facet::bulk::FacetsUpdateBulk;
|
pub use self::facet::bulk::FacetsUpdateBulk;
|
||||||
pub use self::facet::incremental::FacetsUpdateIncremental;
|
pub use self::facet::incremental::FacetsUpdateIncrementalInner;
|
||||||
pub use self::index_documents::{
|
pub use self::index_documents::{
|
||||||
DocumentAdditionResult, DocumentId, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod,
|
DocumentAdditionResult, DocumentId, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod,
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user