diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 0eaeec399..2e2e448c2 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -325,10 +325,9 @@ mod tests { use big_s::S; use maplit::hashset; - use crate::{ - documents::documents_batch_reader_from_objects, index::tests::TempIndex, milli_snap, - FacetDistribution, - }; + use crate::documents::documents_batch_reader_from_objects; + use crate::index::tests::TempIndex; + use crate::{milli_snap, FacetDistribution}; #[test] fn few_candidates_few_facet_values() { diff --git a/milli/src/search/facet/facet_distribution_iter.rs b/milli/src/search/facet/facet_distribution_iter.rs index ad330b8db..01266187a 100644 --- a/milli/src/search/facet/facet_distribution_iter.rs +++ b/milli/src/search/facet/facet_distribution_iter.rs @@ -4,10 +4,10 @@ use heed::Result; use roaring::RoaringBitmap; use super::{get_first_facet_value, get_highest_level}; -use crate::{ - heed_codec::facet::{ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}, - DocumentId, +use crate::heed_codec::facet::{ + ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, }; +use crate::DocumentId; pub fn iterate_over_facet_distribution<'t, CB>( rtxn: &'t heed::RoTxn<'t>, @@ -114,13 +114,15 @@ where #[cfg(test)] mod tests { - use super::iterate_over_facet_distribution; - use crate::milli_snap; - use crate::search::facet::tests::get_random_looking_index; - use crate::{heed_codec::facet::OrderedF64Codec, search::facet::tests::get_simple_index}; + use std::ops::ControlFlow; + use heed::BytesDecode; use roaring::RoaringBitmap; - use std::ops::ControlFlow; + + use super::iterate_over_facet_distribution; + use crate::heed_codec::facet::OrderedF64Codec; + use crate::milli_snap; + use crate::search::facet::tests::{get_random_looking_index, get_simple_index}; #[test] fn filter_distribution_all() { diff --git a/milli/src/search/facet/facet_range_search.rs b/milli/src/search/facet/facet_range_search.rs index c99ac8e92..8934873b7 100644 --- a/milli/src/search/facet/facet_range_search.rs +++ b/milli/src/search/facet/facet_range_search.rs @@ -255,13 +255,15 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { #[cfg(test)] mod tests { + use std::ops::Bound; + + use roaring::RoaringBitmap; + use super::find_docids_of_facet_within_bounds; use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec}; use crate::milli_snap; use crate::search::facet::tests::{get_random_looking_index, get_simple_index}; use crate::snapshot_tests::display_bitmap; - use roaring::RoaringBitmap; - use std::ops::Bound; #[test] fn random_looking_index_snap() { diff --git a/milli/src/search/facet/facet_sort_ascending.rs b/milli/src/search/facet/facet_sort_ascending.rs index 33ca7d1ce..6567fe95e 100644 --- a/milli/src/search/facet/facet_sort_ascending.rs +++ b/milli/src/search/facet/facet_sort_ascending.rs @@ -83,11 +83,12 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> { #[cfg(test)] mod tests { + use roaring::RoaringBitmap; + use crate::milli_snap; use crate::search::facet::facet_sort_ascending::ascending_facet_sort; use crate::search::facet::tests::{get_random_looking_index, get_simple_index}; use crate::snapshot_tests::display_bitmap; - use roaring::RoaringBitmap; #[test] fn filter_sort() { diff --git a/milli/src/search/facet/facet_sort_descending.rs b/milli/src/search/facet/facet_sort_descending.rs index 69f286886..2eab9fca1 100644 --- a/milli/src/search/facet/facet_sort_descending.rs +++ b/milli/src/search/facet/facet_sort_descending.rs @@ -116,12 +116,13 @@ impl<'t> Iterator for DescendingFacetSort<'t> { #[cfg(test)] mod tests { + use roaring::RoaringBitmap; + use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec}; use crate::milli_snap; use crate::search::facet::facet_sort_descending::descending_facet_sort; use crate::search::facet::tests::{get_random_looking_index, get_simple_index}; use crate::snapshot_tests::display_bitmap; - use roaring::RoaringBitmap; #[test] fn filter_sort_descending() { diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index 415c2b51a..18c3a652b 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -80,7 +80,8 @@ pub(crate) mod tests { use rand::{Rng, SeedableRng}; use roaring::RoaringBitmap; - use crate::{heed_codec::facet::OrderedF64Codec, update::facet::tests::FacetIndex}; + use crate::heed_codec::facet::OrderedF64Codec; + use crate::update::facet::tests::FacetIndex; pub fn get_simple_index() -> FacetIndex { let index = FacetIndex::::new(4, 8, 5); diff --git a/milli/src/update/facet/bulk.rs b/milli/src/update/facet/bulk.rs index 3a371995e..e82af5d66 100644 --- a/milli/src/update/facet/bulk.rs +++ b/milli/src/update/facet/bulk.rs @@ -1,19 +1,20 @@ +use std::borrow::Cow; +use std::fs::File; + +use grenad::CompressionType; +use heed::types::ByteSlice; +use heed::{BytesEncode, Error, RoTxn, RwTxn}; +use log::debug; +use roaring::RoaringBitmap; +use time::OffsetDateTime; + +use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE}; use crate::facet::FacetType; use crate::heed_codec::facet::{ ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, }; use crate::update::index_documents::{create_writer, writer_into_reader}; use crate::{CboRoaringBitmapCodec, FieldId, Index, Result}; -use grenad::CompressionType; -use heed::types::ByteSlice; -use heed::{BytesEncode, Error, RoTxn, RwTxn}; -use log::debug; -use roaring::RoaringBitmap; -use std::borrow::Cow; -use std::fs::File; -use time::OffsetDateTime; - -use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE}; /// Algorithm to insert elememts into the `facet_id_(string/f64)_docids` databases /// by rebuilding the database "from scratch". @@ -342,11 +343,13 @@ impl FacetsUpdateBulkInner { #[cfg(test)] mod tests { + use std::iter::once; + + use roaring::RoaringBitmap; + use crate::heed_codec::facet::OrderedF64Codec; use crate::milli_snap; use crate::update::facet::tests::FacetIndex; - use roaring::RoaringBitmap; - use std::iter::once; #[test] fn insert() { diff --git a/milli/src/update/facet/incremental.rs b/milli/src/update/facet/incremental.rs index 6be2dbf03..a06c8e1c2 100644 --- a/milli/src/update/facet/incremental.rs +++ b/milli/src/update/facet/incremental.rs @@ -1,14 +1,16 @@ +use std::collections::HashMap; +use std::fs::File; + +use heed::types::{ByteSlice, DecodeIgnore}; +use heed::{BytesDecode, Error, RoTxn, RwTxn}; +use roaring::RoaringBitmap; + use crate::facet::FacetType; use crate::heed_codec::facet::{ ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, }; use crate::search::facet::get_highest_level; use crate::{CboRoaringBitmapCodec, FieldId, Index, Result}; -use heed::types::{ByteSlice, DecodeIgnore}; -use heed::{BytesDecode, Error, RoTxn, RwTxn}; -use roaring::RoaringBitmap; -use std::collections::HashMap; -use std::fs::File; enum InsertionResult { InPlace, @@ -613,13 +615,14 @@ impl<'a> FacetGroupKey> { #[cfg(test)] mod tests { - use crate::heed_codec::facet::{OrderedF64Codec, StrRefCodec}; - use crate::milli_snap; - use crate::update::facet::tests::FacetIndex; use rand::seq::SliceRandom; use rand::{Rng, SeedableRng}; use roaring::RoaringBitmap; + use crate::heed_codec::facet::{OrderedF64Codec, StrRefCodec}; + use crate::milli_snap; + use crate::update::facet::tests::FacetIndex; + #[test] fn append() { let index = FacetIndex::::new(4, 8, 5); diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index ea6468538..9263d3a6a 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -53,8 +53,8 @@ FacetGroupValue: ``` When the database is first created using the "bulk" method, each node has a fixed number of children -(except for possibly the last one) given by the `group_size` parameter (default to `FACET_GROUP_SIZE`). -The tree is also built such that the highest level has more than `min_level_size` +(except for possibly the last one) given by the `group_size` parameter (default to `FACET_GROUP_SIZE`). +The tree is also built such that the highest level has more than `min_level_size` (default to `FACET_MIN_LEVEL_SIZE`) elements in it. When the database is incrementally updated, the number of children of a node can vary between @@ -66,7 +66,7 @@ When adding documents to the databases, it is important to determine which metho minimise indexing time. The incremental method is faster when adding few new facet values, but the bulk method is faster when a large part of the database is modified. Empirically, it seems that it takes 50x more time to incrementally add N facet values to an existing database than it is to -construct a database of N facet values. This is the heuristic that is used to choose between the +construct a database of N facet values. This is the heuristic that is used to choose between the two methods. */ @@ -74,12 +74,13 @@ pub const FACET_MAX_GROUP_SIZE: u8 = 8; pub const FACET_GROUP_SIZE: u8 = 4; pub const FACET_MIN_LEVEL_SIZE: u8 = 5; +use std::fs::File; + use self::incremental::FacetsUpdateIncremental; use super::FacetsUpdateBulk; use crate::facet::FacetType; use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec}; use crate::{Index, Result}; -use std::fs::File; pub mod bulk; pub mod incremental; @@ -119,11 +120,23 @@ impl<'i> FacetsUpdate<'i> { return Ok(()); } if self.new_data.len() >= (self.database.len(wtxn)? as u64 / 50) { - let bulk_update = FacetsUpdateBulk::new(self.index, self.facet_type, self.new_data, self.group_size, self.min_level_size); + let bulk_update = FacetsUpdateBulk::new( + self.index, + self.facet_type, + self.new_data, + self.group_size, + self.min_level_size, + ); bulk_update.execute(wtxn)?; } else { - let incremental_update = - FacetsUpdateIncremental::new(self.index, self.facet_type, self.new_data, self.group_size, self.min_level_size, self.max_group_size); + let incremental_update = FacetsUpdateIncremental::new( + self.index, + self.facet_type, + self.new_data, + self.group_size, + self.min_level_size, + self.max_group_size, + ); incremental_update.execute(wtxn)?; } Ok(()) @@ -132,6 +145,14 @@ impl<'i> FacetsUpdate<'i> { #[cfg(test)] pub(crate) mod tests { + use std::fmt::Display; + use std::marker::PhantomData; + use std::rc::Rc; + + use heed::types::ByteSlice; + use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn}; + use roaring::RoaringBitmap; + use super::bulk::FacetsUpdateBulkInner; use crate::heed_codec::facet::{ ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, @@ -140,12 +161,6 @@ pub(crate) mod tests { use crate::snapshot_tests::display_bitmap; use crate::update::FacetsUpdateIncrementalInner; use crate::CboRoaringBitmapCodec; - use heed::types::ByteSlice; - use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn}; - use roaring::RoaringBitmap; - use std::fmt::Display; - use std::marker::PhantomData; - use std::rc::Rc; // A dummy index that only contains the facet database, used for testing pub struct FacetIndex @@ -381,9 +396,8 @@ mod comparison_bench { use rand::Rng; use roaring::RoaringBitmap; - use crate::heed_codec::facet::OrderedF64Codec; - use super::tests::FacetIndex; + use crate::heed_codec::facet::OrderedF64Codec; // This is a simple test to get an intuition on the relative speed // of the incremental vs. bulk indexer.