cargo fmt

This commit is contained in:
Loïc Lecrenier 2022-09-07 18:04:07 +02:00 committed by Loïc Lecrenier
parent b1ab09196c
commit 985a94adfc
9 changed files with 78 additions and 52 deletions

View File

@ -325,10 +325,9 @@ mod tests {
use big_s::S; use big_s::S;
use maplit::hashset; use maplit::hashset;
use crate::{ use crate::documents::documents_batch_reader_from_objects;
documents::documents_batch_reader_from_objects, index::tests::TempIndex, milli_snap, use crate::index::tests::TempIndex;
FacetDistribution, use crate::{milli_snap, FacetDistribution};
};
#[test] #[test]
fn few_candidates_few_facet_values() { fn few_candidates_few_facet_values() {

View File

@ -4,10 +4,10 @@ use heed::Result;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level}; use super::{get_first_facet_value, get_highest_level};
use crate::{ use crate::heed_codec::facet::{
heed_codec::facet::{ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}, ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec,
DocumentId,
}; };
use crate::DocumentId;
pub fn iterate_over_facet_distribution<'t, CB>( pub fn iterate_over_facet_distribution<'t, CB>(
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
@ -114,13 +114,15 @@ where
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::iterate_over_facet_distribution; use std::ops::ControlFlow;
use crate::milli_snap;
use crate::search::facet::tests::get_random_looking_index;
use crate::{heed_codec::facet::OrderedF64Codec, search::facet::tests::get_simple_index};
use heed::BytesDecode; use heed::BytesDecode;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use std::ops::ControlFlow;
use super::iterate_over_facet_distribution;
use crate::heed_codec::facet::OrderedF64Codec;
use crate::milli_snap;
use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
#[test] #[test]
fn filter_distribution_all() { fn filter_distribution_all() {

View File

@ -255,13 +255,15 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::ops::Bound;
use roaring::RoaringBitmap;
use super::find_docids_of_facet_within_bounds; use super::find_docids_of_facet_within_bounds;
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec}; use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
use crate::milli_snap; use crate::milli_snap;
use crate::search::facet::tests::{get_random_looking_index, get_simple_index}; use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
use crate::snapshot_tests::display_bitmap; use crate::snapshot_tests::display_bitmap;
use roaring::RoaringBitmap;
use std::ops::Bound;
#[test] #[test]
fn random_looking_index_snap() { fn random_looking_index_snap() {

View File

@ -83,11 +83,12 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use roaring::RoaringBitmap;
use crate::milli_snap; use crate::milli_snap;
use crate::search::facet::facet_sort_ascending::ascending_facet_sort; use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
use crate::search::facet::tests::{get_random_looking_index, get_simple_index}; use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
use crate::snapshot_tests::display_bitmap; use crate::snapshot_tests::display_bitmap;
use roaring::RoaringBitmap;
#[test] #[test]
fn filter_sort() { fn filter_sort() {

View File

@ -116,12 +116,13 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use roaring::RoaringBitmap;
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec}; use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec};
use crate::milli_snap; use crate::milli_snap;
use crate::search::facet::facet_sort_descending::descending_facet_sort; use crate::search::facet::facet_sort_descending::descending_facet_sort;
use crate::search::facet::tests::{get_random_looking_index, get_simple_index}; use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
use crate::snapshot_tests::display_bitmap; use crate::snapshot_tests::display_bitmap;
use roaring::RoaringBitmap;
#[test] #[test]
fn filter_sort_descending() { fn filter_sort_descending() {

View File

@ -80,7 +80,8 @@ pub(crate) mod tests {
use rand::{Rng, SeedableRng}; use rand::{Rng, SeedableRng};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::{heed_codec::facet::OrderedF64Codec, update::facet::tests::FacetIndex}; use crate::heed_codec::facet::OrderedF64Codec;
use crate::update::facet::tests::FacetIndex;
pub fn get_simple_index() -> FacetIndex<OrderedF64Codec> { pub fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5); let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);

View File

@ -1,19 +1,20 @@
use std::borrow::Cow;
use std::fs::File;
use grenad::CompressionType;
use heed::types::ByteSlice;
use heed::{BytesEncode, Error, RoTxn, RwTxn};
use log::debug;
use roaring::RoaringBitmap;
use time::OffsetDateTime;
use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::heed_codec::facet::{ use crate::heed_codec::facet::{
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
}; };
use crate::update::index_documents::{create_writer, writer_into_reader}; use crate::update::index_documents::{create_writer, writer_into_reader};
use crate::{CboRoaringBitmapCodec, FieldId, Index, Result}; use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
use grenad::CompressionType;
use heed::types::ByteSlice;
use heed::{BytesEncode, Error, RoTxn, RwTxn};
use log::debug;
use roaring::RoaringBitmap;
use std::borrow::Cow;
use std::fs::File;
use time::OffsetDateTime;
use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
/// Algorithm to insert elememts into the `facet_id_(string/f64)_docids` databases /// Algorithm to insert elememts into the `facet_id_(string/f64)_docids` databases
/// by rebuilding the database "from scratch". /// by rebuilding the database "from scratch".
@ -342,11 +343,13 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::iter::once;
use roaring::RoaringBitmap;
use crate::heed_codec::facet::OrderedF64Codec; use crate::heed_codec::facet::OrderedF64Codec;
use crate::milli_snap; use crate::milli_snap;
use crate::update::facet::tests::FacetIndex; use crate::update::facet::tests::FacetIndex;
use roaring::RoaringBitmap;
use std::iter::once;
#[test] #[test]
fn insert() { fn insert() {

View File

@ -1,14 +1,16 @@
use std::collections::HashMap;
use std::fs::File;
use heed::types::{ByteSlice, DecodeIgnore};
use heed::{BytesDecode, Error, RoTxn, RwTxn};
use roaring::RoaringBitmap;
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::heed_codec::facet::{ use crate::heed_codec::facet::{
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
}; };
use crate::search::facet::get_highest_level; use crate::search::facet::get_highest_level;
use crate::{CboRoaringBitmapCodec, FieldId, Index, Result}; use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
use heed::types::{ByteSlice, DecodeIgnore};
use heed::{BytesDecode, Error, RoTxn, RwTxn};
use roaring::RoaringBitmap;
use std::collections::HashMap;
use std::fs::File;
enum InsertionResult { enum InsertionResult {
InPlace, InPlace,
@ -613,13 +615,14 @@ impl<'a> FacetGroupKey<Vec<u8>> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::heed_codec::facet::{OrderedF64Codec, StrRefCodec};
use crate::milli_snap;
use crate::update::facet::tests::FacetIndex;
use rand::seq::SliceRandom; use rand::seq::SliceRandom;
use rand::{Rng, SeedableRng}; use rand::{Rng, SeedableRng};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::heed_codec::facet::{OrderedF64Codec, StrRefCodec};
use crate::milli_snap;
use crate::update::facet::tests::FacetIndex;
#[test] #[test]
fn append() { fn append() {
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5); let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);

View File

@ -53,8 +53,8 @@ FacetGroupValue:
``` ```
When the database is first created using the "bulk" method, each node has a fixed number of children When the database is first created using the "bulk" method, each node has a fixed number of children
(except for possibly the last one) given by the `group_size` parameter (default to `FACET_GROUP_SIZE`). (except for possibly the last one) given by the `group_size` parameter (default to `FACET_GROUP_SIZE`).
The tree is also built such that the highest level has more than `min_level_size` The tree is also built such that the highest level has more than `min_level_size`
(default to `FACET_MIN_LEVEL_SIZE`) elements in it. (default to `FACET_MIN_LEVEL_SIZE`) elements in it.
When the database is incrementally updated, the number of children of a node can vary between When the database is incrementally updated, the number of children of a node can vary between
@ -66,7 +66,7 @@ When adding documents to the databases, it is important to determine which metho
minimise indexing time. The incremental method is faster when adding few new facet values, but the minimise indexing time. The incremental method is faster when adding few new facet values, but the
bulk method is faster when a large part of the database is modified. Empirically, it seems that bulk method is faster when a large part of the database is modified. Empirically, it seems that
it takes 50x more time to incrementally add N facet values to an existing database than it is to it takes 50x more time to incrementally add N facet values to an existing database than it is to
construct a database of N facet values. This is the heuristic that is used to choose between the construct a database of N facet values. This is the heuristic that is used to choose between the
two methods. two methods.
*/ */
@ -74,12 +74,13 @@ pub const FACET_MAX_GROUP_SIZE: u8 = 8;
pub const FACET_GROUP_SIZE: u8 = 4; pub const FACET_GROUP_SIZE: u8 = 4;
pub const FACET_MIN_LEVEL_SIZE: u8 = 5; pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
use std::fs::File;
use self::incremental::FacetsUpdateIncremental; use self::incremental::FacetsUpdateIncremental;
use super::FacetsUpdateBulk; use super::FacetsUpdateBulk;
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec}; use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::{Index, Result}; use crate::{Index, Result};
use std::fs::File;
pub mod bulk; pub mod bulk;
pub mod incremental; pub mod incremental;
@ -119,11 +120,23 @@ impl<'i> FacetsUpdate<'i> {
return Ok(()); return Ok(());
} }
if self.new_data.len() >= (self.database.len(wtxn)? as u64 / 50) { if self.new_data.len() >= (self.database.len(wtxn)? as u64 / 50) {
let bulk_update = FacetsUpdateBulk::new(self.index, self.facet_type, self.new_data, self.group_size, self.min_level_size); let bulk_update = FacetsUpdateBulk::new(
self.index,
self.facet_type,
self.new_data,
self.group_size,
self.min_level_size,
);
bulk_update.execute(wtxn)?; bulk_update.execute(wtxn)?;
} else { } else {
let incremental_update = let incremental_update = FacetsUpdateIncremental::new(
FacetsUpdateIncremental::new(self.index, self.facet_type, self.new_data, self.group_size, self.min_level_size, self.max_group_size); self.index,
self.facet_type,
self.new_data,
self.group_size,
self.min_level_size,
self.max_group_size,
);
incremental_update.execute(wtxn)?; incremental_update.execute(wtxn)?;
} }
Ok(()) Ok(())
@ -132,6 +145,14 @@ impl<'i> FacetsUpdate<'i> {
#[cfg(test)] #[cfg(test)]
pub(crate) mod tests { pub(crate) mod tests {
use std::fmt::Display;
use std::marker::PhantomData;
use std::rc::Rc;
use heed::types::ByteSlice;
use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn};
use roaring::RoaringBitmap;
use super::bulk::FacetsUpdateBulkInner; use super::bulk::FacetsUpdateBulkInner;
use crate::heed_codec::facet::{ use crate::heed_codec::facet::{
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
@ -140,12 +161,6 @@ pub(crate) mod tests {
use crate::snapshot_tests::display_bitmap; use crate::snapshot_tests::display_bitmap;
use crate::update::FacetsUpdateIncrementalInner; use crate::update::FacetsUpdateIncrementalInner;
use crate::CboRoaringBitmapCodec; use crate::CboRoaringBitmapCodec;
use heed::types::ByteSlice;
use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn};
use roaring::RoaringBitmap;
use std::fmt::Display;
use std::marker::PhantomData;
use std::rc::Rc;
// A dummy index that only contains the facet database, used for testing // A dummy index that only contains the facet database, used for testing
pub struct FacetIndex<BoundCodec> pub struct FacetIndex<BoundCodec>
@ -381,9 +396,8 @@ mod comparison_bench {
use rand::Rng; use rand::Rng;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::heed_codec::facet::OrderedF64Codec;
use super::tests::FacetIndex; use super::tests::FacetIndex;
use crate::heed_codec::facet::OrderedF64Codec;
// This is a simple test to get an intuition on the relative speed // This is a simple test to get an intuition on the relative speed
// of the incremental vs. bulk indexer. // of the incremental vs. bulk indexer.