mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
cargo fmt
This commit is contained in:
parent
b1ab09196c
commit
985a94adfc
@ -325,10 +325,9 @@ mod tests {
|
||||
use big_s::S;
|
||||
use maplit::hashset;
|
||||
|
||||
use crate::{
|
||||
documents::documents_batch_reader_from_objects, index::tests::TempIndex, milli_snap,
|
||||
FacetDistribution,
|
||||
};
|
||||
use crate::documents::documents_batch_reader_from_objects;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::{milli_snap, FacetDistribution};
|
||||
|
||||
#[test]
|
||||
fn few_candidates_few_facet_values() {
|
||||
|
@ -4,10 +4,10 @@ use heed::Result;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{get_first_facet_value, get_highest_level};
|
||||
use crate::{
|
||||
heed_codec::facet::{ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec},
|
||||
DocumentId,
|
||||
use crate::heed_codec::facet::{
|
||||
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec,
|
||||
};
|
||||
use crate::DocumentId;
|
||||
|
||||
pub fn iterate_over_facet_distribution<'t, CB>(
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
@ -114,13 +114,15 @@ where
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::iterate_over_facet_distribution;
|
||||
use crate::milli_snap;
|
||||
use crate::search::facet::tests::get_random_looking_index;
|
||||
use crate::{heed_codec::facet::OrderedF64Codec, search::facet::tests::get_simple_index};
|
||||
use std::ops::ControlFlow;
|
||||
|
||||
use heed::BytesDecode;
|
||||
use roaring::RoaringBitmap;
|
||||
use std::ops::ControlFlow;
|
||||
|
||||
use super::iterate_over_facet_distribution;
|
||||
use crate::heed_codec::facet::OrderedF64Codec;
|
||||
use crate::milli_snap;
|
||||
use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
|
||||
|
||||
#[test]
|
||||
fn filter_distribution_all() {
|
||||
|
@ -255,13 +255,15 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::ops::Bound;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::find_docids_of_facet_within_bounds;
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
|
||||
use crate::milli_snap;
|
||||
use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
|
||||
use crate::snapshot_tests::display_bitmap;
|
||||
use roaring::RoaringBitmap;
|
||||
use std::ops::Bound;
|
||||
|
||||
#[test]
|
||||
fn random_looking_index_snap() {
|
||||
|
@ -83,11 +83,12 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::milli_snap;
|
||||
use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
|
||||
use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
|
||||
use crate::snapshot_tests::display_bitmap;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
#[test]
|
||||
fn filter_sort() {
|
||||
|
@ -116,12 +116,13 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec};
|
||||
use crate::milli_snap;
|
||||
use crate::search::facet::facet_sort_descending::descending_facet_sort;
|
||||
use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
|
||||
use crate::snapshot_tests::display_bitmap;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
#[test]
|
||||
fn filter_sort_descending() {
|
||||
|
@ -80,7 +80,8 @@ pub(crate) mod tests {
|
||||
use rand::{Rng, SeedableRng};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::{heed_codec::facet::OrderedF64Codec, update::facet::tests::FacetIndex};
|
||||
use crate::heed_codec::facet::OrderedF64Codec;
|
||||
use crate::update::facet::tests::FacetIndex;
|
||||
|
||||
pub fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
|
||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||
|
@ -1,19 +1,20 @@
|
||||
use std::borrow::Cow;
|
||||
use std::fs::File;
|
||||
|
||||
use grenad::CompressionType;
|
||||
use heed::types::ByteSlice;
|
||||
use heed::{BytesEncode, Error, RoTxn, RwTxn};
|
||||
use log::debug;
|
||||
use roaring::RoaringBitmap;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{
|
||||
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||
};
|
||||
use crate::update::index_documents::{create_writer, writer_into_reader};
|
||||
use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
|
||||
use grenad::CompressionType;
|
||||
use heed::types::ByteSlice;
|
||||
use heed::{BytesEncode, Error, RoTxn, RwTxn};
|
||||
use log::debug;
|
||||
use roaring::RoaringBitmap;
|
||||
use std::borrow::Cow;
|
||||
use std::fs::File;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
|
||||
|
||||
/// Algorithm to insert elememts into the `facet_id_(string/f64)_docids` databases
|
||||
/// by rebuilding the database "from scratch".
|
||||
@ -342,11 +343,13 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::iter::once;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::facet::OrderedF64Codec;
|
||||
use crate::milli_snap;
|
||||
use crate::update::facet::tests::FacetIndex;
|
||||
use roaring::RoaringBitmap;
|
||||
use std::iter::once;
|
||||
|
||||
#[test]
|
||||
fn insert() {
|
||||
|
@ -1,14 +1,16 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
|
||||
use heed::types::{ByteSlice, DecodeIgnore};
|
||||
use heed::{BytesDecode, Error, RoTxn, RwTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{
|
||||
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||
};
|
||||
use crate::search::facet::get_highest_level;
|
||||
use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
|
||||
use heed::types::{ByteSlice, DecodeIgnore};
|
||||
use heed::{BytesDecode, Error, RoTxn, RwTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
|
||||
enum InsertionResult {
|
||||
InPlace,
|
||||
@ -613,13 +615,14 @@ impl<'a> FacetGroupKey<Vec<u8>> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::heed_codec::facet::{OrderedF64Codec, StrRefCodec};
|
||||
use crate::milli_snap;
|
||||
use crate::update::facet::tests::FacetIndex;
|
||||
use rand::seq::SliceRandom;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::facet::{OrderedF64Codec, StrRefCodec};
|
||||
use crate::milli_snap;
|
||||
use crate::update::facet::tests::FacetIndex;
|
||||
|
||||
#[test]
|
||||
fn append() {
|
||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||
|
@ -53,8 +53,8 @@ FacetGroupValue:
|
||||
```
|
||||
|
||||
When the database is first created using the "bulk" method, each node has a fixed number of children
|
||||
(except for possibly the last one) given by the `group_size` parameter (default to `FACET_GROUP_SIZE`).
|
||||
The tree is also built such that the highest level has more than `min_level_size`
|
||||
(except for possibly the last one) given by the `group_size` parameter (default to `FACET_GROUP_SIZE`).
|
||||
The tree is also built such that the highest level has more than `min_level_size`
|
||||
(default to `FACET_MIN_LEVEL_SIZE`) elements in it.
|
||||
|
||||
When the database is incrementally updated, the number of children of a node can vary between
|
||||
@ -66,7 +66,7 @@ When adding documents to the databases, it is important to determine which metho
|
||||
minimise indexing time. The incremental method is faster when adding few new facet values, but the
|
||||
bulk method is faster when a large part of the database is modified. Empirically, it seems that
|
||||
it takes 50x more time to incrementally add N facet values to an existing database than it is to
|
||||
construct a database of N facet values. This is the heuristic that is used to choose between the
|
||||
construct a database of N facet values. This is the heuristic that is used to choose between the
|
||||
two methods.
|
||||
*/
|
||||
|
||||
@ -74,12 +74,13 @@ pub const FACET_MAX_GROUP_SIZE: u8 = 8;
|
||||
pub const FACET_GROUP_SIZE: u8 = 4;
|
||||
pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
|
||||
|
||||
use std::fs::File;
|
||||
|
||||
use self::incremental::FacetsUpdateIncremental;
|
||||
use super::FacetsUpdateBulk;
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::{Index, Result};
|
||||
use std::fs::File;
|
||||
|
||||
pub mod bulk;
|
||||
pub mod incremental;
|
||||
@ -119,11 +120,23 @@ impl<'i> FacetsUpdate<'i> {
|
||||
return Ok(());
|
||||
}
|
||||
if self.new_data.len() >= (self.database.len(wtxn)? as u64 / 50) {
|
||||
let bulk_update = FacetsUpdateBulk::new(self.index, self.facet_type, self.new_data, self.group_size, self.min_level_size);
|
||||
let bulk_update = FacetsUpdateBulk::new(
|
||||
self.index,
|
||||
self.facet_type,
|
||||
self.new_data,
|
||||
self.group_size,
|
||||
self.min_level_size,
|
||||
);
|
||||
bulk_update.execute(wtxn)?;
|
||||
} else {
|
||||
let incremental_update =
|
||||
FacetsUpdateIncremental::new(self.index, self.facet_type, self.new_data, self.group_size, self.min_level_size, self.max_group_size);
|
||||
let incremental_update = FacetsUpdateIncremental::new(
|
||||
self.index,
|
||||
self.facet_type,
|
||||
self.new_data,
|
||||
self.group_size,
|
||||
self.min_level_size,
|
||||
self.max_group_size,
|
||||
);
|
||||
incremental_update.execute(wtxn)?;
|
||||
}
|
||||
Ok(())
|
||||
@ -132,6 +145,14 @@ impl<'i> FacetsUpdate<'i> {
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use std::fmt::Display;
|
||||
use std::marker::PhantomData;
|
||||
use std::rc::Rc;
|
||||
|
||||
use heed::types::ByteSlice;
|
||||
use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::bulk::FacetsUpdateBulkInner;
|
||||
use crate::heed_codec::facet::{
|
||||
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||
@ -140,12 +161,6 @@ pub(crate) mod tests {
|
||||
use crate::snapshot_tests::display_bitmap;
|
||||
use crate::update::FacetsUpdateIncrementalInner;
|
||||
use crate::CboRoaringBitmapCodec;
|
||||
use heed::types::ByteSlice;
|
||||
use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
use std::fmt::Display;
|
||||
use std::marker::PhantomData;
|
||||
use std::rc::Rc;
|
||||
|
||||
// A dummy index that only contains the facet database, used for testing
|
||||
pub struct FacetIndex<BoundCodec>
|
||||
@ -381,9 +396,8 @@ mod comparison_bench {
|
||||
use rand::Rng;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::facet::OrderedF64Codec;
|
||||
|
||||
use super::tests::FacetIndex;
|
||||
use crate::heed_codec::facet::OrderedF64Codec;
|
||||
|
||||
// This is a simple test to get an intuition on the relative speed
|
||||
// of the incremental vs. bulk indexer.
|
||||
|
Loading…
Reference in New Issue
Block a user