mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-23 21:20:24 +01:00
Measure the SmallVec efficacity
This commit is contained in:
parent
4ce5d3d66d
commit
7f148c127c
@ -15,6 +15,8 @@ pub struct CboCachedSorter<MF> {
|
||||
sorter: Sorter<MF>,
|
||||
deladd_buffer: Vec<u8>,
|
||||
cbo_buffer: Vec<u8>,
|
||||
total_insertions: usize,
|
||||
fitted_in_key: usize,
|
||||
}
|
||||
|
||||
impl<MF> CboCachedSorter<MF> {
|
||||
@ -24,6 +26,8 @@ impl<MF> CboCachedSorter<MF> {
|
||||
sorter,
|
||||
deladd_buffer: Vec::new(),
|
||||
cbo_buffer: Vec::new(),
|
||||
total_insertions: 0,
|
||||
fitted_in_key: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -35,6 +39,8 @@ impl<MF: MergeFunction> CboCachedSorter<MF> {
|
||||
del.get_or_insert_with(PushOptimizedBitmap::default).insert(n);
|
||||
}
|
||||
None => {
|
||||
self.total_insertions += 1;
|
||||
self.fitted_in_key += (key.len() <= 20) as usize;
|
||||
let value = DelAddRoaringBitmap::new_del_u32(n);
|
||||
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
|
||||
self.write_entry(key, deladd)?;
|
||||
@ -55,6 +61,8 @@ impl<MF: MergeFunction> CboCachedSorter<MF> {
|
||||
del.get_or_insert_with(PushOptimizedBitmap::default).union_with_bitmap(bitmap);
|
||||
}
|
||||
None => {
|
||||
self.total_insertions += 1;
|
||||
self.fitted_in_key += (key.len() <= 20) as usize;
|
||||
let value = DelAddRoaringBitmap::new_del(bitmap);
|
||||
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
|
||||
self.write_entry(key, deladd)?;
|
||||
@ -71,6 +79,8 @@ impl<MF: MergeFunction> CboCachedSorter<MF> {
|
||||
add.get_or_insert_with(PushOptimizedBitmap::default).insert(n);
|
||||
}
|
||||
None => {
|
||||
self.total_insertions += 1;
|
||||
self.fitted_in_key += (key.len() <= 20) as usize;
|
||||
let value = DelAddRoaringBitmap::new_add_u32(n);
|
||||
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
|
||||
self.write_entry(key, deladd)?;
|
||||
@ -91,6 +101,8 @@ impl<MF: MergeFunction> CboCachedSorter<MF> {
|
||||
add.get_or_insert_with(PushOptimizedBitmap::default).union_with_bitmap(bitmap);
|
||||
}
|
||||
None => {
|
||||
self.total_insertions += 1;
|
||||
self.fitted_in_key += (key.len() <= 20) as usize;
|
||||
let value = DelAddRoaringBitmap::new_add(bitmap);
|
||||
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
|
||||
self.write_entry(key, deladd)?;
|
||||
@ -108,6 +120,8 @@ impl<MF: MergeFunction> CboCachedSorter<MF> {
|
||||
add.get_or_insert_with(PushOptimizedBitmap::default).insert(n);
|
||||
}
|
||||
None => {
|
||||
self.total_insertions += 1;
|
||||
self.fitted_in_key += (key.len() <= 20) as usize;
|
||||
let value = DelAddRoaringBitmap::new_del_add_u32(n);
|
||||
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
|
||||
self.write_entry(key, deladd)?;
|
||||
@ -161,14 +175,22 @@ impl<MF: MergeFunction> CboCachedSorter<MF> {
|
||||
for (key, deladd) in mem::replace(&mut self.cache, default_arc) {
|
||||
self.write_entry(key, deladd)?;
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"LruCache stats: {} <= 20 bytes ({}%) on a total of {} insertions",
|
||||
self.fitted_in_key,
|
||||
(self.fitted_in_key as f32 / self.total_insertions as f32) * 100.0,
|
||||
self.total_insertions,
|
||||
);
|
||||
|
||||
Ok(self.sorter)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DelAddRoaringBitmap {
|
||||
pub del: Option<PushOptimizedBitmap>,
|
||||
pub add: Option<PushOptimizedBitmap>,
|
||||
pub(crate) del: Option<PushOptimizedBitmap>,
|
||||
pub(crate) add: Option<PushOptimizedBitmap>,
|
||||
}
|
||||
|
||||
impl DelAddRoaringBitmap {
|
||||
|
@ -1,26 +1,21 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::{borrow::Cow, fs::File, num::NonZero};
|
||||
use std::fs::File;
|
||||
use std::num::NonZero;
|
||||
|
||||
use grenad::Merger;
|
||||
use grenad::MergerBuilder;
|
||||
use grenad::{Merger, MergerBuilder};
|
||||
use heed::RoTxn;
|
||||
use rayon::iter::IntoParallelIterator;
|
||||
use rayon::iter::ParallelIterator;
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||
|
||||
use super::{
|
||||
tokenize_document::{tokenizer_builder, DocumentTokenizer},
|
||||
SearchableExtractor,
|
||||
};
|
||||
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
||||
use super::SearchableExtractor;
|
||||
use crate::update::new::extract::cache::CboCachedSorter;
|
||||
use crate::update::new::extract::perm_json_p::contained_in;
|
||||
use crate::DocumentId;
|
||||
use crate::update::new::{DocumentChange, ItemsPool};
|
||||
use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps};
|
||||
use crate::{
|
||||
bucketed_position,
|
||||
update::{
|
||||
create_sorter,
|
||||
new::{extract::cache::CboCachedSorter, DocumentChange, ItemsPool},
|
||||
GrenadParameters, MergeDeladdCboRoaringBitmaps,
|
||||
},
|
||||
FieldId, GlobalFieldsIdsMap, Index, Result, MAX_POSITION_PER_ATTRIBUTE,
|
||||
bucketed_position, DocumentId, FieldId, GlobalFieldsIdsMap, Index, Result,
|
||||
MAX_POSITION_PER_ATTRIBUTE,
|
||||
};
|
||||
|
||||
const MAX_COUNTED_WORDS: usize = 30;
|
||||
@ -565,7 +560,7 @@ impl WordDocidsExtractors {
|
||||
cached_sorter: &mut WordDocidsCachedSorters,
|
||||
document_change: DocumentChange,
|
||||
) -> Result<()> {
|
||||
let exact_attributes = index.exact_attributes(&rtxn)?;
|
||||
let exact_attributes = index.exact_attributes(rtxn)?;
|
||||
let is_exact_attribute =
|
||||
|fname: &str| exact_attributes.iter().any(|attr| contained_in(fname, attr));
|
||||
let mut buffer = Vec::new();
|
||||
|
@ -59,7 +59,7 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
||||
DocumentChange::Update(inner) => {
|
||||
let document = inner.current(rtxn, index)?.unwrap();
|
||||
process_document_tokens(
|
||||
&document,
|
||||
document,
|
||||
document_tokenizer,
|
||||
fields_ids_map,
|
||||
&mut word_positions,
|
||||
|
@ -92,24 +92,24 @@ impl<'a> DocumentTokenizer<'a> {
|
||||
};
|
||||
|
||||
// if the current field is searchable or contains a searchable attribute
|
||||
if select_field(&field_name, self.attribute_to_extract, self.attribute_to_skip) {
|
||||
if select_field(field_name, self.attribute_to_extract, self.attribute_to_skip) {
|
||||
// parse json.
|
||||
match serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)? {
|
||||
Value::Object(object) => seek_leaf_values_in_object(
|
||||
&object,
|
||||
self.attribute_to_extract,
|
||||
self.attribute_to_skip,
|
||||
&field_name,
|
||||
field_name,
|
||||
&mut tokenize_field,
|
||||
)?,
|
||||
Value::Array(array) => seek_leaf_values_in_array(
|
||||
&array,
|
||||
self.attribute_to_extract,
|
||||
self.attribute_to_skip,
|
||||
&field_name,
|
||||
field_name,
|
||||
&mut tokenize_field,
|
||||
)?,
|
||||
value => tokenize_field(&field_name, &value)?,
|
||||
value => tokenize_field(field_name, &value)?,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user