mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-10-30 01:38:49 +01:00
Keep the caches in the AppendOnlyVec
This commit is contained in:
parent
0a8cb471df
commit
dead7a56a3
@ -99,12 +99,6 @@ fn test_indices() {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Default for AppendOnlyVec<T> {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> AppendOnlyVec<T> {
|
||||
const EMPTY: UnsafeCell<*mut T> = UnsafeCell::new(ptr::null_mut());
|
||||
|
||||
@ -220,6 +214,12 @@ impl<T> AppendOnlyVec<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Default for AppendOnlyVec<T> {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Debug for AppendOnlyVec<T> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("AppendOnlyVec").field("len", &self.len()).finish()
|
||||
|
@ -12,6 +12,7 @@ use super::super::cache::CboCachedSorter;
|
||||
use super::facet_document::extract_document_facets;
|
||||
use super::FacetKind;
|
||||
use crate::facet::value_encoding::f64_into_bytes;
|
||||
use crate::update::new::append_only_vec::AppendOnlyVec;
|
||||
use crate::update::new::extract::DocidsExtractor;
|
||||
use crate::update::new::items_pool::ParallelIteratorExt;
|
||||
use crate::update::new::{DocumentChange, ItemsPool};
|
||||
@ -209,45 +210,40 @@ impl DocidsExtractor for FacetedDocidsExtractor {
|
||||
let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?;
|
||||
let attributes_to_extract: Vec<_> =
|
||||
attributes_to_extract.iter().map(|s| s.as_ref()).collect();
|
||||
|
||||
let context_pool = ItemsPool::new(|| {
|
||||
Ok((
|
||||
fields_ids_map.clone(),
|
||||
Vec::new(),
|
||||
CboCachedSorter::new(
|
||||
// TODO use a better value
|
||||
100.try_into().unwrap(),
|
||||
create_sorter(
|
||||
grenad::SortAlgorithm::Stable,
|
||||
MergeDeladdCboRoaringBitmaps,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
indexer.max_nb_chunks,
|
||||
max_memory,
|
||||
),
|
||||
),
|
||||
))
|
||||
});
|
||||
let caches = AppendOnlyVec::new();
|
||||
|
||||
{
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
|
||||
let _entered = span.enter();
|
||||
document_changes.into_par_iter().try_arc_for_each_try_init(
|
||||
|| index.read_txn().map_err(Error::from),
|
||||
|rtxn, document_change| {
|
||||
context_pool.with(|(fields_ids_map, buffer, cached_sorter)| {
|
||||
Self::extract_document_change(
|
||||
rtxn,
|
||||
index,
|
||||
buffer,
|
||||
fields_ids_map,
|
||||
&attributes_to_extract,
|
||||
cached_sorter,
|
||||
document_change?,
|
||||
)
|
||||
.map_err(Arc::new)
|
||||
})
|
||||
|| {
|
||||
let rtxn = index.read_txn().map_err(Error::from)?;
|
||||
let cache = caches.push(CboCachedSorter::new(
|
||||
// TODO use a better value
|
||||
100.try_into().unwrap(),
|
||||
create_sorter(
|
||||
grenad::SortAlgorithm::Stable,
|
||||
MergeDeladdCboRoaringBitmaps,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
indexer.max_nb_chunks,
|
||||
max_memory,
|
||||
),
|
||||
));
|
||||
Ok((rtxn, fields_ids_map.clone(), Vec::new(), cache))
|
||||
},
|
||||
|(rtxn, fields_ids_map, buffer, cached_sorter), document_change| {
|
||||
Self::extract_document_change(
|
||||
rtxn,
|
||||
index,
|
||||
buffer,
|
||||
fields_ids_map,
|
||||
&attributes_to_extract,
|
||||
cached_sorter,
|
||||
document_change?,
|
||||
)
|
||||
.map_err(Arc::new)
|
||||
},
|
||||
)?;
|
||||
}
|
||||
@ -257,14 +253,15 @@ impl DocidsExtractor for FacetedDocidsExtractor {
|
||||
tracing::trace_span!(target: "indexing::documents::extract", "merger_building");
|
||||
let _entered = span.enter();
|
||||
|
||||
let readers: Vec<_> = context_pool
|
||||
.into_items()
|
||||
let readers: Vec<_> = caches
|
||||
.into_iter()
|
||||
.par_bridge()
|
||||
.map(|(_tokenizer, _fields_ids_map, cached_sorter)| {
|
||||
.map(|cached_sorter| {
|
||||
let sorter = cached_sorter.into_sorter()?;
|
||||
sorter.into_reader_cursors()
|
||||
})
|
||||
.collect();
|
||||
|
||||
for reader in readers {
|
||||
builder.extend(reader?);
|
||||
}
|
||||
|
@ -8,6 +8,8 @@ use heed::RoTxn;
|
||||
use rayon::iter::IntoParallelIterator;
|
||||
|
||||
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
||||
use super::SearchableExtractor;
|
||||
use crate::update::new::append_only_vec::AppendOnlyVec;
|
||||
use crate::update::new::extract::cache::CboCachedSorter;
|
||||
use crate::update::new::extract::perm_json_p::contained_in;
|
||||
use crate::update::new::items_pool::ParallelIteratorExt;
|
||||
@ -339,37 +341,33 @@ impl WordDocidsExtractors {
|
||||
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
|
||||
};
|
||||
|
||||
let context_pool = ItemsPool::new(|| {
|
||||
Ok((
|
||||
&document_tokenizer,
|
||||
fields_ids_map.clone(),
|
||||
WordDocidsCachedSorters::new(
|
||||
indexer,
|
||||
max_memory,
|
||||
// TODO use a better value
|
||||
200_000.try_into().unwrap(),
|
||||
),
|
||||
))
|
||||
});
|
||||
let caches = AppendOnlyVec::new();
|
||||
|
||||
{
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
|
||||
let _entered = span.enter();
|
||||
document_changes.into_par_iter().try_arc_for_each_try_init(
|
||||
|| index.read_txn().map_err(Error::from),
|
||||
|rtxn, document_change| {
|
||||
context_pool.with(|(document_tokenizer, fields_ids_map, cached_sorter)| {
|
||||
Self::extract_document_change(
|
||||
rtxn,
|
||||
index,
|
||||
document_tokenizer,
|
||||
fields_ids_map,
|
||||
cached_sorter,
|
||||
document_change?,
|
||||
)
|
||||
.map_err(Arc::new)
|
||||
})
|
||||
|| {
|
||||
let rtxn = index.read_txn().map_err(Error::from)?;
|
||||
let cache = caches.push(WordDocidsCachedSorters::new(
|
||||
indexer,
|
||||
max_memory,
|
||||
// TODO use a better value
|
||||
200_000.try_into().unwrap(),
|
||||
));
|
||||
Ok((rtxn, &document_tokenizer, fields_ids_map.clone(), cache))
|
||||
},
|
||||
|(rtxn, document_tokenizer, fields_ids_map, cached_sorter), document_change| {
|
||||
Self::extract_document_change(
|
||||
rtxn,
|
||||
index,
|
||||
document_tokenizer,
|
||||
fields_ids_map,
|
||||
cached_sorter,
|
||||
document_change?,
|
||||
)
|
||||
.map_err(Arc::new)
|
||||
},
|
||||
)?;
|
||||
}
|
||||
@ -379,7 +377,7 @@ impl WordDocidsExtractors {
|
||||
tracing::trace_span!(target: "indexing::documents::extract", "merger_building");
|
||||
let _entered = span.enter();
|
||||
let mut builder = WordDocidsMergerBuilders::new();
|
||||
for (_tokenizer, _fields_ids_map, cache) in context_pool.into_items() {
|
||||
for cache in caches.into_iter() {
|
||||
builder.add_sorters(cache)?;
|
||||
}
|
||||
|
||||
|
@ -14,6 +14,7 @@ use tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
||||
|
||||
use super::cache::CboCachedSorter;
|
||||
use super::DocidsExtractor;
|
||||
use crate::update::new::append_only_vec::AppendOnlyVec;
|
||||
use crate::update::new::items_pool::ParallelIteratorExt;
|
||||
use crate::update::new::{DocumentChange, ItemsPool};
|
||||
use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps};
|
||||
@ -57,44 +58,39 @@ pub trait SearchableExtractor {
|
||||
localized_attributes_rules: &localized_attributes_rules,
|
||||
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
|
||||
};
|
||||
|
||||
let context_pool = ItemsPool::new(|| {
|
||||
Ok((
|
||||
&document_tokenizer,
|
||||
fields_ids_map.clone(),
|
||||
CboCachedSorter::new(
|
||||
// TODO use a better value
|
||||
1_000_000.try_into().unwrap(),
|
||||
create_sorter(
|
||||
grenad::SortAlgorithm::Stable,
|
||||
MergeDeladdCboRoaringBitmaps,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
indexer.max_nb_chunks,
|
||||
max_memory,
|
||||
),
|
||||
),
|
||||
))
|
||||
});
|
||||
let caches = AppendOnlyVec::new();
|
||||
|
||||
{
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
|
||||
let _entered = span.enter();
|
||||
document_changes.into_par_iter().try_arc_for_each_try_init(
|
||||
|| index.read_txn().map_err(Error::from),
|
||||
|rtxn, document_change| {
|
||||
context_pool.with(|(document_tokenizer, fields_ids_map, cached_sorter)| {
|
||||
Self::extract_document_change(
|
||||
rtxn,
|
||||
index,
|
||||
document_tokenizer,
|
||||
fields_ids_map,
|
||||
cached_sorter,
|
||||
document_change?,
|
||||
)
|
||||
.map_err(Arc::new)
|
||||
})
|
||||
|| {
|
||||
let rtxn = index.read_txn().map_err(Error::from)?;
|
||||
let cache = caches.push(CboCachedSorter::new(
|
||||
// TODO use a better value
|
||||
1_000_000.try_into().unwrap(),
|
||||
create_sorter(
|
||||
grenad::SortAlgorithm::Stable,
|
||||
MergeDeladdCboRoaringBitmaps,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
indexer.max_nb_chunks,
|
||||
max_memory,
|
||||
),
|
||||
));
|
||||
Ok((rtxn, &document_tokenizer, fields_ids_map.clone(), cache))
|
||||
},
|
||||
|(rtxn, document_tokenizer, fields_ids_map, cached_sorter), document_change| {
|
||||
Self::extract_document_change(
|
||||
rtxn,
|
||||
index,
|
||||
document_tokenizer,
|
||||
fields_ids_map,
|
||||
cached_sorter,
|
||||
document_change?,
|
||||
)
|
||||
.map_err(Arc::new)
|
||||
},
|
||||
)?;
|
||||
}
|
||||
@ -104,14 +100,15 @@ pub trait SearchableExtractor {
|
||||
tracing::trace_span!(target: "indexing::documents::extract", "merger_building");
|
||||
let _entered = span.enter();
|
||||
|
||||
let readers: Vec<_> = context_pool
|
||||
.into_items()
|
||||
let readers: Vec<_> = caches
|
||||
.into_iter()
|
||||
.par_bridge()
|
||||
.map(|(_tokenizer, _fields_ids_map, cached_sorter)| {
|
||||
.map(|cached_sorter| {
|
||||
let sorter = cached_sorter.into_sorter()?;
|
||||
sorter.into_reader_cursors()
|
||||
})
|
||||
.collect();
|
||||
|
||||
for reader in readers {
|
||||
builder.extend(reader?);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user