mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-10-30 01:38:49 +01:00
WIP
This commit is contained in:
parent
82f6e3f3b9
commit
31680f3014
@ -1,8 +1,6 @@
|
|||||||
use std::fs::File;
|
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
use crossbeam_channel::{IntoIter, Receiver, SendError, Sender};
|
use crossbeam_channel::{IntoIter, Receiver, SendError, Sender};
|
||||||
use grenad::Merger;
|
|
||||||
use heed::types::Bytes;
|
use heed::types::Bytes;
|
||||||
use memmap2::Mmap;
|
use memmap2::Mmap;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -10,8 +8,8 @@ use roaring::RoaringBitmap;
|
|||||||
use super::extract::FacetKind;
|
use super::extract::FacetKind;
|
||||||
use super::StdResult;
|
use super::StdResult;
|
||||||
use crate::index::main_key::{DOCUMENTS_IDS_KEY, WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY};
|
use crate::index::main_key::{DOCUMENTS_IDS_KEY, WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY};
|
||||||
|
use crate::update::new::extract::CboCachedSorter;
|
||||||
use crate::update::new::KvReaderFieldId;
|
use crate::update::new::KvReaderFieldId;
|
||||||
use crate::update::MergeDeladdCboRoaringBitmaps;
|
|
||||||
use crate::{DocumentId, Index};
|
use crate::{DocumentId, Index};
|
||||||
|
|
||||||
/// The capacity of the channel is currently in number of messages.
|
/// The capacity of the channel is currently in number of messages.
|
||||||
@ -29,7 +27,9 @@ pub fn merger_writer_channel(cap: usize) -> (MergerSender, WriterReceiver) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// The capacity of the channel is currently in number of messages.
|
/// The capacity of the channel is currently in number of messages.
|
||||||
pub fn extractors_merger_channels(cap: usize) -> (ExtractorSender, MergerReceiver) {
|
pub fn extractors_merger_channels<'extractor>(
|
||||||
|
cap: usize,
|
||||||
|
) -> (ExtractorSender<'extractor>, MergerReceiver<'extractor>) {
|
||||||
let (sender, receiver) = crossbeam_channel::bounded(cap);
|
let (sender, receiver) = crossbeam_channel::bounded(cap);
|
||||||
(ExtractorSender(sender), MergerReceiver(receiver))
|
(ExtractorSender(sender), MergerReceiver(receiver))
|
||||||
}
|
}
|
||||||
@ -313,7 +313,9 @@ pub trait DatabaseType {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub trait MergerOperationType {
|
pub trait MergerOperationType {
|
||||||
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation;
|
fn new_merger_operation<'extractor>(
|
||||||
|
caches: Vec<Vec<CboCachedSorter<'extractor>>>,
|
||||||
|
) -> MergerOperation<'extractor>;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DatabaseType for ExactWordDocids {
|
impl DatabaseType for ExactWordDocids {
|
||||||
@ -321,8 +323,10 @@ impl DatabaseType for ExactWordDocids {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl MergerOperationType for ExactWordDocids {
|
impl MergerOperationType for ExactWordDocids {
|
||||||
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
|
fn new_merger_operation<'extractor>(
|
||||||
MergerOperation::ExactWordDocidsMerger(merger)
|
caches: Vec<Vec<CboCachedSorter<'extractor>>>,
|
||||||
|
) -> MergerOperation<'extractor> {
|
||||||
|
MergerOperation::ExactWordDocidsMerger(caches)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -331,8 +335,10 @@ impl DatabaseType for FidWordCountDocids {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl MergerOperationType for FidWordCountDocids {
|
impl MergerOperationType for FidWordCountDocids {
|
||||||
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
|
fn new_merger_operation<'extractor>(
|
||||||
MergerOperation::FidWordCountDocidsMerger(merger)
|
caches: Vec<Vec<CboCachedSorter<'extractor>>>,
|
||||||
|
) -> MergerOperation<'extractor> {
|
||||||
|
MergerOperation::FidWordCountDocidsMerger(caches)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -341,8 +347,10 @@ impl DatabaseType for WordDocids {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl MergerOperationType for WordDocids {
|
impl MergerOperationType for WordDocids {
|
||||||
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
|
fn new_merger_operation<'extractor>(
|
||||||
MergerOperation::WordDocidsMerger(merger)
|
caches: Vec<Vec<CboCachedSorter<'extractor>>>,
|
||||||
|
) -> MergerOperation<'extractor> {
|
||||||
|
MergerOperation::WordDocidsMerger(caches)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -351,8 +359,10 @@ impl DatabaseType for WordFidDocids {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl MergerOperationType for WordFidDocids {
|
impl MergerOperationType for WordFidDocids {
|
||||||
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
|
fn new_merger_operation<'extractor>(
|
||||||
MergerOperation::WordFidDocidsMerger(merger)
|
caches: Vec<Vec<CboCachedSorter<'extractor>>>,
|
||||||
|
) -> MergerOperation<'extractor> {
|
||||||
|
MergerOperation::WordFidDocidsMerger(caches)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -361,8 +371,10 @@ impl DatabaseType for WordPairProximityDocids {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl MergerOperationType for WordPairProximityDocids {
|
impl MergerOperationType for WordPairProximityDocids {
|
||||||
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
|
fn new_merger_operation<'extractor>(
|
||||||
MergerOperation::WordPairProximityDocidsMerger(merger)
|
caches: Vec<Vec<CboCachedSorter<'extractor>>>,
|
||||||
|
) -> MergerOperation<'extractor> {
|
||||||
|
MergerOperation::WordPairProximityDocidsMerger(caches)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -371,14 +383,18 @@ impl DatabaseType for WordPositionDocids {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl MergerOperationType for WordPositionDocids {
|
impl MergerOperationType for WordPositionDocids {
|
||||||
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
|
fn new_merger_operation<'extractor>(
|
||||||
MergerOperation::WordPositionDocidsMerger(merger)
|
caches: Vec<Vec<CboCachedSorter<'extractor>>>,
|
||||||
|
) -> MergerOperation<'extractor> {
|
||||||
|
MergerOperation::WordPositionDocidsMerger(caches)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MergerOperationType for FacetDocids {
|
impl MergerOperationType for FacetDocids {
|
||||||
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
|
fn new_merger_operation<'extractor>(
|
||||||
MergerOperation::FacetDocidsMerger(merger)
|
caches: Vec<Vec<CboCachedSorter<'extractor>>>,
|
||||||
|
) -> MergerOperation<'extractor> {
|
||||||
|
MergerOperation::FacetDocidsMerger(caches)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -489,23 +505,23 @@ impl DocumentsSender<'_> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum MergerOperation {
|
pub enum MergerOperation<'extractor> {
|
||||||
ExactWordDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
|
ExactWordDocidsMerger(Vec<Vec<CboCachedSorter<'extractor>>>),
|
||||||
FidWordCountDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
|
FidWordCountDocidsMerger(Vec<Vec<CboCachedSorter<'extractor>>>),
|
||||||
WordDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
|
WordDocidsMerger(Vec<Vec<CboCachedSorter<'extractor>>>),
|
||||||
WordFidDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
|
WordFidDocidsMerger(Vec<Vec<CboCachedSorter<'extractor>>>),
|
||||||
WordPairProximityDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
|
WordPairProximityDocidsMerger(Vec<Vec<CboCachedSorter<'extractor>>>),
|
||||||
WordPositionDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
|
WordPositionDocidsMerger(Vec<Vec<CboCachedSorter<'extractor>>>),
|
||||||
FacetDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
|
FacetDocidsMerger(Vec<Vec<CboCachedSorter<'extractor>>>),
|
||||||
DeleteDocument { docid: DocumentId, external_id: String },
|
DeleteDocument { docid: DocumentId, external_id: String },
|
||||||
InsertDocument { docid: DocumentId, external_id: String, document: Box<KvReaderFieldId> },
|
InsertDocument { docid: DocumentId, external_id: String, document: Box<KvReaderFieldId> },
|
||||||
FinishedDocument,
|
FinishedDocument,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct MergerReceiver(Receiver<MergerOperation>);
|
pub struct MergerReceiver<'extractor>(Receiver<MergerOperation<'extractor>>);
|
||||||
|
|
||||||
impl IntoIterator for MergerReceiver {
|
impl<'extractor> IntoIterator for MergerReceiver<'extractor> {
|
||||||
type Item = MergerOperation;
|
type Item = MergerOperation<'extractor>;
|
||||||
type IntoIter = IntoIter<Self::Item>;
|
type IntoIter = IntoIter<Self::Item>;
|
||||||
|
|
||||||
fn into_iter(self) -> Self::IntoIter {
|
fn into_iter(self) -> Self::IntoIter {
|
||||||
@ -513,27 +529,27 @@ impl IntoIterator for MergerReceiver {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct ExtractorSender(Sender<MergerOperation>);
|
pub struct ExtractorSender<'extractor>(Sender<MergerOperation<'extractor>>);
|
||||||
|
|
||||||
impl ExtractorSender {
|
impl<'extractor> ExtractorSender<'extractor> {
|
||||||
pub fn document_sender(&self) -> DocumentSender<'_> {
|
pub fn document_sender(&self) -> DocumentSender<'_, 'extractor> {
|
||||||
DocumentSender(Some(&self.0))
|
DocumentSender(Some(&self.0))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn send_searchable<D: MergerOperationType>(
|
pub fn send_searchable<D: MergerOperationType>(
|
||||||
&self,
|
&self,
|
||||||
merger: Merger<File, MergeDeladdCboRoaringBitmaps>,
|
caches: Vec<Vec<CboCachedSorter<'extractor>>>,
|
||||||
) -> StdResult<(), SendError<()>> {
|
) -> StdResult<(), SendError<()>> {
|
||||||
match self.0.send(D::new_merger_operation(merger)) {
|
match self.0.send(D::new_merger_operation(caches)) {
|
||||||
Ok(()) => Ok(()),
|
Ok(()) => Ok(()),
|
||||||
Err(SendError(_)) => Err(SendError(())),
|
Err(SendError(_)) => Err(SendError(())),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct DocumentSender<'a>(Option<&'a Sender<MergerOperation>>);
|
pub struct DocumentSender<'a, 'extractor>(Option<&'a Sender<MergerOperation<'extractor>>>);
|
||||||
|
|
||||||
impl DocumentSender<'_> {
|
impl DocumentSender<'_, '_> {
|
||||||
pub fn insert(
|
pub fn insert(
|
||||||
&self,
|
&self,
|
||||||
docid: DocumentId,
|
docid: DocumentId,
|
||||||
@ -564,7 +580,7 @@ impl DocumentSender<'_> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Drop for DocumentSender<'_> {
|
impl Drop for DocumentSender<'_, '_> {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
if let Some(sender) = self.0.take() {
|
if let Some(sender) = self.0.take() {
|
||||||
let _ = sender.send(MergerOperation::FinishedDocument);
|
let _ = sender.send(MergerOperation::FinishedDocument);
|
||||||
|
@ -392,7 +392,7 @@ pub fn transpose_and_freeze_caches<'a, 'extractor>(
|
|||||||
/// # Panics
|
/// # Panics
|
||||||
///
|
///
|
||||||
/// - If the bucket IDs in these frozen caches are not exactly the same.
|
/// - If the bucket IDs in these frozen caches are not exactly the same.
|
||||||
pub fn merge_caches<F>(frozen: Vec<FrozenCache>, mut iter: F) -> Result<()>
|
pub fn merge_caches<F>(frozen: Vec<FrozenCache>, mut f: F) -> Result<()>
|
||||||
where
|
where
|
||||||
F: for<'a> FnMut(&'a [u8], DelAddRoaringBitmap) -> Result<()>,
|
F: for<'a> FnMut(&'a [u8], DelAddRoaringBitmap) -> Result<()>,
|
||||||
{
|
{
|
||||||
@ -455,7 +455,7 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
// We send the merged entry outside.
|
// We send the merged entry outside.
|
||||||
(iter)(first_key, output)?;
|
(f)(first_key, output)?;
|
||||||
|
|
||||||
// Don't forget to put the first entry back into the heap.
|
// Don't forget to put the first entry back into the heap.
|
||||||
if first_entry.cursor.move_on_next()?.is_some() {
|
if first_entry.cursor.move_on_next()?.is_some() {
|
||||||
@ -478,7 +478,7 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
// We send the merged entry outside.
|
// We send the merged entry outside.
|
||||||
(iter)(key, output)?;
|
(f)(key, output)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,12 +1,9 @@
|
|||||||
use std::cell::RefCell;
|
use std::cell::RefCell;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::fs::File;
|
|
||||||
use std::ops::DerefMut as _;
|
use std::ops::DerefMut as _;
|
||||||
|
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use grenad::Merger;
|
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use raw_collections::alloc::RefBump;
|
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
use super::super::cache::CboCachedSorter;
|
use super::super::cache::CboCachedSorter;
|
||||||
@ -19,16 +16,16 @@ use crate::update::new::indexer::document_changes::{
|
|||||||
IndexingContext, RefCellExt, ThreadLocal,
|
IndexingContext, RefCellExt, ThreadLocal,
|
||||||
};
|
};
|
||||||
use crate::update::new::DocumentChange;
|
use crate::update::new::DocumentChange;
|
||||||
use crate::update::{GrenadParameters, MergeDeladdCboRoaringBitmaps};
|
use crate::update::GrenadParameters;
|
||||||
use crate::{DocumentId, FieldId, Index, Result, MAX_FACET_VALUE_LENGTH};
|
use crate::{DocumentId, FieldId, Index, Result, MAX_FACET_VALUE_LENGTH};
|
||||||
|
|
||||||
pub struct FacetedExtractorData<'extractor> {
|
pub struct FacetedExtractorData<'a> {
|
||||||
attributes_to_extract: &'extractor [&'extractor str],
|
attributes_to_extract: &'a [&'a str],
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
max_memory: Option<usize>,
|
max_memory: Option<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'extractor> Extractor<'extractor> for FacetedExtractorData<'extractor> {
|
impl<'a, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a> {
|
||||||
type Data = RefCell<CboCachedSorter<'extractor>>;
|
type Data = RefCell<CboCachedSorter<'extractor>>;
|
||||||
|
|
||||||
fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
|
fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
|
||||||
@ -217,12 +214,12 @@ fn truncate_str(s: &str) -> &str {
|
|||||||
|
|
||||||
impl DocidsExtractor for FacetedDocidsExtractor {
|
impl DocidsExtractor for FacetedDocidsExtractor {
|
||||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")]
|
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")]
|
||||||
fn run_extraction<'pl, 'fid, 'indexer, 'index, DC: DocumentChanges<'pl>>(
|
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>>(
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
document_changes: &DC,
|
document_changes: &DC,
|
||||||
indexing_context: IndexingContext<'fid, 'indexer, 'index>,
|
indexing_context: IndexingContext<'fid, 'indexer, 'index>,
|
||||||
extractor_allocs: &mut ThreadLocal<FullySend<Bump>>,
|
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||||
) -> Result<Merger<File, MergeDeladdCboRoaringBitmaps>> {
|
) -> Result<Vec<CboCachedSorter<'extractor>>> {
|
||||||
let max_memory = grenad_parameters.max_memory_by_thread();
|
let max_memory = grenad_parameters.max_memory_by_thread();
|
||||||
|
|
||||||
let index = indexing_context.index;
|
let index = indexing_context.index;
|
||||||
@ -251,26 +248,7 @@ impl DocidsExtractor for FacetedDocidsExtractor {
|
|||||||
&datastore,
|
&datastore,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
{
|
|
||||||
let mut builder = grenad::MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
|
|
||||||
let span =
|
|
||||||
tracing::trace_span!(target: "indexing::documents::extract", "merger_building");
|
|
||||||
let _entered = span.enter();
|
|
||||||
|
|
||||||
let readers: Vec<_> = datastore
|
Ok(datastore.into_iter().map(RefCell::into_inner).collect())
|
||||||
.into_iter()
|
|
||||||
// .par_bridge() // T is !Send
|
|
||||||
.map(|cached_sorter| {
|
|
||||||
let cached_sorter = cached_sorter.into_inner();
|
|
||||||
let sorter = cached_sorter.into_sorter()?;
|
|
||||||
sorter.into_reader_cursors()
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
for reader in readers {
|
|
||||||
builder.extend(reader?);
|
|
||||||
}
|
|
||||||
Ok(builder.build())
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2,25 +2,22 @@ mod cache;
|
|||||||
mod faceted;
|
mod faceted;
|
||||||
mod searchable;
|
mod searchable;
|
||||||
|
|
||||||
use std::cell::RefCell;
|
|
||||||
use std::fs::File;
|
|
||||||
|
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
|
pub use cache::{merge_caches, transpose_and_freeze_caches, CboCachedSorter, DelAddRoaringBitmap};
|
||||||
pub use faceted::*;
|
pub use faceted::*;
|
||||||
use grenad::Merger;
|
|
||||||
pub use searchable::*;
|
pub use searchable::*;
|
||||||
|
|
||||||
use super::indexer::document_changes::{DocumentChanges, FullySend, IndexingContext, ThreadLocal};
|
use super::indexer::document_changes::{DocumentChanges, FullySend, IndexingContext, ThreadLocal};
|
||||||
use crate::update::{GrenadParameters, MergeDeladdCboRoaringBitmaps};
|
use crate::update::GrenadParameters;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
pub trait DocidsExtractor {
|
pub trait DocidsExtractor {
|
||||||
fn run_extraction<'pl, 'fid, 'indexer, 'index, DC: DocumentChanges<'pl>>(
|
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>>(
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
document_changes: &DC,
|
document_changes: &DC,
|
||||||
indexing_context: IndexingContext<'fid, 'indexer, 'index>,
|
indexing_context: IndexingContext<'fid, 'indexer, 'index>,
|
||||||
extractor_allocs: &mut ThreadLocal<FullySend<Bump>>,
|
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||||
) -> Result<Merger<File, MergeDeladdCboRoaringBitmaps>>;
|
) -> Result<Vec<CboCachedSorter<'extractor>>>;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// TODO move in permissive json pointer
|
/// TODO move in permissive json pointer
|
||||||
|
@ -7,7 +7,6 @@ use std::ops::DerefMut as _;
|
|||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use grenad::{Merger, MergerBuilder};
|
use grenad::{Merger, MergerBuilder};
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use raw_collections::alloc::RefBump;
|
|
||||||
|
|
||||||
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
||||||
use crate::update::new::extract::cache::CboCachedSorter;
|
use crate::update::new::extract::cache::CboCachedSorter;
|
||||||
@ -157,15 +156,15 @@ struct WordDocidsMergerBuilders {
|
|||||||
fid_word_count_docids: MergerBuilder<File, MergeDeladdCboRoaringBitmaps>,
|
fid_word_count_docids: MergerBuilder<File, MergeDeladdCboRoaringBitmaps>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct WordDocidsMergers {
|
pub struct WordDocidsMergers<'extractor> {
|
||||||
pub word_fid_docids: Merger<File, MergeDeladdCboRoaringBitmaps>,
|
pub word_fid_docids: Vec<Vec<CboCachedSorter<'extractor>>>,
|
||||||
pub word_docids: Merger<File, MergeDeladdCboRoaringBitmaps>,
|
pub word_docids: Vec<Vec<CboCachedSorter<'extractor>>>,
|
||||||
pub exact_word_docids: Merger<File, MergeDeladdCboRoaringBitmaps>,
|
pub exact_word_docids: Vec<Vec<CboCachedSorter<'extractor>>>,
|
||||||
pub word_position_docids: Merger<File, MergeDeladdCboRoaringBitmaps>,
|
pub word_position_docids: Vec<Vec<CboCachedSorter<'extractor>>>,
|
||||||
pub fid_word_count_docids: Merger<File, MergeDeladdCboRoaringBitmaps>,
|
pub fid_word_count_docids: Vec<Vec<CboCachedSorter<'extractor>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl WordDocidsMergerBuilders {
|
impl<'extractor> WordDocidsMergerBuilders<'extractor> {
|
||||||
fn new() -> Self {
|
fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
word_fid_docids: MergerBuilder::new(MergeDeladdCboRoaringBitmaps),
|
word_fid_docids: MergerBuilder::new(MergeDeladdCboRoaringBitmaps),
|
||||||
@ -202,7 +201,7 @@ impl WordDocidsMergerBuilders {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build(self) -> WordDocidsMergers {
|
fn build(self) -> WordDocidsMergers<'extractor> {
|
||||||
WordDocidsMergers {
|
WordDocidsMergers {
|
||||||
word_fid_docids: self.word_fid_docids.build(),
|
word_fid_docids: self.word_fid_docids.build(),
|
||||||
word_docids: self.word_docids.build(),
|
word_docids: self.word_docids.build(),
|
||||||
|
@ -9,9 +9,7 @@ use std::marker::PhantomData;
|
|||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
pub use extract_word_docids::{WordDocidsExtractors, WordDocidsMergers};
|
pub use extract_word_docids::{WordDocidsExtractors, WordDocidsMergers};
|
||||||
pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor;
|
pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor;
|
||||||
use grenad::Merger;
|
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use raw_collections::alloc::RefBump;
|
|
||||||
use tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
use tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
||||||
|
|
||||||
use super::cache::CboCachedSorter;
|
use super::cache::CboCachedSorter;
|
||||||
@ -21,18 +19,18 @@ use crate::update::new::indexer::document_changes::{
|
|||||||
IndexingContext, ThreadLocal,
|
IndexingContext, ThreadLocal,
|
||||||
};
|
};
|
||||||
use crate::update::new::DocumentChange;
|
use crate::update::new::DocumentChange;
|
||||||
use crate::update::{GrenadParameters, MergeDeladdCboRoaringBitmaps};
|
use crate::update::GrenadParameters;
|
||||||
use crate::{Index, Result, MAX_POSITION_PER_ATTRIBUTE};
|
use crate::{Index, Result, MAX_POSITION_PER_ATTRIBUTE};
|
||||||
|
|
||||||
pub struct SearchableExtractorData<'extractor, EX: SearchableExtractor> {
|
pub struct SearchableExtractorData<'a, EX: SearchableExtractor> {
|
||||||
tokenizer: &'extractor DocumentTokenizer<'extractor>,
|
tokenizer: &'a DocumentTokenizer<'a>,
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
max_memory: Option<usize>,
|
max_memory: Option<usize>,
|
||||||
_ex: PhantomData<EX>,
|
_ex: PhantomData<EX>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor>
|
impl<'a, 'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor>
|
||||||
for SearchableExtractorData<'extractor, EX>
|
for SearchableExtractorData<'a, EX>
|
||||||
{
|
{
|
||||||
type Data = RefCell<CboCachedSorter<'extractor>>;
|
type Data = RefCell<CboCachedSorter<'extractor>>;
|
||||||
|
|
||||||
@ -50,12 +48,12 @@ impl<'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor>
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub trait SearchableExtractor: Sized + Sync {
|
pub trait SearchableExtractor: Sized + Sync {
|
||||||
fn run_extraction<'pl, 'fid, 'indexer, 'index, DC: DocumentChanges<'pl>>(
|
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>>(
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
document_changes: &DC,
|
document_changes: &DC,
|
||||||
indexing_context: IndexingContext<'fid, 'indexer, 'index>,
|
indexing_context: IndexingContext<'fid, 'indexer, 'index>,
|
||||||
extractor_allocs: &mut ThreadLocal<FullySend<Bump>>,
|
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||||
) -> Result<Merger<File, MergeDeladdCboRoaringBitmaps>> {
|
) -> Result<Vec<CboCachedSorter<'extractor>>> {
|
||||||
let max_memory = grenad_parameters.max_memory_by_thread();
|
let max_memory = grenad_parameters.max_memory_by_thread();
|
||||||
|
|
||||||
let rtxn = indexing_context.index.read_txn()?;
|
let rtxn = indexing_context.index.read_txn()?;
|
||||||
@ -107,28 +105,8 @@ pub trait SearchableExtractor: Sized + Sync {
|
|||||||
&datastore,
|
&datastore,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
{
|
|
||||||
let mut builder = grenad::MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
|
|
||||||
let span =
|
|
||||||
tracing::trace_span!(target: "indexing::documents::extract", "merger_building");
|
|
||||||
let _entered = span.enter();
|
|
||||||
|
|
||||||
let readers: Vec<_> = datastore
|
Ok(datastore.into_iter().map(RefCell::into_inner).collect())
|
||||||
.into_iter()
|
|
||||||
// .par_bridge() // T is !Send
|
|
||||||
.map(|cache_sorter| {
|
|
||||||
let cached_sorter = cache_sorter.into_inner();
|
|
||||||
let sorter = cached_sorter.into_sorter()?;
|
|
||||||
sorter.into_reader_cursors()
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
for reader in readers {
|
|
||||||
builder.extend(reader?);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(builder.build())
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extract_document_change(
|
fn extract_document_change(
|
||||||
@ -144,12 +122,12 @@ pub trait SearchableExtractor: Sized + Sync {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<T: SearchableExtractor> DocidsExtractor for T {
|
impl<T: SearchableExtractor> DocidsExtractor for T {
|
||||||
fn run_extraction<'pl, 'fid, 'indexer, 'index, DC: DocumentChanges<'pl>>(
|
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>>(
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
document_changes: &DC,
|
document_changes: &DC,
|
||||||
indexing_context: IndexingContext<'fid, 'indexer, 'index>,
|
indexing_context: IndexingContext<'fid, 'indexer, 'index>,
|
||||||
extractor_allocs: &mut ThreadLocal<FullySend<Bump>>,
|
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||||
) -> Result<Merger<File, MergeDeladdCboRoaringBitmaps>> {
|
) -> Result<Vec<CboCachedSorter<'extractor>>> {
|
||||||
Self::run_extraction(
|
Self::run_extraction(
|
||||||
grenad_parameters,
|
grenad_parameters,
|
||||||
document_changes,
|
document_changes,
|
||||||
|
@ -20,7 +20,7 @@ use super::channel::*;
|
|||||||
use super::document::write_to_obkv;
|
use super::document::write_to_obkv;
|
||||||
use super::document_change::DocumentChange;
|
use super::document_change::DocumentChange;
|
||||||
use super::extract::*;
|
use super::extract::*;
|
||||||
use super::merger::{merge_grenad_entries, FacetFieldIdsDelta};
|
use super::merger::{merge_caches_entries, FacetFieldIdsDelta};
|
||||||
use super::word_fst_builder::PrefixDelta;
|
use super::word_fst_builder::PrefixDelta;
|
||||||
use super::words_prefix_docids::{
|
use super::words_prefix_docids::{
|
||||||
compute_word_prefix_docids, compute_word_prefix_fid_docids, compute_word_prefix_position_docids,
|
compute_word_prefix_docids, compute_word_prefix_fid_docids, compute_word_prefix_position_docids,
|
||||||
@ -33,7 +33,7 @@ use crate::update::new::channel::ExtractorSender;
|
|||||||
use crate::update::new::words_prefix_docids::compute_exact_word_prefix_docids;
|
use crate::update::new::words_prefix_docids::compute_exact_word_prefix_docids;
|
||||||
use crate::update::settings::InnerIndexSettings;
|
use crate::update::settings::InnerIndexSettings;
|
||||||
use crate::update::{FacetsUpdateBulk, GrenadParameters};
|
use crate::update::{FacetsUpdateBulk, GrenadParameters};
|
||||||
use crate::{Error, FieldsIdsMap, GlobalFieldsIdsMap, Index, Result, UserError};
|
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, Result, UserError};
|
||||||
|
|
||||||
pub(crate) mod de;
|
pub(crate) mod de;
|
||||||
pub mod document_changes;
|
pub mod document_changes;
|
||||||
@ -42,11 +42,11 @@ mod document_operation;
|
|||||||
mod partial_dump;
|
mod partial_dump;
|
||||||
mod update_by_function;
|
mod update_by_function;
|
||||||
|
|
||||||
struct DocumentExtractor<'a> {
|
struct DocumentExtractor<'a, 'extractor> {
|
||||||
document_sender: &'a DocumentSender<'a>,
|
document_sender: &'a DocumentSender<'a, 'extractor>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, 'extractor> Extractor<'extractor> for DocumentExtractor<'a> {
|
impl<'a, 'extractor> Extractor<'extractor> for DocumentExtractor<'a, 'extractor> {
|
||||||
type Data = FullySend<()>;
|
type Data = FullySend<()>;
|
||||||
|
|
||||||
fn init_data(&self, _extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
|
fn init_data(&self, _extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
|
||||||
@ -179,6 +179,7 @@ where
|
|||||||
word_position_docids,
|
word_position_docids,
|
||||||
fid_word_count_docids,
|
fid_word_count_docids,
|
||||||
} = WordDocidsExtractors::run_extraction(grenad_parameters, document_changes, indexing_context, &mut extractor_allocs)?;
|
} = WordDocidsExtractors::run_extraction(grenad_parameters, document_changes, indexing_context, &mut extractor_allocs)?;
|
||||||
|
|
||||||
extractor_sender.send_searchable::<WordDocids>(word_docids).unwrap();
|
extractor_sender.send_searchable::<WordDocids>(word_docids).unwrap();
|
||||||
extractor_sender.send_searchable::<WordFidDocids>(word_fid_docids).unwrap();
|
extractor_sender.send_searchable::<WordFidDocids>(word_fid_docids).unwrap();
|
||||||
extractor_sender.send_searchable::<ExactWordDocids>(exact_word_docids).unwrap();
|
extractor_sender.send_searchable::<ExactWordDocids>(exact_word_docids).unwrap();
|
||||||
@ -201,7 +202,7 @@ where
|
|||||||
grenad_parameters,
|
grenad_parameters,
|
||||||
document_changes,
|
document_changes,
|
||||||
indexing_context,
|
indexing_context,
|
||||||
&mut extractor_allocs,
|
&mut extractor_allocs,
|
||||||
&extractor_sender,
|
&extractor_sender,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
@ -239,7 +240,7 @@ where
|
|||||||
tracing::trace_span!(target: "indexing::documents", parent: &indexer_span, "merge");
|
tracing::trace_span!(target: "indexing::documents", parent: &indexer_span, "merge");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
merge_grenad_entries(
|
merge_caches_entries(
|
||||||
merger_receiver,
|
merger_receiver,
|
||||||
merger_sender,
|
merger_sender,
|
||||||
&rtxn,
|
&rtxn,
|
||||||
@ -352,6 +353,7 @@ fn extract_and_send_docids<
|
|||||||
'fid,
|
'fid,
|
||||||
'indexer,
|
'indexer,
|
||||||
'index,
|
'index,
|
||||||
|
'extractor,
|
||||||
DC: DocumentChanges<'pl>,
|
DC: DocumentChanges<'pl>,
|
||||||
E: DocidsExtractor,
|
E: DocidsExtractor,
|
||||||
D: MergerOperationType,
|
D: MergerOperationType,
|
||||||
@ -359,12 +361,12 @@ fn extract_and_send_docids<
|
|||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
document_changes: &DC,
|
document_changes: &DC,
|
||||||
indexing_context: IndexingContext<'fid, 'indexer, 'index>,
|
indexing_context: IndexingContext<'fid, 'indexer, 'index>,
|
||||||
extractor_allocs: &mut ThreadLocal<FullySend<Bump>>,
|
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||||
sender: &ExtractorSender,
|
sender: &ExtractorSender<'extractor>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let merger =
|
let caches =
|
||||||
E::run_extraction(grenad_parameters, document_changes, indexing_context, extractor_allocs)?;
|
E::run_extraction(grenad_parameters, document_changes, indexing_context, extractor_allocs)?;
|
||||||
sender.send_searchable::<D>(merger).unwrap();
|
sender.send_searchable::<D>(caches).unwrap();
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,13 +6,16 @@ use grenad::Merger;
|
|||||||
use hashbrown::HashSet;
|
use hashbrown::HashSet;
|
||||||
use heed::types::Bytes;
|
use heed::types::Bytes;
|
||||||
use heed::{Database, RoTxn};
|
use heed::{Database, RoTxn};
|
||||||
|
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::channel::*;
|
use super::channel::*;
|
||||||
use super::extract::FacetKind;
|
use super::extract::{
|
||||||
|
merge_caches, transpose_and_freeze_caches, CboCachedSorter, DelAddRoaringBitmap, FacetKind,
|
||||||
|
};
|
||||||
use super::word_fst_builder::{PrefixData, PrefixDelta};
|
use super::word_fst_builder::{PrefixData, PrefixDelta};
|
||||||
use super::{Deletion, DocumentChange, KvReaderDelAdd, KvReaderFieldId};
|
use super::{Deletion, DocumentChange, KvReaderDelAdd, KvReaderFieldId};
|
||||||
use crate::update::del_add::DelAdd;
|
use crate::update::del_add::{DelAdd, DelAddOperation};
|
||||||
use crate::update::new::channel::MergerOperation;
|
use crate::update::new::channel::MergerOperation;
|
||||||
use crate::update::new::word_fst_builder::WordFstBuilder;
|
use crate::update::new::word_fst_builder::WordFstBuilder;
|
||||||
use crate::update::MergeDeladdCboRoaringBitmaps;
|
use crate::update::MergeDeladdCboRoaringBitmaps;
|
||||||
@ -20,7 +23,7 @@ use crate::{CboRoaringBitmapCodec, Error, FieldId, GeoPoint, GlobalFieldsIdsMap,
|
|||||||
|
|
||||||
/// TODO We must return some infos/stats
|
/// TODO We must return some infos/stats
|
||||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents", name = "merge")]
|
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents", name = "merge")]
|
||||||
pub fn merge_grenad_entries(
|
pub fn merge_caches_entries(
|
||||||
receiver: MergerReceiver,
|
receiver: MergerReceiver,
|
||||||
sender: MergerSender,
|
sender: MergerSender,
|
||||||
rtxn: &RoTxn,
|
rtxn: &RoTxn,
|
||||||
@ -34,12 +37,12 @@ pub fn merge_grenad_entries(
|
|||||||
|
|
||||||
for merger_operation in receiver {
|
for merger_operation in receiver {
|
||||||
match merger_operation {
|
match merger_operation {
|
||||||
MergerOperation::ExactWordDocidsMerger(merger) => {
|
MergerOperation::ExactWordDocidsMerger(caches) => {
|
||||||
let span =
|
let span =
|
||||||
tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
|
tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
merge_and_send_docids(
|
merge_and_send_docids(
|
||||||
merger,
|
caches,
|
||||||
/// TODO do a MergerOperation::database(&Index) -> Database<Bytes, Bytes>.
|
/// TODO do a MergerOperation::database(&Index) -> Database<Bytes, Bytes>.
|
||||||
index.exact_word_docids.remap_types(),
|
index.exact_word_docids.remap_types(),
|
||||||
rtxn,
|
rtxn,
|
||||||
@ -192,8 +195,6 @@ pub fn merge_grenad_entries(
|
|||||||
sender.send_documents_ids(documents_ids).unwrap();
|
sender.send_documents_ids(documents_ids).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
// ...
|
|
||||||
|
|
||||||
Ok(merger_result)
|
Ok(merger_result)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -254,69 +255,61 @@ impl GeoExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
|
#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
|
||||||
fn merge_and_send_docids(
|
fn merge_and_send_docids<'extractor>(
|
||||||
merger: Merger<File, MergeDeladdCboRoaringBitmaps>,
|
mut caches: Vec<Vec<CboCachedSorter<'extractor>>>,
|
||||||
database: Database<Bytes, Bytes>,
|
database: Database<Bytes, Bytes>,
|
||||||
rtxn: &RoTxn<'_>,
|
rtxn: &RoTxn<'_>,
|
||||||
buffer: &mut Vec<u8>,
|
buffer: &mut Vec<u8>,
|
||||||
docids_sender: impl DocidsSender,
|
docids_sender: impl DocidsSender,
|
||||||
mut register_key: impl FnMut(DelAdd, &[u8]) -> Result<()>,
|
mut register_key: impl FnMut(DelAdd, &[u8]) -> Result<()>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut merger_iter = merger.into_stream_merger_iter().unwrap();
|
transpose_and_freeze_caches(&mut caches)?.into_par_iter().try_for_each(|frozen| {
|
||||||
while let Some((key, deladd)) = merger_iter.next().unwrap() {
|
merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
||||||
let current = database.get(rtxn, key)?;
|
let current = database.get(rtxn, key)?;
|
||||||
let deladd: &KvReaderDelAdd = deladd.into();
|
match merge_cbo_bitmaps(current, del, add)? {
|
||||||
let del = deladd.get(DelAdd::Deletion);
|
Operation::Write(bitmap) => {
|
||||||
let add = deladd.get(DelAdd::Addition);
|
let value = cbo_bitmap_serialize_into_vec(&bitmap, buffer);
|
||||||
|
docids_sender.write(key, value).unwrap();
|
||||||
match merge_cbo_bitmaps(current, del, add)? {
|
register_key(DelAdd::Addition, key)
|
||||||
Operation::Write(bitmap) => {
|
}
|
||||||
let value = cbo_bitmap_serialize_into_vec(&bitmap, buffer);
|
Operation::Delete => {
|
||||||
docids_sender.write(key, value).unwrap();
|
docids_sender.delete(key).unwrap();
|
||||||
register_key(DelAdd::Addition, key)?;
|
register_key(DelAdd::Deletion, key)
|
||||||
|
}
|
||||||
|
Operation::Ignore => Ok(()),
|
||||||
}
|
}
|
||||||
Operation::Delete => {
|
})
|
||||||
docids_sender.delete(key).unwrap();
|
})
|
||||||
register_key(DelAdd::Deletion, key)?;
|
|
||||||
}
|
|
||||||
Operation::Ignore => (),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
|
#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
|
||||||
fn merge_and_send_facet_docids(
|
fn merge_and_send_facet_docids<'extractor>(
|
||||||
merger: Merger<File, MergeDeladdCboRoaringBitmaps>,
|
mut caches: Vec<Vec<CboCachedSorter<'extractor>>>,
|
||||||
database: FacetDatabases,
|
database: FacetDatabases,
|
||||||
rtxn: &RoTxn<'_>,
|
rtxn: &RoTxn<'_>,
|
||||||
buffer: &mut Vec<u8>,
|
buffer: &mut Vec<u8>,
|
||||||
docids_sender: impl DocidsSender,
|
docids_sender: impl DocidsSender,
|
||||||
facet_field_ids_delta: &mut FacetFieldIdsDelta,
|
facet_field_ids_delta: &mut FacetFieldIdsDelta,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut merger_iter = merger.into_stream_merger_iter().unwrap();
|
transpose_and_freeze_caches(&mut caches)?.into_par_iter().try_for_each(|frozen| {
|
||||||
while let Some((key, deladd)) = merger_iter.next().unwrap() {
|
merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
||||||
let current = database.get_cbo_roaring_bytes_value(rtxn, key)?;
|
let current = database.get_cbo_roaring_bytes_value(rtxn, key)?;
|
||||||
let deladd: &KvReaderDelAdd = deladd.into();
|
match merge_cbo_bitmaps(current, del, add)? {
|
||||||
let del = deladd.get(DelAdd::Deletion);
|
Operation::Write(bitmap) => {
|
||||||
let add = deladd.get(DelAdd::Addition);
|
facet_field_ids_delta.register_from_key(key);
|
||||||
|
let value = cbo_bitmap_serialize_into_vec(&bitmap, buffer);
|
||||||
match merge_cbo_bitmaps(current, del, add)? {
|
docids_sender.write(key, value).unwrap();
|
||||||
Operation::Write(bitmap) => {
|
Ok(())
|
||||||
facet_field_ids_delta.register_from_key(key);
|
}
|
||||||
let value = cbo_bitmap_serialize_into_vec(&bitmap, buffer);
|
Operation::Delete => {
|
||||||
docids_sender.write(key, value).unwrap();
|
facet_field_ids_delta.register_from_key(key);
|
||||||
|
docids_sender.delete(key).unwrap();
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
Operation::Ignore => Ok(()),
|
||||||
}
|
}
|
||||||
Operation::Delete => {
|
})
|
||||||
facet_field_ids_delta.register_from_key(key);
|
})
|
||||||
docids_sender.delete(key).unwrap();
|
|
||||||
}
|
|
||||||
Operation::Ignore => (),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct FacetDatabases<'a> {
|
struct FacetDatabases<'a> {
|
||||||
@ -409,13 +402,10 @@ enum Operation {
|
|||||||
/// A function that merges the DelAdd CboRoaringBitmaps with the current bitmap.
|
/// A function that merges the DelAdd CboRoaringBitmaps with the current bitmap.
|
||||||
fn merge_cbo_bitmaps(
|
fn merge_cbo_bitmaps(
|
||||||
current: Option<&[u8]>,
|
current: Option<&[u8]>,
|
||||||
del: Option<&[u8]>,
|
del: Option<RoaringBitmap>,
|
||||||
add: Option<&[u8]>,
|
add: Option<RoaringBitmap>,
|
||||||
) -> Result<Operation> {
|
) -> Result<Operation> {
|
||||||
let current = current.map(CboRoaringBitmapCodec::deserialize_from).transpose()?;
|
let current = current.map(CboRoaringBitmapCodec::deserialize_from).transpose()?;
|
||||||
let del = del.map(CboRoaringBitmapCodec::deserialize_from).transpose()?;
|
|
||||||
let add = add.map(CboRoaringBitmapCodec::deserialize_from).transpose()?;
|
|
||||||
|
|
||||||
match (current, del, add) {
|
match (current, del, add) {
|
||||||
(None, None, None) => Ok(Operation::Ignore), // but it's strange
|
(None, None, None) => Ok(Operation::Ignore), // but it's strange
|
||||||
(None, None, Some(add)) => Ok(Operation::Write(add)),
|
(None, None, Some(add)) => Ok(Operation::Write(add)),
|
||||||
|
Loading…
Reference in New Issue
Block a user