650 lines
21 KiB
Rust
Raw Normal View History

use std::fs::File;
use std::marker::PhantomData;
use crossbeam_channel::{IntoIter, Receiver, SendError, Sender};
2024-09-03 11:02:39 +02:00
use grenad::Merger;
2024-10-29 17:43:36 +01:00
use hashbrown::HashMap;
2024-08-29 15:07:59 +02:00
use heed::types::Bytes;
2024-09-04 14:30:09 +02:00
use memmap2::Mmap;
2024-10-09 14:39:27 +02:00
use roaring::RoaringBitmap;
2024-08-29 15:07:59 +02:00
2024-09-16 09:34:10 +02:00
use super::extract::FacetKind;
2024-08-29 15:07:59 +02:00
use super::StdResult;
use crate::index::main_key::{DOCUMENTS_IDS_KEY, WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY};
use crate::update::new::KvReaderFieldId;
2024-09-03 11:02:39 +02:00
use crate::update::MergeDeladdCboRoaringBitmaps;
2024-10-28 14:23:02 +01:00
use crate::vector::Embedding;
2024-09-04 14:30:09 +02:00
use crate::{DocumentId, Index};
2024-08-29 15:07:59 +02:00
/// The capacity of the channel is currently in number of messages.
2024-09-02 15:10:21 +02:00
pub fn merger_writer_channel(cap: usize) -> (MergerSender, WriterReceiver) {
2024-08-29 15:07:59 +02:00
let (sender, receiver) = crossbeam_channel::bounded(cap);
(
MergerSender {
sender,
send_count: Default::default(),
writer_contentious_count: Default::default(),
merger_contentious_count: Default::default(),
},
WriterReceiver(receiver),
)
2024-08-29 15:07:59 +02:00
}
/// The capacity of the channel is currently in number of messages.
pub fn extractors_merger_channels(cap: usize) -> (ExtractorSender, MergerReceiver) {
let (sender, receiver) = crossbeam_channel::bounded(cap);
(ExtractorSender(sender), MergerReceiver(receiver))
}
2024-09-04 14:30:09 +02:00
pub enum KeyValueEntry {
SmallInMemory { key_length: usize, data: Box<[u8]> },
LargeOnDisk { key: Box<[u8]>, value: Mmap },
2024-08-29 15:07:59 +02:00
}
impl KeyValueEntry {
2024-09-04 14:30:09 +02:00
pub fn from_small_key_value(key: &[u8], value: &[u8]) -> Self {
let mut data = Vec::with_capacity(key.len() + value.len());
data.extend_from_slice(key);
data.extend_from_slice(value);
2024-09-04 14:30:09 +02:00
KeyValueEntry::SmallInMemory { key_length: key.len(), data: data.into_boxed_slice() }
}
2024-10-09 14:39:27 +02:00
pub fn from_small_key_bitmap(key: &[u8], bitmap: RoaringBitmap) -> Self {
let mut data = Vec::with_capacity(key.len() + bitmap.serialized_size());
data.extend_from_slice(key);
bitmap.serialize_into(&mut data).unwrap();
KeyValueEntry::SmallInMemory { key_length: key.len(), data: data.into_boxed_slice() }
}
2024-09-04 14:30:09 +02:00
pub fn from_large_key_value(key: &[u8], value: Mmap) -> Self {
KeyValueEntry::LargeOnDisk { key: key.to_vec().into_boxed_slice(), value }
}
pub fn key(&self) -> &[u8] {
2024-09-04 14:30:09 +02:00
match self {
KeyValueEntry::SmallInMemory { key_length, data } => &data.as_ref()[..*key_length],
KeyValueEntry::LargeOnDisk { key, value: _ } => key.as_ref(),
}
}
pub fn value(&self) -> &[u8] {
2024-09-04 14:30:09 +02:00
match self {
KeyValueEntry::SmallInMemory { key_length, data } => &data.as_ref()[*key_length..],
KeyValueEntry::LargeOnDisk { key: _, value } => value.as_ref(),
}
2024-08-29 15:07:59 +02:00
}
}
pub struct KeyEntry {
data: Box<[u8]>,
}
impl KeyEntry {
pub fn from_key(key: &[u8]) -> Self {
KeyEntry { data: key.to_vec().into_boxed_slice() }
}
pub fn entry(&self) -> &[u8] {
self.data.as_ref()
}
}
pub enum EntryOperation {
Delete(KeyEntry),
Write(KeyValueEntry),
}
2024-08-29 15:07:59 +02:00
pub struct DocumentEntry {
docid: DocumentId,
content: Box<[u8]>,
}
impl DocumentEntry {
pub fn new_uncompressed(docid: DocumentId, content: Box<KvReaderFieldId>) -> Self {
DocumentEntry { docid, content: content.into() }
}
pub fn new_compressed(docid: DocumentId, content: Box<[u8]>) -> Self {
DocumentEntry { docid, content }
}
pub fn key(&self) -> [u8; 4] {
self.docid.to_be_bytes()
}
pub fn content(&self) -> &[u8] {
&self.content
2024-08-29 15:07:59 +02:00
}
}
pub struct DocumentDeletionEntry(DocumentId);
impl DocumentDeletionEntry {
pub fn key(&self) -> [u8; 4] {
self.0.to_be_bytes()
}
}
2024-10-29 17:43:36 +01:00
pub enum WriterOperation {
DbOperation(DbOperation),
ArroyOperation(ArroyOperation),
}
pub enum ArroyOperation {
/// TODO: call when deleting regular documents
DeleteVectors {
docid: DocumentId,
},
SetVectors {
docid: DocumentId,
embedder_id: u8,
embeddings: Vec<Embedding>,
},
SetVector {
docid: DocumentId,
embedder_id: u8,
embedding: Embedding,
},
Finish {
user_provided: HashMap<String, RoaringBitmap>,
},
}
pub struct DbOperation {
database: Database,
entry: EntryOperation,
}
#[derive(Debug)]
pub enum Database {
Documents,
ExternalDocumentsIds,
2024-09-04 12:17:13 +02:00
ExactWordDocids,
FidWordCountDocids,
Main,
WordDocids,
WordFidDocids,
WordPairProximityDocids,
2024-09-04 12:17:13 +02:00
WordPositionDocids,
2024-09-16 09:34:10 +02:00
FacetIdIsNullDocids,
FacetIdIsEmptyDocids,
FacetIdExistsDocids,
FacetIdF64NumberDocids,
FacetIdStringDocids,
2024-08-29 15:07:59 +02:00
}
2024-09-16 09:34:10 +02:00
impl Database {
2024-08-29 15:07:59 +02:00
pub fn database(&self, index: &Index) -> heed::Database<Bytes, Bytes> {
2024-09-16 09:34:10 +02:00
match self {
Database::Documents => index.documents.remap_types(),
Database::ExternalDocumentsIds => index.external_documents_ids.remap_types(),
2024-09-04 12:17:13 +02:00
Database::ExactWordDocids => index.exact_word_docids.remap_types(),
Database::Main => index.main.remap_types(),
Database::WordDocids => index.word_docids.remap_types(),
Database::WordFidDocids => index.word_fid_docids.remap_types(),
2024-09-04 12:17:13 +02:00
Database::WordPositionDocids => index.word_position_docids.remap_types(),
Database::FidWordCountDocids => index.field_id_word_count_docids.remap_types(),
Database::WordPairProximityDocids => index.word_pair_proximity_docids.remap_types(),
2024-09-16 09:34:10 +02:00
Database::FacetIdIsNullDocids => index.facet_id_is_null_docids.remap_types(),
Database::FacetIdIsEmptyDocids => index.facet_id_is_empty_docids.remap_types(),
Database::FacetIdExistsDocids => index.facet_id_exists_docids.remap_types(),
Database::FacetIdF64NumberDocids => index.facet_id_f64_docids.remap_types(),
Database::FacetIdStringDocids => index.facet_id_string_docids.remap_types(),
2024-08-29 15:07:59 +02:00
}
}
2024-09-16 09:34:10 +02:00
}
impl From<FacetKind> for Database {
fn from(value: FacetKind) -> Self {
match value {
FacetKind::Number => Database::FacetIdF64NumberDocids,
FacetKind::String => Database::FacetIdStringDocids,
FacetKind::Null => Database::FacetIdIsNullDocids,
FacetKind::Empty => Database::FacetIdIsEmptyDocids,
FacetKind::Exists => Database::FacetIdExistsDocids,
}
}
}
2024-10-29 17:43:36 +01:00
impl DbOperation {
2024-09-16 09:34:10 +02:00
pub fn database(&self, index: &Index) -> heed::Database<Bytes, Bytes> {
self.database.database(index)
}
pub fn entry(self) -> EntryOperation {
self.entry
}
2024-08-29 15:07:59 +02:00
}
pub struct WriterReceiver(Receiver<WriterOperation>);
impl IntoIterator for WriterReceiver {
type Item = WriterOperation;
type IntoIter = IntoIter<Self::Item>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
2024-08-29 15:07:59 +02:00
}
}
pub struct MergerSender {
sender: Sender<WriterOperation>,
/// The number of message we send in total in the channel.
send_count: std::cell::Cell<usize>,
/// The number of times we sent something in a channel that was full.
writer_contentious_count: std::cell::Cell<usize>,
/// The number of times we sent something in a channel that was empty.
merger_contentious_count: std::cell::Cell<usize>,
}
impl Drop for MergerSender {
fn drop(&mut self) {
2024-09-24 18:21:58 +02:00
eprintln!(
"Merger channel stats: {} sends, {} writer contentions ({}%), {} merger contentions ({}%)",
self.send_count.get(),
self.writer_contentious_count.get(),
(self.writer_contentious_count.get() as f32 / self.send_count.get() as f32) * 100.0,
self.merger_contentious_count.get(),
(self.merger_contentious_count.get() as f32 / self.send_count.get() as f32) * 100.0
)
}
}
2024-08-29 15:07:59 +02:00
impl MergerSender {
pub fn main(&self) -> MainSender<'_> {
MainSender(self)
}
2024-09-16 09:34:10 +02:00
pub fn docids<D: DatabaseType>(&self) -> WordDocidsSender<'_, D> {
WordDocidsSender { sender: self, _marker: PhantomData }
2024-09-16 09:34:10 +02:00
}
pub fn facet_docids(&self) -> FacetDocidsSender<'_> {
FacetDocidsSender { sender: self }
}
pub fn documents(&self) -> DocumentsSender<'_> {
DocumentsSender(self)
}
2024-10-09 14:39:27 +02:00
pub fn send_documents_ids(&self, documents_ids: RoaringBitmap) -> StdResult<(), SendError<()>> {
let entry = EntryOperation::Write(KeyValueEntry::from_small_key_bitmap(
DOCUMENTS_IDS_KEY.as_bytes(),
2024-10-09 14:39:27 +02:00
documents_ids,
));
2024-10-29 17:43:36 +01:00
match self.send_db_operation(DbOperation { database: Database::Main, entry }) {
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
}
}
2024-10-29 17:43:36 +01:00
fn send_db_operation(&self, op: DbOperation) -> StdResult<(), SendError<()>> {
if self.sender.is_full() {
self.writer_contentious_count.set(self.writer_contentious_count.get() + 1);
}
if self.sender.is_empty() {
self.merger_contentious_count.set(self.merger_contentious_count.get() + 1);
}
self.send_count.set(self.send_count.get() + 1);
2024-10-29 17:43:36 +01:00
match self.sender.send(WriterOperation::DbOperation(op)) {
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
}
}
}
pub struct MainSender<'a>(&'a MergerSender);
impl MainSender<'_> {
2024-09-04 14:30:09 +02:00
pub fn write_words_fst(&self, value: Mmap) -> StdResult<(), SendError<()>> {
let entry = EntryOperation::Write(KeyValueEntry::from_large_key_value(
WORDS_FST_KEY.as_bytes(),
value,
));
2024-10-29 17:43:36 +01:00
match self.0.send_db_operation(DbOperation { database: Database::Main, entry }) {
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
}
}
pub fn write_words_prefixes_fst(&self, value: Mmap) -> StdResult<(), SendError<()>> {
let entry = EntryOperation::Write(KeyValueEntry::from_large_key_value(
WORDS_PREFIXES_FST_KEY.as_bytes(),
value,
));
2024-10-29 17:43:36 +01:00
match self.0.send_db_operation(DbOperation { database: Database::Main, entry }) {
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
}
}
pub fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>> {
let entry = EntryOperation::Delete(KeyEntry::from_key(key));
2024-10-29 17:43:36 +01:00
match self.0.send_db_operation(DbOperation { database: Database::Main, entry }) {
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
}
}
}
2024-09-04 12:17:13 +02:00
pub enum ExactWordDocids {}
pub enum FidWordCountDocids {}
pub enum WordDocids {}
pub enum WordFidDocids {}
pub enum WordPairProximityDocids {}
2024-09-04 12:17:13 +02:00
pub enum WordPositionDocids {}
2024-09-16 09:34:10 +02:00
pub enum FacetDocids {}
pub trait DatabaseType {
2024-09-04 12:17:13 +02:00
const DATABASE: Database;
2024-09-16 09:34:10 +02:00
}
2024-09-04 12:17:13 +02:00
2024-09-16 09:34:10 +02:00
pub trait MergerOperationType {
2024-09-04 12:17:13 +02:00
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation;
}
impl DatabaseType for ExactWordDocids {
const DATABASE: Database = Database::ExactWordDocids;
2024-09-16 09:34:10 +02:00
}
2024-09-04 12:17:13 +02:00
2024-09-16 09:34:10 +02:00
impl MergerOperationType for ExactWordDocids {
2024-09-04 12:17:13 +02:00
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
MergerOperation::ExactWordDocidsMerger(merger)
2024-09-04 12:17:13 +02:00
}
}
impl DatabaseType for FidWordCountDocids {
const DATABASE: Database = Database::FidWordCountDocids;
2024-09-16 09:34:10 +02:00
}
2024-09-04 12:17:13 +02:00
2024-09-16 09:34:10 +02:00
impl MergerOperationType for FidWordCountDocids {
2024-09-04 12:17:13 +02:00
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
MergerOperation::FidWordCountDocidsMerger(merger)
}
}
impl DatabaseType for WordDocids {
const DATABASE: Database = Database::WordDocids;
2024-09-16 09:34:10 +02:00
}
2024-09-16 09:34:10 +02:00
impl MergerOperationType for WordDocids {
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
MergerOperation::WordDocidsMerger(merger)
}
}
impl DatabaseType for WordFidDocids {
2024-09-04 12:17:13 +02:00
const DATABASE: Database = Database::WordFidDocids;
2024-09-16 09:34:10 +02:00
}
2024-09-04 12:17:13 +02:00
2024-09-16 09:34:10 +02:00
impl MergerOperationType for WordFidDocids {
2024-09-04 12:17:13 +02:00
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
MergerOperation::WordFidDocidsMerger(merger)
}
}
impl DatabaseType for WordPairProximityDocids {
const DATABASE: Database = Database::WordPairProximityDocids;
2024-09-16 09:34:10 +02:00
}
2024-09-16 09:34:10 +02:00
impl MergerOperationType for WordPairProximityDocids {
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
MergerOperation::WordPairProximityDocidsMerger(merger)
}
}
2024-09-04 12:17:13 +02:00
impl DatabaseType for WordPositionDocids {
const DATABASE: Database = Database::WordPositionDocids;
2024-09-16 09:34:10 +02:00
}
2024-09-04 12:17:13 +02:00
2024-09-16 09:34:10 +02:00
impl MergerOperationType for WordPositionDocids {
2024-09-04 12:17:13 +02:00
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
MergerOperation::WordPositionDocidsMerger(merger)
}
}
2024-09-16 09:34:10 +02:00
impl MergerOperationType for FacetDocids {
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
MergerOperation::FacetDocidsMerger(merger)
}
}
pub trait DocidsSender {
fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>>;
fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>>;
}
pub struct WordDocidsSender<'a, D> {
sender: &'a MergerSender,
_marker: PhantomData<D>,
}
2024-09-16 09:34:10 +02:00
impl<D: DatabaseType> DocidsSender for WordDocidsSender<'_, D> {
fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>> {
2024-09-04 14:30:09 +02:00
let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(key, value));
2024-10-29 17:43:36 +01:00
match self.sender.send_db_operation(DbOperation { database: D::DATABASE, entry }) {
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
}
}
2024-09-16 09:34:10 +02:00
fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>> {
let entry = EntryOperation::Delete(KeyEntry::from_key(key));
2024-10-29 17:43:36 +01:00
match self.sender.send_db_operation(DbOperation { database: D::DATABASE, entry }) {
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
}
}
}
2024-09-16 09:34:10 +02:00
pub struct FacetDocidsSender<'a> {
sender: &'a MergerSender,
2024-09-16 09:34:10 +02:00
}
impl DocidsSender for FacetDocidsSender<'_> {
fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>> {
let (facet_kind, key) = FacetKind::extract_from_key(key);
let database = Database::from(facet_kind);
// let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(key, value));
let entry = match facet_kind {
// skip level group size
FacetKind::String | FacetKind::Number => {
// add facet group size
let value = [&[1], value].concat();
EntryOperation::Write(KeyValueEntry::from_small_key_value(key, &value))
}
_ => EntryOperation::Write(KeyValueEntry::from_small_key_value(key, value)),
};
2024-10-29 17:43:36 +01:00
match self.sender.send_db_operation(DbOperation { database, entry }) {
2024-09-16 09:34:10 +02:00
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
}
}
fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>> {
let (facet_kind, key) = FacetKind::extract_from_key(key);
let database = Database::from(facet_kind);
2024-09-16 09:34:10 +02:00
let entry = EntryOperation::Delete(KeyEntry::from_key(key));
2024-10-29 17:43:36 +01:00
match self.sender.send_db_operation(DbOperation { database, entry }) {
2024-09-16 09:34:10 +02:00
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
}
}
}
pub struct DocumentsSender<'a>(&'a MergerSender);
impl DocumentsSender<'_> {
/// TODO do that efficiently
pub fn uncompressed(
&self,
docid: DocumentId,
external_id: String,
document: &KvReaderFieldId,
) -> StdResult<(), SendError<()>> {
2024-09-04 14:30:09 +02:00
let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(
&docid.to_be_bytes(),
document.as_bytes(),
));
2024-10-29 17:43:36 +01:00
match self.0.send_db_operation(DbOperation { database: Database::Documents, entry }) {
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
}?;
let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(
external_id.as_bytes(),
&docid.to_be_bytes(),
));
2024-10-29 17:43:36 +01:00
match self
.0
.send_db_operation(DbOperation { database: Database::ExternalDocumentsIds, entry })
{
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
}
}
2024-08-29 15:07:59 +02:00
pub fn delete(&self, docid: DocumentId, external_id: String) -> StdResult<(), SendError<()>> {
let entry = EntryOperation::Delete(KeyEntry::from_key(&docid.to_be_bytes()));
2024-10-29 17:43:36 +01:00
match self.0.send_db_operation(DbOperation { database: Database::Documents, entry }) {
2024-08-29 15:07:59 +02:00
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
}?;
let entry = EntryOperation::Delete(KeyEntry::from_key(external_id.as_bytes()));
2024-10-29 17:43:36 +01:00
match self
.0
.send_db_operation(DbOperation { database: Database::ExternalDocumentsIds, entry })
{
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
2024-08-29 15:07:59 +02:00
}
}
}
2024-10-29 17:43:36 +01:00
pub struct EmbeddingSender<'a>(&'a Sender<WriterOperation>);
2024-10-21 10:35:56 +02:00
impl EmbeddingSender<'_> {
2024-10-28 14:23:02 +01:00
pub fn set_vectors(
&self,
docid: DocumentId,
embedder_id: u8,
embeddings: Vec<Embedding>,
) -> StdResult<(), SendError<()>> {
2024-10-29 17:43:36 +01:00
self.0
.send(WriterOperation::ArroyOperation(ArroyOperation::SetVectors {
docid,
embedder_id,
embeddings,
}))
.map_err(|_| SendError(()))
2024-10-28 14:23:02 +01:00
}
pub fn set_vector(
&self,
docid: DocumentId,
embedder_id: u8,
embedding: Embedding,
) -> StdResult<(), SendError<()>> {
2024-10-29 17:43:36 +01:00
self.0
.send(WriterOperation::ArroyOperation(ArroyOperation::SetVector {
docid,
embedder_id,
embedding,
}))
.map_err(|_| SendError(()))
}
/// Marks all embedders as "to be built"
pub fn finish(
self,
user_provided: HashMap<String, RoaringBitmap>,
2024-10-21 10:35:56 +02:00
) -> StdResult<(), SendError<()>> {
2024-10-29 17:43:36 +01:00
self.0
.send(WriterOperation::ArroyOperation(ArroyOperation::Finish { user_provided }))
.map_err(|_| SendError(()))
2024-10-21 10:35:56 +02:00
}
}
pub enum MergerOperation {
2024-09-04 12:17:13 +02:00
ExactWordDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
FidWordCountDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
WordDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
WordFidDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
WordPairProximityDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
2024-09-04 12:17:13 +02:00
WordPositionDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
2024-09-16 09:34:10 +02:00
FacetDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
DeleteDocument { docid: DocumentId, external_id: String },
InsertDocument { docid: DocumentId, external_id: String, document: Box<KvReaderFieldId> },
2024-09-12 18:01:02 +02:00
FinishedDocument,
}
pub struct MergerReceiver(Receiver<MergerOperation>);
impl IntoIterator for MergerReceiver {
type Item = MergerOperation;
type IntoIter = IntoIter<Self::Item>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
pub struct ExtractorSender(Sender<MergerOperation>);
2024-09-03 11:02:39 +02:00
impl ExtractorSender {
2024-09-12 18:01:02 +02:00
pub fn document_sender(&self) -> DocumentSender<'_> {
DocumentSender(Some(&self.0))
2024-09-12 18:01:02 +02:00
}
2024-09-16 09:34:10 +02:00
pub fn send_searchable<D: MergerOperationType>(
2024-09-12 18:01:02 +02:00
&self,
merger: Merger<File, MergeDeladdCboRoaringBitmaps>,
) -> StdResult<(), SendError<()>> {
match self.0.send(D::new_merger_operation(merger)) {
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
}
}
}
pub struct DocumentSender<'a>(Option<&'a Sender<MergerOperation>>);
2024-09-12 18:01:02 +02:00
impl DocumentSender<'_> {
pub fn insert(
2024-09-03 11:02:39 +02:00
&self,
docid: DocumentId,
external_id: String,
document: Box<KvReaderFieldId>,
2024-09-03 11:02:39 +02:00
) -> StdResult<(), SendError<()>> {
let sender = self.0.unwrap();
match sender.send(MergerOperation::InsertDocument { docid, external_id, document }) {
2024-09-03 11:02:39 +02:00
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
}
}
pub fn delete(&self, docid: DocumentId, external_id: String) -> StdResult<(), SendError<()>> {
let sender = self.0.unwrap();
match sender.send(MergerOperation::DeleteDocument { docid, external_id }) {
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
}
}
pub fn finish(mut self) -> StdResult<(), SendError<()>> {
let sender = self.0.take().unwrap();
match sender.send(MergerOperation::FinishedDocument) {
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
}
}
}
2024-09-12 18:01:02 +02:00
impl Drop for DocumentSender<'_> {
fn drop(&mut self) {
if let Some(sender) = self.0.take() {
2024-09-30 16:08:29 +02:00
let _ = sender.send(MergerOperation::FinishedDocument);
}
2024-09-12 18:01:02 +02:00
}
}