Implement mostly all senders

This commit is contained in:
Clément Renault 2024-11-26 18:30:44 +01:00
parent 79671c9faa
commit 8442db8101
No known key found for this signature in database
GPG key ID: F250A4C4E3AE5F5F
6 changed files with 398 additions and 304 deletions

View file

@ -1,14 +1,19 @@
use std::cell::RefCell;
use std::marker::PhantomData;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::num::NonZeroU16;
use std::{mem, slice};
use bbqueue::framed::{FrameGrantR, FrameProducer};
use bytemuck::{NoUninit, CheckedBitPattern};
use crossbeam::sync::{Parker, Unparker};
use crossbeam_channel::{IntoIter, Receiver, SendError, Sender};
use crossbeam_channel::{IntoIter, Receiver, SendError};
use heed::types::Bytes;
use heed::BytesDecode;
use memmap2::Mmap;
use roaring::RoaringBitmap;
use super::extract::FacetKind;
use super::ref_cell_ext::RefCellExt;
use super::thread_local::{FullySend, ThreadLocal};
use super::StdResult;
use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec};
@ -16,7 +21,7 @@ use crate::index::main_key::{GEO_FACETED_DOCUMENTS_IDS_KEY, GEO_RTREE_KEY};
use crate::index::{db_name, IndexEmbeddingConfig};
use crate::update::new::KvReaderFieldId;
use crate::vector::Embedding;
use crate::{DocumentId, Index};
use crate::{CboRoaringBitmapCodec, DocumentId, Index};
/// Creates a tuple of producer/receivers to be used by
/// the extractors and the writer loop.
@ -26,125 +31,97 @@ use crate::{DocumentId, Index};
/// Panics if the number of provided bbqueue is not exactly equal
/// to the number of available threads in the rayon threadpool.
pub fn extractor_writer_bbqueue(
bbqueue: &[bbqueue::BBBuffer],
bbbuffers: &[bbqueue::BBBuffer],
) -> (ExtractorBbqueueSender, WriterBbqueueReceiver) {
assert_eq!(
bbqueue.len(),
bbbuffers.len(),
rayon::current_num_threads(),
"You must provide as many BBBuffer as the available number of threads to extract"
);
let capacity = bbbuffers.first().unwrap().capacity();
let parker = Parker::new();
let extractors = ThreadLocal::with_capacity(bbqueue.len());
let extractors = ThreadLocal::with_capacity(bbbuffers.len());
let producers = rayon::broadcast(|bi| {
let bbqueue = &bbqueue[bi.index()];
let bbqueue = &bbbuffers[bi.index()];
let (producer, consumer) = bbqueue.try_split_framed().unwrap();
extractors.get_or(|| FullySend(producer));
extractors.get_or(|| FullySend(RefCell::new(producer)));
consumer
});
(
ExtractorBbqueueSender { inner: extractors, unparker: parker.unparker().clone() },
ExtractorBbqueueSender {
inner: extractors,
capacity: capacity.checked_sub(9).unwrap(),
unparker: parker.unparker().clone(),
},
WriterBbqueueReceiver { inner: producers, parker },
)
}
pub struct ExtractorBbqueueSender<'a> {
inner: ThreadLocal<FullySend<bbqueue::framed::FrameProducer<'a>>>,
/// Used to wake up the receiver thread,
/// Used everytime we write something in the producer.
unparker: Unparker,
}
pub struct WriterBbqueueReceiver<'a> {
inner: Vec<bbqueue::framed::FrameConsumer<'a>>,
/// Used to park when no more work is required
parker: Parker,
}
/// The capacity of the channel is currently in number of messages.
pub fn extractor_writer_channel(cap: usize) -> (ExtractorSender, WriterReceiver) {
let (sender, receiver) = crossbeam_channel::bounded(cap);
(
ExtractorSender {
sender,
send_count: Default::default(),
writer_contentious_count: Default::default(),
extractor_contentious_count: Default::default(),
},
WriterReceiver(receiver),
)
}
pub enum KeyValueEntry {
Small { key_length: usize, data: Box<[u8]> },
Large { key_entry: KeyEntry, data: Mmap },
}
impl KeyValueEntry {
pub fn from_small_key_value(key: &[u8], value: &[u8]) -> Self {
let mut data = Vec::with_capacity(key.len() + value.len());
data.extend_from_slice(key);
data.extend_from_slice(value);
KeyValueEntry::Small { key_length: key.len(), data: data.into_boxed_slice() }
}
fn from_large_key_value(key: &[u8], value: Mmap) -> Self {
KeyValueEntry::Large { key_entry: KeyEntry::from_key(key), data: value }
}
pub fn key(&self) -> &[u8] {
match self {
KeyValueEntry::Small { key_length, data } => &data[..*key_length],
KeyValueEntry::Large { key_entry, data: _ } => key_entry.entry(),
}
}
pub fn value(&self) -> &[u8] {
match self {
KeyValueEntry::Small { key_length, data } => &data[*key_length..],
KeyValueEntry::Large { key_entry: _, data } => &data[..],
impl<'a> WriterBbqueueReceiver<'a> {
pub fn read(&mut self) -> Option<FrameWithHeader<'a>> {
loop {
for consumer in &mut self.inner {
// mark the frame as auto release
if let Some() = consumer.read()
}
break None;
}
}
}
pub struct KeyEntry {
data: Box<[u8]>,
struct FrameWithHeader<'a> {
header: EntryHeader,
frame: FrameGrantR<'a>,
}
impl KeyEntry {
pub fn from_key(key: &[u8]) -> Self {
KeyEntry { data: key.to_vec().into_boxed_slice() }
#[derive(Debug, Clone, Copy, CheckedBitPattern)]
#[repr(u8)]
enum EntryHeader {
/// Wether a put of the key/value pair or a delete of the given key.
DbOperation {
/// The database on which to perform the operation.
database: Database,
/// The key length in the buffer.
///
/// If None it means that the buffer is dedicated
/// to the key and it is therefore a deletion operation.
key_length: Option<NonZeroU16>,
},
ArroyDeleteVector {
docid: DocumentId,
},
/// The embedding is the remaining space and represents a non-aligned [f32].
ArroySetVector {
docid: DocumentId,
embedder_id: u8,
},
}
impl EntryHeader {
fn delete_key_size(key_length: u16) -> usize {
mem::size_of::<Self>() + key_length as usize
}
pub fn entry(&self) -> &[u8] {
self.data.as_ref()
fn put_key_value_size(key_length: u16, value_length: usize) -> usize {
mem::size_of::<Self>() + key_length as usize + value_length
}
fn bytes_of(&self) -> &[u8] {
/// TODO do the variant matching ourselves
todo!()
}
}
pub enum EntryOperation {
Delete(KeyEntry),
Write(KeyValueEntry),
}
pub enum WriterOperation {
DbOperation(DbOperation),
ArroyOperation(ArroyOperation),
}
pub enum ArroyOperation {
DeleteVectors { docid: DocumentId },
SetVectors { docid: DocumentId, embedder_id: u8, embeddings: Vec<Embedding> },
SetVector { docid: DocumentId, embedder_id: u8, embedding: Embedding },
Finish { configs: Vec<IndexEmbeddingConfig> },
}
pub struct DbOperation {
database: Database,
entry: EntryOperation,
}
#[derive(Debug)]
#[derive(Debug, Clone, Copy, NoUninit, CheckedBitPattern)]
#[repr(u32)]
pub enum Database {
Main,
Documents,
@ -220,82 +197,46 @@ impl From<FacetKind> for Database {
}
}
impl DbOperation {
pub fn database(&self, index: &Index) -> heed::Database<Bytes, Bytes> {
self.database.database(index)
}
pub fn database_name(&self) -> &'static str {
self.database.database_name()
}
pub fn entry(self) -> EntryOperation {
self.entry
}
pub struct ExtractorBbqueueSender<'a> {
inner: ThreadLocal<FullySend<RefCell<FrameProducer<'a>>>>,
/// The capacity of this frame producer, will never be able to store more than that.
///
/// Note that the FrameProducer requires up to 9 bytes to encode the length,
/// the capacity has been shrinked accordingly.
///
/// <https://docs.rs/bbqueue/latest/bbqueue/framed/index.html#frame-header>
capacity: usize,
/// Used to wake up the receiver thread,
/// Used everytime we write something in the producer.
unparker: Unparker,
}
pub struct WriterReceiver(Receiver<WriterOperation>);
impl IntoIterator for WriterReceiver {
type Item = WriterOperation;
type IntoIter = IntoIter<Self::Item>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
pub struct ExtractorSender {
sender: Sender<WriterOperation>,
/// The number of message we sent in total in the channel.
send_count: AtomicUsize,
/// The number of times we sent something in a channel that was full.
writer_contentious_count: AtomicUsize,
/// The number of times we sent something in a channel that was empty.
extractor_contentious_count: AtomicUsize,
}
impl Drop for ExtractorSender {
fn drop(&mut self) {
let send_count = *self.send_count.get_mut();
let writer_contentious_count = *self.writer_contentious_count.get_mut();
let extractor_contentious_count = *self.extractor_contentious_count.get_mut();
tracing::debug!(
"Extractor channel stats: {send_count} sends, \
{writer_contentious_count} writer contentions ({}%), \
{extractor_contentious_count} extractor contentions ({}%)",
(writer_contentious_count as f32 / send_count as f32) * 100.0,
(extractor_contentious_count as f32 / send_count as f32) * 100.0
)
}
}
impl ExtractorSender {
pub fn docids<D: DatabaseType>(&self) -> WordDocidsSender<'_, D> {
impl<'b> ExtractorBbqueueSender<'b> {
pub fn docids<'a, D: DatabaseType>(&'a self) -> WordDocidsSender<'a, 'b, D> {
WordDocidsSender { sender: self, _marker: PhantomData }
}
pub fn facet_docids(&self) -> FacetDocidsSender<'_> {
pub fn facet_docids<'a>(&'a self) -> FacetDocidsSender<'a, 'b> {
FacetDocidsSender { sender: self }
}
pub fn field_id_docid_facet_sender(&self) -> FieldIdDocidFacetSender<'_> {
FieldIdDocidFacetSender(self)
pub fn field_id_docid_facet_sender<'a>(&'a self) -> FieldIdDocidFacetSender<'a, 'b> {
FieldIdDocidFacetSender(&self)
}
pub fn documents(&self) -> DocumentsSender<'_> {
DocumentsSender(self)
pub fn documents<'a>(&'a self) -> DocumentsSender<'a, 'b> {
DocumentsSender(&self)
}
pub fn embeddings(&self) -> EmbeddingSender<'_> {
EmbeddingSender(&self.sender)
pub fn embeddings<'a>(&'a self) -> EmbeddingSender<'a, 'b> {
EmbeddingSender(&self)
}
pub fn geo(&self) -> GeoSender<'_> {
GeoSender(&self.sender)
pub fn geo<'a>(&'a self) -> GeoSender<'a, 'b> {
GeoSender(&self)
}
fn send_delete_vector(&self, docid: DocumentId) -> StdResult<(), SendError<()>> {
fn send_delete_vector(&self, docid: DocumentId) -> crate::Result<()> {
match self
.sender
.send(WriterOperation::ArroyOperation(ArroyOperation::DeleteVectors { docid }))
@ -305,18 +246,69 @@ impl ExtractorSender {
}
}
fn send_db_operation(&self, op: DbOperation) -> StdResult<(), SendError<()>> {
if self.sender.is_full() {
self.writer_contentious_count.fetch_add(1, Ordering::SeqCst);
}
if self.sender.is_empty() {
self.extractor_contentious_count.fetch_add(1, Ordering::SeqCst);
fn write_key_value(&self, database: Database, key: &[u8], value: &[u8]) -> crate::Result<()> {
let capacity = self.capacity;
let refcell = self.inner.get().unwrap();
let mut producer = refcell.0.borrow_mut_or_yield();
let key_length = key.len().try_into().unwrap();
let value_length = value.len();
let total_length = EntryHeader::put_key_value_size(key_length, value_length);
if total_length > capacity {
unreachable!("entry larger that the bbqueue capacity");
}
self.send_count.fetch_add(1, Ordering::SeqCst);
match self.sender.send(WriterOperation::DbOperation(op)) {
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
let payload_header =
EntryHeader::DbOperation { database, key_length: NonZeroU16::new(key_length) };
loop {
let mut grant = match producer.grant(total_length) {
Ok(grant) => grant,
Err(bbqueue::Error::InsufficientSize) => continue,
Err(e) => unreachable!("{e:?}"),
};
let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>());
header.copy_from_slice(payload_header.bytes_of());
let (key_out, value_out) = remaining.split_at_mut(key.len());
key_out.copy_from_slice(key);
value_out.copy_from_slice(value);
// We could commit only the used memory.
grant.commit(total_length);
break Ok(());
}
}
fn delete_entry(&self, database: Database, key: &[u8]) -> crate::Result<()> {
let capacity = self.capacity;
let refcell = self.inner.get().unwrap();
let mut producer = refcell.0.borrow_mut_or_yield();
let key_length = key.len().try_into().unwrap();
let total_length = EntryHeader::delete_key_size(key_length);
if total_length > capacity {
unreachable!("entry larger that the bbqueue capacity");
}
let payload_header = EntryHeader::DbOperation { database, key_length: None };
loop {
let mut grant = match producer.grant(total_length) {
Ok(grant) => grant,
Err(bbqueue::Error::InsufficientSize) => continue,
Err(e) => unreachable!("{e:?}"),
};
let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>());
header.copy_from_slice(payload_header.bytes_of());
remaining.copy_from_slice(key);
// We could commit only the used memory.
grant.commit(total_length);
break Ok(());
}
}
}
@ -356,159 +348,237 @@ impl DatabaseType for WordPositionDocids {
const DATABASE: Database = Database::WordPositionDocids;
}
pub trait DocidsSender {
fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>>;
fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>>;
}
pub struct WordDocidsSender<'a, D> {
sender: &'a ExtractorSender,
pub struct WordDocidsSender<'a, 'b, D> {
sender: &'a ExtractorBbqueueSender<'b>,
_marker: PhantomData<D>,
}
impl<D: DatabaseType> DocidsSender for WordDocidsSender<'_, D> {
fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>> {
let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(key, value));
match self.sender.send_db_operation(DbOperation { database: D::DATABASE, entry }) {
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
impl<D: DatabaseType> WordDocidsSender<'_, '_, D> {
pub fn write(&self, key: &[u8], bitmap: &RoaringBitmap) -> crate::Result<()> {
let capacity = self.sender.capacity;
let refcell = self.sender.inner.get().unwrap();
let mut producer = refcell.0.borrow_mut_or_yield();
let key_length = key.len().try_into().unwrap();
let value_length = CboRoaringBitmapCodec::serialized_size(bitmap);
let total_length = EntryHeader::put_key_value_size(key_length, value_length);
if total_length > capacity {
unreachable!("entry larger that the bbqueue capacity");
}
}
fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>> {
let entry = EntryOperation::Delete(KeyEntry::from_key(key));
match self.sender.send_db_operation(DbOperation { database: D::DATABASE, entry }) {
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
}
}
}
pub struct FacetDocidsSender<'a> {
sender: &'a ExtractorSender,
}
impl DocidsSender for FacetDocidsSender<'_> {
fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>> {
let (facet_kind, key) = FacetKind::extract_from_key(key);
let database = Database::from(facet_kind);
let entry = match facet_kind {
// skip level group size
FacetKind::String | FacetKind::Number => {
// add facet group size
let value = [&[1], value].concat();
EntryOperation::Write(KeyValueEntry::from_small_key_value(key, &value))
}
_ => EntryOperation::Write(KeyValueEntry::from_small_key_value(key, value)),
let payload_header = EntryHeader::DbOperation {
database: D::DATABASE,
key_length: NonZeroU16::new(key_length),
};
match self.sender.send_db_operation(DbOperation { database, entry }) {
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
loop {
let mut grant = match producer.grant(total_length) {
Ok(grant) => grant,
Err(bbqueue::Error::InsufficientSize) => continue,
Err(e) => unreachable!("{e:?}"),
};
let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>());
header.copy_from_slice(payload_header.bytes_of());
let (key_out, value_out) = remaining.split_at_mut(key.len());
key_out.copy_from_slice(key);
CboRoaringBitmapCodec::serialize_into_writer(bitmap, value_out)?;
// We could commit only the used memory.
grant.commit(total_length);
break Ok(());
}
}
fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>> {
pub fn delete(&self, key: &[u8]) -> crate::Result<()> {
let capacity = self.sender.capacity;
let refcell = self.sender.inner.get().unwrap();
let mut producer = refcell.0.borrow_mut_or_yield();
let key_length = key.len().try_into().unwrap();
let total_length = EntryHeader::delete_key_size(key_length);
if total_length > capacity {
unreachable!("entry larger that the bbqueue capacity");
}
let payload_header = EntryHeader::DbOperation { database: D::DATABASE, key_length: None };
loop {
let mut grant = match producer.grant(total_length) {
Ok(grant) => grant,
Err(bbqueue::Error::InsufficientSize) => continue,
Err(e) => unreachable!("{e:?}"),
};
let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>());
header.copy_from_slice(payload_header.bytes_of());
remaining.copy_from_slice(key);
// We could commit only the used memory.
grant.commit(total_length);
break Ok(());
}
}
}
pub struct FacetDocidsSender<'a, 'b> {
sender: &'a ExtractorBbqueueSender<'b>,
}
impl FacetDocidsSender<'_, '_> {
pub fn write(&self, key: &[u8], bitmap: &RoaringBitmap) -> crate::Result<()> {
let capacity = self.sender.capacity;
let refcell = self.sender.inner.get().unwrap();
let mut producer = refcell.0.borrow_mut_or_yield();
let (facet_kind, key) = FacetKind::extract_from_key(key);
let database = Database::from(facet_kind);
let entry = EntryOperation::Delete(KeyEntry::from_key(key));
match self.sender.send_db_operation(DbOperation { database, entry }) {
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
let key_length = key.len().try_into().unwrap();
let value_length = CboRoaringBitmapCodec::serialized_size(bitmap);
let value_length = match facet_kind {
// We must take the facet group size into account
// when we serialize strings and numbers.
FacetKind::Number | FacetKind::String => value_length + 1,
FacetKind::Null | FacetKind::Empty | FacetKind::Exists => value_length,
};
let total_length = EntryHeader::put_key_value_size(key_length, value_length);
if total_length > capacity {
unreachable!("entry larger that the bbqueue capacity");
}
let payload_header = EntryHeader::DbOperation {
database: Database::from(facet_kind),
key_length: NonZeroU16::new(key_length),
};
loop {
let mut grant = match producer.grant(total_length) {
Ok(grant) => grant,
Err(bbqueue::Error::InsufficientSize) => continue,
Err(e) => unreachable!("{e:?}"),
};
let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>());
header.copy_from_slice(payload_header.bytes_of());
let (key_out, value_out) = remaining.split_at_mut(key.len());
key_out.copy_from_slice(key);
let value_out = match facet_kind {
// We must take the facet group size into account
// when we serialize strings and numbers.
FacetKind::String | FacetKind::Number => {
let (first, remaining) = value_out.split_first_mut().unwrap();
*first = 1;
remaining
}
FacetKind::Null | FacetKind::Empty | FacetKind::Exists => value_out,
};
CboRoaringBitmapCodec::serialize_into_writer(bitmap, value_out)?;
// We could commit only the used memory.
grant.commit(total_length);
break Ok(());
}
}
pub fn delete(&self, key: &[u8]) -> crate::Result<()> {
let capacity = self.sender.capacity;
let refcell = self.sender.inner.get().unwrap();
let mut producer = refcell.0.borrow_mut_or_yield();
let (facet_kind, key) = FacetKind::extract_from_key(key);
let key_length = key.len().try_into().unwrap();
let total_length = EntryHeader::delete_key_size(key_length);
if total_length > capacity {
unreachable!("entry larger that the bbqueue capacity");
}
let payload_header =
EntryHeader::DbOperation { database: Database::from(facet_kind), key_length: None };
loop {
let mut grant = match producer.grant(total_length) {
Ok(grant) => grant,
Err(bbqueue::Error::InsufficientSize) => continue,
Err(e) => unreachable!("{e:?}"),
};
let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>());
header.copy_from_slice(payload_header.bytes_of());
remaining.copy_from_slice(key);
// We could commit only the used memory.
grant.commit(total_length);
break Ok(());
}
}
}
pub struct FieldIdDocidFacetSender<'a>(&'a ExtractorSender);
pub struct FieldIdDocidFacetSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>);
impl FieldIdDocidFacetSender<'_> {
pub fn write_facet_string(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>> {
impl FieldIdDocidFacetSender<'_, '_> {
pub fn write_facet_string(&self, key: &[u8], value: &[u8]) -> crate::Result<()> {
debug_assert!(FieldDocIdFacetStringCodec::bytes_decode(key).is_ok());
let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(key, value));
self.0
.send_db_operation(DbOperation { database: Database::FieldIdDocidFacetStrings, entry })
self.0.write_key_value(Database::FieldIdDocidFacetStrings, key, value)
}
pub fn write_facet_f64(&self, key: &[u8]) -> StdResult<(), SendError<()>> {
pub fn write_facet_f64(&self, key: &[u8]) -> crate::Result<()> {
debug_assert!(FieldDocIdFacetF64Codec::bytes_decode(key).is_ok());
let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(key, &[]));
self.0.send_db_operation(DbOperation { database: Database::FieldIdDocidFacetF64s, entry })
self.0.write_key_value(Database::FieldIdDocidFacetF64s, key, &[])
}
pub fn delete_facet_string(&self, key: &[u8]) -> StdResult<(), SendError<()>> {
pub fn delete_facet_string(&self, key: &[u8]) -> crate::Result<()> {
debug_assert!(FieldDocIdFacetStringCodec::bytes_decode(key).is_ok());
let entry = EntryOperation::Delete(KeyEntry::from_key(key));
self.0
.send_db_operation(DbOperation { database: Database::FieldIdDocidFacetStrings, entry })
self.0.delete_entry(Database::FieldIdDocidFacetStrings, key)
}
pub fn delete_facet_f64(&self, key: &[u8]) -> StdResult<(), SendError<()>> {
pub fn delete_facet_f64(&self, key: &[u8]) -> crate::Result<()> {
debug_assert!(FieldDocIdFacetF64Codec::bytes_decode(key).is_ok());
let entry = EntryOperation::Delete(KeyEntry::from_key(key));
self.0.send_db_operation(DbOperation { database: Database::FieldIdDocidFacetF64s, entry })
self.0.delete_entry(Database::FieldIdDocidFacetF64s, key)
}
}
pub struct DocumentsSender<'a>(&'a ExtractorSender);
pub struct DocumentsSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>);
impl DocumentsSender<'_> {
impl DocumentsSender<'_, '_> {
/// TODO do that efficiently
pub fn uncompressed(
&self,
docid: DocumentId,
external_id: String,
document: &KvReaderFieldId,
) -> StdResult<(), SendError<()>> {
let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(
&docid.to_be_bytes(),
document.as_bytes(),
));
match self.0.send_db_operation(DbOperation { database: Database::Documents, entry }) {
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
}?;
let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(
) -> crate::Result<()> {
self.0.write_key_value(Database::Documents, &docid.to_be_bytes(), document.as_bytes())?;
self.0.write_key_value(
Database::ExternalDocumentsIds,
external_id.as_bytes(),
&docid.to_be_bytes(),
));
match self
.0
.send_db_operation(DbOperation { database: Database::ExternalDocumentsIds, entry })
{
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
}
)
}
pub fn delete(&self, docid: DocumentId, external_id: String) -> StdResult<(), SendError<()>> {
let entry = EntryOperation::Delete(KeyEntry::from_key(&docid.to_be_bytes()));
match self.0.send_db_operation(DbOperation { database: Database::Documents, entry }) {
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
}?;
pub fn delete(&self, docid: DocumentId, external_id: String) -> crate::Result<()> {
self.0.delete_entry(Database::Documents, &docid.to_be_bytes())?;
self.0.send_delete_vector(docid)?;
let entry = EntryOperation::Delete(KeyEntry::from_key(external_id.as_bytes()));
match self
.0
.send_db_operation(DbOperation { database: Database::ExternalDocumentsIds, entry })
{
Ok(()) => Ok(()),
Err(SendError(_)) => Err(SendError(())),
}
self.0.delete_entry(Database::ExternalDocumentsIds, external_id.as_bytes())
}
}
pub struct EmbeddingSender<'a>(&'a Sender<WriterOperation>);
pub struct EmbeddingSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>);
impl EmbeddingSender<'_> {
impl EmbeddingSender<'_, '_> {
pub fn set_vectors(
&self,
docid: DocumentId,
embedder_id: u8,
embeddings: Vec<Embedding>,
) -> StdResult<(), SendError<()>> {
) -> crate::Result<()> {
self.0
.send(WriterOperation::ArroyOperation(ArroyOperation::SetVectors {
docid,
@ -541,33 +611,36 @@ impl EmbeddingSender<'_> {
}
}
pub struct GeoSender<'a>(&'a Sender<WriterOperation>);
pub struct GeoSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>);
impl GeoSender<'_> {
impl GeoSender<'_, '_> {
pub fn set_rtree(&self, value: Mmap) -> StdResult<(), SendError<()>> {
self.0
.send(WriterOperation::DbOperation(DbOperation {
database: Database::Main,
entry: EntryOperation::Write(KeyValueEntry::from_large_key_value(
GEO_RTREE_KEY.as_bytes(),
value,
)),
}))
.map_err(|_| SendError(()))
todo!("set rtree from file")
// self.0
// .send(WriterOperation::DbOperation(DbOperation {
// database: Database::Main,
// entry: EntryOperation::Write(KeyValueEntry::from_large_key_value(
// GEO_RTREE_KEY.as_bytes(),
// value,
// )),
// }))
// .map_err(|_| SendError(()))
}
pub fn set_geo_faceted(&self, bitmap: &RoaringBitmap) -> StdResult<(), SendError<()>> {
let mut buffer = Vec::new();
bitmap.serialize_into(&mut buffer).unwrap();
todo!("serialize directly into bbqueue (as a real roaringbitmap not a cbo)")
self.0
.send(WriterOperation::DbOperation(DbOperation {
database: Database::Main,
entry: EntryOperation::Write(KeyValueEntry::from_small_key_value(
GEO_FACETED_DOCUMENTS_IDS_KEY.as_bytes(),
&buffer,
)),
}))
.map_err(|_| SendError(()))
// let mut buffer = Vec::new();
// bitmap.serialize_into(&mut buffer).unwrap();
// self.0
// .send(WriterOperation::DbOperation(DbOperation {
// database: Database::Main,
// entry: EntryOperation::Write(KeyValueEntry::from_small_key_value(
// GEO_FACETED_DOCUMENTS_IDS_KEY.as_bytes(),
// &buffer,
// )),
// }))
// .map_err(|_| SendError(()))
}
}