Replace obkv with the temporary new version of it

This commit is contained in:
Clément Renault 2024-08-29 19:20:10 +02:00
parent 27df9e6c73
commit 0c57cf7565
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
37 changed files with 142 additions and 223 deletions

13
Cargo.lock generated
View File

@ -3434,7 +3434,7 @@ dependencies = [
"mimalloc", "mimalloc",
"mime", "mime",
"num_cpus", "num_cpus",
"obkv 0.2.2", "obkv",
"once_cell", "once_cell",
"ordered-float", "ordered-float",
"parking_lot", "parking_lot",
@ -3601,8 +3601,7 @@ dependencies = [
"memchr", "memchr",
"memmap2", "memmap2",
"mimalloc", "mimalloc",
"obkv 0.2.2", "obkv",
"obkv 0.3.0",
"once_cell", "once_cell",
"ordered-float", "ordered-float",
"rand", "rand",
@ -3849,16 +3848,10 @@ dependencies = [
"memchr", "memchr",
] ]
[[package]]
name = "obkv"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2e27bcfe835a379d32352112f6b8dbae2d99d16a5fff42abe6e5ba5386c1e5a"
[[package]] [[package]]
name = "obkv" name = "obkv"
version = "0.3.0" version = "0.3.0"
source = "git+https://github.com/kerollmops/obkv?branch=unsized-kvreader#5289a6658cd471f4212c1edc1a40b2a3c3d11fe0" source = "git+https://github.com/kerollmops/obkv?branch=unsized-kvreader#9c2900d106fa84e7079b288e7f7c366ec7cae948"
[[package]] [[package]]
name = "once_cell" name = "once_cell"

View File

@ -57,7 +57,7 @@ meilisearch-types = { path = "../meilisearch-types" }
mimalloc = { version = "0.1.43", default-features = false } mimalloc = { version = "0.1.43", default-features = false }
mime = "0.3.17" mime = "0.3.17"
num_cpus = "1.16.0" num_cpus = "1.16.0"
obkv = "0.2.2" obkv = { git = "https://github.com/kerollmops/obkv", branch = "unsized-kvreader" }
once_cell = "1.19.0" once_cell = "1.19.0"
ordered-float = "4.2.1" ordered-float = "4.2.1"
parking_lot = "0.12.3" parking_lot = "0.12.3"

View File

@ -1247,7 +1247,7 @@ impl<'a> HitMaker<'a> {
self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?; self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?;
// First generate a document with all the displayed fields // First generate a document with all the displayed fields
let displayed_document = make_document(&self.displayed_ids, &self.fields_ids_map, obkv)?; let displayed_document = make_document(&self.displayed_ids, &self.fields_ids_map, &obkv)?;
let add_vectors_fid = let add_vectors_fid =
self.vectors_fid.filter(|_fid| self.retrieve_vectors == RetrieveVectors::Retrieve); self.vectors_fid.filter(|_fid| self.retrieve_vectors == RetrieveVectors::Retrieve);

View File

@ -12,6 +12,7 @@ readme.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
big_s = "1.0.2"
bimap = { version = "0.6.3", features = ["serde"] } bimap = { version = "0.6.3", features = ["serde"] }
bincode = "1.3.3" bincode = "1.3.3"
bstr = "1.9.1" bstr = "1.9.1"
@ -44,8 +45,7 @@ levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
lru = "0.12.3" lru = "0.12.3"
memchr = "2.5.0" memchr = "2.5.0"
memmap2 = "0.9.4" memmap2 = "0.9.4"
obkv = "0.2.2" obkv = { git = "https://github.com/kerollmops/obkv", branch = "unsized-kvreader" }
obkv2 = { package = "obkv", git = "https://github.com/kerollmops/obkv", branch = "unsized-kvreader" }
once_cell = "1.19.0" once_cell = "1.19.0"
ordered-float = "4.2.1" ordered-float = "4.2.1"
rayon = "1.10.0" rayon = "1.10.0"
@ -94,7 +94,6 @@ rayon-par-bridge = "0.1.0"
[dev-dependencies] [dev-dependencies]
mimalloc = { version = "0.1.43", default-features = false } mimalloc = { version = "0.1.43", default-features = false }
big_s = "1.0.2"
insta = "1.39.0" insta = "1.39.0"
maplit = "1.0.2" maplit = "1.0.2"
md5 = "0.7.0" md5 = "0.7.0"

View File

@ -69,7 +69,7 @@ impl<R: io::Read + io::Seek> EnrichedDocumentsBatchReader<R> {
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct EnrichedDocument<'a> { pub struct EnrichedDocument<'a> {
pub document: KvReader<'a, FieldId>, pub document: &'a KvReader<FieldId>,
pub document_id: DocumentId, pub document_id: DocumentId,
} }

View File

@ -27,7 +27,7 @@ use crate::{FieldId, Object, Result};
const DOCUMENTS_BATCH_INDEX_KEY: [u8; 8] = u64::MAX.to_be_bytes(); const DOCUMENTS_BATCH_INDEX_KEY: [u8; 8] = u64::MAX.to_be_bytes();
/// Helper function to convert an obkv reader into a JSON object. /// Helper function to convert an obkv reader into a JSON object.
pub fn obkv_to_object(obkv: &KvReader<'_, FieldId>, index: &DocumentsBatchIndex) -> Result<Object> { pub fn obkv_to_object(obkv: &KvReader<FieldId>, index: &DocumentsBatchIndex) -> Result<Object> {
obkv.iter() obkv.iter()
.map(|(field_id, value)| { .map(|(field_id, value)| {
let field_name = index let field_name = index
@ -76,7 +76,7 @@ impl DocumentsBatchIndex {
self.0.get_by_right(name).cloned() self.0.get_by_right(name).cloned()
} }
pub fn recreate_json(&self, document: &obkv::KvReaderU16<'_>) -> Result<Object> { pub fn recreate_json(&self, document: &obkv::KvReaderU16) -> Result<Object> {
let mut map = Object::new(); let mut map = Object::new();
for (k, v) in document.iter() { for (k, v) in document.iter() {

View File

@ -52,7 +52,7 @@ impl<'a> PrimaryKey<'a> {
pub fn document_id( pub fn document_id(
&self, &self,
document: &obkv::KvReader<'_, FieldId>, document: &obkv::KvReader<FieldId>,
fields: &impl FieldIdMapper, fields: &impl FieldIdMapper,
) -> Result<StdResult<String, DocumentIdExtractionError>> { ) -> Result<StdResult<String, DocumentIdExtractionError>> {
match self { match self {

View File

@ -76,11 +76,9 @@ impl<R: io::Read + io::Seek> DocumentsBatchCursor<R> {
pub fn get( pub fn get(
&mut self, &mut self,
offset: u32, offset: u32,
) -> Result<Option<KvReader<'_, FieldId>>, DocumentsBatchCursorError> { ) -> Result<Option<&KvReader<FieldId>>, DocumentsBatchCursorError> {
match self.cursor.move_on_key_equal_to(offset.to_be_bytes())? { match self.cursor.move_on_key_equal_to(offset.to_be_bytes())? {
Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => { Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => Ok(Some(value.into())),
Ok(Some(KvReader::new(value)))
}
_otherwise => Ok(None), _otherwise => Ok(None),
} }
} }
@ -89,11 +87,9 @@ impl<R: io::Read + io::Seek> DocumentsBatchCursor<R> {
/// `next_document` advance the document reader until all the documents have been read. /// `next_document` advance the document reader until all the documents have been read.
pub fn next_document( pub fn next_document(
&mut self, &mut self,
) -> Result<Option<KvReader<'_, FieldId>>, DocumentsBatchCursorError> { ) -> Result<Option<&KvReader<FieldId>>, DocumentsBatchCursorError> {
match self.cursor.move_on_next()? { match self.cursor.move_on_next()? {
Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => { Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => Ok(Some(value.into())),
Ok(Some(KvReader::new(value)))
}
_otherwise => Ok(None), _otherwise => Ok(None),
} }
} }

View File

@ -6,10 +6,10 @@ use obkv::{KvReaderU16, KvWriterU16};
pub struct ObkvCodec; pub struct ObkvCodec;
impl<'a> heed::BytesDecode<'a> for ObkvCodec { impl<'a> heed::BytesDecode<'a> for ObkvCodec {
type DItem = KvReaderU16<'a>; type DItem = &'a KvReaderU16;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
Ok(KvReaderU16::new(bytes)) Ok(KvReaderU16::from_slice(bytes))
} }
} }

View File

@ -122,7 +122,7 @@ impl CboRoaringBitmapCodec {
/// Merges a DelAdd delta into a CboRoaringBitmap. /// Merges a DelAdd delta into a CboRoaringBitmap.
pub fn merge_deladd_into<'a>( pub fn merge_deladd_into<'a>(
deladd: KvReaderDelAdd<'_>, deladd: &KvReaderDelAdd,
previous: &[u8], previous: &[u8],
buffer: &'a mut Vec<u8>, buffer: &'a mut Vec<u8>,
) -> io::Result<Option<&'a [u8]>> { ) -> io::Result<Option<&'a [u8]>> {

View File

@ -1252,7 +1252,7 @@ impl Index {
/* documents */ /* documents */
/// Returns a document by using the document id. /// Returns a document by using the document id.
pub fn document<'t>(&self, rtxn: &'t RoTxn, id: DocumentId) -> Result<obkv::KvReaderU16<'t>> { pub fn document<'t>(&self, rtxn: &'t RoTxn, id: DocumentId) -> Result<&'t obkv::KvReaderU16> {
self.documents self.documents
.get(rtxn, &id)? .get(rtxn, &id)?
.ok_or(UserError::UnknownInternalDocumentId { document_id: id }) .ok_or(UserError::UnknownInternalDocumentId { document_id: id })
@ -1264,7 +1264,7 @@ impl Index {
&'a self, &'a self,
rtxn: &'t RoTxn<'t>, rtxn: &'t RoTxn<'t>,
ids: impl IntoIterator<Item = DocumentId> + 'a, ids: impl IntoIterator<Item = DocumentId> + 'a,
) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> { ) -> Result<impl Iterator<Item = Result<(DocumentId, &'t obkv::KvReaderU16)>> + 'a> {
Ok(ids.into_iter().map(move |id| { Ok(ids.into_iter().map(move |id| {
let kv = self let kv = self
.documents .documents
@ -1279,7 +1279,7 @@ impl Index {
&self, &self,
rtxn: &'t RoTxn<'t>, rtxn: &'t RoTxn<'t>,
ids: impl IntoIterator<Item = DocumentId>, ids: impl IntoIterator<Item = DocumentId>,
) -> Result<Vec<(DocumentId, obkv::KvReaderU16<'t>)>> { ) -> Result<Vec<(DocumentId, &'t obkv::KvReaderU16)>> {
self.iter_documents(rtxn, ids)?.collect() self.iter_documents(rtxn, ids)?.collect()
} }
@ -1287,7 +1287,7 @@ impl Index {
pub fn all_documents<'a, 't: 'a>( pub fn all_documents<'a, 't: 'a>(
&'a self, &'a self,
rtxn: &'t RoTxn<'t>, rtxn: &'t RoTxn<'t>,
) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> { ) -> Result<impl Iterator<Item = Result<(DocumentId, &'t obkv::KvReaderU16)>> + 'a> {
self.iter_documents(rtxn, self.documents_ids(rtxn)?) self.iter_documents(rtxn, self.documents_ids(rtxn)?)
} }

View File

@ -214,7 +214,7 @@ pub fn bucketed_position(relative: u16) -> u16 {
pub fn obkv_to_json( pub fn obkv_to_json(
displayed_fields: &[FieldId], displayed_fields: &[FieldId],
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
obkv: obkv::KvReaderU16<'_>, obkv: &obkv::KvReaderU16,
) -> Result<Object> { ) -> Result<Object> {
displayed_fields displayed_fields
.iter() .iter()
@ -232,10 +232,7 @@ pub fn obkv_to_json(
} }
/// Transform every field of a raw obkv store into a JSON Object. /// Transform every field of a raw obkv store into a JSON Object.
pub fn all_obkv_to_json( pub fn all_obkv_to_json(obkv: &obkv::KvReaderU16, fields_ids_map: &FieldsIdsMap) -> Result<Object> {
obkv: obkv::KvReaderU16<'_>,
fields_ids_map: &FieldsIdsMap,
) -> Result<Object> {
let all_keys = obkv.iter().map(|(k, _v)| k).collect::<Vec<_>>(); let all_keys = obkv.iter().map(|(k, _v)| k).collect::<Vec<_>>();
obkv_to_json(all_keys.as_slice(), fields_ids_map, obkv) obkv_to_json(all_keys.as_slice(), fields_ids_map, obkv)
} }

View File

@ -30,13 +30,13 @@ impl ParsedValue {
impl<'a> Document<'a> { impl<'a> Document<'a> {
pub fn new( pub fn new(
data: obkv::KvReaderU16<'a>, data: &'a obkv::KvReaderU16,
side: DelAdd, side: DelAdd,
inverted_field_map: &'a FieldsIdsMap, inverted_field_map: &'a FieldsIdsMap,
) -> Self { ) -> Self {
let mut out_data = BTreeMap::new(); let mut out_data = BTreeMap::new();
for (fid, raw) in data { for (fid, raw) in data {
let obkv = KvReaderDelAdd::new(raw); let obkv = KvReaderDelAdd::from_slice(raw);
let Some(raw) = obkv.get(side) else { let Some(raw) = obkv.get(side) else {
continue; continue;
}; };

View File

@ -91,7 +91,7 @@ impl Prompt {
pub fn render( pub fn render(
&self, &self,
document: obkv::KvReaderU16<'_>, document: &obkv::KvReaderU16,
side: DelAdd, side: DelAdd,
field_id_map: &FieldsIdsMap, field_id_map: &FieldsIdsMap,
) -> Result<String, RenderPromptError> { ) -> Result<String, RenderPromptError> {

View File

@ -1,7 +1,7 @@
use obkv::Key; use obkv::Key;
pub type KvWriterDelAdd<W> = obkv::KvWriter<W, DelAdd>; pub type KvWriterDelAdd<W> = obkv::KvWriter<W, DelAdd>;
pub type KvReaderDelAdd<'a> = obkv::KvReader<'a, DelAdd>; pub type KvReaderDelAdd = obkv::KvReader<DelAdd>;
/// DelAdd defines the new value to add in the database and old value to delete from the database. /// DelAdd defines the new value to add in the database and old value to delete from the database.
/// ///
@ -30,31 +30,13 @@ impl Key for DelAdd {
} }
} }
// TODO remove this implementation
impl obkv2::Key for DelAdd {
const BYTES_SIZE: usize = std::mem::size_of::<DelAdd>();
type BYTES = [u8; <Self as obkv2::Key>::BYTES_SIZE];
fn to_be_bytes(&self) -> Self::BYTES {
u8::to_be_bytes(*self as u8)
}
fn from_be_bytes(array: Self::BYTES) -> Self {
match u8::from_be_bytes(array) {
0 => Self::Deletion,
1 => Self::Addition,
otherwise => unreachable!("DelAdd has only 2 variants, unknown variant: {}", otherwise),
}
}
}
/// Creates a Kv<K, Kv<DelAdd, value>> from Kv<K, value> /// Creates a Kv<K, Kv<DelAdd, value>> from Kv<K, value>
/// ///
/// Deletion: put all the values under DelAdd::Deletion /// Deletion: put all the values under DelAdd::Deletion
/// Addition: put all the values under DelAdd::Addition, /// Addition: put all the values under DelAdd::Addition,
/// DeletionAndAddition: put all the values under DelAdd::Deletion and DelAdd::Addition, /// DeletionAndAddition: put all the values under DelAdd::Deletion and DelAdd::Addition,
pub fn into_del_add_obkv<K: obkv::Key + PartialOrd>( pub fn into_del_add_obkv<K: obkv::Key + PartialOrd>(
reader: obkv::KvReader<'_, K>, reader: &obkv::KvReader<K>,
operation: DelAddOperation, operation: DelAddOperation,
buffer: &mut Vec<u8>, buffer: &mut Vec<u8>,
) -> Result<(), std::io::Error> { ) -> Result<(), std::io::Error> {
@ -64,7 +46,7 @@ pub fn into_del_add_obkv<K: obkv::Key + PartialOrd>(
/// Akin to the [into_del_add_obkv] function but lets you /// Akin to the [into_del_add_obkv] function but lets you
/// conditionally define the `DelAdd` variant based on the obkv key. /// conditionally define the `DelAdd` variant based on the obkv key.
pub fn into_del_add_obkv_conditional_operation<K, F>( pub fn into_del_add_obkv_conditional_operation<K, F>(
reader: obkv::KvReader<'_, K>, reader: &obkv::KvReader<K>,
buffer: &mut Vec<u8>, buffer: &mut Vec<u8>,
operation: F, operation: F,
) -> std::io::Result<()> ) -> std::io::Result<()>
@ -104,8 +86,8 @@ pub enum DelAddOperation {
/// putting each deletion obkv's keys under an DelAdd::Deletion /// putting each deletion obkv's keys under an DelAdd::Deletion
/// and putting each addition obkv's keys under an DelAdd::Addition /// and putting each addition obkv's keys under an DelAdd::Addition
pub fn del_add_from_two_obkvs<K: obkv::Key + PartialOrd + Ord>( pub fn del_add_from_two_obkvs<K: obkv::Key + PartialOrd + Ord>(
deletion: &obkv::KvReader<'_, K>, deletion: &obkv::KvReader<K>,
addition: &obkv::KvReader<'_, K>, addition: &obkv::KvReader<K>,
buffer: &mut Vec<u8>, buffer: &mut Vec<u8>,
) -> Result<(), std::io::Error> { ) -> Result<(), std::io::Error> {
use itertools::merge_join_by; use itertools::merge_join_by;
@ -139,7 +121,7 @@ pub fn del_add_from_two_obkvs<K: obkv::Key + PartialOrd + Ord>(
writer.finish() writer.finish()
} }
pub fn is_noop_del_add_obkv(del_add: KvReaderDelAdd<'_>) -> bool { pub fn is_noop_del_add_obkv(del_add: &KvReaderDelAdd) -> bool {
del_add.get(DelAdd::Deletion) == del_add.get(DelAdd::Addition) del_add.get(DelAdd::Deletion) == del_add.get(DelAdd::Addition)
} }
@ -154,5 +136,5 @@ pub fn deladd_serialize_add_side<'a>(
obkv: &'a [u8], obkv: &'a [u8],
_buffer: &mut Vec<u8>, _buffer: &mut Vec<u8>,
) -> crate::Result<&'a [u8]> { ) -> crate::Result<&'a [u8]> {
Ok(KvReaderDelAdd::new(obkv).get(DelAdd::Addition).unwrap_or_default()) Ok(KvReaderDelAdd::from_slice(obkv).get(DelAdd::Addition).unwrap_or_default())
} }

View File

@ -135,7 +135,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
if !valid_lmdb_key(key) { if !valid_lmdb_key(key) {
continue; continue;
} }
let value = KvReaderDelAdd::new(value); let value = KvReaderDelAdd::from_slice(value);
// DB is empty, it is safe to ignore Del operations // DB is empty, it is safe to ignore Del operations
let Some(value) = value.get(DelAdd::Addition) else { let Some(value) = value.get(DelAdd::Addition) else {
@ -161,7 +161,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
continue; continue;
} }
let value = KvReaderDelAdd::new(value); let value = KvReaderDelAdd::from_slice(value);
// the value is a CboRoaringBitmap, but I still need to prepend the // the value is a CboRoaringBitmap, but I still need to prepend the
// group size for level 0 (= 1) to it // group size for level 0 (= 1) to it

View File

@ -109,7 +109,7 @@ impl FacetsUpdateIncremental {
} }
current_field_id = Some(key.field_id); current_field_id = Some(key.field_id);
let value = KvReader::new(value); let value = KvReader::from_slice(value);
let docids_to_delete = value let docids_to_delete = value
.get(DelAdd::Deletion) .get(DelAdd::Deletion)
.map(CboRoaringBitmapCodec::bytes_decode) .map(CboRoaringBitmapCodec::bytes_decode)

View File

@ -187,7 +187,7 @@ fn index_facet_search(
) -> Result<()> { ) -> Result<()> {
let mut iter = normalized_delta_data.into_stream_merger_iter()?; let mut iter = normalized_delta_data.into_stream_merger_iter()?;
while let Some((key_bytes, delta_bytes)) = iter.next()? { while let Some((key_bytes, delta_bytes)) = iter.next()? {
let deladd_reader = KvReaderDelAdd::new(delta_bytes); let deladd_reader = KvReaderDelAdd::from_slice(delta_bytes);
let database_set = index let database_set = index
.facet_id_normalized_string_strings .facet_id_normalized_string_strings

View File

@ -145,7 +145,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
#[tracing::instrument(level = "trace", skip(uuid_buffer, documents_batch_index, document) #[tracing::instrument(level = "trace", skip(uuid_buffer, documents_batch_index, document)
target = "indexing::documents")] target = "indexing::documents")]
fn fetch_or_generate_document_id( fn fetch_or_generate_document_id(
document: &obkv::KvReader<'_, FieldId>, document: &obkv::KvReader<FieldId>,
documents_batch_index: &DocumentsBatchIndex, documents_batch_index: &DocumentsBatchIndex,
primary_key: PrimaryKey<'_>, primary_key: PrimaryKey<'_>,
autogenerate_docids: bool, autogenerate_docids: bool,

View File

@ -80,7 +80,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
.try_into() .try_into()
.map(u32::from_be_bytes) .map(u32::from_be_bytes)
.map_err(|_| SerializationError::InvalidNumberSerialization)?; .map_err(|_| SerializationError::InvalidNumberSerialization)?;
let obkv = KvReader::<FieldId>::new(value); let obkv = KvReader::<FieldId>::from_slice(value);
// if the searchable fields didn't change, skip the searchable indexing for this document. // if the searchable fields didn't change, skip the searchable indexing for this document.
if !force_reindexing && !searchable_fields_changed(&obkv, settings_diff) { if !force_reindexing && !searchable_fields_changed(&obkv, settings_diff) {
@ -126,13 +126,13 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
// transforming two KV<FieldId, KV<u16, String>> into one KV<FieldId, KV<DelAdd, KV<u16, String>>> // transforming two KV<FieldId, KV<u16, String>> into one KV<FieldId, KV<DelAdd, KV<u16, String>>>
value_buffer.clear(); value_buffer.clear();
del_add_from_two_obkvs( del_add_from_two_obkvs(
&KvReader::<FieldId>::new(del_obkv), &KvReader::<FieldId>::from_slice(del_obkv),
&KvReader::<FieldId>::new(add_obkv), &KvReader::<FieldId>::from_slice(add_obkv),
&mut value_buffer, &mut value_buffer,
)?; )?;
// write each KV<DelAdd, KV<u16, String>> into the sorter, field by field. // write each KV<DelAdd, KV<u16, String>> into the sorter, field by field.
let obkv = KvReader::<FieldId>::new(&value_buffer); let obkv = KvReader::<FieldId>::from_slice(&value_buffer);
for (field_id, value) in obkv.iter() { for (field_id, value) in obkv.iter() {
key_buffer.truncate(mem::size_of::<u32>()); key_buffer.truncate(mem::size_of::<u32>());
key_buffer.extend_from_slice(&field_id.to_be_bytes()); key_buffer.extend_from_slice(&field_id.to_be_bytes());
@ -146,13 +146,13 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
/// Check if any searchable fields of a document changed. /// Check if any searchable fields of a document changed.
fn searchable_fields_changed( fn searchable_fields_changed(
obkv: &KvReader<'_, FieldId>, obkv: &KvReader<FieldId>,
settings_diff: &InnerIndexSettingsDiff, settings_diff: &InnerIndexSettingsDiff,
) -> bool { ) -> bool {
let searchable_fields = &settings_diff.new.searchable_fields_ids; let searchable_fields = &settings_diff.new.searchable_fields_ids;
for (field_id, field_bytes) in obkv.iter() { for (field_id, field_bytes) in obkv.iter() {
if searchable_fields.contains(&field_id) { if searchable_fields.contains(&field_id) {
let del_add = KvReaderDelAdd::new(field_bytes); let del_add = KvReaderDelAdd::from_slice(field_bytes);
match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) { match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) {
// if both fields are None, check the next field. // if both fields are None, check the next field.
(None, None) => (), (None, None) => (),
@ -189,7 +189,7 @@ fn tokenizer_builder<'a>(
/// Extract words mapped with their positions of a document. /// Extract words mapped with their positions of a document.
fn tokens_from_document<'a>( fn tokens_from_document<'a>(
obkv: &KvReader<'a, FieldId>, obkv: &'a KvReader<FieldId>,
settings: &InnerIndexSettings, settings: &InnerIndexSettings,
tokenizer: &Tokenizer<'_>, tokenizer: &Tokenizer<'_>,
max_positions_per_attributes: u32, max_positions_per_attributes: u32,
@ -202,7 +202,7 @@ fn tokens_from_document<'a>(
// if field is searchable. // if field is searchable.
if settings.searchable_fields_ids.contains(&field_id) { if settings.searchable_fields_ids.contains(&field_id) {
// extract deletion or addition only. // extract deletion or addition only.
if let Some(field_bytes) = KvReaderDelAdd::new(field_bytes).get(del_add) { if let Some(field_bytes) = KvReaderDelAdd::from_slice(field_bytes).get(del_add) {
// parse json. // parse json.
let value = let value =
serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?; serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;

View File

@ -45,7 +45,7 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
buffer.clear(); buffer.clear();
let mut obkv = KvWriterDelAdd::new(&mut buffer); let mut obkv = KvWriterDelAdd::new(&mut buffer);
for (deladd_key, _) in KvReaderDelAdd::new(deladd_obkv_bytes).iter() { for (deladd_key, _) in KvReaderDelAdd::from_slice(deladd_obkv_bytes).iter() {
obkv.insert(deladd_key, document_id.to_ne_bytes())?; obkv.insert(deladd_key, document_id.to_ne_bytes())?;
} }
obkv.finish()?; obkv.finish()?;

View File

@ -75,7 +75,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
let mut buffer = Vec::new(); let mut buffer = Vec::new();
let mut cursor = docid_fid_facet_string.into_cursor()?; let mut cursor = docid_fid_facet_string.into_cursor()?;
while let Some((key, deladd_original_value_bytes)) = cursor.move_on_next()? { while let Some((key, deladd_original_value_bytes)) = cursor.move_on_next()? {
let deladd_reader = KvReaderDelAdd::new(deladd_original_value_bytes); let deladd_reader = KvReaderDelAdd::from_slice(deladd_original_value_bytes);
let is_same_value = deladd_reader.get(DelAdd::Deletion).is_some() let is_same_value = deladd_reader.get(DelAdd::Deletion).is_some()
&& deladd_reader.get(DelAdd::Addition).is_some(); && deladd_reader.get(DelAdd::Addition).is_some();
@ -163,7 +163,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
let mut buffer = Vec::new(); let mut buffer = Vec::new();
let mut cursor = docid_fid_facet_string.into_cursor()?; let mut cursor = docid_fid_facet_string.into_cursor()?;
while let Some((key, deladd_original_value_bytes)) = cursor.move_on_next()? { while let Some((key, deladd_original_value_bytes)) = cursor.move_on_next()? {
let deladd_reader = KvReaderDelAdd::new(deladd_original_value_bytes); let deladd_reader = KvReaderDelAdd::from_slice(deladd_original_value_bytes);
let is_same_value = deladd_reader.get(DelAdd::Deletion).is_some() let is_same_value = deladd_reader.get(DelAdd::Deletion).is_some()
&& deladd_reader.get(DelAdd::Addition).is_some(); && deladd_reader.get(DelAdd::Addition).is_some();

View File

@ -83,10 +83,10 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
if !settings_diff.settings_update_only || old_faceted_fids != new_faceted_fids { if !settings_diff.settings_update_only || old_faceted_fids != new_faceted_fids {
let mut cursor = obkv_documents.into_cursor()?; let mut cursor = obkv_documents.into_cursor()?;
while let Some((docid_bytes, value)) = cursor.move_on_next()? { while let Some((docid_bytes, value)) = cursor.move_on_next()? {
let obkv = obkv::KvReader::new(value); let obkv = obkv::KvReader::from_slice(value);
let get_document_json_value = move |field_id, side| { let get_document_json_value = move |field_id, side| {
obkv.get(field_id) obkv.get(field_id)
.map(KvReaderDelAdd::new) .map(KvReaderDelAdd::from_slice)
.and_then(|kv| kv.get(side)) .and_then(|kv| kv.get(side))
.map(from_slice) .map(from_slice)
.transpose() .transpose()

View File

@ -45,19 +45,23 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?; .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
let document_id = u32::from_be_bytes(document_id_bytes); let document_id = u32::from_be_bytes(document_id_bytes);
let del_add_reader = KvReaderDelAdd::new(value); let del_add_reader = KvReaderDelAdd::from_slice(value);
let deletion = del_add_reader let deletion = del_add_reader
// get deleted words // get deleted words
.get(DelAdd::Deletion) .get(DelAdd::Deletion)
// count deleted words // count deleted words
.map(|deletion| KvReaderU16::new(deletion).iter().take(MAX_COUNTED_WORDS + 1).count()) .map(|deletion| {
KvReaderU16::from_slice(deletion).iter().take(MAX_COUNTED_WORDS + 1).count()
})
// keep the count if under or equal to MAX_COUNTED_WORDS // keep the count if under or equal to MAX_COUNTED_WORDS
.filter(|&word_count| word_count <= MAX_COUNTED_WORDS); .filter(|&word_count| word_count <= MAX_COUNTED_WORDS);
let addition = del_add_reader let addition = del_add_reader
// get added words // get added words
.get(DelAdd::Addition) .get(DelAdd::Addition)
// count added words // count added words
.map(|addition| KvReaderU16::new(addition).iter().take(MAX_COUNTED_WORDS + 1).count()) .map(|addition| {
KvReaderU16::from_slice(addition).iter().take(MAX_COUNTED_WORDS + 1).count()
})
// keep the count if under or equal to MAX_COUNTED_WORDS // keep the count if under or equal to MAX_COUNTED_WORDS
.filter(|&word_count| word_count <= MAX_COUNTED_WORDS); .filter(|&word_count| word_count <= MAX_COUNTED_WORDS);

View File

@ -29,11 +29,11 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
let mut cursor = obkv_documents.into_cursor()?; let mut cursor = obkv_documents.into_cursor()?;
while let Some((docid_bytes, value)) = cursor.move_on_next()? { while let Some((docid_bytes, value)) = cursor.move_on_next()? {
let obkv = obkv::KvReader::new(value); let obkv = obkv::KvReader::from_slice(value);
// since we only need the primary key when we throw an error // since we only need the primary key when we throw an error
// we create this getter to lazily get it when needed // we create this getter to lazily get it when needed
let document_id = || -> Value { let document_id = || -> Value {
let reader = KvReaderDelAdd::new(obkv.get(primary_key_id).unwrap()); let reader = KvReaderDelAdd::from_slice(obkv.get(primary_key_id).unwrap());
let document_id = let document_id =
reader.get(DelAdd::Deletion).or(reader.get(DelAdd::Addition)).unwrap(); reader.get(DelAdd::Deletion).or(reader.get(DelAdd::Addition)).unwrap();
serde_json::from_slice(document_id).unwrap() serde_json::from_slice(document_id).unwrap()
@ -68,15 +68,17 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
/// Extract the finite floats lat and lng from two bytes slices. /// Extract the finite floats lat and lng from two bytes slices.
fn extract_lat_lng( fn extract_lat_lng(
document: &obkv::KvReader<'_, FieldId>, document: &obkv::KvReader<FieldId>,
settings: &InnerIndexSettings, settings: &InnerIndexSettings,
deladd: DelAdd, deladd: DelAdd,
document_id: impl Fn() -> Value, document_id: impl Fn() -> Value,
) -> Result<Option<[f64; 2]>> { ) -> Result<Option<[f64; 2]>> {
match settings.geo_fields_ids { match settings.geo_fields_ids {
Some((lat_fid, lng_fid)) => { Some((lat_fid, lng_fid)) => {
let lat = document.get(lat_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd)); let lat =
let lng = document.get(lng_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd)); document.get(lat_fid).map(KvReaderDelAdd::from_slice).and_then(|r| r.get(deladd));
let lng =
document.get(lng_fid).map(KvReaderDelAdd::from_slice).and_then(|r| r.get(deladd));
let (lat, lng) = match (lat, lng) { let (lat, lng) = match (lat, lng) {
(Some(lat), Some(lng)) => (lat, lng), (Some(lat), Some(lng)) => (lat, lng),
(Some(_), None) => { (Some(_), None) => {

View File

@ -307,7 +307,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
debug_assert!(from_utf8(external_id_bytes).is_ok()); debug_assert!(from_utf8(external_id_bytes).is_ok());
let docid = DocumentId::from_be_bytes(docid_bytes); let docid = DocumentId::from_be_bytes(docid_bytes);
let obkv = obkv::KvReader::new(value); let obkv = obkv::KvReader::from_slice(value);
key_buffer.clear(); key_buffer.clear();
key_buffer.extend_from_slice(docid_bytes.as_slice()); key_buffer.extend_from_slice(docid_bytes.as_slice());
@ -475,7 +475,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
#[allow(clippy::too_many_arguments)] // feel free to find efficient way to factor arguments #[allow(clippy::too_many_arguments)] // feel free to find efficient way to factor arguments
fn extract_vector_document_diff( fn extract_vector_document_diff(
docid: DocumentId, docid: DocumentId,
obkv: obkv::KvReader<'_, FieldId>, obkv: &obkv::KvReader<FieldId>,
prompt: &Prompt, prompt: &Prompt,
(add_to_user_provided, remove_from_user_provided): (&mut RoaringBitmap, &mut RoaringBitmap), (add_to_user_provided, remove_from_user_provided): (&mut RoaringBitmap, &mut RoaringBitmap),
(old, new): (VectorState, VectorState), (old, new): (VectorState, VectorState),
@ -517,7 +517,7 @@ fn extract_vector_document_diff(
// Do we keep this document? // Do we keep this document?
let document_is_kept = obkv let document_is_kept = obkv
.iter() .iter()
.map(|(_, deladd)| KvReaderDelAdd::new(deladd)) .map(|(_, deladd)| KvReaderDelAdd::from_slice(deladd))
.any(|deladd| deladd.get(DelAdd::Addition).is_some()); .any(|deladd| deladd.get(DelAdd::Addition).is_some());
if document_is_kept { if document_is_kept {
@ -553,7 +553,7 @@ fn extract_vector_document_diff(
// Do we keep this document? // Do we keep this document?
let document_is_kept = obkv let document_is_kept = obkv
.iter() .iter()
.map(|(_, deladd)| KvReaderDelAdd::new(deladd)) .map(|(_, deladd)| KvReaderDelAdd::from_slice(deladd))
.any(|deladd| deladd.get(DelAdd::Addition).is_some()); .any(|deladd| deladd.get(DelAdd::Addition).is_some());
if document_is_kept { if document_is_kept {
if embedder_is_manual { if embedder_is_manual {
@ -579,7 +579,7 @@ fn extract_vector_document_diff(
// Do we keep this document? // Do we keep this document?
let document_is_kept = obkv let document_is_kept = obkv
.iter() .iter()
.map(|(_, deladd)| KvReaderDelAdd::new(deladd)) .map(|(_, deladd)| KvReaderDelAdd::from_slice(deladd))
.any(|deladd| deladd.get(DelAdd::Addition).is_some()); .any(|deladd| deladd.get(DelAdd::Addition).is_some());
if document_is_kept { if document_is_kept {
// if the new version of documents has the vectors in the DB, // if the new version of documents has the vectors in the DB,
@ -597,7 +597,7 @@ fn extract_vector_document_diff(
} }
fn regenerate_if_prompt_changed( fn regenerate_if_prompt_changed(
obkv: obkv::KvReader<'_, FieldId>, obkv: &obkv::KvReader<FieldId>,
(old_prompt, new_prompt): (&Prompt, &Prompt), (old_prompt, new_prompt): (&Prompt, &Prompt),
(old_fields_ids_map, new_fields_ids_map): (&FieldsIdsMap, &FieldsIdsMap), (old_fields_ids_map, new_fields_ids_map): (&FieldsIdsMap, &FieldsIdsMap),
) -> Result<VectorStateDelta> { ) -> Result<VectorStateDelta> {
@ -612,7 +612,7 @@ fn regenerate_if_prompt_changed(
} }
fn regenerate_prompt( fn regenerate_prompt(
obkv: obkv::KvReader<'_, FieldId>, obkv: &obkv::KvReader<FieldId>,
prompt: &Prompt, prompt: &Prompt,
new_fields_ids_map: &FieldsIdsMap, new_fields_ids_map: &FieldsIdsMap,
) -> Result<VectorStateDelta> { ) -> Result<VectorStateDelta> {

View File

@ -58,17 +58,17 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
let document_id = u32::from_be_bytes(document_id_bytes); let document_id = u32::from_be_bytes(document_id_bytes);
let fid = u16::from_be_bytes(fid_bytes); let fid = u16::from_be_bytes(fid_bytes);
let del_add_reader = KvReaderDelAdd::new(value); let del_add_reader = KvReaderDelAdd::from_slice(value);
// extract all unique words to remove. // extract all unique words to remove.
if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) { if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) {
for (_pos, word) in KvReaderU16::new(deletion).iter() { for (_pos, word) in KvReaderU16::from_slice(deletion).iter() {
del_words.insert(word.to_vec()); del_words.insert(word.to_vec());
} }
} }
// extract all unique additional words. // extract all unique additional words.
if let Some(addition) = del_add_reader.get(DelAdd::Addition) { if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
for (_pos, word) in KvReaderU16::new(addition).iter() { for (_pos, word) in KvReaderU16::from_slice(addition).iter() {
add_words.insert(word.to_vec()); add_words.insert(word.to_vec());
} }
} }
@ -115,7 +115,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
// NOTE: replacing sorters by bitmap merging is less efficient, so, use sorters. // NOTE: replacing sorters by bitmap merging is less efficient, so, use sorters.
while let Some((key, value)) = iter.next()? { while let Some((key, value)) = iter.next()? {
// only keep the value if their is a change to apply in the DB. // only keep the value if their is a change to apply in the DB.
if !is_noop_del_add_obkv(KvReaderDelAdd::new(value)) { if !is_noop_del_add_obkv(KvReaderDelAdd::from_slice(value)) {
word_fid_docids_writer.insert(key, value)?; word_fid_docids_writer.insert(key, value)?;
} }
@ -123,7 +123,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
.map_err(|_| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?; .map_err(|_| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
// merge all deletions // merge all deletions
let obkv = KvReaderDelAdd::new(value); let obkv = KvReaderDelAdd::from_slice(value);
if let Some(value) = obkv.get(DelAdd::Deletion) { if let Some(value) = obkv.get(DelAdd::Deletion) {
let delete_from_exact = settings_diff.old.exact_attributes.contains(&fid); let delete_from_exact = settings_diff.old.exact_attributes.contains(&fid);
buffer.clear(); buffer.clear();

View File

@ -92,8 +92,8 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
} }
// deletions // deletions
if let Some(deletion) = KvReaderDelAdd::new(value).get(DelAdd::Deletion) { if let Some(deletion) = KvReaderDelAdd::from_slice(value).get(DelAdd::Deletion) {
for (position, word) in KvReaderU16::new(deletion).iter() { for (position, word) in KvReaderU16::from_slice(deletion).iter() {
// drain the proximity window until the head word is considered close to the word we are inserting. // drain the proximity window until the head word is considered close to the word we are inserting.
while del_word_positions.front().map_or(false, |(_w, p)| { while del_word_positions.front().map_or(false, |(_w, p)| {
index_proximity(*p as u32, position as u32) >= MAX_DISTANCE index_proximity(*p as u32, position as u32) >= MAX_DISTANCE
@ -125,8 +125,8 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
} }
// additions // additions
if let Some(addition) = KvReaderDelAdd::new(value).get(DelAdd::Addition) { if let Some(addition) = KvReaderDelAdd::from_slice(value).get(DelAdd::Addition) {
for (position, word) in KvReaderU16::new(addition).iter() { for (position, word) in KvReaderU16::from_slice(addition).iter() {
// drain the proximity window until the head word is considered close to the word we are inserting. // drain the proximity window until the head word is considered close to the word we are inserting.
while add_word_positions.front().map_or(false, |(_w, p)| { while add_word_positions.front().map_or(false, |(_w, p)| {
index_proximity(*p as u32, position as u32) >= MAX_DISTANCE index_proximity(*p as u32, position as u32) >= MAX_DISTANCE

View File

@ -60,10 +60,10 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
current_document_id = Some(document_id); current_document_id = Some(document_id);
let del_add_reader = KvReaderDelAdd::new(value); let del_add_reader = KvReaderDelAdd::from_slice(value);
// extract all unique words to remove. // extract all unique words to remove.
if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) { if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) {
for (position, word_bytes) in KvReaderU16::new(deletion).iter() { for (position, word_bytes) in KvReaderU16::from_slice(deletion).iter() {
let position = bucketed_position(position); let position = bucketed_position(position);
del_word_positions.insert((position, word_bytes.to_vec())); del_word_positions.insert((position, word_bytes.to_vec()));
} }
@ -71,7 +71,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
// extract all unique additional words. // extract all unique additional words.
if let Some(addition) = del_add_reader.get(DelAdd::Addition) { if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
for (position, word_bytes) in KvReaderU16::new(addition).iter() { for (position, word_bytes) in KvReaderU16::from_slice(addition).iter() {
let position = bucketed_position(position); let position = bucketed_position(position);
add_word_positions.insert((position, word_bytes.to_vec())); add_word_positions.insert((position, word_bytes.to_vec()));
} }

View File

@ -45,8 +45,8 @@ pub fn keep_latest_obkv<'a>(_key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<
} }
pub fn merge_two_del_add_obkvs( pub fn merge_two_del_add_obkvs(
base: obkv::KvReaderU16<'_>, base: &obkv::KvReaderU16,
update: obkv::KvReaderU16<'_>, update: &obkv::KvReaderU16,
merge_additions: bool, merge_additions: bool,
buffer: &mut Vec<u8>, buffer: &mut Vec<u8>,
) { ) {
@ -66,7 +66,7 @@ pub fn merge_two_del_add_obkvs(
// If merge_additions is false, recreate an obkv keeping the deletions only. // If merge_additions is false, recreate an obkv keeping the deletions only.
value_buffer.clear(); value_buffer.clear();
let mut value_writer = KvWriterDelAdd::new(&mut value_buffer); let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
let base_reader = KvReaderDelAdd::new(v); let base_reader = KvReaderDelAdd::from_slice(v);
if let Some(deletion) = base_reader.get(DelAdd::Deletion) { if let Some(deletion) = base_reader.get(DelAdd::Deletion) {
value_writer.insert(DelAdd::Deletion, deletion).unwrap(); value_writer.insert(DelAdd::Deletion, deletion).unwrap();
@ -80,8 +80,8 @@ pub fn merge_two_del_add_obkvs(
// merge deletions and additions. // merge deletions and additions.
value_buffer.clear(); value_buffer.clear();
let mut value_writer = KvWriterDelAdd::new(&mut value_buffer); let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
let base_reader = KvReaderDelAdd::new(base); let base_reader = KvReaderDelAdd::from_slice(base);
let update_reader = KvReaderDelAdd::new(update); let update_reader = KvReaderDelAdd::from_slice(update);
// keep newest deletion. // keep newest deletion.
if let Some(deletion) = update_reader if let Some(deletion) = update_reader
@ -131,8 +131,8 @@ fn inner_merge_del_add_obkvs<'a>(
break; break;
} }
let newest = obkv::KvReader::new(&acc); let newest = obkv::KvReader::from_slice(&acc);
let oldest = obkv::KvReader::new(&current[1..]); let oldest = obkv::KvReader::from_slice(&current[1..]);
merge_two_del_add_obkvs(oldest, newest, merge_additions, &mut buffer); merge_two_del_add_obkvs(oldest, newest, merge_additions, &mut buffer);
// we want the result of the merge into our accumulator. // we want the result of the merge into our accumulator.
@ -187,7 +187,7 @@ pub fn merge_deladd_cbo_roaring_bitmaps<'a>(
let mut del_bitmaps_bytes = Vec::new(); let mut del_bitmaps_bytes = Vec::new();
let mut add_bitmaps_bytes = Vec::new(); let mut add_bitmaps_bytes = Vec::new();
for value in values { for value in values {
let obkv = KvReaderDelAdd::new(value); let obkv = KvReaderDelAdd::from_slice(value);
if let Some(bitmap_bytes) = obkv.get(DelAdd::Deletion) { if let Some(bitmap_bytes) = obkv.get(DelAdd::Deletion) {
del_bitmaps_bytes.push(bitmap_bytes); del_bitmaps_bytes.push(bitmap_bytes);
} }
@ -217,7 +217,7 @@ pub fn merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap<'a>(
buffer: &'a mut Vec<u8>, buffer: &'a mut Vec<u8>,
) -> Result<Option<&'a [u8]>> { ) -> Result<Option<&'a [u8]>> {
Ok(CboRoaringBitmapCodec::merge_deladd_into( Ok(CboRoaringBitmapCodec::merge_deladd_into(
KvReaderDelAdd::new(deladd_obkv), KvReaderDelAdd::from_slice(deladd_obkv),
previous, previous,
buffer, buffer,
)?) )?)
@ -236,7 +236,7 @@ pub fn merge_deladd_btreeset_string<'a>(
let mut del_set = BTreeSet::new(); let mut del_set = BTreeSet::new();
let mut add_set = BTreeSet::new(); let mut add_set = BTreeSet::new();
for value in values { for value in values {
let obkv = KvReaderDelAdd::new(value); let obkv = KvReaderDelAdd::from_slice(value);
if let Some(bytes) = obkv.get(DelAdd::Deletion) { if let Some(bytes) = obkv.get(DelAdd::Deletion) {
let set = serde_json::from_slice::<BTreeSet<String>>(bytes).unwrap(); let set = serde_json::from_slice::<BTreeSet<String>>(bytes).unwrap();
for value in set { for value in set {

View File

@ -31,14 +31,14 @@ impl<'t> ImmutableObkvs<'t> {
} }
/// Returns the OBKVs identified by the given ID. /// Returns the OBKVs identified by the given ID.
pub fn obkv(&self, docid: DocumentId) -> heed::Result<Option<KvReaderU16<'t>>> { pub fn obkv(&self, docid: DocumentId) -> heed::Result<Option<&'t KvReaderU16>> {
match self match self
.ids .ids
.rank(docid) .rank(docid)
.checked_sub(1) .checked_sub(1)
.and_then(|offset| self.slices.get(offset as usize)) .and_then(|offset| self.slices.get(offset as usize))
{ {
Some(bytes) => Ok(Some(KvReaderU16::new(bytes))), Some(&bytes) => Ok(Some(bytes.into())),
None => Ok(None), None => Ok(None),
} }
} }

View File

@ -278,13 +278,13 @@ impl<'a, 'i> Transform<'a, 'i> {
document_sorter_value_buffer.clear(); document_sorter_value_buffer.clear();
document_sorter_value_buffer.push(Operation::Addition as u8); document_sorter_value_buffer.push(Operation::Addition as u8);
into_del_add_obkv( into_del_add_obkv(
KvReaderU16::new(base_obkv), KvReaderU16::from_slice(base_obkv),
deladd_operation, deladd_operation,
&mut document_sorter_value_buffer, &mut document_sorter_value_buffer,
)?; )?;
self.original_sorter self.original_sorter
.insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?; .insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?;
let base_obkv = KvReader::new(base_obkv); let base_obkv = KvReader::from_slice(base_obkv);
if let Some(flattened_obkv) = if let Some(flattened_obkv) =
Self::flatten_from_fields_ids_map(&base_obkv, &mut self.fields_ids_map)? Self::flatten_from_fields_ids_map(&base_obkv, &mut self.fields_ids_map)?
{ {
@ -292,7 +292,7 @@ impl<'a, 'i> Transform<'a, 'i> {
document_sorter_value_buffer.clear(); document_sorter_value_buffer.clear();
document_sorter_value_buffer.push(Operation::Addition as u8); document_sorter_value_buffer.push(Operation::Addition as u8);
into_del_add_obkv( into_del_add_obkv(
KvReaderU16::new(&flattened_obkv), KvReaderU16::from_slice(&flattened_obkv),
deladd_operation, deladd_operation,
&mut document_sorter_value_buffer, &mut document_sorter_value_buffer,
)?; )?;
@ -311,7 +311,7 @@ impl<'a, 'i> Transform<'a, 'i> {
document_sorter_value_buffer.clear(); document_sorter_value_buffer.clear();
document_sorter_value_buffer.push(Operation::Addition as u8); document_sorter_value_buffer.push(Operation::Addition as u8);
into_del_add_obkv( into_del_add_obkv(
KvReaderU16::new(&obkv_buffer), KvReaderU16::from_slice(&obkv_buffer),
DelAddOperation::Addition, DelAddOperation::Addition,
&mut document_sorter_value_buffer, &mut document_sorter_value_buffer,
)?; )?;
@ -319,14 +319,14 @@ impl<'a, 'i> Transform<'a, 'i> {
self.original_sorter self.original_sorter
.insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?; .insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?;
let flattened_obkv = KvReader::new(&obkv_buffer); let flattened_obkv = KvReader::from_slice(&obkv_buffer);
if let Some(obkv) = if let Some(obkv) =
Self::flatten_from_fields_ids_map(&flattened_obkv, &mut self.fields_ids_map)? Self::flatten_from_fields_ids_map(&flattened_obkv, &mut self.fields_ids_map)?
{ {
document_sorter_value_buffer.clear(); document_sorter_value_buffer.clear();
document_sorter_value_buffer.push(Operation::Addition as u8); document_sorter_value_buffer.push(Operation::Addition as u8);
into_del_add_obkv( into_del_add_obkv(
KvReaderU16::new(&obkv), KvReaderU16::from_slice(&obkv),
DelAddOperation::Addition, DelAddOperation::Addition,
&mut document_sorter_value_buffer, &mut document_sorter_value_buffer,
)? )?
@ -519,14 +519,14 @@ impl<'a, 'i> Transform<'a, 'i> {
document_sorter_value_buffer.clear(); document_sorter_value_buffer.clear();
document_sorter_value_buffer.push(Operation::Deletion as u8); document_sorter_value_buffer.push(Operation::Deletion as u8);
into_del_add_obkv( into_del_add_obkv(
KvReaderU16::new(base_obkv), KvReaderU16::from_slice(base_obkv),
DelAddOperation::Deletion, DelAddOperation::Deletion,
document_sorter_value_buffer, document_sorter_value_buffer,
)?; )?;
self.original_sorter.insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?; self.original_sorter.insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?;
// flatten it and push it as to delete in the flattened_sorter // flatten it and push it as to delete in the flattened_sorter
let flattened_obkv = KvReader::new(base_obkv); let flattened_obkv = KvReader::from_slice(base_obkv);
if let Some(obkv) = if let Some(obkv) =
Self::flatten_from_fields_ids_map(&flattened_obkv, &mut self.fields_ids_map)? Self::flatten_from_fields_ids_map(&flattened_obkv, &mut self.fields_ids_map)?
{ {
@ -534,7 +534,7 @@ impl<'a, 'i> Transform<'a, 'i> {
document_sorter_value_buffer.clear(); document_sorter_value_buffer.clear();
document_sorter_value_buffer.push(Operation::Deletion as u8); document_sorter_value_buffer.push(Operation::Deletion as u8);
into_del_add_obkv( into_del_add_obkv(
KvReaderU16::new(&obkv), KvReaderU16::from_slice(&obkv),
DelAddOperation::Deletion, DelAddOperation::Deletion,
document_sorter_value_buffer, document_sorter_value_buffer,
)?; )?;
@ -552,7 +552,7 @@ impl<'a, 'i> Transform<'a, 'i> {
target = "indexing::transform" target = "indexing::transform"
)] )]
fn flatten_from_fields_ids_map( fn flatten_from_fields_ids_map(
obkv: &KvReader<'_, FieldId>, obkv: &KvReader<FieldId>,
fields_ids_map: &mut FieldsIdsMap, fields_ids_map: &mut FieldsIdsMap,
) -> Result<Option<Vec<u8>>> { ) -> Result<Option<Vec<u8>>> {
if obkv if obkv
@ -720,10 +720,10 @@ impl<'a, 'i> Transform<'a, 'i> {
total_documents: self.documents_count, total_documents: self.documents_count,
}); });
for (key, value) in KvReader::new(val) { for (key, value) in KvReader::from_slice(val) {
let reader = KvReaderDelAdd::new(value); let reader = KvReaderDelAdd::from_slice(value);
match (reader.get(DelAdd::Deletion), reader.get(DelAdd::Addition)) { match (reader.get(DelAdd::Deletion), reader.get(DelAdd::Addition)) {
(None, None) => {} (None, None) => (),
(None, Some(_)) => { (None, Some(_)) => {
// New field // New field
let name = self.fields_ids_map.name(key).ok_or( let name = self.fields_ids_map.name(key).ok_or(
@ -837,7 +837,7 @@ impl<'a, 'i> Transform<'a, 'i> {
/// then fill the provided buffers with delta documents using KvWritterDelAdd. /// then fill the provided buffers with delta documents using KvWritterDelAdd.
#[allow(clippy::too_many_arguments)] // need the vectors + fid, feel free to create a struct xo xo #[allow(clippy::too_many_arguments)] // need the vectors + fid, feel free to create a struct xo xo
fn rebind_existing_document( fn rebind_existing_document(
old_obkv: KvReader<'_, FieldId>, old_obkv: &KvReader<FieldId>,
settings_diff: &InnerIndexSettingsDiff, settings_diff: &InnerIndexSettingsDiff,
modified_faceted_fields: &HashSet<String>, modified_faceted_fields: &HashSet<String>,
mut injected_vectors: serde_json::Map<String, serde_json::Value>, mut injected_vectors: serde_json::Map<String, serde_json::Value>,
@ -925,7 +925,7 @@ impl<'a, 'i> Transform<'a, 'i> {
} }
let data = obkv_writer.into_inner()?; let data = obkv_writer.into_inner()?;
let obkv = KvReader::<FieldId>::new(&data); let obkv = KvReader::<FieldId>::from_slice(&data);
if let Some(original_obkv_buffer) = original_obkv_buffer { if let Some(original_obkv_buffer) = original_obkv_buffer {
original_obkv_buffer.clear(); original_obkv_buffer.clear();
@ -936,7 +936,7 @@ impl<'a, 'i> Transform<'a, 'i> {
// take the non-flattened version if flatten_from_fields_ids_map returns None. // take the non-flattened version if flatten_from_fields_ids_map returns None.
let mut fields_ids_map = settings_diff.new.fields_ids_map.clone(); let mut fields_ids_map = settings_diff.new.fields_ids_map.clone();
let flattened = Self::flatten_from_fields_ids_map(&obkv, &mut fields_ids_map)?; let flattened = Self::flatten_from_fields_ids_map(&obkv, &mut fields_ids_map)?;
let flattened = flattened.as_deref().map_or(obkv, KvReader::new); let flattened = flattened.as_deref().map_or(obkv, KvReader::from_slice);
flattened_obkv_buffer.clear(); flattened_obkv_buffer.clear();
into_del_add_obkv_conditional_operation(flattened, flattened_obkv_buffer, |id| { into_del_add_obkv_conditional_operation(flattened, flattened_obkv_buffer, |id| {
@ -1173,21 +1173,21 @@ mod test {
kv_writer.insert(0_u8, [0]).unwrap(); kv_writer.insert(0_u8, [0]).unwrap();
let buffer = kv_writer.into_inner().unwrap(); let buffer = kv_writer.into_inner().unwrap();
into_del_add_obkv( into_del_add_obkv(
KvReaderU16::new(&buffer), KvReaderU16::from_slice(&buffer),
DelAddOperation::Addition, DelAddOperation::Addition,
&mut additive_doc_0, &mut additive_doc_0,
) )
.unwrap(); .unwrap();
additive_doc_0.insert(0, Operation::Addition as u8); additive_doc_0.insert(0, Operation::Addition as u8);
into_del_add_obkv( into_del_add_obkv(
KvReaderU16::new(&buffer), KvReaderU16::from_slice(&buffer),
DelAddOperation::Deletion, DelAddOperation::Deletion,
&mut deletive_doc_0, &mut deletive_doc_0,
) )
.unwrap(); .unwrap();
deletive_doc_0.insert(0, Operation::Deletion as u8); deletive_doc_0.insert(0, Operation::Deletion as u8);
into_del_add_obkv( into_del_add_obkv(
KvReaderU16::new(&buffer), KvReaderU16::from_slice(&buffer),
DelAddOperation::DeletionAndAddition, DelAddOperation::DeletionAndAddition,
&mut del_add_doc_0, &mut del_add_doc_0,
) )
@ -1199,7 +1199,7 @@ mod test {
kv_writer.insert(1_u8, [1]).unwrap(); kv_writer.insert(1_u8, [1]).unwrap();
let buffer = kv_writer.into_inner().unwrap(); let buffer = kv_writer.into_inner().unwrap();
into_del_add_obkv( into_del_add_obkv(
KvReaderU16::new(&buffer), KvReaderU16::from_slice(&buffer),
DelAddOperation::Addition, DelAddOperation::Addition,
&mut additive_doc_1, &mut additive_doc_1,
) )
@ -1212,7 +1212,7 @@ mod test {
kv_writer.insert(1_u8, [1]).unwrap(); kv_writer.insert(1_u8, [1]).unwrap();
let buffer = kv_writer.into_inner().unwrap(); let buffer = kv_writer.into_inner().unwrap();
into_del_add_obkv( into_del_add_obkv(
KvReaderU16::new(&buffer), KvReaderU16::from_slice(&buffer),
DelAddOperation::Addition, DelAddOperation::Addition,
&mut additive_doc_0_1, &mut additive_doc_0_1,
) )

View File

@ -162,7 +162,7 @@ pub(crate) fn write_typed_chunk_into_index(
let mut vectors_buffer = Vec::new(); let mut vectors_buffer = Vec::new();
while let Some((key, reader)) = iter.next()? { while let Some((key, reader)) = iter.next()? {
let mut writer: KvWriter<_, FieldId> = KvWriter::memory(); let mut writer: KvWriter<_, FieldId> = KvWriter::memory();
let reader: KvReader<'_, FieldId> = KvReader::new(reader); let reader: &KvReader<FieldId> = reader.into();
let (document_id_bytes, external_id_bytes) = try_split_array_at(key) let (document_id_bytes, external_id_bytes) = try_split_array_at(key)
.ok_or(SerializationError::Decoding { db_name: Some(DOCUMENTS) })?; .ok_or(SerializationError::Decoding { db_name: Some(DOCUMENTS) })?;
@ -170,7 +170,7 @@ pub(crate) fn write_typed_chunk_into_index(
let external_id = std::str::from_utf8(external_id_bytes)?; let external_id = std::str::from_utf8(external_id_bytes)?;
for (field_id, value) in reader.iter() { for (field_id, value) in reader.iter() {
let del_add_reader = KvReaderDelAdd::new(value); let del_add_reader = KvReaderDelAdd::from_slice(value);
if let Some(addition) = del_add_reader.get(DelAdd::Addition) { if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
let addition = if vectors_fid == Some(field_id) { let addition = if vectors_fid == Some(field_id) {
@ -529,7 +529,7 @@ pub(crate) fn write_typed_chunk_into_index(
index.field_id_docid_facet_f64s.remap_types::<Bytes, Bytes>(); index.field_id_docid_facet_f64s.remap_types::<Bytes, Bytes>();
let mut iter = merger.into_stream_merger_iter()?; let mut iter = merger.into_stream_merger_iter()?;
while let Some((key, value)) = iter.next()? { while let Some((key, value)) = iter.next()? {
let reader = KvReaderDelAdd::new(value); let reader = KvReaderDelAdd::from_slice(value);
if valid_lmdb_key(key) { if valid_lmdb_key(key) {
match (reader.get(DelAdd::Deletion), reader.get(DelAdd::Addition)) { match (reader.get(DelAdd::Deletion), reader.get(DelAdd::Addition)) {
(None, None) => {} (None, None) => {}
@ -563,7 +563,7 @@ pub(crate) fn write_typed_chunk_into_index(
index.field_id_docid_facet_strings.remap_types::<Bytes, Bytes>(); index.field_id_docid_facet_strings.remap_types::<Bytes, Bytes>();
let mut iter = merger.into_stream_merger_iter()?; let mut iter = merger.into_stream_merger_iter()?;
while let Some((key, value)) = iter.next()? { while let Some((key, value)) = iter.next()? {
let reader = KvReaderDelAdd::new(value); let reader = KvReaderDelAdd::from_slice(value);
if valid_lmdb_key(key) { if valid_lmdb_key(key) {
match (reader.get(DelAdd::Deletion), reader.get(DelAdd::Addition)) { match (reader.get(DelAdd::Deletion), reader.get(DelAdd::Addition)) {
(None, None) => {} (None, None) => {}
@ -600,7 +600,7 @@ pub(crate) fn write_typed_chunk_into_index(
// convert the key back to a u32 (4 bytes) // convert the key back to a u32 (4 bytes)
let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap(); let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap();
let deladd_obkv = KvReaderDelAdd::new(value); let deladd_obkv = KvReaderDelAdd::from_slice(value);
if let Some(value) = deladd_obkv.get(DelAdd::Deletion) { if let Some(value) = deladd_obkv.get(DelAdd::Deletion) {
let geopoint = extract_geo_point(value, docid); let geopoint = extract_geo_point(value, docid);
rtree.remove(&geopoint); rtree.remove(&geopoint);
@ -723,7 +723,7 @@ pub(crate) fn write_typed_chunk_into_index(
let (left, _index) = try_split_array_at(key).unwrap(); let (left, _index) = try_split_array_at(key).unwrap();
let docid = DocumentId::from_be_bytes(left); let docid = DocumentId::from_be_bytes(left);
let vector_deladd_obkv = KvReaderDelAdd::new(value); let vector_deladd_obkv = KvReaderDelAdd::from_slice(value);
if let Some(value) = vector_deladd_obkv.get(DelAdd::Deletion) { if let Some(value) = vector_deladd_obkv.get(DelAdd::Deletion) {
let vector: Vec<f32> = pod_collect_to_vec(value); let vector: Vec<f32> = pod_collect_to_vec(value);
@ -852,7 +852,7 @@ where
if valid_lmdb_key(key) { if valid_lmdb_key(key) {
let (proximity_to_insert, word1, word2) = let (proximity_to_insert, word1, word2) =
U8StrStrCodec::bytes_decode(key).map_err(heed::Error::Decoding)?; U8StrStrCodec::bytes_decode(key).map_err(heed::Error::Decoding)?;
let data_to_insert = match KvReaderDelAdd::new(value).get(DelAdd::Addition) { let data_to_insert = match KvReaderDelAdd::from_slice(value).get(DelAdd::Addition) {
Some(value) => { Some(value) => {
CboRoaringBitmapCodec::bytes_decode(value).map_err(heed::Error::Decoding)? CboRoaringBitmapCodec::bytes_decode(value).map_err(heed::Error::Decoding)?
} }

View File

@ -1,4 +1,3 @@
use core::slice::SlicePattern;
use std::fs::File; use std::fs::File;
use crossbeam_channel::{IntoIter, Receiver, SendError, Sender}; use crossbeam_channel::{IntoIter, Receiver, SendError, Sender};
@ -44,11 +43,11 @@ impl KeyValueEntry {
} }
pub fn key(&self) -> &[u8] { pub fn key(&self) -> &[u8] {
&self.data.as_slice()[..self.key_length] &self.data.as_ref()[..self.key_length]
} }
pub fn value(&self) -> &[u8] { pub fn value(&self) -> &[u8] {
&self.data.as_slice()[self.key_length..] &self.data.as_ref()[self.key_length..]
} }
} }

View File

@ -1,5 +1,5 @@
use heed::RoTxn; use heed::RoTxn;
use obkv2::KvReader; use obkv::KvReader;
use super::indexer::KvReaderFieldId; use super::indexer::KvReaderFieldId;
use crate::{DocumentId, FieldId}; use crate::{DocumentId, FieldId};

View File

@ -4,7 +4,8 @@ mod channel;
mod items_pool; mod items_pool;
mod merge; mod merge;
mod global_fields_ids_map; /// TODO remove this
// mod global_fields_ids_map;
pub type StdResult<T, E> = std::result::Result<T, E>; pub type StdResult<T, E> = std::result::Result<T, E>;
@ -27,8 +28,7 @@ mod indexer {
use super::channel::{ use super::channel::{
extractors_merger_channels, merger_writer_channels, EntryOperation, extractors_merger_channels, merger_writer_channels, EntryOperation,
ExtractorsMergerChannels, MergerReceiver, MergerSender, MergerWriterChannels, ExtractorsMergerChannels, MergerReceiver, MergerSender, WriterOperation,
WriterOperation,
}; };
use super::document_change::{Deletion, DocumentChange, Insertion, Update}; use super::document_change::{Deletion, DocumentChange, Insertion, Update};
use super::items_pool::ItemsPool; use super::items_pool::ItemsPool;
@ -44,10 +44,10 @@ mod indexer {
Result, UserError, Result, UserError,
}; };
pub type KvReaderFieldId = obkv2::KvReader<FieldId>; pub type KvReaderFieldId = obkv::KvReader<FieldId>;
pub type KvReaderDelAdd = obkv2::KvReader<DelAdd>; pub type KvReaderDelAdd = obkv::KvReader<DelAdd>;
pub type KvWriterFieldId<W> = obkv2::KvWriter<W, FieldId>; pub type KvWriterFieldId<W> = obkv::KvWriter<W, FieldId>;
pub type KvWriterDelAdd<W> = obkv2::KvWriter<W, DelAdd>; pub type KvWriterDelAdd<W> = obkv::KvWriter<W, DelAdd>;
pub struct DocumentOperationIndexer { pub struct DocumentOperationIndexer {
operations: Vec<Payload>, operations: Vec<Payload>,
@ -105,7 +105,7 @@ mod indexer {
rtxn: &'a RoTxn, rtxn: &'a RoTxn,
mut fields_ids_map: FieldsIdsMap, mut fields_ids_map: FieldsIdsMap,
primary_key: &'a PrimaryKey<'a>, primary_key: &'a PrimaryKey<'a>,
) -> Result<impl ParallelIterator<Item = DocumentChange> + 'a> { ) -> Result<impl ParallelIterator<Item = Result<Option<DocumentChange>>> + 'a> {
let documents_ids = index.documents_ids(rtxn)?; let documents_ids = index.documents_ids(rtxn)?;
let mut available_docids = AvailableDocumentsIds::from_documents_ids(&documents_ids); let mut available_docids = AvailableDocumentsIds::from_documents_ids(&documents_ids);
let mut docids_version_offsets = HashMap::<String, _>::new(); let mut docids_version_offsets = HashMap::<String, _>::new();
@ -198,7 +198,7 @@ mod indexer {
} }
let items = Arc::new(ItemsPool::new(|| index.read_txn().map_err(crate::Error::from))); let items = Arc::new(ItemsPool::new(|| index.read_txn().map_err(crate::Error::from)));
docids_version_offsets.into_par_iter().map_with( Ok(docids_version_offsets.into_par_iter().map_with(
items, items,
|context_pool, (external_docid, (internal_docid, operations))| { |context_pool, (external_docid, (internal_docid, operations))| {
context_pool.with(|rtxn| match self.method { context_pool.with(|rtxn| match self.method {
@ -221,58 +221,7 @@ mod indexer {
), ),
}) })
}, },
); ))
Ok(vec![].into_par_iter())
// let mut file_count: usize = 0;
// for result in WalkDir::new(update_files_path)
// // TODO handle errors
// .sort_by_key(|entry| entry.metadata().unwrap().created().unwrap())
// {
// let entry = result?;
// if !entry.file_type().is_file() {
// continue;
// }
// let file = File::open(entry.path())
// .with_context(|| format!("While opening {}", entry.path().display()))?;
// let content = unsafe {
// Mmap::map(&file)
// .map(Arc::new)
// .with_context(|| format!("While memory mapping {}", entry.path().display()))?
// };
// let reader =
// crate::documents::DocumentsBatchReader::from_reader(Cursor::new(content.as_ref()))?;
// let (mut batch_cursor, batch_index) = reader.into_cursor_and_fields_index();
// batch_index.iter().for_each(|(_, name)| {
// fields_ids_map.insert(name);
// });
// let mut offset: u32 = 0;
// while let Some(document) = batch_cursor.next_document()? {
// let primary_key = batch_index.id(primary_key).unwrap();
// let document_id = document.get(primary_key).unwrap();
// let document_id = std::str::from_utf8(document_id).unwrap();
// let document_offset = DocumentOffset { content: content.clone(), offset };
// match docids_version_offsets.get_mut(document_id) {
// None => {
// let docid = match maindb.external_documents_ids.get(rtxn, document_id)? {
// Some(docid) => docid,
// None => sequential_docids.next().context("no more available docids")?,
// };
// docids_version_offsets
// .insert(document_id.into(), (docid, smallvec![document_offset]));
// }
// Some((_, offsets)) => offsets.push(document_offset),
// }
// offset += 1;
// p.inc(1);
// }
// file_count += 1;
// }
} }
} }

View File

@ -109,14 +109,13 @@ impl ParsedVectorsDiff {
pub fn new( pub fn new(
docid: DocumentId, docid: DocumentId,
embedders_configs: &[IndexEmbeddingConfig], embedders_configs: &[IndexEmbeddingConfig],
documents_diff: KvReader<'_, FieldId>, documents_diff: &KvReader<FieldId>,
old_vectors_fid: Option<FieldId>, old_vectors_fid: Option<FieldId>,
new_vectors_fid: Option<FieldId>, new_vectors_fid: Option<FieldId>,
) -> Result<Self, Error> { ) -> Result<Self, Error> {
let mut old = match old_vectors_fid let mut old = match old_vectors_fid
.and_then(|vectors_fid| documents_diff.get(vectors_fid)) .and_then(|vectors_fid| documents_diff.get(vectors_fid))
.map(KvReaderDelAdd::new) .map(|bytes| to_vector_map(bytes.into(), DelAdd::Deletion))
.map(|obkv| to_vector_map(obkv, DelAdd::Deletion))
.transpose() .transpose()
{ {
Ok(del) => del, Ok(del) => del,
@ -143,8 +142,7 @@ impl ParsedVectorsDiff {
let Some(bytes) = documents_diff.get(new_vectors_fid) else { let Some(bytes) = documents_diff.get(new_vectors_fid) else {
break 'new VectorsState::NoVectorsFieldInDocument; break 'new VectorsState::NoVectorsFieldInDocument;
}; };
let obkv = KvReaderDelAdd::new(bytes); match to_vector_map(bytes.into(), DelAdd::Addition)? {
match to_vector_map(obkv, DelAdd::Addition)? {
Some(new) => VectorsState::Vectors(new), Some(new) => VectorsState::Vectors(new),
None => VectorsState::NoVectorsFieldInDocument, None => VectorsState::NoVectorsFieldInDocument,
} }
@ -239,7 +237,7 @@ impl Error {
} }
fn to_vector_map( fn to_vector_map(
obkv: KvReaderDelAdd<'_>, obkv: &KvReaderDelAdd,
side: DelAdd, side: DelAdd,
) -> Result<Option<BTreeMap<String, Vectors>>, Error> { ) -> Result<Option<BTreeMap<String, Vectors>>, Error> {
Ok(if let Some(value) = obkv.get(side) { Ok(if let Some(value) = obkv.get(side) {