Replace obkv with the temporary new version of it

This commit is contained in:
Clément Renault 2024-08-29 19:20:10 +02:00
parent 27df9e6c73
commit 0c57cf7565
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
37 changed files with 142 additions and 223 deletions

13
Cargo.lock generated
View File

@ -3434,7 +3434,7 @@ dependencies = [
"mimalloc",
"mime",
"num_cpus",
"obkv 0.2.2",
"obkv",
"once_cell",
"ordered-float",
"parking_lot",
@ -3601,8 +3601,7 @@ dependencies = [
"memchr",
"memmap2",
"mimalloc",
"obkv 0.2.2",
"obkv 0.3.0",
"obkv",
"once_cell",
"ordered-float",
"rand",
@ -3849,16 +3848,10 @@ dependencies = [
"memchr",
]
[[package]]
name = "obkv"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2e27bcfe835a379d32352112f6b8dbae2d99d16a5fff42abe6e5ba5386c1e5a"
[[package]]
name = "obkv"
version = "0.3.0"
source = "git+https://github.com/kerollmops/obkv?branch=unsized-kvreader#5289a6658cd471f4212c1edc1a40b2a3c3d11fe0"
source = "git+https://github.com/kerollmops/obkv?branch=unsized-kvreader#9c2900d106fa84e7079b288e7f7c366ec7cae948"
[[package]]
name = "once_cell"

View File

@ -57,7 +57,7 @@ meilisearch-types = { path = "../meilisearch-types" }
mimalloc = { version = "0.1.43", default-features = false }
mime = "0.3.17"
num_cpus = "1.16.0"
obkv = "0.2.2"
obkv = { git = "https://github.com/kerollmops/obkv", branch = "unsized-kvreader" }
once_cell = "1.19.0"
ordered-float = "4.2.1"
parking_lot = "0.12.3"

View File

@ -1247,7 +1247,7 @@ impl<'a> HitMaker<'a> {
self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?;
// First generate a document with all the displayed fields
let displayed_document = make_document(&self.displayed_ids, &self.fields_ids_map, obkv)?;
let displayed_document = make_document(&self.displayed_ids, &self.fields_ids_map, &obkv)?;
let add_vectors_fid =
self.vectors_fid.filter(|_fid| self.retrieve_vectors == RetrieveVectors::Retrieve);

View File

@ -12,6 +12,7 @@ readme.workspace = true
license.workspace = true
[dependencies]
big_s = "1.0.2"
bimap = { version = "0.6.3", features = ["serde"] }
bincode = "1.3.3"
bstr = "1.9.1"
@ -44,8 +45,7 @@ levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
lru = "0.12.3"
memchr = "2.5.0"
memmap2 = "0.9.4"
obkv = "0.2.2"
obkv2 = { package = "obkv", git = "https://github.com/kerollmops/obkv", branch = "unsized-kvreader" }
obkv = { git = "https://github.com/kerollmops/obkv", branch = "unsized-kvreader" }
once_cell = "1.19.0"
ordered-float = "4.2.1"
rayon = "1.10.0"
@ -94,7 +94,6 @@ rayon-par-bridge = "0.1.0"
[dev-dependencies]
mimalloc = { version = "0.1.43", default-features = false }
big_s = "1.0.2"
insta = "1.39.0"
maplit = "1.0.2"
md5 = "0.7.0"

View File

@ -69,7 +69,7 @@ impl<R: io::Read + io::Seek> EnrichedDocumentsBatchReader<R> {
#[derive(Debug, Clone)]
pub struct EnrichedDocument<'a> {
pub document: KvReader<'a, FieldId>,
pub document: &'a KvReader<FieldId>,
pub document_id: DocumentId,
}

View File

@ -27,7 +27,7 @@ use crate::{FieldId, Object, Result};
const DOCUMENTS_BATCH_INDEX_KEY: [u8; 8] = u64::MAX.to_be_bytes();
/// Helper function to convert an obkv reader into a JSON object.
pub fn obkv_to_object(obkv: &KvReader<'_, FieldId>, index: &DocumentsBatchIndex) -> Result<Object> {
pub fn obkv_to_object(obkv: &KvReader<FieldId>, index: &DocumentsBatchIndex) -> Result<Object> {
obkv.iter()
.map(|(field_id, value)| {
let field_name = index
@ -76,7 +76,7 @@ impl DocumentsBatchIndex {
self.0.get_by_right(name).cloned()
}
pub fn recreate_json(&self, document: &obkv::KvReaderU16<'_>) -> Result<Object> {
pub fn recreate_json(&self, document: &obkv::KvReaderU16) -> Result<Object> {
let mut map = Object::new();
for (k, v) in document.iter() {

View File

@ -52,7 +52,7 @@ impl<'a> PrimaryKey<'a> {
pub fn document_id(
&self,
document: &obkv::KvReader<'_, FieldId>,
document: &obkv::KvReader<FieldId>,
fields: &impl FieldIdMapper,
) -> Result<StdResult<String, DocumentIdExtractionError>> {
match self {

View File

@ -76,11 +76,9 @@ impl<R: io::Read + io::Seek> DocumentsBatchCursor<R> {
pub fn get(
&mut self,
offset: u32,
) -> Result<Option<KvReader<'_, FieldId>>, DocumentsBatchCursorError> {
) -> Result<Option<&KvReader<FieldId>>, DocumentsBatchCursorError> {
match self.cursor.move_on_key_equal_to(offset.to_be_bytes())? {
Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => {
Ok(Some(KvReader::new(value)))
}
Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => Ok(Some(value.into())),
_otherwise => Ok(None),
}
}
@ -89,11 +87,9 @@ impl<R: io::Read + io::Seek> DocumentsBatchCursor<R> {
/// `next_document` advance the document reader until all the documents have been read.
pub fn next_document(
&mut self,
) -> Result<Option<KvReader<'_, FieldId>>, DocumentsBatchCursorError> {
) -> Result<Option<&KvReader<FieldId>>, DocumentsBatchCursorError> {
match self.cursor.move_on_next()? {
Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => {
Ok(Some(KvReader::new(value)))
}
Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => Ok(Some(value.into())),
_otherwise => Ok(None),
}
}

View File

@ -6,10 +6,10 @@ use obkv::{KvReaderU16, KvWriterU16};
pub struct ObkvCodec;
impl<'a> heed::BytesDecode<'a> for ObkvCodec {
type DItem = KvReaderU16<'a>;
type DItem = &'a KvReaderU16;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
Ok(KvReaderU16::new(bytes))
Ok(KvReaderU16::from_slice(bytes))
}
}

View File

@ -122,7 +122,7 @@ impl CboRoaringBitmapCodec {
/// Merges a DelAdd delta into a CboRoaringBitmap.
pub fn merge_deladd_into<'a>(
deladd: KvReaderDelAdd<'_>,
deladd: &KvReaderDelAdd,
previous: &[u8],
buffer: &'a mut Vec<u8>,
) -> io::Result<Option<&'a [u8]>> {

View File

@ -1252,7 +1252,7 @@ impl Index {
/* documents */
/// Returns a document by using the document id.
pub fn document<'t>(&self, rtxn: &'t RoTxn, id: DocumentId) -> Result<obkv::KvReaderU16<'t>> {
pub fn document<'t>(&self, rtxn: &'t RoTxn, id: DocumentId) -> Result<&'t obkv::KvReaderU16> {
self.documents
.get(rtxn, &id)?
.ok_or(UserError::UnknownInternalDocumentId { document_id: id })
@ -1264,7 +1264,7 @@ impl Index {
&'a self,
rtxn: &'t RoTxn<'t>,
ids: impl IntoIterator<Item = DocumentId> + 'a,
) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> {
) -> Result<impl Iterator<Item = Result<(DocumentId, &'t obkv::KvReaderU16)>> + 'a> {
Ok(ids.into_iter().map(move |id| {
let kv = self
.documents
@ -1279,7 +1279,7 @@ impl Index {
&self,
rtxn: &'t RoTxn<'t>,
ids: impl IntoIterator<Item = DocumentId>,
) -> Result<Vec<(DocumentId, obkv::KvReaderU16<'t>)>> {
) -> Result<Vec<(DocumentId, &'t obkv::KvReaderU16)>> {
self.iter_documents(rtxn, ids)?.collect()
}
@ -1287,7 +1287,7 @@ impl Index {
pub fn all_documents<'a, 't: 'a>(
&'a self,
rtxn: &'t RoTxn<'t>,
) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> {
) -> Result<impl Iterator<Item = Result<(DocumentId, &'t obkv::KvReaderU16)>> + 'a> {
self.iter_documents(rtxn, self.documents_ids(rtxn)?)
}

View File

@ -214,7 +214,7 @@ pub fn bucketed_position(relative: u16) -> u16 {
pub fn obkv_to_json(
displayed_fields: &[FieldId],
fields_ids_map: &FieldsIdsMap,
obkv: obkv::KvReaderU16<'_>,
obkv: &obkv::KvReaderU16,
) -> Result<Object> {
displayed_fields
.iter()
@ -232,10 +232,7 @@ pub fn obkv_to_json(
}
/// Transform every field of a raw obkv store into a JSON Object.
pub fn all_obkv_to_json(
obkv: obkv::KvReaderU16<'_>,
fields_ids_map: &FieldsIdsMap,
) -> Result<Object> {
pub fn all_obkv_to_json(obkv: &obkv::KvReaderU16, fields_ids_map: &FieldsIdsMap) -> Result<Object> {
let all_keys = obkv.iter().map(|(k, _v)| k).collect::<Vec<_>>();
obkv_to_json(all_keys.as_slice(), fields_ids_map, obkv)
}

View File

@ -30,13 +30,13 @@ impl ParsedValue {
impl<'a> Document<'a> {
pub fn new(
data: obkv::KvReaderU16<'a>,
data: &'a obkv::KvReaderU16,
side: DelAdd,
inverted_field_map: &'a FieldsIdsMap,
) -> Self {
let mut out_data = BTreeMap::new();
for (fid, raw) in data {
let obkv = KvReaderDelAdd::new(raw);
let obkv = KvReaderDelAdd::from_slice(raw);
let Some(raw) = obkv.get(side) else {
continue;
};

View File

@ -91,7 +91,7 @@ impl Prompt {
pub fn render(
&self,
document: obkv::KvReaderU16<'_>,
document: &obkv::KvReaderU16,
side: DelAdd,
field_id_map: &FieldsIdsMap,
) -> Result<String, RenderPromptError> {

View File

@ -1,7 +1,7 @@
use obkv::Key;
pub type KvWriterDelAdd<W> = obkv::KvWriter<W, DelAdd>;
pub type KvReaderDelAdd<'a> = obkv::KvReader<'a, DelAdd>;
pub type KvReaderDelAdd = obkv::KvReader<DelAdd>;
/// DelAdd defines the new value to add in the database and old value to delete from the database.
///
@ -30,31 +30,13 @@ impl Key for DelAdd {
}
}
// TODO remove this implementation
impl obkv2::Key for DelAdd {
const BYTES_SIZE: usize = std::mem::size_of::<DelAdd>();
type BYTES = [u8; <Self as obkv2::Key>::BYTES_SIZE];
fn to_be_bytes(&self) -> Self::BYTES {
u8::to_be_bytes(*self as u8)
}
fn from_be_bytes(array: Self::BYTES) -> Self {
match u8::from_be_bytes(array) {
0 => Self::Deletion,
1 => Self::Addition,
otherwise => unreachable!("DelAdd has only 2 variants, unknown variant: {}", otherwise),
}
}
}
/// Creates a Kv<K, Kv<DelAdd, value>> from Kv<K, value>
///
/// Deletion: put all the values under DelAdd::Deletion
/// Addition: put all the values under DelAdd::Addition,
/// DeletionAndAddition: put all the values under DelAdd::Deletion and DelAdd::Addition,
pub fn into_del_add_obkv<K: obkv::Key + PartialOrd>(
reader: obkv::KvReader<'_, K>,
reader: &obkv::KvReader<K>,
operation: DelAddOperation,
buffer: &mut Vec<u8>,
) -> Result<(), std::io::Error> {
@ -64,7 +46,7 @@ pub fn into_del_add_obkv<K: obkv::Key + PartialOrd>(
/// Akin to the [into_del_add_obkv] function but lets you
/// conditionally define the `DelAdd` variant based on the obkv key.
pub fn into_del_add_obkv_conditional_operation<K, F>(
reader: obkv::KvReader<'_, K>,
reader: &obkv::KvReader<K>,
buffer: &mut Vec<u8>,
operation: F,
) -> std::io::Result<()>
@ -104,8 +86,8 @@ pub enum DelAddOperation {
/// putting each deletion obkv's keys under an DelAdd::Deletion
/// and putting each addition obkv's keys under an DelAdd::Addition
pub fn del_add_from_two_obkvs<K: obkv::Key + PartialOrd + Ord>(
deletion: &obkv::KvReader<'_, K>,
addition: &obkv::KvReader<'_, K>,
deletion: &obkv::KvReader<K>,
addition: &obkv::KvReader<K>,
buffer: &mut Vec<u8>,
) -> Result<(), std::io::Error> {
use itertools::merge_join_by;
@ -139,7 +121,7 @@ pub fn del_add_from_two_obkvs<K: obkv::Key + PartialOrd + Ord>(
writer.finish()
}
pub fn is_noop_del_add_obkv(del_add: KvReaderDelAdd<'_>) -> bool {
pub fn is_noop_del_add_obkv(del_add: &KvReaderDelAdd) -> bool {
del_add.get(DelAdd::Deletion) == del_add.get(DelAdd::Addition)
}
@ -154,5 +136,5 @@ pub fn deladd_serialize_add_side<'a>(
obkv: &'a [u8],
_buffer: &mut Vec<u8>,
) -> crate::Result<&'a [u8]> {
Ok(KvReaderDelAdd::new(obkv).get(DelAdd::Addition).unwrap_or_default())
Ok(KvReaderDelAdd::from_slice(obkv).get(DelAdd::Addition).unwrap_or_default())
}

View File

@ -135,7 +135,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
if !valid_lmdb_key(key) {
continue;
}
let value = KvReaderDelAdd::new(value);
let value = KvReaderDelAdd::from_slice(value);
// DB is empty, it is safe to ignore Del operations
let Some(value) = value.get(DelAdd::Addition) else {
@ -161,7 +161,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
continue;
}
let value = KvReaderDelAdd::new(value);
let value = KvReaderDelAdd::from_slice(value);
// the value is a CboRoaringBitmap, but I still need to prepend the
// group size for level 0 (= 1) to it

View File

@ -109,7 +109,7 @@ impl FacetsUpdateIncremental {
}
current_field_id = Some(key.field_id);
let value = KvReader::new(value);
let value = KvReader::from_slice(value);
let docids_to_delete = value
.get(DelAdd::Deletion)
.map(CboRoaringBitmapCodec::bytes_decode)

View File

@ -187,7 +187,7 @@ fn index_facet_search(
) -> Result<()> {
let mut iter = normalized_delta_data.into_stream_merger_iter()?;
while let Some((key_bytes, delta_bytes)) = iter.next()? {
let deladd_reader = KvReaderDelAdd::new(delta_bytes);
let deladd_reader = KvReaderDelAdd::from_slice(delta_bytes);
let database_set = index
.facet_id_normalized_string_strings

View File

@ -145,7 +145,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
#[tracing::instrument(level = "trace", skip(uuid_buffer, documents_batch_index, document)
target = "indexing::documents")]
fn fetch_or_generate_document_id(
document: &obkv::KvReader<'_, FieldId>,
document: &obkv::KvReader<FieldId>,
documents_batch_index: &DocumentsBatchIndex,
primary_key: PrimaryKey<'_>,
autogenerate_docids: bool,

View File

@ -80,7 +80,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
.try_into()
.map(u32::from_be_bytes)
.map_err(|_| SerializationError::InvalidNumberSerialization)?;
let obkv = KvReader::<FieldId>::new(value);
let obkv = KvReader::<FieldId>::from_slice(value);
// if the searchable fields didn't change, skip the searchable indexing for this document.
if !force_reindexing && !searchable_fields_changed(&obkv, settings_diff) {
@ -126,13 +126,13 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
// transforming two KV<FieldId, KV<u16, String>> into one KV<FieldId, KV<DelAdd, KV<u16, String>>>
value_buffer.clear();
del_add_from_two_obkvs(
&KvReader::<FieldId>::new(del_obkv),
&KvReader::<FieldId>::new(add_obkv),
&KvReader::<FieldId>::from_slice(del_obkv),
&KvReader::<FieldId>::from_slice(add_obkv),
&mut value_buffer,
)?;
// write each KV<DelAdd, KV<u16, String>> into the sorter, field by field.
let obkv = KvReader::<FieldId>::new(&value_buffer);
let obkv = KvReader::<FieldId>::from_slice(&value_buffer);
for (field_id, value) in obkv.iter() {
key_buffer.truncate(mem::size_of::<u32>());
key_buffer.extend_from_slice(&field_id.to_be_bytes());
@ -146,13 +146,13 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
/// Check if any searchable fields of a document changed.
fn searchable_fields_changed(
obkv: &KvReader<'_, FieldId>,
obkv: &KvReader<FieldId>,
settings_diff: &InnerIndexSettingsDiff,
) -> bool {
let searchable_fields = &settings_diff.new.searchable_fields_ids;
for (field_id, field_bytes) in obkv.iter() {
if searchable_fields.contains(&field_id) {
let del_add = KvReaderDelAdd::new(field_bytes);
let del_add = KvReaderDelAdd::from_slice(field_bytes);
match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) {
// if both fields are None, check the next field.
(None, None) => (),
@ -189,7 +189,7 @@ fn tokenizer_builder<'a>(
/// Extract words mapped with their positions of a document.
fn tokens_from_document<'a>(
obkv: &KvReader<'a, FieldId>,
obkv: &'a KvReader<FieldId>,
settings: &InnerIndexSettings,
tokenizer: &Tokenizer<'_>,
max_positions_per_attributes: u32,
@ -202,7 +202,7 @@ fn tokens_from_document<'a>(
// if field is searchable.
if settings.searchable_fields_ids.contains(&field_id) {
// extract deletion or addition only.
if let Some(field_bytes) = KvReaderDelAdd::new(field_bytes).get(del_add) {
if let Some(field_bytes) = KvReaderDelAdd::from_slice(field_bytes).get(del_add) {
// parse json.
let value =
serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;

View File

@ -45,7 +45,7 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
buffer.clear();
let mut obkv = KvWriterDelAdd::new(&mut buffer);
for (deladd_key, _) in KvReaderDelAdd::new(deladd_obkv_bytes).iter() {
for (deladd_key, _) in KvReaderDelAdd::from_slice(deladd_obkv_bytes).iter() {
obkv.insert(deladd_key, document_id.to_ne_bytes())?;
}
obkv.finish()?;

View File

@ -75,7 +75,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
let mut buffer = Vec::new();
let mut cursor = docid_fid_facet_string.into_cursor()?;
while let Some((key, deladd_original_value_bytes)) = cursor.move_on_next()? {
let deladd_reader = KvReaderDelAdd::new(deladd_original_value_bytes);
let deladd_reader = KvReaderDelAdd::from_slice(deladd_original_value_bytes);
let is_same_value = deladd_reader.get(DelAdd::Deletion).is_some()
&& deladd_reader.get(DelAdd::Addition).is_some();
@ -163,7 +163,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
let mut buffer = Vec::new();
let mut cursor = docid_fid_facet_string.into_cursor()?;
while let Some((key, deladd_original_value_bytes)) = cursor.move_on_next()? {
let deladd_reader = KvReaderDelAdd::new(deladd_original_value_bytes);
let deladd_reader = KvReaderDelAdd::from_slice(deladd_original_value_bytes);
let is_same_value = deladd_reader.get(DelAdd::Deletion).is_some()
&& deladd_reader.get(DelAdd::Addition).is_some();

View File

@ -83,10 +83,10 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
if !settings_diff.settings_update_only || old_faceted_fids != new_faceted_fids {
let mut cursor = obkv_documents.into_cursor()?;
while let Some((docid_bytes, value)) = cursor.move_on_next()? {
let obkv = obkv::KvReader::new(value);
let obkv = obkv::KvReader::from_slice(value);
let get_document_json_value = move |field_id, side| {
obkv.get(field_id)
.map(KvReaderDelAdd::new)
.map(KvReaderDelAdd::from_slice)
.and_then(|kv| kv.get(side))
.map(from_slice)
.transpose()

View File

@ -45,19 +45,23 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
let document_id = u32::from_be_bytes(document_id_bytes);
let del_add_reader = KvReaderDelAdd::new(value);
let del_add_reader = KvReaderDelAdd::from_slice(value);
let deletion = del_add_reader
// get deleted words
.get(DelAdd::Deletion)
// count deleted words
.map(|deletion| KvReaderU16::new(deletion).iter().take(MAX_COUNTED_WORDS + 1).count())
.map(|deletion| {
KvReaderU16::from_slice(deletion).iter().take(MAX_COUNTED_WORDS + 1).count()
})
// keep the count if under or equal to MAX_COUNTED_WORDS
.filter(|&word_count| word_count <= MAX_COUNTED_WORDS);
let addition = del_add_reader
// get added words
.get(DelAdd::Addition)
// count added words
.map(|addition| KvReaderU16::new(addition).iter().take(MAX_COUNTED_WORDS + 1).count())
.map(|addition| {
KvReaderU16::from_slice(addition).iter().take(MAX_COUNTED_WORDS + 1).count()
})
// keep the count if under or equal to MAX_COUNTED_WORDS
.filter(|&word_count| word_count <= MAX_COUNTED_WORDS);

View File

@ -29,11 +29,11 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
let mut cursor = obkv_documents.into_cursor()?;
while let Some((docid_bytes, value)) = cursor.move_on_next()? {
let obkv = obkv::KvReader::new(value);
let obkv = obkv::KvReader::from_slice(value);
// since we only need the primary key when we throw an error
// we create this getter to lazily get it when needed
let document_id = || -> Value {
let reader = KvReaderDelAdd::new(obkv.get(primary_key_id).unwrap());
let reader = KvReaderDelAdd::from_slice(obkv.get(primary_key_id).unwrap());
let document_id =
reader.get(DelAdd::Deletion).or(reader.get(DelAdd::Addition)).unwrap();
serde_json::from_slice(document_id).unwrap()
@ -68,15 +68,17 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
/// Extract the finite floats lat and lng from two bytes slices.
fn extract_lat_lng(
document: &obkv::KvReader<'_, FieldId>,
document: &obkv::KvReader<FieldId>,
settings: &InnerIndexSettings,
deladd: DelAdd,
document_id: impl Fn() -> Value,
) -> Result<Option<[f64; 2]>> {
match settings.geo_fields_ids {
Some((lat_fid, lng_fid)) => {
let lat = document.get(lat_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
let lng = document.get(lng_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
let lat =
document.get(lat_fid).map(KvReaderDelAdd::from_slice).and_then(|r| r.get(deladd));
let lng =
document.get(lng_fid).map(KvReaderDelAdd::from_slice).and_then(|r| r.get(deladd));
let (lat, lng) = match (lat, lng) {
(Some(lat), Some(lng)) => (lat, lng),
(Some(_), None) => {

View File

@ -307,7 +307,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
debug_assert!(from_utf8(external_id_bytes).is_ok());
let docid = DocumentId::from_be_bytes(docid_bytes);
let obkv = obkv::KvReader::new(value);
let obkv = obkv::KvReader::from_slice(value);
key_buffer.clear();
key_buffer.extend_from_slice(docid_bytes.as_slice());
@ -475,7 +475,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
#[allow(clippy::too_many_arguments)] // feel free to find efficient way to factor arguments
fn extract_vector_document_diff(
docid: DocumentId,
obkv: obkv::KvReader<'_, FieldId>,
obkv: &obkv::KvReader<FieldId>,
prompt: &Prompt,
(add_to_user_provided, remove_from_user_provided): (&mut RoaringBitmap, &mut RoaringBitmap),
(old, new): (VectorState, VectorState),
@ -517,7 +517,7 @@ fn extract_vector_document_diff(
// Do we keep this document?
let document_is_kept = obkv
.iter()
.map(|(_, deladd)| KvReaderDelAdd::new(deladd))
.map(|(_, deladd)| KvReaderDelAdd::from_slice(deladd))
.any(|deladd| deladd.get(DelAdd::Addition).is_some());
if document_is_kept {
@ -553,7 +553,7 @@ fn extract_vector_document_diff(
// Do we keep this document?
let document_is_kept = obkv
.iter()
.map(|(_, deladd)| KvReaderDelAdd::new(deladd))
.map(|(_, deladd)| KvReaderDelAdd::from_slice(deladd))
.any(|deladd| deladd.get(DelAdd::Addition).is_some());
if document_is_kept {
if embedder_is_manual {
@ -579,7 +579,7 @@ fn extract_vector_document_diff(
// Do we keep this document?
let document_is_kept = obkv
.iter()
.map(|(_, deladd)| KvReaderDelAdd::new(deladd))
.map(|(_, deladd)| KvReaderDelAdd::from_slice(deladd))
.any(|deladd| deladd.get(DelAdd::Addition).is_some());
if document_is_kept {
// if the new version of documents has the vectors in the DB,
@ -597,7 +597,7 @@ fn extract_vector_document_diff(
}
fn regenerate_if_prompt_changed(
obkv: obkv::KvReader<'_, FieldId>,
obkv: &obkv::KvReader<FieldId>,
(old_prompt, new_prompt): (&Prompt, &Prompt),
(old_fields_ids_map, new_fields_ids_map): (&FieldsIdsMap, &FieldsIdsMap),
) -> Result<VectorStateDelta> {
@ -612,7 +612,7 @@ fn regenerate_if_prompt_changed(
}
fn regenerate_prompt(
obkv: obkv::KvReader<'_, FieldId>,
obkv: &obkv::KvReader<FieldId>,
prompt: &Prompt,
new_fields_ids_map: &FieldsIdsMap,
) -> Result<VectorStateDelta> {

View File

@ -58,17 +58,17 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
let document_id = u32::from_be_bytes(document_id_bytes);
let fid = u16::from_be_bytes(fid_bytes);
let del_add_reader = KvReaderDelAdd::new(value);
let del_add_reader = KvReaderDelAdd::from_slice(value);
// extract all unique words to remove.
if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) {
for (_pos, word) in KvReaderU16::new(deletion).iter() {
for (_pos, word) in KvReaderU16::from_slice(deletion).iter() {
del_words.insert(word.to_vec());
}
}
// extract all unique additional words.
if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
for (_pos, word) in KvReaderU16::new(addition).iter() {
for (_pos, word) in KvReaderU16::from_slice(addition).iter() {
add_words.insert(word.to_vec());
}
}
@ -115,7 +115,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
// NOTE: replacing sorters by bitmap merging is less efficient, so, use sorters.
while let Some((key, value)) = iter.next()? {
// only keep the value if their is a change to apply in the DB.
if !is_noop_del_add_obkv(KvReaderDelAdd::new(value)) {
if !is_noop_del_add_obkv(KvReaderDelAdd::from_slice(value)) {
word_fid_docids_writer.insert(key, value)?;
}
@ -123,7 +123,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
.map_err(|_| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
// merge all deletions
let obkv = KvReaderDelAdd::new(value);
let obkv = KvReaderDelAdd::from_slice(value);
if let Some(value) = obkv.get(DelAdd::Deletion) {
let delete_from_exact = settings_diff.old.exact_attributes.contains(&fid);
buffer.clear();

View File

@ -92,8 +92,8 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
}
// deletions
if let Some(deletion) = KvReaderDelAdd::new(value).get(DelAdd::Deletion) {
for (position, word) in KvReaderU16::new(deletion).iter() {
if let Some(deletion) = KvReaderDelAdd::from_slice(value).get(DelAdd::Deletion) {
for (position, word) in KvReaderU16::from_slice(deletion).iter() {
// drain the proximity window until the head word is considered close to the word we are inserting.
while del_word_positions.front().map_or(false, |(_w, p)| {
index_proximity(*p as u32, position as u32) >= MAX_DISTANCE
@ -125,8 +125,8 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
}
// additions
if let Some(addition) = KvReaderDelAdd::new(value).get(DelAdd::Addition) {
for (position, word) in KvReaderU16::new(addition).iter() {
if let Some(addition) = KvReaderDelAdd::from_slice(value).get(DelAdd::Addition) {
for (position, word) in KvReaderU16::from_slice(addition).iter() {
// drain the proximity window until the head word is considered close to the word we are inserting.
while add_word_positions.front().map_or(false, |(_w, p)| {
index_proximity(*p as u32, position as u32) >= MAX_DISTANCE

View File

@ -60,10 +60,10 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
current_document_id = Some(document_id);
let del_add_reader = KvReaderDelAdd::new(value);
let del_add_reader = KvReaderDelAdd::from_slice(value);
// extract all unique words to remove.
if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) {
for (position, word_bytes) in KvReaderU16::new(deletion).iter() {
for (position, word_bytes) in KvReaderU16::from_slice(deletion).iter() {
let position = bucketed_position(position);
del_word_positions.insert((position, word_bytes.to_vec()));
}
@ -71,7 +71,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
// extract all unique additional words.
if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
for (position, word_bytes) in KvReaderU16::new(addition).iter() {
for (position, word_bytes) in KvReaderU16::from_slice(addition).iter() {
let position = bucketed_position(position);
add_word_positions.insert((position, word_bytes.to_vec()));
}

View File

@ -45,8 +45,8 @@ pub fn keep_latest_obkv<'a>(_key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<
}
pub fn merge_two_del_add_obkvs(
base: obkv::KvReaderU16<'_>,
update: obkv::KvReaderU16<'_>,
base: &obkv::KvReaderU16,
update: &obkv::KvReaderU16,
merge_additions: bool,
buffer: &mut Vec<u8>,
) {
@ -66,7 +66,7 @@ pub fn merge_two_del_add_obkvs(
// If merge_additions is false, recreate an obkv keeping the deletions only.
value_buffer.clear();
let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
let base_reader = KvReaderDelAdd::new(v);
let base_reader = KvReaderDelAdd::from_slice(v);
if let Some(deletion) = base_reader.get(DelAdd::Deletion) {
value_writer.insert(DelAdd::Deletion, deletion).unwrap();
@ -80,8 +80,8 @@ pub fn merge_two_del_add_obkvs(
// merge deletions and additions.
value_buffer.clear();
let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
let base_reader = KvReaderDelAdd::new(base);
let update_reader = KvReaderDelAdd::new(update);
let base_reader = KvReaderDelAdd::from_slice(base);
let update_reader = KvReaderDelAdd::from_slice(update);
// keep newest deletion.
if let Some(deletion) = update_reader
@ -131,8 +131,8 @@ fn inner_merge_del_add_obkvs<'a>(
break;
}
let newest = obkv::KvReader::new(&acc);
let oldest = obkv::KvReader::new(&current[1..]);
let newest = obkv::KvReader::from_slice(&acc);
let oldest = obkv::KvReader::from_slice(&current[1..]);
merge_two_del_add_obkvs(oldest, newest, merge_additions, &mut buffer);
// we want the result of the merge into our accumulator.
@ -187,7 +187,7 @@ pub fn merge_deladd_cbo_roaring_bitmaps<'a>(
let mut del_bitmaps_bytes = Vec::new();
let mut add_bitmaps_bytes = Vec::new();
for value in values {
let obkv = KvReaderDelAdd::new(value);
let obkv = KvReaderDelAdd::from_slice(value);
if let Some(bitmap_bytes) = obkv.get(DelAdd::Deletion) {
del_bitmaps_bytes.push(bitmap_bytes);
}
@ -217,7 +217,7 @@ pub fn merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap<'a>(
buffer: &'a mut Vec<u8>,
) -> Result<Option<&'a [u8]>> {
Ok(CboRoaringBitmapCodec::merge_deladd_into(
KvReaderDelAdd::new(deladd_obkv),
KvReaderDelAdd::from_slice(deladd_obkv),
previous,
buffer,
)?)
@ -236,7 +236,7 @@ pub fn merge_deladd_btreeset_string<'a>(
let mut del_set = BTreeSet::new();
let mut add_set = BTreeSet::new();
for value in values {
let obkv = KvReaderDelAdd::new(value);
let obkv = KvReaderDelAdd::from_slice(value);
if let Some(bytes) = obkv.get(DelAdd::Deletion) {
let set = serde_json::from_slice::<BTreeSet<String>>(bytes).unwrap();
for value in set {

View File

@ -31,14 +31,14 @@ impl<'t> ImmutableObkvs<'t> {
}
/// Returns the OBKVs identified by the given ID.
pub fn obkv(&self, docid: DocumentId) -> heed::Result<Option<KvReaderU16<'t>>> {
pub fn obkv(&self, docid: DocumentId) -> heed::Result<Option<&'t KvReaderU16>> {
match self
.ids
.rank(docid)
.checked_sub(1)
.and_then(|offset| self.slices.get(offset as usize))
{
Some(bytes) => Ok(Some(KvReaderU16::new(bytes))),
Some(&bytes) => Ok(Some(bytes.into())),
None => Ok(None),
}
}

View File

@ -278,13 +278,13 @@ impl<'a, 'i> Transform<'a, 'i> {
document_sorter_value_buffer.clear();
document_sorter_value_buffer.push(Operation::Addition as u8);
into_del_add_obkv(
KvReaderU16::new(base_obkv),
KvReaderU16::from_slice(base_obkv),
deladd_operation,
&mut document_sorter_value_buffer,
)?;
self.original_sorter
.insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?;
let base_obkv = KvReader::new(base_obkv);
let base_obkv = KvReader::from_slice(base_obkv);
if let Some(flattened_obkv) =
Self::flatten_from_fields_ids_map(&base_obkv, &mut self.fields_ids_map)?
{
@ -292,7 +292,7 @@ impl<'a, 'i> Transform<'a, 'i> {
document_sorter_value_buffer.clear();
document_sorter_value_buffer.push(Operation::Addition as u8);
into_del_add_obkv(
KvReaderU16::new(&flattened_obkv),
KvReaderU16::from_slice(&flattened_obkv),
deladd_operation,
&mut document_sorter_value_buffer,
)?;
@ -311,7 +311,7 @@ impl<'a, 'i> Transform<'a, 'i> {
document_sorter_value_buffer.clear();
document_sorter_value_buffer.push(Operation::Addition as u8);
into_del_add_obkv(
KvReaderU16::new(&obkv_buffer),
KvReaderU16::from_slice(&obkv_buffer),
DelAddOperation::Addition,
&mut document_sorter_value_buffer,
)?;
@ -319,14 +319,14 @@ impl<'a, 'i> Transform<'a, 'i> {
self.original_sorter
.insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?;
let flattened_obkv = KvReader::new(&obkv_buffer);
let flattened_obkv = KvReader::from_slice(&obkv_buffer);
if let Some(obkv) =
Self::flatten_from_fields_ids_map(&flattened_obkv, &mut self.fields_ids_map)?
{
document_sorter_value_buffer.clear();
document_sorter_value_buffer.push(Operation::Addition as u8);
into_del_add_obkv(
KvReaderU16::new(&obkv),
KvReaderU16::from_slice(&obkv),
DelAddOperation::Addition,
&mut document_sorter_value_buffer,
)?
@ -519,14 +519,14 @@ impl<'a, 'i> Transform<'a, 'i> {
document_sorter_value_buffer.clear();
document_sorter_value_buffer.push(Operation::Deletion as u8);
into_del_add_obkv(
KvReaderU16::new(base_obkv),
KvReaderU16::from_slice(base_obkv),
DelAddOperation::Deletion,
document_sorter_value_buffer,
)?;
self.original_sorter.insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?;
// flatten it and push it as to delete in the flattened_sorter
let flattened_obkv = KvReader::new(base_obkv);
let flattened_obkv = KvReader::from_slice(base_obkv);
if let Some(obkv) =
Self::flatten_from_fields_ids_map(&flattened_obkv, &mut self.fields_ids_map)?
{
@ -534,7 +534,7 @@ impl<'a, 'i> Transform<'a, 'i> {
document_sorter_value_buffer.clear();
document_sorter_value_buffer.push(Operation::Deletion as u8);
into_del_add_obkv(
KvReaderU16::new(&obkv),
KvReaderU16::from_slice(&obkv),
DelAddOperation::Deletion,
document_sorter_value_buffer,
)?;
@ -552,7 +552,7 @@ impl<'a, 'i> Transform<'a, 'i> {
target = "indexing::transform"
)]
fn flatten_from_fields_ids_map(
obkv: &KvReader<'_, FieldId>,
obkv: &KvReader<FieldId>,
fields_ids_map: &mut FieldsIdsMap,
) -> Result<Option<Vec<u8>>> {
if obkv
@ -720,10 +720,10 @@ impl<'a, 'i> Transform<'a, 'i> {
total_documents: self.documents_count,
});
for (key, value) in KvReader::new(val) {
let reader = KvReaderDelAdd::new(value);
for (key, value) in KvReader::from_slice(val) {
let reader = KvReaderDelAdd::from_slice(value);
match (reader.get(DelAdd::Deletion), reader.get(DelAdd::Addition)) {
(None, None) => {}
(None, None) => (),
(None, Some(_)) => {
// New field
let name = self.fields_ids_map.name(key).ok_or(
@ -837,7 +837,7 @@ impl<'a, 'i> Transform<'a, 'i> {
/// then fill the provided buffers with delta documents using KvWritterDelAdd.
#[allow(clippy::too_many_arguments)] // need the vectors + fid, feel free to create a struct xo xo
fn rebind_existing_document(
old_obkv: KvReader<'_, FieldId>,
old_obkv: &KvReader<FieldId>,
settings_diff: &InnerIndexSettingsDiff,
modified_faceted_fields: &HashSet<String>,
mut injected_vectors: serde_json::Map<String, serde_json::Value>,
@ -925,7 +925,7 @@ impl<'a, 'i> Transform<'a, 'i> {
}
let data = obkv_writer.into_inner()?;
let obkv = KvReader::<FieldId>::new(&data);
let obkv = KvReader::<FieldId>::from_slice(&data);
if let Some(original_obkv_buffer) = original_obkv_buffer {
original_obkv_buffer.clear();
@ -936,7 +936,7 @@ impl<'a, 'i> Transform<'a, 'i> {
// take the non-flattened version if flatten_from_fields_ids_map returns None.
let mut fields_ids_map = settings_diff.new.fields_ids_map.clone();
let flattened = Self::flatten_from_fields_ids_map(&obkv, &mut fields_ids_map)?;
let flattened = flattened.as_deref().map_or(obkv, KvReader::new);
let flattened = flattened.as_deref().map_or(obkv, KvReader::from_slice);
flattened_obkv_buffer.clear();
into_del_add_obkv_conditional_operation(flattened, flattened_obkv_buffer, |id| {
@ -1173,21 +1173,21 @@ mod test {
kv_writer.insert(0_u8, [0]).unwrap();
let buffer = kv_writer.into_inner().unwrap();
into_del_add_obkv(
KvReaderU16::new(&buffer),
KvReaderU16::from_slice(&buffer),
DelAddOperation::Addition,
&mut additive_doc_0,
)
.unwrap();
additive_doc_0.insert(0, Operation::Addition as u8);
into_del_add_obkv(
KvReaderU16::new(&buffer),
KvReaderU16::from_slice(&buffer),
DelAddOperation::Deletion,
&mut deletive_doc_0,
)
.unwrap();
deletive_doc_0.insert(0, Operation::Deletion as u8);
into_del_add_obkv(
KvReaderU16::new(&buffer),
KvReaderU16::from_slice(&buffer),
DelAddOperation::DeletionAndAddition,
&mut del_add_doc_0,
)
@ -1199,7 +1199,7 @@ mod test {
kv_writer.insert(1_u8, [1]).unwrap();
let buffer = kv_writer.into_inner().unwrap();
into_del_add_obkv(
KvReaderU16::new(&buffer),
KvReaderU16::from_slice(&buffer),
DelAddOperation::Addition,
&mut additive_doc_1,
)
@ -1212,7 +1212,7 @@ mod test {
kv_writer.insert(1_u8, [1]).unwrap();
let buffer = kv_writer.into_inner().unwrap();
into_del_add_obkv(
KvReaderU16::new(&buffer),
KvReaderU16::from_slice(&buffer),
DelAddOperation::Addition,
&mut additive_doc_0_1,
)

View File

@ -162,7 +162,7 @@ pub(crate) fn write_typed_chunk_into_index(
let mut vectors_buffer = Vec::new();
while let Some((key, reader)) = iter.next()? {
let mut writer: KvWriter<_, FieldId> = KvWriter::memory();
let reader: KvReader<'_, FieldId> = KvReader::new(reader);
let reader: &KvReader<FieldId> = reader.into();
let (document_id_bytes, external_id_bytes) = try_split_array_at(key)
.ok_or(SerializationError::Decoding { db_name: Some(DOCUMENTS) })?;
@ -170,7 +170,7 @@ pub(crate) fn write_typed_chunk_into_index(
let external_id = std::str::from_utf8(external_id_bytes)?;
for (field_id, value) in reader.iter() {
let del_add_reader = KvReaderDelAdd::new(value);
let del_add_reader = KvReaderDelAdd::from_slice(value);
if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
let addition = if vectors_fid == Some(field_id) {
@ -529,7 +529,7 @@ pub(crate) fn write_typed_chunk_into_index(
index.field_id_docid_facet_f64s.remap_types::<Bytes, Bytes>();
let mut iter = merger.into_stream_merger_iter()?;
while let Some((key, value)) = iter.next()? {
let reader = KvReaderDelAdd::new(value);
let reader = KvReaderDelAdd::from_slice(value);
if valid_lmdb_key(key) {
match (reader.get(DelAdd::Deletion), reader.get(DelAdd::Addition)) {
(None, None) => {}
@ -563,7 +563,7 @@ pub(crate) fn write_typed_chunk_into_index(
index.field_id_docid_facet_strings.remap_types::<Bytes, Bytes>();
let mut iter = merger.into_stream_merger_iter()?;
while let Some((key, value)) = iter.next()? {
let reader = KvReaderDelAdd::new(value);
let reader = KvReaderDelAdd::from_slice(value);
if valid_lmdb_key(key) {
match (reader.get(DelAdd::Deletion), reader.get(DelAdd::Addition)) {
(None, None) => {}
@ -600,7 +600,7 @@ pub(crate) fn write_typed_chunk_into_index(
// convert the key back to a u32 (4 bytes)
let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap();
let deladd_obkv = KvReaderDelAdd::new(value);
let deladd_obkv = KvReaderDelAdd::from_slice(value);
if let Some(value) = deladd_obkv.get(DelAdd::Deletion) {
let geopoint = extract_geo_point(value, docid);
rtree.remove(&geopoint);
@ -723,7 +723,7 @@ pub(crate) fn write_typed_chunk_into_index(
let (left, _index) = try_split_array_at(key).unwrap();
let docid = DocumentId::from_be_bytes(left);
let vector_deladd_obkv = KvReaderDelAdd::new(value);
let vector_deladd_obkv = KvReaderDelAdd::from_slice(value);
if let Some(value) = vector_deladd_obkv.get(DelAdd::Deletion) {
let vector: Vec<f32> = pod_collect_to_vec(value);
@ -852,7 +852,7 @@ where
if valid_lmdb_key(key) {
let (proximity_to_insert, word1, word2) =
U8StrStrCodec::bytes_decode(key).map_err(heed::Error::Decoding)?;
let data_to_insert = match KvReaderDelAdd::new(value).get(DelAdd::Addition) {
let data_to_insert = match KvReaderDelAdd::from_slice(value).get(DelAdd::Addition) {
Some(value) => {
CboRoaringBitmapCodec::bytes_decode(value).map_err(heed::Error::Decoding)?
}

View File

@ -1,4 +1,3 @@
use core::slice::SlicePattern;
use std::fs::File;
use crossbeam_channel::{IntoIter, Receiver, SendError, Sender};
@ -44,11 +43,11 @@ impl KeyValueEntry {
}
pub fn key(&self) -> &[u8] {
&self.data.as_slice()[..self.key_length]
&self.data.as_ref()[..self.key_length]
}
pub fn value(&self) -> &[u8] {
&self.data.as_slice()[self.key_length..]
&self.data.as_ref()[self.key_length..]
}
}

View File

@ -1,5 +1,5 @@
use heed::RoTxn;
use obkv2::KvReader;
use obkv::KvReader;
use super::indexer::KvReaderFieldId;
use crate::{DocumentId, FieldId};

View File

@ -4,7 +4,8 @@ mod channel;
mod items_pool;
mod merge;
mod global_fields_ids_map;
/// TODO remove this
// mod global_fields_ids_map;
pub type StdResult<T, E> = std::result::Result<T, E>;
@ -27,8 +28,7 @@ mod indexer {
use super::channel::{
extractors_merger_channels, merger_writer_channels, EntryOperation,
ExtractorsMergerChannels, MergerReceiver, MergerSender, MergerWriterChannels,
WriterOperation,
ExtractorsMergerChannels, MergerReceiver, MergerSender, WriterOperation,
};
use super::document_change::{Deletion, DocumentChange, Insertion, Update};
use super::items_pool::ItemsPool;
@ -44,10 +44,10 @@ mod indexer {
Result, UserError,
};
pub type KvReaderFieldId = obkv2::KvReader<FieldId>;
pub type KvReaderDelAdd = obkv2::KvReader<DelAdd>;
pub type KvWriterFieldId<W> = obkv2::KvWriter<W, FieldId>;
pub type KvWriterDelAdd<W> = obkv2::KvWriter<W, DelAdd>;
pub type KvReaderFieldId = obkv::KvReader<FieldId>;
pub type KvReaderDelAdd = obkv::KvReader<DelAdd>;
pub type KvWriterFieldId<W> = obkv::KvWriter<W, FieldId>;
pub type KvWriterDelAdd<W> = obkv::KvWriter<W, DelAdd>;
pub struct DocumentOperationIndexer {
operations: Vec<Payload>,
@ -105,7 +105,7 @@ mod indexer {
rtxn: &'a RoTxn,
mut fields_ids_map: FieldsIdsMap,
primary_key: &'a PrimaryKey<'a>,
) -> Result<impl ParallelIterator<Item = DocumentChange> + 'a> {
) -> Result<impl ParallelIterator<Item = Result<Option<DocumentChange>>> + 'a> {
let documents_ids = index.documents_ids(rtxn)?;
let mut available_docids = AvailableDocumentsIds::from_documents_ids(&documents_ids);
let mut docids_version_offsets = HashMap::<String, _>::new();
@ -198,7 +198,7 @@ mod indexer {
}
let items = Arc::new(ItemsPool::new(|| index.read_txn().map_err(crate::Error::from)));
docids_version_offsets.into_par_iter().map_with(
Ok(docids_version_offsets.into_par_iter().map_with(
items,
|context_pool, (external_docid, (internal_docid, operations))| {
context_pool.with(|rtxn| match self.method {
@ -221,58 +221,7 @@ mod indexer {
),
})
},
);
Ok(vec![].into_par_iter())
// let mut file_count: usize = 0;
// for result in WalkDir::new(update_files_path)
// // TODO handle errors
// .sort_by_key(|entry| entry.metadata().unwrap().created().unwrap())
// {
// let entry = result?;
// if !entry.file_type().is_file() {
// continue;
// }
// let file = File::open(entry.path())
// .with_context(|| format!("While opening {}", entry.path().display()))?;
// let content = unsafe {
// Mmap::map(&file)
// .map(Arc::new)
// .with_context(|| format!("While memory mapping {}", entry.path().display()))?
// };
// let reader =
// crate::documents::DocumentsBatchReader::from_reader(Cursor::new(content.as_ref()))?;
// let (mut batch_cursor, batch_index) = reader.into_cursor_and_fields_index();
// batch_index.iter().for_each(|(_, name)| {
// fields_ids_map.insert(name);
// });
// let mut offset: u32 = 0;
// while let Some(document) = batch_cursor.next_document()? {
// let primary_key = batch_index.id(primary_key).unwrap();
// let document_id = document.get(primary_key).unwrap();
// let document_id = std::str::from_utf8(document_id).unwrap();
// let document_offset = DocumentOffset { content: content.clone(), offset };
// match docids_version_offsets.get_mut(document_id) {
// None => {
// let docid = match maindb.external_documents_ids.get(rtxn, document_id)? {
// Some(docid) => docid,
// None => sequential_docids.next().context("no more available docids")?,
// };
// docids_version_offsets
// .insert(document_id.into(), (docid, smallvec![document_offset]));
// }
// Some((_, offsets)) => offsets.push(document_offset),
// }
// offset += 1;
// p.inc(1);
// }
// file_count += 1;
// }
))
}
}

View File

@ -109,14 +109,13 @@ impl ParsedVectorsDiff {
pub fn new(
docid: DocumentId,
embedders_configs: &[IndexEmbeddingConfig],
documents_diff: KvReader<'_, FieldId>,
documents_diff: &KvReader<FieldId>,
old_vectors_fid: Option<FieldId>,
new_vectors_fid: Option<FieldId>,
) -> Result<Self, Error> {
let mut old = match old_vectors_fid
.and_then(|vectors_fid| documents_diff.get(vectors_fid))
.map(KvReaderDelAdd::new)
.map(|obkv| to_vector_map(obkv, DelAdd::Deletion))
.map(|bytes| to_vector_map(bytes.into(), DelAdd::Deletion))
.transpose()
{
Ok(del) => del,
@ -143,8 +142,7 @@ impl ParsedVectorsDiff {
let Some(bytes) = documents_diff.get(new_vectors_fid) else {
break 'new VectorsState::NoVectorsFieldInDocument;
};
let obkv = KvReaderDelAdd::new(bytes);
match to_vector_map(obkv, DelAdd::Addition)? {
match to_vector_map(bytes.into(), DelAdd::Addition)? {
Some(new) => VectorsState::Vectors(new),
None => VectorsState::NoVectorsFieldInDocument,
}
@ -239,7 +237,7 @@ impl Error {
}
fn to_vector_map(
obkv: KvReaderDelAdd<'_>,
obkv: &KvReaderDelAdd,
side: DelAdd,
) -> Result<Option<BTreeMap<String, Vectors>>, Error> {
Ok(if let Some(value) = obkv.get(side) {