From d35278320edad8149b38c96cd00022527d3f0ca2 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 28 May 2024 14:22:19 +0200 Subject: [PATCH] Add support functions for accessing arroy writers and readers --- milli/src/index.rs | 16 ++++++++++++++++ milli/src/search/new/vector_sort.rs | 15 ++------------- milli/src/update/index_documents/mod.rs | 6 ++---- milli/src/update/index_documents/typed_chunk.rs | 11 ++--------- milli/src/vector/mod.rs | 6 ++++++ 5 files changed, 28 insertions(+), 26 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 982be0139..3c502d541 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1595,6 +1595,22 @@ impl Index { .unwrap_or_default()) } + pub fn arroy_readers<'a>( + &'a self, + rtxn: &'a RoTxn<'a>, + embedder_id: u8, + ) -> impl Iterator>> + 'a { + crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| { + arroy::Reader::open(rtxn, k, self.vector_arroy) + .map(Some) + .or_else(|e| match e { + arroy::Error::MissingMetadata => Ok(None), + e => Err(e.into()), + }) + .transpose() + }) + } + pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> { self.main.remap_types::().put(wtxn, main_key::SEARCH_CUTOFF, &cutoff) } diff --git a/milli/src/search/new/vector_sort.rs b/milli/src/search/new/vector_sort.rs index de272ed47..cd69b6c47 100644 --- a/milli/src/search/new/vector_sort.rs +++ b/milli/src/search/new/vector_sort.rs @@ -49,19 +49,8 @@ impl VectorSort { ctx: &mut SearchContext<'_>, vector_candidates: &RoaringBitmap, ) -> Result<()> { - let writer_index = (self.embedder_index as u16) << 8; - let readers: std::result::Result, _> = (0..=u8::MAX) - .map_while(|k| { - arroy::Reader::open(ctx.txn, writer_index | (k as u16), ctx.index.vector_arroy) - .map(Some) - .or_else(|e| match e { - arroy::Error::MissingMetadata => Ok(None), - e => Err(e), - }) - .transpose() - }) - .collect(); - + let readers: std::result::Result, _> = + ctx.index.arroy_readers(ctx.txn, self.embedder_index).collect(); let readers = readers?; let target = &self.target; diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index f281becd6..11caa91eb 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -561,10 +561,8 @@ where )?; pool.install(|| { - let writer_index = (embedder_index as u16) << 8; - for k in 0..=u8::MAX { - let writer = - arroy::Writer::new(vector_arroy, writer_index | (k as u16), dimension); + for k in crate::vector::arroy_db_range_for_embedder(embedder_index) { + let writer = arroy::Writer::new(vector_arroy, k, dimension); if writer.is_empty(wtxn)? { break; } diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 27f760c2a..2ef7a8990 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -634,16 +634,9 @@ pub(crate) fn write_typed_chunk_into_index( let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or( InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, )?; - let writer_index = (embedder_index as u16) << 8; // FIXME: allow customizing distance - let writers: Vec<_> = (0..=u8::MAX) - .map(|k| { - arroy::Writer::new( - index.vector_arroy, - writer_index | (k as u16), - expected_dimension, - ) - }) + let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index) + .map(|k| arroy::Writer::new(index.vector_arroy, k, expected_dimension)) .collect(); // remove vectors for docids we want them removed diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs index 1922bb389..553c8c3c1 100644 --- a/milli/src/vector/mod.rs +++ b/milli/src/vector/mod.rs @@ -442,3 +442,9 @@ impl DistributionShift { pub const fn is_cuda_enabled() -> bool { cfg!(feature = "cuda") } + +pub fn arroy_db_range_for_embedder(embedder_id: u8) -> impl Iterator { + let embedder_id = (embedder_id as u16) << 8; + + (0..=u8::MAX).map(move |k| embedder_id | (k as u16)) +}