rename the ArroyReader to an ArroyWrapper since it can read and write

This commit is contained in:
Tamo 2024-09-19 10:35:17 +02:00
parent 79f29eed3c
commit 2b6952eda1
5 changed files with 13 additions and 13 deletions

View File

@ -21,7 +21,7 @@ use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
use crate::order_by_map::OrderByMap; use crate::order_by_map::OrderByMap;
use crate::proximity::ProximityPrecision; use crate::proximity::ProximityPrecision;
use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME; use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
use crate::vector::{ArroyReader, Embedding, EmbeddingConfig}; use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfig};
use crate::{ use crate::{
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec, FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
@ -1615,9 +1615,9 @@ impl Index {
rtxn: &'a RoTxn<'a>, rtxn: &'a RoTxn<'a>,
embedder_id: u8, embedder_id: u8,
quantized: bool, quantized: bool,
) -> impl Iterator<Item = Result<ArroyReader>> + 'a { ) -> impl Iterator<Item = Result<ArroyWrapper>> + 'a {
crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| { crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| {
let reader = ArroyReader::new(self.vector_arroy, k, quantized); let reader = ArroyWrapper::new(self.vector_arroy, k, quantized);
// Here we don't care about the dimensions, but we want to know if we can read // Here we don't care about the dimensions, but we want to know if we can read
// in the database or if its medata are missing. // in the database or if its medata are missing.
match reader.dimensions(rtxn) { match reader.dimensions(rtxn) {
@ -1654,7 +1654,7 @@ impl Index {
let mut embeddings = Vec::new(); let mut embeddings = Vec::new();
'vectors: for i in 0..=u8::MAX { 'vectors: for i in 0..=u8::MAX {
let reader = ArroyReader::new( let reader = ArroyWrapper::new(
self.vector_arroy, self.vector_arroy,
embedder_id | (i as u16), embedder_id | (i as u16),
config.config.quantized(), config.config.quantized(),

View File

@ -43,7 +43,7 @@ use crate::update::index_documents::parallel::ImmutableObkvs;
use crate::update::{ use crate::update::{
IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst, IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
}; };
use crate::vector::{ArroyReader, EmbeddingConfigs}; use crate::vector::{ArroyWrapper, EmbeddingConfigs};
use crate::{CboRoaringBitmapCodec, Index, Object, Result}; use crate::{CboRoaringBitmapCodec, Index, Object, Result};
static MERGED_DATABASE_COUNT: usize = 7; static MERGED_DATABASE_COUNT: usize = 7;
@ -691,7 +691,7 @@ where
)?; )?;
let first_id = crate::vector::arroy_db_range_for_embedder(index).next().unwrap(); let first_id = crate::vector::arroy_db_range_for_embedder(index).next().unwrap();
let reader = let reader =
ArroyReader::new(self.index.vector_arroy, first_id, action.was_quantized); ArroyWrapper::new(self.index.vector_arroy, first_id, action.was_quantized);
let dim = reader.dimensions(self.wtxn)?; let dim = reader.dimensions(self.wtxn)?;
dimension.insert(name.to_string(), dim); dimension.insert(name.to_string(), dim);
} }
@ -710,7 +710,7 @@ where
pool.install(|| { pool.install(|| {
for k in crate::vector::arroy_db_range_for_embedder(embedder_index) { for k in crate::vector::arroy_db_range_for_embedder(embedder_index) {
let mut writer = ArroyReader::new(vector_arroy, k, was_quantized); let mut writer = ArroyWrapper::new(vector_arroy, k, was_quantized);
if is_quantizing { if is_quantizing {
writer.quantize(wtxn, k, dimension)?; writer.quantize(wtxn, k, dimension)?;
} }

View File

@ -29,7 +29,7 @@ use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
use crate::vector::settings::WriteBackToDocuments; use crate::vector::settings::WriteBackToDocuments;
use crate::vector::ArroyReader; use crate::vector::ArroyWrapper;
use crate::{ use crate::{
is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
}; };
@ -990,7 +990,7 @@ impl<'a, 'i> Transform<'a, 'i> {
None None
}; };
let readers: Result<BTreeMap<&str, (Vec<ArroyReader>, &RoaringBitmap)>> = settings_diff let readers: Result<BTreeMap<&str, (Vec<ArroyWrapper>, &RoaringBitmap)>> = settings_diff
.embedding_config_updates .embedding_config_updates
.iter() .iter()
.filter_map(|(name, action)| { .filter_map(|(name, action)| {

View File

@ -27,7 +27,7 @@ use crate::update::index_documents::helpers::{
as_cloneable_grenad, keep_latest_obkv, try_split_array_at, as_cloneable_grenad, keep_latest_obkv, try_split_array_at,
}; };
use crate::update::settings::InnerIndexSettingsDiff; use crate::update::settings::InnerIndexSettingsDiff;
use crate::vector::ArroyReader; use crate::vector::ArroyWrapper;
use crate::{ use crate::{
lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError, lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
Result, SerializationError, U8StrStrCodec, Result, SerializationError, U8StrStrCodec,
@ -673,7 +673,7 @@ pub(crate) fn write_typed_chunk_into_index(
.map_or(false, |conf| conf.was_quantized); .map_or(false, |conf| conf.was_quantized);
// FIXME: allow customizing distance // FIXME: allow customizing distance
let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index) let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index)
.map(|k| ArroyReader::new(index.vector_arroy, k, binary_quantized)) .map(|k| ArroyWrapper::new(index.vector_arroy, k, binary_quantized))
.collect(); .collect();
// remove vectors for docids we want them removed // remove vectors for docids we want them removed

View File

@ -30,13 +30,13 @@ pub type Embedding = Vec<f32>;
pub const REQUEST_PARALLELISM: usize = 40; pub const REQUEST_PARALLELISM: usize = 40;
pub struct ArroyReader { pub struct ArroyWrapper {
quantized: bool, quantized: bool,
index: u16, index: u16,
database: arroy::Database<Unspecified>, database: arroy::Database<Unspecified>,
} }
impl ArroyReader { impl ArroyWrapper {
pub fn new(database: arroy::Database<Unspecified>, index: u16, quantized: bool) -> Self { pub fn new(database: arroy::Database<Unspecified>, index: u16, quantized: bool) -> Self {
Self { database, index, quantized } Self { database, index, quantized }
} }