mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-29 08:14:26 +01:00
rename the embedder index for clarity
This commit is contained in:
parent
1e4d4e69c4
commit
79d8a7a51a
@ -32,17 +32,21 @@ pub const REQUEST_PARALLELISM: usize = 40;
|
|||||||
|
|
||||||
pub struct ArroyWrapper {
|
pub struct ArroyWrapper {
|
||||||
quantized: bool,
|
quantized: bool,
|
||||||
index: u8,
|
embedder_index: u8,
|
||||||
database: arroy::Database<Unspecified>,
|
database: arroy::Database<Unspecified>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ArroyWrapper {
|
impl ArroyWrapper {
|
||||||
pub fn new(database: arroy::Database<Unspecified>, index: u8, quantized: bool) -> Self {
|
pub fn new(
|
||||||
Self { database, index, quantized }
|
database: arroy::Database<Unspecified>,
|
||||||
|
embedder_index: u8,
|
||||||
|
quantized: bool,
|
||||||
|
) -> Self {
|
||||||
|
Self { database, embedder_index, quantized }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn index(&self) -> u8 {
|
pub fn index(&self) -> u8 {
|
||||||
self.index
|
self.embedder_index
|
||||||
}
|
}
|
||||||
|
|
||||||
fn readers<'a, D: arroy::Distance>(
|
fn readers<'a, D: arroy::Distance>(
|
||||||
@ -50,7 +54,7 @@ impl ArroyWrapper {
|
|||||||
rtxn: &'a RoTxn<'a>,
|
rtxn: &'a RoTxn<'a>,
|
||||||
db: arroy::Database<D>,
|
db: arroy::Database<D>,
|
||||||
) -> impl Iterator<Item = Result<arroy::Reader<D>, arroy::Error>> + 'a {
|
) -> impl Iterator<Item = Result<arroy::Reader<D>, arroy::Error>> + 'a {
|
||||||
arroy_db_range_for_embedder(self.index).map_while(move |index| {
|
arroy_db_range_for_embedder(self.embedder_index).map_while(move |index| {
|
||||||
match arroy::Reader::open(rtxn, index, db) {
|
match arroy::Reader::open(rtxn, index, db) {
|
||||||
Ok(reader) => Some(Ok(reader)),
|
Ok(reader) => Some(Ok(reader)),
|
||||||
Err(arroy::Error::MissingMetadata(_)) => None,
|
Err(arroy::Error::MissingMetadata(_)) => None,
|
||||||
@ -60,7 +64,7 @@ impl ArroyWrapper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn dimensions(&self, rtxn: &RoTxn) -> Result<usize, arroy::Error> {
|
pub fn dimensions(&self, rtxn: &RoTxn) -> Result<usize, arroy::Error> {
|
||||||
let first_id = arroy_db_range_for_embedder(self.index).next().unwrap();
|
let first_id = arroy_db_range_for_embedder(self.embedder_index).next().unwrap();
|
||||||
if self.quantized {
|
if self.quantized {
|
||||||
Ok(arroy::Reader::open(rtxn, first_id, self.quantized_db())?.dimensions())
|
Ok(arroy::Reader::open(rtxn, first_id, self.quantized_db())?.dimensions())
|
||||||
} else {
|
} else {
|
||||||
@ -70,7 +74,7 @@ impl ArroyWrapper {
|
|||||||
|
|
||||||
pub fn quantize(&mut self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> {
|
pub fn quantize(&mut self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> {
|
||||||
if !self.quantized {
|
if !self.quantized {
|
||||||
for index in arroy_db_range_for_embedder(self.index) {
|
for index in arroy_db_range_for_embedder(self.embedder_index) {
|
||||||
let writer = arroy::Writer::new(self.angular_db(), index, dimension);
|
let writer = arroy::Writer::new(self.angular_db(), index, dimension);
|
||||||
writer.prepare_changing_distance::<BinaryQuantizedAngular>(wtxn)?;
|
writer.prepare_changing_distance::<BinaryQuantizedAngular>(wtxn)?;
|
||||||
}
|
}
|
||||||
@ -81,7 +85,7 @@ impl ArroyWrapper {
|
|||||||
|
|
||||||
// TODO: We can stop early when we find an empty DB
|
// TODO: We can stop early when we find an empty DB
|
||||||
pub fn need_build(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> {
|
pub fn need_build(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> {
|
||||||
for index in arroy_db_range_for_embedder(self.index) {
|
for index in arroy_db_range_for_embedder(self.embedder_index) {
|
||||||
let need_build = if self.quantized {
|
let need_build = if self.quantized {
|
||||||
arroy::Writer::new(self.quantized_db(), index, dimension).need_build(rtxn)
|
arroy::Writer::new(self.quantized_db(), index, dimension).need_build(rtxn)
|
||||||
} else {
|
} else {
|
||||||
@ -101,7 +105,7 @@ impl ArroyWrapper {
|
|||||||
rng: &mut R,
|
rng: &mut R,
|
||||||
dimension: usize,
|
dimension: usize,
|
||||||
) -> Result<(), arroy::Error> {
|
) -> Result<(), arroy::Error> {
|
||||||
for index in arroy_db_range_for_embedder(self.index) {
|
for index in arroy_db_range_for_embedder(self.embedder_index) {
|
||||||
if self.quantized {
|
if self.quantized {
|
||||||
arroy::Writer::new(self.quantized_db(), index, dimension).build(wtxn, rng, None)?
|
arroy::Writer::new(self.quantized_db(), index, dimension).build(wtxn, rng, None)?
|
||||||
} else {
|
} else {
|
||||||
@ -119,7 +123,9 @@ impl ArroyWrapper {
|
|||||||
embeddings: &Embeddings<f32>,
|
embeddings: &Embeddings<f32>,
|
||||||
) -> Result<(), arroy::Error> {
|
) -> Result<(), arroy::Error> {
|
||||||
let dimension = embeddings.dimension();
|
let dimension = embeddings.dimension();
|
||||||
for (index, vector) in arroy_db_range_for_embedder(self.index).zip(embeddings.iter()) {
|
for (index, vector) in
|
||||||
|
arroy_db_range_for_embedder(self.embedder_index).zip(embeddings.iter())
|
||||||
|
{
|
||||||
if self.quantized {
|
if self.quantized {
|
||||||
arroy::Writer::new(self.quantized_db(), index, dimension)
|
arroy::Writer::new(self.quantized_db(), index, dimension)
|
||||||
.add_item(wtxn, item_id, vector)?
|
.add_item(wtxn, item_id, vector)?
|
||||||
@ -154,7 +160,7 @@ impl ArroyWrapper {
|
|||||||
) -> Result<(), arroy::Error> {
|
) -> Result<(), arroy::Error> {
|
||||||
let dimension = vector.len();
|
let dimension = vector.len();
|
||||||
|
|
||||||
for index in arroy_db_range_for_embedder(self.index) {
|
for index in arroy_db_range_for_embedder(self.embedder_index) {
|
||||||
let writer = arroy::Writer::new(db, index, dimension);
|
let writer = arroy::Writer::new(db, index, dimension);
|
||||||
if !writer.contains_item(wtxn, item_id)? {
|
if !writer.contains_item(wtxn, item_id)? {
|
||||||
writer.add_item(wtxn, item_id, vector)?;
|
writer.add_item(wtxn, item_id, vector)?;
|
||||||
@ -172,7 +178,7 @@ impl ArroyWrapper {
|
|||||||
dimension: usize,
|
dimension: usize,
|
||||||
item_id: arroy::ItemId,
|
item_id: arroy::ItemId,
|
||||||
) -> Result<bool, arroy::Error> {
|
) -> Result<bool, arroy::Error> {
|
||||||
for index in arroy_db_range_for_embedder(self.index) {
|
for index in arroy_db_range_for_embedder(self.embedder_index) {
|
||||||
if self.quantized {
|
if self.quantized {
|
||||||
let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
|
let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
|
||||||
if writer.del_item(wtxn, item_id)? {
|
if writer.del_item(wtxn, item_id)? {
|
||||||
@ -213,7 +219,7 @@ impl ArroyWrapper {
|
|||||||
let dimension = vector.len();
|
let dimension = vector.len();
|
||||||
let mut deleted_index = None;
|
let mut deleted_index = None;
|
||||||
|
|
||||||
for index in arroy_db_range_for_embedder(self.index) {
|
for index in arroy_db_range_for_embedder(self.embedder_index) {
|
||||||
let writer = arroy::Writer::new(db, index, dimension);
|
let writer = arroy::Writer::new(db, index, dimension);
|
||||||
let Some(candidate) = writer.item_vector(wtxn, item_id)? else {
|
let Some(candidate) = writer.item_vector(wtxn, item_id)? else {
|
||||||
// uses invariant: vectors are packed in the first writers.
|
// uses invariant: vectors are packed in the first writers.
|
||||||
@ -228,7 +234,9 @@ impl ArroyWrapper {
|
|||||||
// 🥲 enforce invariant: vectors are packed in the first writers.
|
// 🥲 enforce invariant: vectors are packed in the first writers.
|
||||||
if let Some(deleted_index) = deleted_index {
|
if let Some(deleted_index) = deleted_index {
|
||||||
let mut last_index_with_a_vector = None;
|
let mut last_index_with_a_vector = None;
|
||||||
for index in arroy_db_range_for_embedder(self.index).skip(deleted_index as usize) {
|
for index in
|
||||||
|
arroy_db_range_for_embedder(self.embedder_index).skip(deleted_index as usize)
|
||||||
|
{
|
||||||
let writer = arroy::Writer::new(db, index, dimension);
|
let writer = arroy::Writer::new(db, index, dimension);
|
||||||
let Some(candidate) = writer.item_vector(wtxn, item_id)? else {
|
let Some(candidate) = writer.item_vector(wtxn, item_id)? else {
|
||||||
break;
|
break;
|
||||||
@ -247,7 +255,7 @@ impl ArroyWrapper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> {
|
pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> {
|
||||||
for index in arroy_db_range_for_embedder(self.index) {
|
for index in arroy_db_range_for_embedder(self.embedder_index) {
|
||||||
if self.quantized {
|
if self.quantized {
|
||||||
arroy::Writer::new(self.quantized_db(), index, dimension).clear(wtxn)?;
|
arroy::Writer::new(self.quantized_db(), index, dimension).clear(wtxn)?;
|
||||||
} else {
|
} else {
|
||||||
@ -258,7 +266,7 @@ impl ArroyWrapper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_empty(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> {
|
pub fn is_empty(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> {
|
||||||
for index in arroy_db_range_for_embedder(self.index) {
|
for index in arroy_db_range_for_embedder(self.embedder_index) {
|
||||||
let empty = if self.quantized {
|
let empty = if self.quantized {
|
||||||
arroy::Writer::new(self.quantized_db(), index, dimension).is_empty(rtxn)?
|
arroy::Writer::new(self.quantized_db(), index, dimension).is_empty(rtxn)?
|
||||||
} else {
|
} else {
|
||||||
@ -277,7 +285,7 @@ impl ArroyWrapper {
|
|||||||
dimension: usize,
|
dimension: usize,
|
||||||
item: arroy::ItemId,
|
item: arroy::ItemId,
|
||||||
) -> Result<bool, arroy::Error> {
|
) -> Result<bool, arroy::Error> {
|
||||||
for index in arroy_db_range_for_embedder(self.index) {
|
for index in arroy_db_range_for_embedder(self.embedder_index) {
|
||||||
let contains = if self.quantized {
|
let contains = if self.quantized {
|
||||||
arroy::Writer::new(self.quantized_db(), index, dimension)
|
arroy::Writer::new(self.quantized_db(), index, dimension)
|
||||||
.contains_item(rtxn, item)?
|
.contains_item(rtxn, item)?
|
||||||
|
Loading…
Reference in New Issue
Block a user