mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-05 19:23:31 +01:00
Fix a bug around deleting all the vectors of a doc
This commit is contained in:
parent
cc4bd54669
commit
096a28656e
@ -146,15 +146,13 @@ pub struct LargeVectors {
|
||||
pub docid: DocumentId,
|
||||
/// The embedder id in which to insert the large embedding.
|
||||
pub embedder_id: u8,
|
||||
/// The dimensions of the embeddings in this payload.
|
||||
pub dimensions: u16,
|
||||
/// The large embedding that must be written.
|
||||
pub embeddings: Mmap,
|
||||
}
|
||||
|
||||
impl LargeVectors {
|
||||
pub fn read_embeddings(&self) -> impl Iterator<Item = &[f32]> {
|
||||
self.embeddings.chunks_exact(self.dimensions as usize).map(bytemuck::cast_slice)
|
||||
pub fn read_embeddings(&self, dimensions: usize) -> impl Iterator<Item = &[f32]> {
|
||||
self.embeddings.chunks_exact(dimensions).map(bytemuck::cast_slice)
|
||||
}
|
||||
}
|
||||
|
||||
@ -241,15 +239,18 @@ impl ArroySetVector {
|
||||
&self,
|
||||
frame: &FrameGrantR<'_>,
|
||||
vec: &'v mut Vec<f32>,
|
||||
) -> &'v [f32] {
|
||||
) -> Option<&'v [f32]> {
|
||||
vec.clear();
|
||||
let skip = EntryHeader::variant_size() + mem::size_of::<Self>();
|
||||
let bytes = &frame[skip..];
|
||||
if bytes.is_empty() {
|
||||
return None;
|
||||
}
|
||||
bytes.chunks_exact(mem::size_of::<f32>()).for_each(|bytes| {
|
||||
let f = bytes.try_into().map(f32::from_ne_bytes).unwrap();
|
||||
vec.push(f);
|
||||
});
|
||||
&vec[..]
|
||||
Some(&vec[..])
|
||||
}
|
||||
}
|
||||
|
||||
@ -259,9 +260,8 @@ impl ArroySetVector {
|
||||
/// non-aligned [f32] each with dimensions f32s.
|
||||
pub struct ArroySetVectors {
|
||||
pub docid: DocumentId,
|
||||
pub dimensions: u16,
|
||||
pub embedder_id: u8,
|
||||
_padding: u8,
|
||||
_padding: [u8; 3],
|
||||
}
|
||||
|
||||
impl ArroySetVectors {
|
||||
@ -270,30 +270,6 @@ impl ArroySetVectors {
|
||||
&frame[skip..]
|
||||
}
|
||||
|
||||
// /// The number of embeddings in this payload.
|
||||
// pub fn embedding_count(&self, frame: &FrameGrantR<'_>) -> usize {
|
||||
// let bytes = Self::remaining_bytes(frame);
|
||||
// bytes.len().checked_div(self.dimensions as usize).unwrap()
|
||||
// }
|
||||
|
||||
/// Read the embedding at `index` or `None` if out of bounds.
|
||||
pub fn read_embedding_into_vec<'v>(
|
||||
&self,
|
||||
frame: &FrameGrantR<'_>,
|
||||
index: usize,
|
||||
vec: &'v mut Vec<f32>,
|
||||
) -> Option<&'v [f32]> {
|
||||
vec.clear();
|
||||
let bytes = Self::remaining_bytes(frame);
|
||||
let embedding_size = self.dimensions as usize * mem::size_of::<f32>();
|
||||
let embedding_bytes = bytes.chunks_exact(embedding_size).nth(index)?;
|
||||
embedding_bytes.chunks_exact(mem::size_of::<f32>()).for_each(|bytes| {
|
||||
let f = bytes.try_into().map(f32::from_ne_bytes).unwrap();
|
||||
vec.push(f);
|
||||
});
|
||||
Some(&vec[..])
|
||||
}
|
||||
|
||||
/// Read all the embeddings and write them into an aligned `f32` Vec.
|
||||
pub fn read_all_embeddings_into_vec<'v>(
|
||||
&self,
|
||||
@ -607,18 +583,14 @@ impl<'b> ExtractorBbqueueSender<'b> {
|
||||
let refcell = self.producers.get().unwrap();
|
||||
let mut producer = refcell.0.borrow_mut_or_yield();
|
||||
|
||||
// If there are no vector we specify the dimensions
|
||||
// to zero to allocate no extra space at all
|
||||
let dimensions = match embeddings.first() {
|
||||
Some(embedding) => embedding.len(),
|
||||
None => return Ok(()),
|
||||
};
|
||||
|
||||
let arroy_set_vector = ArroySetVectors {
|
||||
docid,
|
||||
dimensions: dimensions.try_into().unwrap(),
|
||||
embedder_id,
|
||||
_padding: 0,
|
||||
None => 0,
|
||||
};
|
||||
|
||||
let arroy_set_vector = ArroySetVectors { docid, embedder_id, _padding: [0; 3] };
|
||||
let payload_header = EntryHeader::ArroySetVectors(arroy_set_vector);
|
||||
let total_length = EntryHeader::total_set_vectors_size(embeddings.len(), dimensions);
|
||||
if total_length > capacity {
|
||||
@ -632,13 +604,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
|
||||
value_file.sync_all()?;
|
||||
let embeddings = unsafe { Mmap::map(&value_file)? };
|
||||
|
||||
let large_vectors = LargeVectors {
|
||||
docid,
|
||||
embedder_id,
|
||||
dimensions: dimensions.try_into().unwrap(),
|
||||
embeddings,
|
||||
};
|
||||
|
||||
let large_vectors = LargeVectors { docid, embedder_id, embeddings };
|
||||
self.sender.send(ReceiverAction::LargeVectors(large_vectors)).unwrap();
|
||||
|
||||
return Ok(());
|
||||
@ -657,9 +623,11 @@ impl<'b> ExtractorBbqueueSender<'b> {
|
||||
let (header_bytes, remaining) = grant.split_at_mut(header_size);
|
||||
payload_header.serialize_into(header_bytes);
|
||||
|
||||
let output_iter = remaining.chunks_exact_mut(dimensions * mem::size_of::<f32>());
|
||||
for (embedding, output) in embeddings.iter().zip(output_iter) {
|
||||
output.copy_from_slice(bytemuck::cast_slice(embedding));
|
||||
if dimensions != 0 {
|
||||
let output_iter = remaining.chunks_exact_mut(dimensions * mem::size_of::<f32>());
|
||||
for (embedding, output) in embeddings.iter().zip(output_iter) {
|
||||
output.copy_from_slice(bytemuck::cast_slice(embedding));
|
||||
}
|
||||
}
|
||||
|
||||
// We could commit only the used memory.
|
||||
|
@ -443,7 +443,7 @@ where
|
||||
let (_, _, writer, dimensions) =
|
||||
arroy_writers.get(&embedder_id).expect("requested a missing embedder");
|
||||
let mut embeddings = Embeddings::new(*dimensions);
|
||||
for embedding in large_vectors.read_embeddings() {
|
||||
for embedding in large_vectors.read_embeddings(*dimensions) {
|
||||
embeddings.push(embedding.to_vec()).unwrap();
|
||||
}
|
||||
writer.del_items(wtxn, *dimensions, docid)?;
|
||||
@ -597,11 +597,12 @@ fn write_from_bbqueue(
|
||||
EntryHeader::ArroySetVector(asv) => {
|
||||
let ArroySetVector { docid, embedder_id, .. } = asv;
|
||||
let frame = frame_with_header.frame();
|
||||
let embedding = asv.read_embedding_into_vec(frame, aligned_embedding);
|
||||
let (_, _, writer, dimensions) =
|
||||
arroy_writers.get(&embedder_id).expect("requested a missing embedder");
|
||||
writer.del_items(wtxn, *dimensions, docid)?;
|
||||
writer.add_item(wtxn, docid, embedding)?;
|
||||
if let Some(embedding) = asv.read_embedding_into_vec(frame, aligned_embedding) {
|
||||
writer.add_item(wtxn, docid, embedding)?;
|
||||
}
|
||||
}
|
||||
EntryHeader::ArroySetVectors(asvs) => {
|
||||
let ArroySetVectors { docid, embedder_id, .. } = asvs;
|
||||
|
@ -5,6 +5,7 @@ pub trait RefCellExt<T: ?Sized> {
|
||||
&self,
|
||||
) -> std::result::Result<RefMut<'_, T>, std::cell::BorrowMutError>;
|
||||
|
||||
#[track_caller]
|
||||
fn borrow_mut_or_yield(&self) -> RefMut<'_, T> {
|
||||
self.try_borrow_mut_or_yield().unwrap()
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user