diff --git a/crates/milli/src/update/index_documents/extract/extract_vector_points.rs b/crates/milli/src/update/index_documents/extract/extract_vector_points.rs index 677ff93c9..9604c4823 100644 --- a/crates/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/crates/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -1259,7 +1259,7 @@ impl<'doc> OnEmbed<'doc> for WriteGrenadOnEmbed<'_> { error: crate::vector::error::EmbedError, embedder_name: &'doc str, unused_vectors_distribution: &crate::vector::error::UnusedVectorsDistribution, - _metadata: &[crate::vector::session::Metadata<'doc>], + _metadata: bumpalo::collections::Vec<'doc, crate::vector::session::Metadata<'doc>>, ) -> crate::Error { if let FaultSource::Bug = error.fault { crate::Error::InternalError(crate::InternalError::VectorEmbeddingError(error.into())) diff --git a/crates/milli/src/update/new/extract/vectors/mod.rs b/crates/milli/src/update/new/extract/vectors/mod.rs index f8e0e7cb5..72a07dea6 100644 --- a/crates/milli/src/update/new/extract/vectors/mod.rs +++ b/crates/milli/src/update/new/extract/vectors/mod.rs @@ -478,7 +478,7 @@ impl<'doc> OnEmbed<'doc> for OnEmbeddingDocumentUpdates<'doc, '_> { error: crate::vector::hf::EmbedError, embedder_name: &'doc str, unused_vectors_distribution: &UnusedVectorsDistributionBump, - metadata: &[Metadata<'doc>], + metadata: BVec<'doc, Metadata<'doc>>, ) -> crate::Error { if let FaultSource::Bug = error.fault { crate::Error::InternalError(crate::InternalError::VectorEmbeddingError(error.into())) diff --git a/crates/milli/src/vector/session.rs b/crates/milli/src/vector/session.rs index 5f6d68879..b582bd840 100644 --- a/crates/milli/src/vector/session.rs +++ b/crates/milli/src/vector/session.rs @@ -28,7 +28,7 @@ pub trait OnEmbed<'doc> { error: EmbedError, embedder_name: &'doc str, unused_vectors_distribution: &Self::ErrorMetadata, - metadata: &[Metadata<'doc>], + metadata: BVec<'doc, Metadata<'doc>>, ) -> crate::Error; } @@ -143,12 +143,19 @@ impl<'doc, C: OnEmbed<'doc>, I: Input> EmbedSession<'doc, C, I> { Ok(()) } Err(error) => { + // reset metadata and inputs, and send metadata to the error processing. + let doc_alloc = self.metadata.bump(); + let metadata = std::mem::replace( + &mut self.metadata, + BVec::with_capacity_in(self.inputs.capacity(), doc_alloc), + ); + self.inputs.clear(); return Err(self.on_embed.process_embedding_error( error, self.embedder_name, unused_vectors_distribution, - &self.metadata, - )) + metadata, + )); } }; self.inputs.clear();