From 980921e078f05b35c9907a38843d406b4851f95c Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 12 Nov 2024 16:31:22 +0100 Subject: [PATCH] Vector fixes --- crates/milli/src/update/new/extract/vectors/mod.rs | 7 ++----- crates/milli/src/update/new/vector_document.rs | 8 +++++++- crates/milli/src/vector/ollama.rs | 2 +- crates/milli/src/vector/openai.rs | 2 +- crates/milli/src/vector/rest.rs | 2 +- 5 files changed, 12 insertions(+), 9 deletions(-) diff --git a/crates/milli/src/update/new/extract/vectors/mod.rs b/crates/milli/src/update/new/extract/vectors/mod.rs index df8e2ed09..514791a65 100644 --- a/crates/milli/src/update/new/extract/vectors/mod.rs +++ b/crates/milli/src/update/new/extract/vectors/mod.rs @@ -115,11 +115,8 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> { new_vectors.vectors_for_key(embedder_name).transpose() }) { let new_vectors = new_vectors?; - match (old_vectors.regenerate, new_vectors.regenerate) { - (true, true) | (false, false) => todo!(), - _ => { - chunks.set_regenerate(update.docid(), new_vectors.regenerate); - } + if old_vectors.regenerate != new_vectors.regenerate { + chunks.set_regenerate(update.docid(), new_vectors.regenerate); } // do we have set embeddings? if let Some(embeddings) = new_vectors.embeddings { diff --git a/crates/milli/src/update/new/vector_document.rs b/crates/milli/src/update/new/vector_document.rs index e96e29053..381c4dab6 100644 --- a/crates/milli/src/update/new/vector_document.rs +++ b/crates/milli/src/update/new/vector_document.rs @@ -180,7 +180,13 @@ fn entry_from_raw_value( }, RawVectors::ImplicitlyUserProvided(value) => VectorEntry { has_configured_embedder, - embeddings: value.map(Embeddings::FromJsonImplicityUserProvided), + // implicitly user provided always provide embeddings + // `None` here means that there are no embeddings + embeddings: Some( + value + .map(Embeddings::FromJsonImplicityUserProvided) + .unwrap_or(Embeddings::FromDb(Default::default())), + ), regenerate: false, implicit: true, }, diff --git a/crates/milli/src/vector/ollama.rs b/crates/milli/src/vector/ollama.rs index 65fd05416..263d9d3c9 100644 --- a/crates/milli/src/vector/ollama.rs +++ b/crates/milli/src/vector/ollama.rs @@ -113,7 +113,7 @@ impl Embedder { threads .install(move || { let embeddings: Result>, _> = texts - .par_chunks(self.chunk_count_hint()) + .par_chunks(self.prompt_count_in_chunk_hint()) .map(move |chunk| self.embed(chunk)) .collect(); diff --git a/crates/milli/src/vector/openai.rs b/crates/milli/src/vector/openai.rs index 466fd1660..375b2878a 100644 --- a/crates/milli/src/vector/openai.rs +++ b/crates/milli/src/vector/openai.rs @@ -266,7 +266,7 @@ impl Embedder { threads .install(move || { let embeddings: Result>, _> = texts - .par_chunks(self.chunk_count_hint()) + .par_chunks(self.prompt_count_in_chunk_hint()) .map(move |chunk| self.embed(chunk)) .collect(); diff --git a/crates/milli/src/vector/rest.rs b/crates/milli/src/vector/rest.rs index dc2ab95f9..eeb5b16af 100644 --- a/crates/milli/src/vector/rest.rs +++ b/crates/milli/src/vector/rest.rs @@ -193,7 +193,7 @@ impl Embedder { threads .install(move || { let embeddings: Result>, _> = texts - .par_chunks(self.chunk_count_hint()) + .par_chunks(self.prompt_count_in_chunk_hint()) .map(move |chunk| self.embed_ref(chunk)) .collect();