Vector fixes

This commit is contained in:
Louis Dureuil 2024-11-12 16:31:22 +01:00
parent 1fcd5f091e
commit 980921e078
No known key found for this signature in database
5 changed files with 12 additions and 9 deletions

View File

@ -115,12 +115,9 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
new_vectors.vectors_for_key(embedder_name).transpose()
}) {
let new_vectors = new_vectors?;
match (old_vectors.regenerate, new_vectors.regenerate) {
(true, true) | (false, false) => todo!(),
_ => {
if old_vectors.regenerate != new_vectors.regenerate {
chunks.set_regenerate(update.docid(), new_vectors.regenerate);
}
}
// do we have set embeddings?
if let Some(embeddings) = new_vectors.embeddings {
chunks.set_vectors(

View File

@ -180,7 +180,13 @@ fn entry_from_raw_value(
},
RawVectors::ImplicitlyUserProvided(value) => VectorEntry {
has_configured_embedder,
embeddings: value.map(Embeddings::FromJsonImplicityUserProvided),
// implicitly user provided always provide embeddings
// `None` here means that there are no embeddings
embeddings: Some(
value
.map(Embeddings::FromJsonImplicityUserProvided)
.unwrap_or(Embeddings::FromDb(Default::default())),
),
regenerate: false,
implicit: true,
},

View File

@ -113,7 +113,7 @@ impl Embedder {
threads
.install(move || {
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
.par_chunks(self.chunk_count_hint())
.par_chunks(self.prompt_count_in_chunk_hint())
.map(move |chunk| self.embed(chunk))
.collect();

View File

@ -266,7 +266,7 @@ impl Embedder {
threads
.install(move || {
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
.par_chunks(self.chunk_count_hint())
.par_chunks(self.prompt_count_in_chunk_hint())
.map(move |chunk| self.embed(chunk))
.collect();

View File

@ -193,7 +193,7 @@ impl Embedder {
threads
.install(move || {
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
.par_chunks(self.chunk_count_hint())
.par_chunks(self.prompt_count_in_chunk_hint())
.map(move |chunk| self.embed_ref(chunk))
.collect();