mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-06-15 12:31:35 +02:00
Merge pull request #5668 from meilisearch/fix-must-regenerate
Various fixes to embedding regeneration
This commit is contained in:
commit
e2b549c5ee
@ -111,6 +111,8 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
|
||||
let prompt = chunks.prompt();
|
||||
|
||||
let old_vectors = old_vectors.vectors_for_key(embedder_name)?.unwrap();
|
||||
|
||||
// case where we have a `_vectors` field in the updated document
|
||||
if let Some(new_vectors) = new_vectors.as_ref().and_then(|new_vectors| {
|
||||
new_vectors.vectors_for_key(embedder_name).transpose()
|
||||
}) {
|
||||
@ -130,18 +132,9 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
|
||||
error: error.to_string(),
|
||||
})?,
|
||||
)?;
|
||||
// regenerate if the new `_vectors` fields is set to.
|
||||
} else if new_vectors.regenerate {
|
||||
let new_rendered = prompt.render_document(
|
||||
update.external_document_id(),
|
||||
update.current(
|
||||
&context.rtxn,
|
||||
context.index,
|
||||
context.db_fields_ids_map,
|
||||
)?,
|
||||
context.new_fields_ids_map,
|
||||
&context.doc_alloc,
|
||||
)?;
|
||||
let old_rendered = prompt.render_document(
|
||||
update.external_document_id(),
|
||||
update.merged(
|
||||
&context.rtxn,
|
||||
@ -151,7 +144,31 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
|
||||
context.new_fields_ids_map,
|
||||
&context.doc_alloc,
|
||||
)?;
|
||||
if new_rendered != old_rendered {
|
||||
let must_regenerate = if !old_vectors.regenerate {
|
||||
// we just enabled `regenerate`
|
||||
true
|
||||
} else {
|
||||
let old_rendered = prompt.render_document(
|
||||
update.external_document_id(),
|
||||
update.current(
|
||||
&context.rtxn,
|
||||
context.index,
|
||||
context.db_fields_ids_map,
|
||||
)?,
|
||||
context.new_fields_ids_map,
|
||||
&context.doc_alloc,
|
||||
);
|
||||
|
||||
if let Ok(old_rendered) = old_rendered {
|
||||
// must regenerate if the rendered changed
|
||||
new_rendered != old_rendered
|
||||
} else {
|
||||
// cannot check previous rendered, better regenerate
|
||||
true
|
||||
}
|
||||
};
|
||||
|
||||
if must_regenerate {
|
||||
chunks.set_autogenerated(
|
||||
update.docid(),
|
||||
update.external_document_id(),
|
||||
@ -160,17 +177,8 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
|
||||
)?;
|
||||
}
|
||||
}
|
||||
// no `_vectors` field, so only regenerate if the document is already set to in the DB.
|
||||
} else if old_vectors.regenerate {
|
||||
let old_rendered = prompt.render_document(
|
||||
update.external_document_id(),
|
||||
update.current(
|
||||
&context.rtxn,
|
||||
context.index,
|
||||
context.db_fields_ids_map,
|
||||
)?,
|
||||
context.new_fields_ids_map,
|
||||
&context.doc_alloc,
|
||||
)?;
|
||||
let new_rendered = prompt.render_document(
|
||||
update.external_document_id(),
|
||||
update.merged(
|
||||
@ -181,7 +189,28 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
|
||||
context.new_fields_ids_map,
|
||||
&context.doc_alloc,
|
||||
)?;
|
||||
if new_rendered != old_rendered {
|
||||
|
||||
let must_regenerate = {
|
||||
let old_rendered = prompt.render_document(
|
||||
update.external_document_id(),
|
||||
update.current(
|
||||
&context.rtxn,
|
||||
context.index,
|
||||
context.db_fields_ids_map,
|
||||
)?,
|
||||
context.new_fields_ids_map,
|
||||
&context.doc_alloc,
|
||||
);
|
||||
if let Ok(old_rendered) = old_rendered {
|
||||
// regenerate if the rendered version changed
|
||||
new_rendered != old_rendered
|
||||
} else {
|
||||
// if we cannot render the previous version of the documents, let's regenerate
|
||||
true
|
||||
}
|
||||
};
|
||||
|
||||
if must_regenerate {
|
||||
chunks.set_autogenerated(
|
||||
update.docid(),
|
||||
update.external_document_id(),
|
||||
|
Loading…
x
Reference in New Issue
Block a user