mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-06-15 20:42:24 +02:00
Merge pull request #5668 from meilisearch/fix-must-regenerate
Various fixes to embedding regeneration
This commit is contained in:
commit
e2b549c5ee
@ -111,6 +111,8 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
|
|||||||
let prompt = chunks.prompt();
|
let prompt = chunks.prompt();
|
||||||
|
|
||||||
let old_vectors = old_vectors.vectors_for_key(embedder_name)?.unwrap();
|
let old_vectors = old_vectors.vectors_for_key(embedder_name)?.unwrap();
|
||||||
|
|
||||||
|
// case where we have a `_vectors` field in the updated document
|
||||||
if let Some(new_vectors) = new_vectors.as_ref().and_then(|new_vectors| {
|
if let Some(new_vectors) = new_vectors.as_ref().and_then(|new_vectors| {
|
||||||
new_vectors.vectors_for_key(embedder_name).transpose()
|
new_vectors.vectors_for_key(embedder_name).transpose()
|
||||||
}) {
|
}) {
|
||||||
@ -130,18 +132,9 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
|
|||||||
error: error.to_string(),
|
error: error.to_string(),
|
||||||
})?,
|
})?,
|
||||||
)?;
|
)?;
|
||||||
|
// regenerate if the new `_vectors` fields is set to.
|
||||||
} else if new_vectors.regenerate {
|
} else if new_vectors.regenerate {
|
||||||
let new_rendered = prompt.render_document(
|
let new_rendered = prompt.render_document(
|
||||||
update.external_document_id(),
|
|
||||||
update.current(
|
|
||||||
&context.rtxn,
|
|
||||||
context.index,
|
|
||||||
context.db_fields_ids_map,
|
|
||||||
)?,
|
|
||||||
context.new_fields_ids_map,
|
|
||||||
&context.doc_alloc,
|
|
||||||
)?;
|
|
||||||
let old_rendered = prompt.render_document(
|
|
||||||
update.external_document_id(),
|
update.external_document_id(),
|
||||||
update.merged(
|
update.merged(
|
||||||
&context.rtxn,
|
&context.rtxn,
|
||||||
@ -151,7 +144,31 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
|
|||||||
context.new_fields_ids_map,
|
context.new_fields_ids_map,
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
)?;
|
)?;
|
||||||
if new_rendered != old_rendered {
|
let must_regenerate = if !old_vectors.regenerate {
|
||||||
|
// we just enabled `regenerate`
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
let old_rendered = prompt.render_document(
|
||||||
|
update.external_document_id(),
|
||||||
|
update.current(
|
||||||
|
&context.rtxn,
|
||||||
|
context.index,
|
||||||
|
context.db_fields_ids_map,
|
||||||
|
)?,
|
||||||
|
context.new_fields_ids_map,
|
||||||
|
&context.doc_alloc,
|
||||||
|
);
|
||||||
|
|
||||||
|
if let Ok(old_rendered) = old_rendered {
|
||||||
|
// must regenerate if the rendered changed
|
||||||
|
new_rendered != old_rendered
|
||||||
|
} else {
|
||||||
|
// cannot check previous rendered, better regenerate
|
||||||
|
true
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if must_regenerate {
|
||||||
chunks.set_autogenerated(
|
chunks.set_autogenerated(
|
||||||
update.docid(),
|
update.docid(),
|
||||||
update.external_document_id(),
|
update.external_document_id(),
|
||||||
@ -160,17 +177,8 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
|
|||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// no `_vectors` field, so only regenerate if the document is already set to in the DB.
|
||||||
} else if old_vectors.regenerate {
|
} else if old_vectors.regenerate {
|
||||||
let old_rendered = prompt.render_document(
|
|
||||||
update.external_document_id(),
|
|
||||||
update.current(
|
|
||||||
&context.rtxn,
|
|
||||||
context.index,
|
|
||||||
context.db_fields_ids_map,
|
|
||||||
)?,
|
|
||||||
context.new_fields_ids_map,
|
|
||||||
&context.doc_alloc,
|
|
||||||
)?;
|
|
||||||
let new_rendered = prompt.render_document(
|
let new_rendered = prompt.render_document(
|
||||||
update.external_document_id(),
|
update.external_document_id(),
|
||||||
update.merged(
|
update.merged(
|
||||||
@ -181,7 +189,28 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
|
|||||||
context.new_fields_ids_map,
|
context.new_fields_ids_map,
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
)?;
|
)?;
|
||||||
if new_rendered != old_rendered {
|
|
||||||
|
let must_regenerate = {
|
||||||
|
let old_rendered = prompt.render_document(
|
||||||
|
update.external_document_id(),
|
||||||
|
update.current(
|
||||||
|
&context.rtxn,
|
||||||
|
context.index,
|
||||||
|
context.db_fields_ids_map,
|
||||||
|
)?,
|
||||||
|
context.new_fields_ids_map,
|
||||||
|
&context.doc_alloc,
|
||||||
|
);
|
||||||
|
if let Ok(old_rendered) = old_rendered {
|
||||||
|
// regenerate if the rendered version changed
|
||||||
|
new_rendered != old_rendered
|
||||||
|
} else {
|
||||||
|
// if we cannot render the previous version of the documents, let's regenerate
|
||||||
|
true
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if must_regenerate {
|
||||||
chunks.set_autogenerated(
|
chunks.set_autogenerated(
|
||||||
update.docid(),
|
update.docid(),
|
||||||
update.external_document_id(),
|
update.external_document_id(),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user