Merge pull request #5674 from meilisearch/release-v1.15.2

Bring back v1.15.2 to main
This commit is contained in:
Clément Renault 2025-06-16 11:36:07 +00:00 committed by GitHub
commit abb399b802
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 262 additions and 97 deletions

View file

@ -29,7 +29,6 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
let max_positions_per_attributes = max_positions_per_attributes
.map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
let max_memory = indexer.max_memory_by_thread();
let force_reindexing = settings_diff.reindex_searchable();
// initialize destination values.
let mut documents_ids = RoaringBitmap::new();
@ -43,6 +42,12 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
true,
);
let force_reindexing = settings_diff.reindex_searchable();
let skip_indexing = !force_reindexing && settings_diff.settings_update_only();
if skip_indexing {
return sorter_into_reader(docid_word_positions_sorter, indexer);
}
// initialize buffers.
let mut del_buffers = Buffers::default();
let mut add_buffers = Buffers::default();

View file

@ -111,6 +111,8 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
let prompt = chunks.prompt();
let old_vectors = old_vectors.vectors_for_key(embedder_name)?.unwrap();
// case where we have a `_vectors` field in the updated document
if let Some(new_vectors) = new_vectors.as_ref().and_then(|new_vectors| {
new_vectors.vectors_for_key(embedder_name).transpose()
}) {
@ -130,18 +132,9 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
error: error.to_string(),
})?,
)?;
// regenerate if the new `_vectors` fields is set to.
} else if new_vectors.regenerate {
let new_rendered = prompt.render_document(
update.external_document_id(),
update.current(
&context.rtxn,
context.index,
context.db_fields_ids_map,
)?,
context.new_fields_ids_map,
&context.doc_alloc,
)?;
let old_rendered = prompt.render_document(
update.external_document_id(),
update.merged(
&context.rtxn,
@ -151,7 +144,31 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
context.new_fields_ids_map,
&context.doc_alloc,
)?;
if new_rendered != old_rendered {
let must_regenerate = if !old_vectors.regenerate {
// we just enabled `regenerate`
true
} else {
let old_rendered = prompt.render_document(
update.external_document_id(),
update.current(
&context.rtxn,
context.index,
context.db_fields_ids_map,
)?,
context.new_fields_ids_map,
&context.doc_alloc,
);
if let Ok(old_rendered) = old_rendered {
// must regenerate if the rendered changed
new_rendered != old_rendered
} else {
// cannot check previous rendered, better regenerate
true
}
};
if must_regenerate {
chunks.set_autogenerated(
update.docid(),
update.external_document_id(),
@ -160,17 +177,8 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
)?;
}
}
// no `_vectors` field, so only regenerate if the document is already set to in the DB.
} else if old_vectors.regenerate {
let old_rendered = prompt.render_document(
update.external_document_id(),
update.current(
&context.rtxn,
context.index,
context.db_fields_ids_map,
)?,
context.new_fields_ids_map,
&context.doc_alloc,
)?;
let new_rendered = prompt.render_document(
update.external_document_id(),
update.merged(
@ -181,7 +189,28 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
context.new_fields_ids_map,
&context.doc_alloc,
)?;
if new_rendered != old_rendered {
let must_regenerate = {
let old_rendered = prompt.render_document(
update.external_document_id(),
update.current(
&context.rtxn,
context.index,
context.db_fields_ids_map,
)?,
context.new_fields_ids_map,
&context.doc_alloc,
);
if let Ok(old_rendered) = old_rendered {
// regenerate if the rendered version changed
new_rendered != old_rendered
} else {
// if we cannot render the previous version of the documents, let's regenerate
true
}
};
if must_regenerate {
chunks.set_autogenerated(
update.docid(),
update.external_document_id(),