mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-28 22:28:05 +01:00
Merge #5092
5092: Precise spans for new indexer r=dureuill a=dureuill - Separate extract and merge spans - Add span around commit Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
commit
fb66fec398
@ -1024,7 +1024,13 @@ impl IndexScheduler {
|
||||
|
||||
let mut index_wtxn = index.write_txn()?;
|
||||
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::scheduler", "commit");
|
||||
let _entered = span.enter();
|
||||
|
||||
index_wtxn.commit()?;
|
||||
}
|
||||
|
||||
// if the update processed successfully, we're going to store the new
|
||||
// stats of the index. Since the tasks have already been processed and
|
||||
|
@ -109,11 +109,14 @@ where
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
|
||||
// document but we need to create a function that collects and compresses documents.
|
||||
let document_sender = extractor_sender.documents();
|
||||
let document_extractor = DocumentsExtractor::new(&document_sender, embedders);
|
||||
let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "documents");
|
||||
let _entered = span.enter();
|
||||
extract(document_changes,
|
||||
&document_extractor,
|
||||
indexing_context,
|
||||
@ -121,7 +124,10 @@ where
|
||||
&datastore,
|
||||
Step::ExtractingDocuments,
|
||||
)?;
|
||||
|
||||
}
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "documents");
|
||||
let _entered = span.enter();
|
||||
for document_extractor_data in datastore {
|
||||
let document_extractor_data = document_extractor_data.0.into_inner();
|
||||
for (field, delta) in document_extractor_data.field_distribution_delta {
|
||||
@ -133,14 +139,15 @@ where
|
||||
}
|
||||
|
||||
field_distribution.retain(|_, v| *v != 0);
|
||||
}
|
||||
|
||||
let facet_field_ids_delta;
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "faceted");
|
||||
let caches = {
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "faceted");
|
||||
let _entered = span.enter();
|
||||
|
||||
facet_field_ids_delta = merge_and_send_facet_docids(
|
||||
FacetedDocidsExtractor::run_extraction(
|
||||
grenad_parameters,
|
||||
document_changes,
|
||||
@ -148,16 +155,25 @@ where
|
||||
&mut extractor_allocs,
|
||||
&extractor_sender.field_id_docid_facet_sender(),
|
||||
Step::ExtractingFacets
|
||||
)?,
|
||||
)?
|
||||
};
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "faceted");
|
||||
let _entered = span.enter();
|
||||
|
||||
facet_field_ids_delta = merge_and_send_facet_docids(
|
||||
caches,
|
||||
FacetDatabases::new(index),
|
||||
index,
|
||||
extractor_sender.facet_docids(),
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
|
||||
|
||||
|
||||
let WordDocidsCaches {
|
||||
@ -166,15 +182,19 @@ where
|
||||
exact_word_docids,
|
||||
word_position_docids,
|
||||
fid_word_count_docids,
|
||||
} = WordDocidsExtractors::run_extraction(
|
||||
} = {
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
WordDocidsExtractors::run_extraction(
|
||||
grenad_parameters,
|
||||
document_changes,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
Step::ExtractingWords
|
||||
)?;
|
||||
)?
|
||||
};
|
||||
|
||||
// TODO Word Docids Merger
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
|
||||
let _entered = span.enter();
|
||||
@ -187,7 +207,6 @@ where
|
||||
)?;
|
||||
}
|
||||
|
||||
// Word Fid Docids Merging
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
|
||||
let _entered = span.enter();
|
||||
@ -200,7 +219,6 @@ where
|
||||
)?;
|
||||
}
|
||||
|
||||
// Exact Word Docids Merging
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
|
||||
let _entered = span.enter();
|
||||
@ -213,7 +231,6 @@ where
|
||||
)?;
|
||||
}
|
||||
|
||||
// Word Position Docids Merging
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
|
||||
let _entered = span.enter();
|
||||
@ -226,7 +243,6 @@ where
|
||||
)?;
|
||||
}
|
||||
|
||||
// Fid Word Count Docids Merging
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
|
||||
let _entered = span.enter();
|
||||
@ -244,17 +260,22 @@ where
|
||||
// this works only if the settings didn't change during this transaction.
|
||||
let proximity_precision = index.proximity_precision(&rtxn)?.unwrap_or_default();
|
||||
if proximity_precision == ProximityPrecision::ByWord {
|
||||
let caches = {
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
|
||||
let caches = <WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(
|
||||
<WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(
|
||||
grenad_parameters,
|
||||
document_changes,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
Step::ExtractingWordProximity,
|
||||
)?;
|
||||
)?
|
||||
};
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
merge_and_send_docids(
|
||||
caches,
|
||||
@ -264,10 +285,9 @@ where
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
'vectors: {
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
|
||||
let _entered = span.enter();
|
||||
|
||||
let mut index_embeddings = index.embedding_configs(&rtxn)?;
|
||||
if index_embeddings.is_empty() {
|
||||
@ -277,7 +297,15 @@ where
|
||||
let embedding_sender = extractor_sender.embeddings();
|
||||
let extractor = EmbeddingExtractor::new(embedders, &embedding_sender, field_distribution, request_threads());
|
||||
let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
|
||||
let _entered = span.enter();
|
||||
|
||||
extract(document_changes, &extractor, indexing_context, &mut extractor_allocs, &datastore, Step::ExtractingEmbeddings)?;
|
||||
}
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "vectors");
|
||||
let _entered = span.enter();
|
||||
|
||||
for config in &mut index_embeddings {
|
||||
'data: for data in datastore.iter_mut() {
|
||||
@ -286,18 +314,21 @@ where
|
||||
deladd.apply_to(&mut config.user_provided);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
embedding_sender.finish(index_embeddings).unwrap();
|
||||
}
|
||||
|
||||
'geo: {
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "geo");
|
||||
let _entered = span.enter();
|
||||
|
||||
let Some(extractor) = GeoExtractor::new(&rtxn, index, grenad_parameters)? else {
|
||||
break 'geo;
|
||||
};
|
||||
let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "geo");
|
||||
let _entered = span.enter();
|
||||
|
||||
extract(
|
||||
document_changes,
|
||||
&extractor,
|
||||
@ -306,6 +337,7 @@ where
|
||||
&datastore,
|
||||
Step::WritingGeoPoints
|
||||
)?;
|
||||
}
|
||||
|
||||
merge_and_send_rtree(
|
||||
datastore,
|
||||
@ -316,11 +348,7 @@ where
|
||||
)?;
|
||||
}
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "FINISH");
|
||||
let _entered = span.enter();
|
||||
(indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase));
|
||||
}
|
||||
|
||||
Result::Ok(facet_field_ids_delta)
|
||||
})?;
|
||||
@ -352,6 +380,10 @@ where
|
||||
.collect();
|
||||
|
||||
let mut arroy_writers = arroy_writers?;
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::write_db", "all");
|
||||
let _entered = span.enter();
|
||||
|
||||
for operation in writer_receiver {
|
||||
match operation {
|
||||
WriterOperation::DbOperation(db_operation) => {
|
||||
@ -362,11 +394,13 @@ where
|
||||
Ok(false) => unreachable!("We tried to delete an unknown key"),
|
||||
Ok(_) => (),
|
||||
Err(error) => {
|
||||
return Err(Error::InternalError(InternalError::StoreDeletion {
|
||||
return Err(Error::InternalError(
|
||||
InternalError::StoreDeletion {
|
||||
database_name,
|
||||
key: e.entry().to_owned(),
|
||||
error,
|
||||
}));
|
||||
},
|
||||
));
|
||||
}
|
||||
},
|
||||
EntryOperation::Write(e) => {
|
||||
@ -383,8 +417,10 @@ where
|
||||
}
|
||||
WriterOperation::ArroyOperation(arroy_operation) => match arroy_operation {
|
||||
ArroyOperation::DeleteVectors { docid } => {
|
||||
for (_embedder_index, (_embedder_name, _embedder, writer, dimensions)) in
|
||||
&mut arroy_writers
|
||||
for (
|
||||
_embedder_index,
|
||||
(_embedder_name, _embedder, writer, dimensions),
|
||||
) in &mut arroy_writers
|
||||
{
|
||||
let dimensions = *dimensions;
|
||||
writer.del_items(wtxn, dimensions, docid)?;
|
||||
@ -395,9 +431,10 @@ where
|
||||
embedder_id,
|
||||
embeddings: raw_embeddings,
|
||||
} => {
|
||||
let (_, _, writer, dimensions) =
|
||||
arroy_writers.get(&embedder_id).expect("requested a missing embedder");
|
||||
// TODO: switch to Embeddings
|
||||
let (_, _, writer, dimensions) = arroy_writers
|
||||
.get(&embedder_id)
|
||||
.expect("requested a missing embedder");
|
||||
|
||||
let mut embeddings = Embeddings::new(*dimensions);
|
||||
for embedding in raw_embeddings {
|
||||
embeddings.append(embedding).unwrap();
|
||||
@ -407,8 +444,9 @@ where
|
||||
writer.add_items(wtxn, docid, &embeddings)?;
|
||||
}
|
||||
ArroyOperation::SetVector { docid, embedder_id, embedding } => {
|
||||
let (_, _, writer, dimensions) =
|
||||
arroy_writers.get(&embedder_id).expect("requested a missing embedder");
|
||||
let (_, _, writer, dimensions) = arroy_writers
|
||||
.get(&embedder_id)
|
||||
.expect("requested a missing embedder");
|
||||
writer.del_items(wtxn, *dimensions, docid)?;
|
||||
writer.add_item(wtxn, docid, &embedding)?;
|
||||
}
|
||||
@ -420,8 +458,10 @@ where
|
||||
Step::WritingEmbeddingsToDatabase,
|
||||
));
|
||||
|
||||
for (_embedder_index, (_embedder_name, _embedder, writer, dimensions)) in
|
||||
&mut arroy_writers
|
||||
for (
|
||||
_embedder_index,
|
||||
(_embedder_name, _embedder, writer, dimensions),
|
||||
) in &mut arroy_writers
|
||||
{
|
||||
let dimensions = *dimensions;
|
||||
writer.build_and_quantize(
|
||||
@ -438,6 +478,7 @@ where
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(indexing_context.send_progress)(Progress::from_step(Step::WaitingForExtractors));
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user