mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-28 23:41:38 +01:00
Add more precise spans
This commit is contained in:
parent
5560452ef9
commit
aa460819a7
@ -109,11 +109,14 @@ where
|
|||||||
|
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
|
|
||||||
|
|
||||||
// document but we need to create a function that collects and compresses documents.
|
// document but we need to create a function that collects and compresses documents.
|
||||||
let document_sender = extractor_sender.documents();
|
let document_sender = extractor_sender.documents();
|
||||||
let document_extractor = DocumentsExtractor::new(&document_sender, embedders);
|
let document_extractor = DocumentsExtractor::new(&document_sender, embedders);
|
||||||
let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||||
|
{
|
||||||
|
let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "documents");
|
||||||
|
let _entered = span.enter();
|
||||||
extract(document_changes,
|
extract(document_changes,
|
||||||
&document_extractor,
|
&document_extractor,
|
||||||
indexing_context,
|
indexing_context,
|
||||||
@ -121,7 +124,10 @@ where
|
|||||||
&datastore,
|
&datastore,
|
||||||
Step::ExtractingDocuments,
|
Step::ExtractingDocuments,
|
||||||
)?;
|
)?;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "documents");
|
||||||
|
let _entered = span.enter();
|
||||||
for document_extractor_data in datastore {
|
for document_extractor_data in datastore {
|
||||||
let document_extractor_data = document_extractor_data.0.into_inner();
|
let document_extractor_data = document_extractor_data.0.into_inner();
|
||||||
for (field, delta) in document_extractor_data.field_distribution_delta {
|
for (field, delta) in document_extractor_data.field_distribution_delta {
|
||||||
@ -133,14 +139,15 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
field_distribution.retain(|_, v| *v != 0);
|
field_distribution.retain(|_, v| *v != 0);
|
||||||
|
}
|
||||||
|
|
||||||
let facet_field_ids_delta;
|
let facet_field_ids_delta;
|
||||||
|
|
||||||
{
|
{
|
||||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "faceted");
|
let caches = {
|
||||||
|
let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "faceted");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
|
|
||||||
facet_field_ids_delta = merge_and_send_facet_docids(
|
|
||||||
FacetedDocidsExtractor::run_extraction(
|
FacetedDocidsExtractor::run_extraction(
|
||||||
grenad_parameters,
|
grenad_parameters,
|
||||||
document_changes,
|
document_changes,
|
||||||
@ -148,16 +155,25 @@ where
|
|||||||
&mut extractor_allocs,
|
&mut extractor_allocs,
|
||||||
&extractor_sender.field_id_docid_facet_sender(),
|
&extractor_sender.field_id_docid_facet_sender(),
|
||||||
Step::ExtractingFacets
|
Step::ExtractingFacets
|
||||||
)?,
|
)?
|
||||||
|
};
|
||||||
|
|
||||||
|
{
|
||||||
|
let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "faceted");
|
||||||
|
let _entered = span.enter();
|
||||||
|
|
||||||
|
facet_field_ids_delta = merge_and_send_facet_docids(
|
||||||
|
caches,
|
||||||
FacetDatabases::new(index),
|
FacetDatabases::new(index),
|
||||||
index,
|
index,
|
||||||
extractor_sender.facet_docids(),
|
extractor_sender.facet_docids(),
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
|
|
||||||
let _entered = span.enter();
|
|
||||||
|
|
||||||
|
|
||||||
let WordDocidsCaches {
|
let WordDocidsCaches {
|
||||||
@ -166,15 +182,19 @@ where
|
|||||||
exact_word_docids,
|
exact_word_docids,
|
||||||
word_position_docids,
|
word_position_docids,
|
||||||
fid_word_count_docids,
|
fid_word_count_docids,
|
||||||
} = WordDocidsExtractors::run_extraction(
|
} = {
|
||||||
|
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
|
||||||
|
let _entered = span.enter();
|
||||||
|
|
||||||
|
WordDocidsExtractors::run_extraction(
|
||||||
grenad_parameters,
|
grenad_parameters,
|
||||||
document_changes,
|
document_changes,
|
||||||
indexing_context,
|
indexing_context,
|
||||||
&mut extractor_allocs,
|
&mut extractor_allocs,
|
||||||
Step::ExtractingWords
|
Step::ExtractingWords
|
||||||
)?;
|
)?
|
||||||
|
};
|
||||||
|
|
||||||
// TODO Word Docids Merger
|
|
||||||
{
|
{
|
||||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
|
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
@ -187,7 +207,6 @@ where
|
|||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Word Fid Docids Merging
|
|
||||||
{
|
{
|
||||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
|
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
@ -200,7 +219,6 @@ where
|
|||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Exact Word Docids Merging
|
|
||||||
{
|
{
|
||||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
|
let span = tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
@ -213,7 +231,6 @@ where
|
|||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Word Position Docids Merging
|
|
||||||
{
|
{
|
||||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
|
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
@ -226,7 +243,6 @@ where
|
|||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fid Word Count Docids Merging
|
|
||||||
{
|
{
|
||||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
|
let span = tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
@ -244,17 +260,22 @@ where
|
|||||||
// this works only if the settings didn't change during this transaction.
|
// this works only if the settings didn't change during this transaction.
|
||||||
let proximity_precision = index.proximity_precision(&rtxn)?.unwrap_or_default();
|
let proximity_precision = index.proximity_precision(&rtxn)?.unwrap_or_default();
|
||||||
if proximity_precision == ProximityPrecision::ByWord {
|
if proximity_precision == ProximityPrecision::ByWord {
|
||||||
|
let caches = {
|
||||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
|
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
|
|
||||||
|
<WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(
|
||||||
let caches = <WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(
|
|
||||||
grenad_parameters,
|
grenad_parameters,
|
||||||
document_changes,
|
document_changes,
|
||||||
indexing_context,
|
indexing_context,
|
||||||
&mut extractor_allocs,
|
&mut extractor_allocs,
|
||||||
Step::ExtractingWordProximity,
|
Step::ExtractingWordProximity,
|
||||||
)?;
|
)?
|
||||||
|
};
|
||||||
|
|
||||||
|
{
|
||||||
|
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids");
|
||||||
|
let _entered = span.enter();
|
||||||
|
|
||||||
merge_and_send_docids(
|
merge_and_send_docids(
|
||||||
caches,
|
caches,
|
||||||
@ -264,10 +285,9 @@ where
|
|||||||
&indexing_context.must_stop_processing,
|
&indexing_context.must_stop_processing,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
'vectors: {
|
'vectors: {
|
||||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
|
|
||||||
let _entered = span.enter();
|
|
||||||
|
|
||||||
let mut index_embeddings = index.embedding_configs(&rtxn)?;
|
let mut index_embeddings = index.embedding_configs(&rtxn)?;
|
||||||
if index_embeddings.is_empty() {
|
if index_embeddings.is_empty() {
|
||||||
@ -277,7 +297,15 @@ where
|
|||||||
let embedding_sender = extractor_sender.embeddings();
|
let embedding_sender = extractor_sender.embeddings();
|
||||||
let extractor = EmbeddingExtractor::new(embedders, &embedding_sender, field_distribution, request_threads());
|
let extractor = EmbeddingExtractor::new(embedders, &embedding_sender, field_distribution, request_threads());
|
||||||
let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||||
|
{
|
||||||
|
let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
|
||||||
|
let _entered = span.enter();
|
||||||
|
|
||||||
extract(document_changes, &extractor, indexing_context, &mut extractor_allocs, &datastore, Step::ExtractingEmbeddings)?;
|
extract(document_changes, &extractor, indexing_context, &mut extractor_allocs, &datastore, Step::ExtractingEmbeddings)?;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let span = tracing::trace_span!(target: "indexing::documents::merge", "vectors");
|
||||||
|
let _entered = span.enter();
|
||||||
|
|
||||||
for config in &mut index_embeddings {
|
for config in &mut index_embeddings {
|
||||||
'data: for data in datastore.iter_mut() {
|
'data: for data in datastore.iter_mut() {
|
||||||
@ -286,18 +314,21 @@ where
|
|||||||
deladd.apply_to(&mut config.user_provided);
|
deladd.apply_to(&mut config.user_provided);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
embedding_sender.finish(index_embeddings).unwrap();
|
embedding_sender.finish(index_embeddings).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
'geo: {
|
'geo: {
|
||||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "geo");
|
|
||||||
let _entered = span.enter();
|
|
||||||
|
|
||||||
let Some(extractor) = GeoExtractor::new(&rtxn, index, grenad_parameters)? else {
|
let Some(extractor) = GeoExtractor::new(&rtxn, index, grenad_parameters)? else {
|
||||||
break 'geo;
|
break 'geo;
|
||||||
};
|
};
|
||||||
let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||||
|
|
||||||
|
{
|
||||||
|
let span = tracing::trace_span!(target: "indexing::documents::extract", "geo");
|
||||||
|
let _entered = span.enter();
|
||||||
|
|
||||||
extract(
|
extract(
|
||||||
document_changes,
|
document_changes,
|
||||||
&extractor,
|
&extractor,
|
||||||
@ -306,6 +337,7 @@ where
|
|||||||
&datastore,
|
&datastore,
|
||||||
Step::WritingGeoPoints
|
Step::WritingGeoPoints
|
||||||
)?;
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
merge_and_send_rtree(
|
merge_and_send_rtree(
|
||||||
datastore,
|
datastore,
|
||||||
@ -316,11 +348,7 @@ where
|
|||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
|
||||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "FINISH");
|
|
||||||
let _entered = span.enter();
|
|
||||||
(indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase));
|
(indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase));
|
||||||
}
|
|
||||||
|
|
||||||
Result::Ok(facet_field_ids_delta)
|
Result::Ok(facet_field_ids_delta)
|
||||||
})?;
|
})?;
|
||||||
@ -352,6 +380,10 @@ where
|
|||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let mut arroy_writers = arroy_writers?;
|
let mut arroy_writers = arroy_writers?;
|
||||||
|
{
|
||||||
|
let span = tracing::trace_span!(target: "indexing::write_db", "all");
|
||||||
|
let _entered = span.enter();
|
||||||
|
|
||||||
for operation in writer_receiver {
|
for operation in writer_receiver {
|
||||||
match operation {
|
match operation {
|
||||||
WriterOperation::DbOperation(db_operation) => {
|
WriterOperation::DbOperation(db_operation) => {
|
||||||
@ -362,11 +394,13 @@ where
|
|||||||
Ok(false) => unreachable!("We tried to delete an unknown key"),
|
Ok(false) => unreachable!("We tried to delete an unknown key"),
|
||||||
Ok(_) => (),
|
Ok(_) => (),
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
return Err(Error::InternalError(InternalError::StoreDeletion {
|
return Err(Error::InternalError(
|
||||||
|
InternalError::StoreDeletion {
|
||||||
database_name,
|
database_name,
|
||||||
key: e.entry().to_owned(),
|
key: e.entry().to_owned(),
|
||||||
error,
|
error,
|
||||||
}));
|
},
|
||||||
|
));
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
EntryOperation::Write(e) => {
|
EntryOperation::Write(e) => {
|
||||||
@ -383,8 +417,10 @@ where
|
|||||||
}
|
}
|
||||||
WriterOperation::ArroyOperation(arroy_operation) => match arroy_operation {
|
WriterOperation::ArroyOperation(arroy_operation) => match arroy_operation {
|
||||||
ArroyOperation::DeleteVectors { docid } => {
|
ArroyOperation::DeleteVectors { docid } => {
|
||||||
for (_embedder_index, (_embedder_name, _embedder, writer, dimensions)) in
|
for (
|
||||||
&mut arroy_writers
|
_embedder_index,
|
||||||
|
(_embedder_name, _embedder, writer, dimensions),
|
||||||
|
) in &mut arroy_writers
|
||||||
{
|
{
|
||||||
let dimensions = *dimensions;
|
let dimensions = *dimensions;
|
||||||
writer.del_items(wtxn, dimensions, docid)?;
|
writer.del_items(wtxn, dimensions, docid)?;
|
||||||
@ -395,9 +431,10 @@ where
|
|||||||
embedder_id,
|
embedder_id,
|
||||||
embeddings: raw_embeddings,
|
embeddings: raw_embeddings,
|
||||||
} => {
|
} => {
|
||||||
let (_, _, writer, dimensions) =
|
let (_, _, writer, dimensions) = arroy_writers
|
||||||
arroy_writers.get(&embedder_id).expect("requested a missing embedder");
|
.get(&embedder_id)
|
||||||
// TODO: switch to Embeddings
|
.expect("requested a missing embedder");
|
||||||
|
|
||||||
let mut embeddings = Embeddings::new(*dimensions);
|
let mut embeddings = Embeddings::new(*dimensions);
|
||||||
for embedding in raw_embeddings {
|
for embedding in raw_embeddings {
|
||||||
embeddings.append(embedding).unwrap();
|
embeddings.append(embedding).unwrap();
|
||||||
@ -407,8 +444,9 @@ where
|
|||||||
writer.add_items(wtxn, docid, &embeddings)?;
|
writer.add_items(wtxn, docid, &embeddings)?;
|
||||||
}
|
}
|
||||||
ArroyOperation::SetVector { docid, embedder_id, embedding } => {
|
ArroyOperation::SetVector { docid, embedder_id, embedding } => {
|
||||||
let (_, _, writer, dimensions) =
|
let (_, _, writer, dimensions) = arroy_writers
|
||||||
arroy_writers.get(&embedder_id).expect("requested a missing embedder");
|
.get(&embedder_id)
|
||||||
|
.expect("requested a missing embedder");
|
||||||
writer.del_items(wtxn, *dimensions, docid)?;
|
writer.del_items(wtxn, *dimensions, docid)?;
|
||||||
writer.add_item(wtxn, docid, &embedding)?;
|
writer.add_item(wtxn, docid, &embedding)?;
|
||||||
}
|
}
|
||||||
@ -420,8 +458,10 @@ where
|
|||||||
Step::WritingEmbeddingsToDatabase,
|
Step::WritingEmbeddingsToDatabase,
|
||||||
));
|
));
|
||||||
|
|
||||||
for (_embedder_index, (_embedder_name, _embedder, writer, dimensions)) in
|
for (
|
||||||
&mut arroy_writers
|
_embedder_index,
|
||||||
|
(_embedder_name, _embedder, writer, dimensions),
|
||||||
|
) in &mut arroy_writers
|
||||||
{
|
{
|
||||||
let dimensions = *dimensions;
|
let dimensions = *dimensions;
|
||||||
writer.build_and_quantize(
|
writer.build_and_quantize(
|
||||||
@ -438,6 +478,7 @@ where
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
(indexing_context.send_progress)(Progress::from_step(Step::WaitingForExtractors));
|
(indexing_context.send_progress)(Progress::from_step(Step::WaitingForExtractors));
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user