mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Implement Incremental document database stats computing
This commit is contained in:
parent
d9642ec916
commit
9f3663e768
9 changed files with 116 additions and 53 deletions
|
@ -711,15 +711,17 @@ impl DelAddRoaringBitmap {
|
|||
DelAddRoaringBitmap { del, add }
|
||||
}
|
||||
|
||||
pub fn apply_to(&self, documents_ids: &mut RoaringBitmap) {
|
||||
pub fn apply_to(&self, documents_ids: &mut RoaringBitmap, modified_docids: &mut RoaringBitmap) {
|
||||
let DelAddRoaringBitmap { del, add } = self;
|
||||
|
||||
if let Some(del) = del {
|
||||
*documents_ids -= del;
|
||||
*modified_docids |= del;
|
||||
}
|
||||
|
||||
if let Some(add) = add {
|
||||
*documents_ids |= add;
|
||||
*modified_docids |= add;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ pub(super) fn extract_all<'pl, 'extractor, DC, MSP>(
|
|||
field_distribution: &mut BTreeMap<String, u64>,
|
||||
mut index_embeddings: Vec<IndexEmbeddingConfig>,
|
||||
document_ids: &mut RoaringBitmap,
|
||||
modified_docids: &mut RoaringBitmap,
|
||||
) -> Result<(FacetFieldIdsDelta, Vec<IndexEmbeddingConfig>)>
|
||||
where
|
||||
DC: DocumentChanges<'pl>,
|
||||
|
@ -70,7 +71,7 @@ where
|
|||
// adding the delta should never cause a negative result, as we are removing fields that previously existed.
|
||||
*current = current.saturating_add_signed(delta);
|
||||
}
|
||||
document_extractor_data.docids_delta.apply_to(document_ids);
|
||||
document_extractor_data.docids_delta.apply_to(document_ids, modified_docids);
|
||||
}
|
||||
|
||||
field_distribution.retain(|_, v| *v != 0);
|
||||
|
@ -256,7 +257,7 @@ where
|
|||
let Some(deladd) = data.remove(&config.name) else {
|
||||
continue 'data;
|
||||
};
|
||||
deladd.apply_to(&mut config.user_provided);
|
||||
deladd.apply_to(&mut config.user_provided, modified_docids);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -130,6 +130,7 @@ where
|
|||
let index_embeddings = index.embedding_configs(wtxn)?;
|
||||
let mut field_distribution = index.field_distribution(wtxn)?;
|
||||
let mut document_ids = index.documents_ids(wtxn)?;
|
||||
let mut modified_docids = roaring::RoaringBitmap::new();
|
||||
|
||||
let congestion = thread::scope(|s| -> Result<ChannelCongestion> {
|
||||
let indexer_span = tracing::Span::current();
|
||||
|
@ -138,6 +139,7 @@ where
|
|||
// prevent moving the field_distribution and document_ids in the inner closure...
|
||||
let field_distribution = &mut field_distribution;
|
||||
let document_ids = &mut document_ids;
|
||||
let modified_docids = &mut modified_docids;
|
||||
let extractor_handle =
|
||||
Builder::new().name(S("indexer-extractors")).spawn_scoped(s, move || {
|
||||
pool.install(move || {
|
||||
|
@ -152,6 +154,7 @@ where
|
|||
field_distribution,
|
||||
index_embeddings,
|
||||
document_ids,
|
||||
modified_docids,
|
||||
)
|
||||
})
|
||||
.unwrap()
|
||||
|
@ -227,6 +230,7 @@ where
|
|||
embedders,
|
||||
field_distribution,
|
||||
document_ids,
|
||||
modified_docids,
|
||||
)?;
|
||||
|
||||
Ok(congestion)
|
||||
|
|
|
@ -129,6 +129,7 @@ pub fn update_index(
|
|||
embedders: EmbeddingConfigs,
|
||||
field_distribution: std::collections::BTreeMap<String, u64>,
|
||||
document_ids: roaring::RoaringBitmap,
|
||||
modified_docids: roaring::RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
index.put_fields_ids_map(wtxn, new_fields_ids_map.as_fields_ids_map())?;
|
||||
if let Some(new_primary_key) = new_primary_key {
|
||||
|
@ -140,6 +141,7 @@ pub fn update_index(
|
|||
index.put_field_distribution(wtxn, &field_distribution)?;
|
||||
index.put_documents_ids(wtxn, &document_ids)?;
|
||||
index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
|
||||
index.update_documents_stats(wtxn, modified_docids)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue