mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 03:47:02 +02:00
Merge #5341
5341: Embeddings stats r=ManyTheFish a=ManyTheFish # Pull Request ## Related issue Fixes #5321 ## What does this PR do? - Add embedding stats - force dumpless upgrade to recompute stats - add tests Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
commit
885710a07b
12 changed files with 626 additions and 72 deletions
|
@ -410,8 +410,43 @@ impl ArroyWrapper {
|
|||
fn quantized_db(&self) -> arroy::Database<BinaryQuantizedCosine> {
|
||||
self.database.remap_data_type()
|
||||
}
|
||||
|
||||
pub fn aggregate_stats(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
stats: &mut ArroyStats,
|
||||
) -> Result<(), arroy::Error> {
|
||||
if self.quantized {
|
||||
for reader in self.readers(rtxn, self.quantized_db()) {
|
||||
let reader = reader?;
|
||||
let documents = reader.item_ids();
|
||||
if documents.is_empty() {
|
||||
break;
|
||||
}
|
||||
stats.documents |= documents;
|
||||
stats.number_of_embeddings += documents.len();
|
||||
}
|
||||
} else {
|
||||
for reader in self.readers(rtxn, self.angular_db()) {
|
||||
let reader = reader?;
|
||||
let documents = reader.item_ids();
|
||||
if documents.is_empty() {
|
||||
break;
|
||||
}
|
||||
stats.documents |= documents;
|
||||
stats.number_of_embeddings += documents.len();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct ArroyStats {
|
||||
pub number_of_embeddings: u64,
|
||||
pub documents: RoaringBitmap,
|
||||
}
|
||||
/// One or multiple embeddings stored consecutively in a flat vector.
|
||||
pub struct Embeddings<F> {
|
||||
data: Vec<F>,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue