From 3b8965bc762d278863aee5838001fca430759f60 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 22 Apr 2025 18:07:26 +0200 Subject: [PATCH] Display and sum the time spent in arroy --- crates/milli/src/search/mod.rs | 8 +++++++- crates/milli/src/search/new/mod.rs | 23 ++++++++++++++++++++++ crates/milli/src/search/new/vector_sort.rs | 8 ++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/crates/milli/src/search/mod.rs b/crates/milli/src/search/mod.rs index def00ec92..37b1aaf09 100644 --- a/crates/milli/src/search/mod.rs +++ b/crates/milli/src/search/mod.rs @@ -8,7 +8,7 @@ use roaring::bitmap::RoaringBitmap; pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords}; -use self::new::{execute_vector_search, PartialSearchResult}; +use self::new::{execute_vector_search, PartialSearchResult, VectorStoreStats}; use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features}; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::vector::Embedder; @@ -269,6 +269,12 @@ impl<'a> Search<'a> { )?, }; + if let Some(VectorStoreStats { total_time, total_queries, total_results }) = + ctx.vector_store_stats + { + tracing::debug!("Vector store stats: total_time={total_time:.02?}, total_queries={total_queries}, total_results={total_results}"); + } + // consume context and located_query_terms to build MatchingWords. let matching_words = match located_query_terms { Some(located_query_terms) => MatchingWords::new(ctx, located_query_terms), diff --git a/crates/milli/src/search/new/mod.rs b/crates/milli/src/search/new/mod.rs index 8b04d8c6a..6e794ef53 100644 --- a/crates/milli/src/search/new/mod.rs +++ b/crates/milli/src/search/new/mod.rs @@ -22,6 +22,8 @@ mod vector_sort; mod tests; use std::collections::HashSet; +use std::ops::AddAssign; +use std::time::Duration; use bucket_sort::{bucket_sort, BucketSortOutput}; use charabia::{Language, TokenizerBuilder}; @@ -72,6 +74,7 @@ pub struct SearchContext<'ctx> { pub phrase_docids: PhraseDocIdsCache, pub restricted_fids: Option, pub prefix_search: PrefixSearch, + pub vector_store_stats: Option, } impl<'ctx> SearchContext<'ctx> { @@ -101,6 +104,7 @@ impl<'ctx> SearchContext<'ctx> { phrase_docids: <_>::default(), restricted_fids: None, prefix_search, + vector_store_stats: None, }) } @@ -166,6 +170,25 @@ impl<'ctx> SearchContext<'ctx> { } } +#[derive(Debug, Default)] +pub struct VectorStoreStats { + /// The total time spent on vector search. + pub total_time: Duration, + /// The number of searches performed. + pub total_queries: usize, + /// The number of nearest neighbors found. + pub total_results: usize, +} + +impl AddAssign for VectorStoreStats { + fn add_assign(&mut self, other: Self) { + let Self { total_time, total_queries, total_results } = self; + *total_time += other.total_time; + *total_queries += other.total_queries; + *total_results += other.total_results; + } +} + #[derive(Clone, Copy, PartialEq, PartialOrd, Ord, Eq)] pub enum Word { Original(Interned), diff --git a/crates/milli/src/search/new/vector_sort.rs b/crates/milli/src/search/new/vector_sort.rs index a25605cfc..834f97384 100644 --- a/crates/milli/src/search/new/vector_sort.rs +++ b/crates/milli/src/search/new/vector_sort.rs @@ -1,8 +1,10 @@ use std::iter::FromIterator; +use std::time::Instant; use roaring::RoaringBitmap; use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait}; +use super::VectorStoreStats; use crate::score_details::{self, ScoreDetails}; use crate::vector::{ArroyWrapper, DistributionShift, Embedder}; use crate::{DocumentId, Result, SearchContext, SearchLogger}; @@ -53,9 +55,15 @@ impl VectorSort { ) -> Result<()> { let target = &self.target; + let before = Instant::now(); let reader = ArroyWrapper::new(ctx.index.vector_arroy, self.embedder_index, self.quantized); let results = reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?; self.cached_sorted_docids = results.into_iter(); + *ctx.vector_store_stats.get_or_insert_default() += VectorStoreStats { + total_time: before.elapsed(), + total_queries: 1, + total_results: self.cached_sorted_docids.len(), + }; Ok(()) }