Display and sum the time spent in arroy

This commit is contained in:
Kerollmops 2025-04-22 18:07:26 +02:00
parent 9fd9fcb03e
commit 3b8965bc76
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
3 changed files with 38 additions and 1 deletions

View File

@ -8,7 +8,7 @@ use roaring::bitmap::RoaringBitmap;
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
use self::new::{execute_vector_search, PartialSearchResult};
use self::new::{execute_vector_search, PartialSearchResult, VectorStoreStats};
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::vector::Embedder;
@ -269,6 +269,12 @@ impl<'a> Search<'a> {
)?,
};
if let Some(VectorStoreStats { total_time, total_queries, total_results }) =
ctx.vector_store_stats
{
tracing::debug!("Vector store stats: total_time={total_time:.02?}, total_queries={total_queries}, total_results={total_results}");
}
// consume context and located_query_terms to build MatchingWords.
let matching_words = match located_query_terms {
Some(located_query_terms) => MatchingWords::new(ctx, located_query_terms),

View File

@ -22,6 +22,8 @@ mod vector_sort;
mod tests;
use std::collections::HashSet;
use std::ops::AddAssign;
use std::time::Duration;
use bucket_sort::{bucket_sort, BucketSortOutput};
use charabia::{Language, TokenizerBuilder};
@ -72,6 +74,7 @@ pub struct SearchContext<'ctx> {
pub phrase_docids: PhraseDocIdsCache,
pub restricted_fids: Option<RestrictedFids>,
pub prefix_search: PrefixSearch,
pub vector_store_stats: Option<VectorStoreStats>,
}
impl<'ctx> SearchContext<'ctx> {
@ -101,6 +104,7 @@ impl<'ctx> SearchContext<'ctx> {
phrase_docids: <_>::default(),
restricted_fids: None,
prefix_search,
vector_store_stats: None,
})
}
@ -166,6 +170,25 @@ impl<'ctx> SearchContext<'ctx> {
}
}
#[derive(Debug, Default)]
pub struct VectorStoreStats {
/// The total time spent on vector search.
pub total_time: Duration,
/// The number of searches performed.
pub total_queries: usize,
/// The number of nearest neighbors found.
pub total_results: usize,
}
impl AddAssign for VectorStoreStats {
fn add_assign(&mut self, other: Self) {
let Self { total_time, total_queries, total_results } = self;
*total_time += other.total_time;
*total_queries += other.total_queries;
*total_results += other.total_results;
}
}
#[derive(Clone, Copy, PartialEq, PartialOrd, Ord, Eq)]
pub enum Word {
Original(Interned<String>),

View File

@ -1,8 +1,10 @@
use std::iter::FromIterator;
use std::time::Instant;
use roaring::RoaringBitmap;
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
use super::VectorStoreStats;
use crate::score_details::{self, ScoreDetails};
use crate::vector::{ArroyWrapper, DistributionShift, Embedder};
use crate::{DocumentId, Result, SearchContext, SearchLogger};
@ -53,9 +55,15 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
) -> Result<()> {
let target = &self.target;
let before = Instant::now();
let reader = ArroyWrapper::new(ctx.index.vector_arroy, self.embedder_index, self.quantized);
let results = reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?;
self.cached_sorted_docids = results.into_iter();
*ctx.vector_store_stats.get_or_insert_default() += VectorStoreStats {
total_time: before.elapsed(),
total_queries: 1,
total_results: self.cached_sorted_docids.len(),
};
Ok(())
}