Add analytics for the negative operator

This commit is contained in:
Clément Renault 2024-03-26 18:01:27 +01:00
parent 1da9e0f246
commit 34262c7a0d
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
5 changed files with 33 additions and 2 deletions

View File

@ -580,6 +580,7 @@ pub struct SearchAggregator {
total_received: usize, total_received: usize,
total_succeeded: usize, total_succeeded: usize,
total_degraded: usize, total_degraded: usize,
total_used_negative_operator: usize,
time_spent: BinaryHeap<usize>, time_spent: BinaryHeap<usize>,
// sort // sort
@ -760,12 +761,16 @@ impl SearchAggregator {
facet_distribution: _, facet_distribution: _,
facet_stats: _, facet_stats: _,
degraded, degraded,
used_negative_operator,
} = result; } = result;
self.total_succeeded = self.total_succeeded.saturating_add(1); self.total_succeeded = self.total_succeeded.saturating_add(1);
if *degraded { if *degraded {
self.total_degraded = self.total_degraded.saturating_add(1); self.total_degraded = self.total_degraded.saturating_add(1);
} }
if *used_negative_operator {
self.total_used_negative_operator = self.total_used_negative_operator.saturating_add(1);
}
self.time_spent.push(*processing_time_ms as usize); self.time_spent.push(*processing_time_ms as usize);
} }
@ -808,6 +813,7 @@ impl SearchAggregator {
embedder, embedder,
hybrid, hybrid,
total_degraded, total_degraded,
total_used_negative_operator,
} = other; } = other;
if self.timestamp.is_none() { if self.timestamp.is_none() {
@ -823,6 +829,8 @@ impl SearchAggregator {
self.total_received = self.total_received.saturating_add(total_received); self.total_received = self.total_received.saturating_add(total_received);
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded); self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
self.total_degraded = self.total_degraded.saturating_add(total_degraded); self.total_degraded = self.total_degraded.saturating_add(total_degraded);
self.total_used_negative_operator =
self.total_used_negative_operator.saturating_add(total_used_negative_operator);
self.time_spent.append(time_spent); self.time_spent.append(time_spent);
// sort // sort
@ -929,6 +937,7 @@ impl SearchAggregator {
embedder, embedder,
hybrid, hybrid,
total_degraded, total_degraded,
total_used_negative_operator,
} = self; } = self;
if total_received == 0 { if total_received == 0 {
@ -949,6 +958,7 @@ impl SearchAggregator {
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
"total_received": total_received, "total_received": total_received,
"total_degraded": total_degraded, "total_degraded": total_degraded,
"total_used_negative_operator": total_used_negative_operator,
}, },
"sort": { "sort": {
"with_geoPoint": sort_with_geo_point, "with_geoPoint": sort_with_geo_point,

View File

@ -324,9 +324,11 @@ pub struct SearchResult {
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub facet_stats: Option<BTreeMap<String, FacetStats>>, pub facet_stats: Option<BTreeMap<String, FacetStats>>,
// This information is only used for analytics purposes // These fields are only used for analytics purposes
#[serde(skip)] #[serde(skip)]
pub degraded: bool, pub degraded: bool,
#[serde(skip)]
pub used_negative_operator: bool,
} }
#[derive(Serialize, Debug, Clone, PartialEq)] #[derive(Serialize, Debug, Clone, PartialEq)]
@ -512,6 +514,7 @@ pub fn perform_search(
candidates, candidates,
document_scores, document_scores,
degraded, degraded,
used_negative_operator,
.. ..
} = match &query.hybrid { } = match &query.hybrid {
Some(hybrid) => match *hybrid.semantic_ratio { Some(hybrid) => match *hybrid.semantic_ratio {
@ -717,6 +720,7 @@ pub fn perform_search(
facet_distribution, facet_distribution,
facet_stats, facet_stats,
degraded, degraded,
used_negative_operator,
}; };
Ok(result) Ok(result)
} }

View File

@ -11,6 +11,7 @@ struct ScoreWithRatioResult {
candidates: RoaringBitmap, candidates: RoaringBitmap,
document_scores: Vec<(u32, ScoreWithRatio)>, document_scores: Vec<(u32, ScoreWithRatio)>,
degraded: bool, degraded: bool,
used_negative_operator: bool,
} }
type ScoreWithRatio = (Vec<ScoreDetails>, f32); type ScoreWithRatio = (Vec<ScoreDetails>, f32);
@ -78,6 +79,7 @@ impl ScoreWithRatioResult {
candidates: results.candidates, candidates: results.candidates,
document_scores, document_scores,
degraded: results.degraded, degraded: results.degraded,
used_negative_operator: results.used_negative_operator,
} }
} }
@ -113,6 +115,7 @@ impl ScoreWithRatioResult {
documents_ids, documents_ids,
document_scores, document_scores,
degraded: left.degraded | right.degraded, degraded: left.degraded | right.degraded,
used_negative_operator: left.used_negative_operator | right.used_negative_operator,
} }
} }
} }

View File

@ -183,6 +183,7 @@ impl<'a> Search<'a> {
documents_ids, documents_ids,
document_scores, document_scores,
degraded, degraded,
used_negative_operator,
} = match self.vector.as_ref() { } = match self.vector.as_ref() {
Some(vector) => execute_vector_search( Some(vector) => execute_vector_search(
&mut ctx, &mut ctx,
@ -221,7 +222,14 @@ impl<'a> Search<'a> {
None => MatchingWords::default(), None => MatchingWords::default(),
}; };
Ok(SearchResult { matching_words, candidates, document_scores, documents_ids, degraded }) Ok(SearchResult {
matching_words,
candidates,
document_scores,
documents_ids,
degraded,
used_negative_operator,
})
} }
} }
@ -272,6 +280,7 @@ pub struct SearchResult {
pub documents_ids: Vec<DocumentId>, pub documents_ids: Vec<DocumentId>,
pub document_scores: Vec<Vec<ScoreDetails>>, pub document_scores: Vec<Vec<ScoreDetails>>,
pub degraded: bool, pub degraded: bool,
pub used_negative_operator: bool,
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]

View File

@ -571,6 +571,7 @@ pub fn execute_vector_search(
documents_ids: docids, documents_ids: docids,
located_query_terms: None, located_query_terms: None,
degraded, degraded,
used_negative_operator: false,
}) })
} }
@ -594,6 +595,7 @@ pub fn execute_search(
) -> Result<PartialSearchResult> { ) -> Result<PartialSearchResult> {
check_sort_criteria(ctx, sort_criteria.as_ref())?; check_sort_criteria(ctx, sort_criteria.as_ref())?;
let mut used_negative_operator = false;
let mut located_query_terms = None; let mut located_query_terms = None;
let query_terms = if let Some(query) = query { let query_terms = if let Some(query) = query {
let span = tracing::trace_span!(target: "search::tokens", "tokenizer_builder"); let span = tracing::trace_span!(target: "search::tokens", "tokenizer_builder");
@ -636,6 +638,7 @@ pub fn execute_search(
let (query_terms, negative_words) = let (query_terms, negative_words) =
located_query_terms_from_tokens(ctx, tokens, words_limit)?; located_query_terms_from_tokens(ctx, tokens, words_limit)?;
used_negative_operator = !negative_words.is_empty();
let ignored_documents = resolve_negative_words(ctx, &negative_words)?; let ignored_documents = resolve_negative_words(ctx, &negative_words)?;
universe -= ignored_documents; universe -= ignored_documents;
@ -710,6 +713,7 @@ pub fn execute_search(
documents_ids: docids, documents_ids: docids,
located_query_terms, located_query_terms,
degraded, degraded,
used_negative_operator,
}) })
} }
@ -772,4 +776,5 @@ pub struct PartialSearchResult {
pub document_scores: Vec<Vec<ScoreDetails>>, pub document_scores: Vec<Vec<ScoreDetails>>,
pub degraded: bool, pub degraded: bool,
pub used_negative_operator: bool,
} }