Add analytics for the negative operator

This commit is contained in:
Clément Renault 2024-03-26 18:01:27 +01:00
parent 1da9e0f246
commit 34262c7a0d
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
5 changed files with 33 additions and 2 deletions

View File

@ -580,6 +580,7 @@ pub struct SearchAggregator {
total_received: usize,
total_succeeded: usize,
total_degraded: usize,
total_used_negative_operator: usize,
time_spent: BinaryHeap<usize>,
// sort
@ -760,12 +761,16 @@ impl SearchAggregator {
facet_distribution: _,
facet_stats: _,
degraded,
used_negative_operator,
} = result;
self.total_succeeded = self.total_succeeded.saturating_add(1);
if *degraded {
self.total_degraded = self.total_degraded.saturating_add(1);
}
if *used_negative_operator {
self.total_used_negative_operator = self.total_used_negative_operator.saturating_add(1);
}
self.time_spent.push(*processing_time_ms as usize);
}
@ -808,6 +813,7 @@ impl SearchAggregator {
embedder,
hybrid,
total_degraded,
total_used_negative_operator,
} = other;
if self.timestamp.is_none() {
@ -823,6 +829,8 @@ impl SearchAggregator {
self.total_received = self.total_received.saturating_add(total_received);
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
self.total_degraded = self.total_degraded.saturating_add(total_degraded);
self.total_used_negative_operator =
self.total_used_negative_operator.saturating_add(total_used_negative_operator);
self.time_spent.append(time_spent);
// sort
@ -929,6 +937,7 @@ impl SearchAggregator {
embedder,
hybrid,
total_degraded,
total_used_negative_operator,
} = self;
if total_received == 0 {
@ -949,6 +958,7 @@ impl SearchAggregator {
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
"total_received": total_received,
"total_degraded": total_degraded,
"total_used_negative_operator": total_used_negative_operator,
},
"sort": {
"with_geoPoint": sort_with_geo_point,

View File

@ -324,9 +324,11 @@ pub struct SearchResult {
#[serde(skip_serializing_if = "Option::is_none")]
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
// This information is only used for analytics purposes
// These fields are only used for analytics purposes
#[serde(skip)]
pub degraded: bool,
#[serde(skip)]
pub used_negative_operator: bool,
}
#[derive(Serialize, Debug, Clone, PartialEq)]
@ -512,6 +514,7 @@ pub fn perform_search(
candidates,
document_scores,
degraded,
used_negative_operator,
..
} = match &query.hybrid {
Some(hybrid) => match *hybrid.semantic_ratio {
@ -717,6 +720,7 @@ pub fn perform_search(
facet_distribution,
facet_stats,
degraded,
used_negative_operator,
};
Ok(result)
}

View File

@ -11,6 +11,7 @@ struct ScoreWithRatioResult {
candidates: RoaringBitmap,
document_scores: Vec<(u32, ScoreWithRatio)>,
degraded: bool,
used_negative_operator: bool,
}
type ScoreWithRatio = (Vec<ScoreDetails>, f32);
@ -78,6 +79,7 @@ impl ScoreWithRatioResult {
candidates: results.candidates,
document_scores,
degraded: results.degraded,
used_negative_operator: results.used_negative_operator,
}
}
@ -113,6 +115,7 @@ impl ScoreWithRatioResult {
documents_ids,
document_scores,
degraded: left.degraded | right.degraded,
used_negative_operator: left.used_negative_operator | right.used_negative_operator,
}
}
}

View File

@ -183,6 +183,7 @@ impl<'a> Search<'a> {
documents_ids,
document_scores,
degraded,
used_negative_operator,
} = match self.vector.as_ref() {
Some(vector) => execute_vector_search(
&mut ctx,
@ -221,7 +222,14 @@ impl<'a> Search<'a> {
None => MatchingWords::default(),
};
Ok(SearchResult { matching_words, candidates, document_scores, documents_ids, degraded })
Ok(SearchResult {
matching_words,
candidates,
document_scores,
documents_ids,
degraded,
used_negative_operator,
})
}
}
@ -272,6 +280,7 @@ pub struct SearchResult {
pub documents_ids: Vec<DocumentId>,
pub document_scores: Vec<Vec<ScoreDetails>>,
pub degraded: bool,
pub used_negative_operator: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]

View File

@ -571,6 +571,7 @@ pub fn execute_vector_search(
documents_ids: docids,
located_query_terms: None,
degraded,
used_negative_operator: false,
})
}
@ -594,6 +595,7 @@ pub fn execute_search(
) -> Result<PartialSearchResult> {
check_sort_criteria(ctx, sort_criteria.as_ref())?;
let mut used_negative_operator = false;
let mut located_query_terms = None;
let query_terms = if let Some(query) = query {
let span = tracing::trace_span!(target: "search::tokens", "tokenizer_builder");
@ -636,6 +638,7 @@ pub fn execute_search(
let (query_terms, negative_words) =
located_query_terms_from_tokens(ctx, tokens, words_limit)?;
used_negative_operator = !negative_words.is_empty();
let ignored_documents = resolve_negative_words(ctx, &negative_words)?;
universe -= ignored_documents;
@ -710,6 +713,7 @@ pub fn execute_search(
documents_ids: docids,
located_query_terms,
degraded,
used_negative_operator,
})
}
@ -772,4 +776,5 @@ pub struct PartialSearchResult {
pub document_scores: Vec<Vec<ScoreDetails>>,
pub degraded: bool,
pub used_negative_operator: bool,
}