implements a first version of the cutoff without settings

This commit is contained in:
Tamo 2024-03-05 11:21:46 +01:00
parent 10d053cd2f
commit 4a467739cd
11 changed files with 210 additions and 52 deletions

View File

@ -579,6 +579,7 @@ pub struct SearchAggregator {
// requests // requests
total_received: usize, total_received: usize,
total_succeeded: usize, total_succeeded: usize,
total_degraded: usize,
time_spent: BinaryHeap<usize>, time_spent: BinaryHeap<usize>,
// sort // sort
@ -758,9 +759,13 @@ impl SearchAggregator {
hits_info: _, hits_info: _,
facet_distribution: _, facet_distribution: _,
facet_stats: _, facet_stats: _,
degraded,
} = result; } = result;
self.total_succeeded = self.total_succeeded.saturating_add(1); self.total_succeeded = self.total_succeeded.saturating_add(1);
if *degraded {
self.total_degraded = self.total_degraded.saturating_add(1);
}
self.time_spent.push(*processing_time_ms as usize); self.time_spent.push(*processing_time_ms as usize);
} }
@ -802,6 +807,7 @@ impl SearchAggregator {
semantic_ratio, semantic_ratio,
embedder, embedder,
hybrid, hybrid,
total_degraded,
} = other; } = other;
if self.timestamp.is_none() { if self.timestamp.is_none() {
@ -816,6 +822,7 @@ impl SearchAggregator {
// request // request
self.total_received = self.total_received.saturating_add(total_received); self.total_received = self.total_received.saturating_add(total_received);
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded); self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
self.total_degraded = self.total_degraded.saturating_add(total_degraded);
self.time_spent.append(time_spent); self.time_spent.append(time_spent);
// sort // sort
@ -921,6 +928,7 @@ impl SearchAggregator {
semantic_ratio, semantic_ratio,
embedder, embedder,
hybrid, hybrid,
total_degraded,
} = self; } = self;
if total_received == 0 { if total_received == 0 {
@ -940,6 +948,7 @@ impl SearchAggregator {
"total_succeeded": total_succeeded, "total_succeeded": total_succeeded,
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
"total_received": total_received, "total_received": total_received,
"total_degraded": total_degraded,
}, },
"sort": { "sort": {
"with_geoPoint": sort_with_geo_point, "with_geoPoint": sort_with_geo_point,

View File

@ -1,7 +1,7 @@
use std::cmp::min; use std::cmp::min;
use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::collections::{BTreeMap, BTreeSet, HashSet};
use std::str::FromStr; use std::str::FromStr;
use std::time::Instant; use std::time::{Duration, Instant};
use deserr::Deserr; use deserr::Deserr;
use either::Either; use either::Either;
@ -14,7 +14,7 @@ use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid; use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::score_details::{self, ScoreDetails, ScoringStrategy}; use meilisearch_types::milli::score_details::{self, ScoreDetails, ScoringStrategy};
use meilisearch_types::milli::vector::DistributionShift; use meilisearch_types::milli::vector::DistributionShift;
use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues}; use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget};
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
use meilisearch_types::{milli, Document}; use meilisearch_types::{milli, Document};
use milli::tokenizer::TokenizerBuilder; use milli::tokenizer::TokenizerBuilder;
@ -323,6 +323,9 @@ pub struct SearchResult {
pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>, pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub facet_stats: Option<BTreeMap<String, FacetStats>>, pub facet_stats: Option<BTreeMap<String, FacetStats>>,
#[serde(skip_serializing_if = "std::ops::Not::not")]
pub degraded: bool,
} }
#[derive(Serialize, Debug, Clone, PartialEq)] #[derive(Serialize, Debug, Clone, PartialEq)]
@ -382,8 +385,10 @@ fn prepare_search<'t>(
query: &'t SearchQuery, query: &'t SearchQuery,
features: RoFeatures, features: RoFeatures,
distribution: Option<DistributionShift>, distribution: Option<DistributionShift>,
time_budget: TimeBudget,
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> { ) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
let mut search = index.search(rtxn); let mut search = index.search(rtxn);
search.time_budget(time_budget);
if query.vector.is_some() { if query.vector.is_some() {
features.check_vector("Passing `vector` as a query parameter")?; features.check_vector("Passing `vector` as a query parameter")?;
@ -491,19 +496,26 @@ pub fn perform_search(
distribution: Option<DistributionShift>, distribution: Option<DistributionShift>,
) -> Result<SearchResult, MeilisearchHttpError> { ) -> Result<SearchResult, MeilisearchHttpError> {
let before_search = Instant::now(); let before_search = Instant::now();
let time_budget = TimeBudget::new(Duration::from_millis(150));
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let (search, is_finite_pagination, max_total_hits, offset) = let (search, is_finite_pagination, max_total_hits, offset) =
prepare_search(index, &rtxn, &query, features, distribution)?; prepare_search(index, &rtxn, &query, features, distribution, time_budget)?;
let milli::SearchResult { documents_ids, matching_words, candidates, document_scores, .. } = let milli::SearchResult {
match &query.hybrid { documents_ids,
Some(hybrid) => match *hybrid.semantic_ratio { matching_words,
ratio if ratio == 0.0 || ratio == 1.0 => search.execute()?, candidates,
ratio => search.execute_hybrid(ratio)?, document_scores,
}, degraded,
None => search.execute()?, ..
}; } = match &query.hybrid {
Some(hybrid) => match *hybrid.semantic_ratio {
ratio if ratio == 0.0 || ratio == 1.0 => search.execute()?,
ratio => search.execute_hybrid(ratio)?,
},
None => search.execute()?,
};
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
@ -700,6 +712,7 @@ pub fn perform_search(
processing_time_ms: before_search.elapsed().as_millis(), processing_time_ms: before_search.elapsed().as_millis(),
facet_distribution, facet_distribution,
facet_stats, facet_stats,
degraded,
}; };
Ok(result) Ok(result)
} }
@ -712,9 +725,11 @@ pub fn perform_facet_search(
features: RoFeatures, features: RoFeatures,
) -> Result<FacetSearchResult, MeilisearchHttpError> { ) -> Result<FacetSearchResult, MeilisearchHttpError> {
let before_search = Instant::now(); let before_search = Instant::now();
let time_budget = TimeBudget::new(Duration::from_millis(150));
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features, None)?; let (search, _, _, _) =
prepare_search(index, &rtxn, &search_query, features, None, time_budget)?;
let mut facet_search = let mut facet_search =
SearchForFacetValues::new(facet_name, search, search_query.hybrid.is_some()); SearchForFacetValues::new(facet_name, search, search_query.hybrid.is_some());
if let Some(facet_query) = &facet_query { if let Some(facet_query) = &facet_query {

View File

@ -6,7 +6,7 @@ use std::time::Instant;
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use milli::{ use milli::{
execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, Index, SearchContext, execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, Index, SearchContext,
SearchLogger, TermsMatchingStrategy, SearchLogger, TermsMatchingStrategy, TimeBudget,
}; };
#[global_allocator] #[global_allocator]
@ -65,6 +65,7 @@ fn main() -> Result<(), Box<dyn Error>> {
None, None,
&mut DefaultSearchLogger, &mut DefaultSearchLogger,
logger, logger,
TimeBudget::max(),
)?; )?;
if let Some((logger, dir)) = detailed_logger { if let Some((logger, dir)) = detailed_logger {
logger.finish(&mut ctx, Path::new(dir))?; logger.finish(&mut ctx, Path::new(dir))?;

View File

@ -2421,6 +2421,7 @@ pub(crate) mod tests {
candidates: _, candidates: _,
document_scores: _, document_scores: _,
mut documents_ids, mut documents_ids,
degraded: _,
} = search.execute().unwrap(); } = search.execute().unwrap();
let primary_key_id = index.fields_ids_map(&rtxn).unwrap().id("primary_key").unwrap(); let primary_key_id = index.fields_ids_map(&rtxn).unwrap().id("primary_key").unwrap();
documents_ids.sort_unstable(); documents_ids.sort_unstable();

View File

@ -30,6 +30,7 @@ pub mod snapshot_tests;
use std::collections::{BTreeMap, HashMap}; use std::collections::{BTreeMap, HashMap};
use std::convert::{TryFrom, TryInto}; use std::convert::{TryFrom, TryInto};
use std::fmt;
use std::hash::BuildHasherDefault; use std::hash::BuildHasherDefault;
use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer}; use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer};
@ -104,6 +105,40 @@ pub const MAX_WORD_LENGTH: usize = MAX_LMDB_KEY_LENGTH / 2;
pub const MAX_POSITION_PER_ATTRIBUTE: u32 = u16::MAX as u32 + 1; pub const MAX_POSITION_PER_ATTRIBUTE: u32 = u16::MAX as u32 + 1;
#[derive(Clone, Copy)]
pub struct TimeBudget {
started_at: std::time::Instant,
budget: std::time::Duration,
}
impl fmt::Debug for TimeBudget {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("TimeBudget")
.field("started_at", &self.started_at)
.field("budget", &self.budget)
.field("left", &(self.budget - self.started_at.elapsed()))
.finish()
}
}
impl TimeBudget {
pub fn new(budget: std::time::Duration) -> Self {
Self { started_at: std::time::Instant::now(), budget }
}
pub fn max() -> Self {
Self::new(std::time::Duration::from_secs(u64::MAX))
}
pub fn exceeded(&self) -> bool {
self.must_stop()
}
pub fn must_stop(&self) -> bool {
self.started_at.elapsed() > self.budget
}
}
// Convert an absolute word position into a relative position. // Convert an absolute word position into a relative position.
// Return the field id of the attribute related to the absolute position // Return the field id of the attribute related to the absolute position
// and the relative position in the attribute. // and the relative position in the attribute.

View File

@ -106,6 +106,7 @@ impl ScoreWithRatioResult {
candidates: left.candidates | right.candidates, candidates: left.candidates | right.candidates,
documents_ids, documents_ids,
document_scores, document_scores,
degraded: false,
} }
} }
} }
@ -131,6 +132,7 @@ impl<'a> Search<'a> {
index: self.index, index: self.index,
distribution_shift: self.distribution_shift, distribution_shift: self.distribution_shift,
embedder_name: self.embedder_name.clone(), embedder_name: self.embedder_name.clone(),
time_budget: self.time_budget,
}; };
let vector_query = search.vector.take(); let vector_query = search.vector.take();

View File

@ -11,7 +11,7 @@ use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::vector::DistributionShift; use crate::vector::DistributionShift;
use crate::{ use crate::{
execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Index, Result,
SearchContext, SearchContext, TimeBudget,
}; };
// Building these factories is not free. // Building these factories is not free.
@ -43,6 +43,8 @@ pub struct Search<'a> {
index: &'a Index, index: &'a Index,
distribution_shift: Option<DistributionShift>, distribution_shift: Option<DistributionShift>,
embedder_name: Option<String>, embedder_name: Option<String>,
time_budget: TimeBudget,
} }
impl<'a> Search<'a> { impl<'a> Search<'a> {
@ -64,6 +66,7 @@ impl<'a> Search<'a> {
index, index,
distribution_shift: None, distribution_shift: None,
embedder_name: None, embedder_name: None,
time_budget: TimeBudget::max(),
} }
} }
@ -143,6 +146,11 @@ impl<'a> Search<'a> {
self self
} }
pub fn time_budget(&mut self, time_budget: TimeBudget) -> &mut Search<'a> {
self.time_budget = time_budget;
self
}
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> { pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
if has_vector_search { if has_vector_search {
let ctx = SearchContext::new(self.index, self.rtxn); let ctx = SearchContext::new(self.index, self.rtxn);
@ -169,36 +177,43 @@ impl<'a> Search<'a> {
} }
let universe = filtered_universe(&ctx, &self.filter)?; let universe = filtered_universe(&ctx, &self.filter)?;
let PartialSearchResult { located_query_terms, candidates, documents_ids, document_scores } = let PartialSearchResult {
match self.vector.as_ref() { located_query_terms,
Some(vector) => execute_vector_search( candidates,
&mut ctx, documents_ids,
vector, document_scores,
self.scoring_strategy, degraded,
universe, } = match self.vector.as_ref() {
&self.sort_criteria, Some(vector) => execute_vector_search(
self.geo_strategy, &mut ctx,
self.offset, vector,
self.limit, self.scoring_strategy,
self.distribution_shift, universe,
embedder_name, &self.sort_criteria,
)?, self.geo_strategy,
None => execute_search( self.offset,
&mut ctx, self.limit,
self.query.as_deref(), self.distribution_shift,
self.terms_matching_strategy, embedder_name,
self.scoring_strategy, self.time_budget,
self.exhaustive_number_hits, )?,
universe, None => execute_search(
&self.sort_criteria, &mut ctx,
self.geo_strategy, self.query.as_deref(),
self.offset, self.terms_matching_strategy,
self.limit, self.scoring_strategy,
Some(self.words_limit), self.exhaustive_number_hits,
&mut DefaultSearchLogger, universe,
&mut DefaultSearchLogger, &self.sort_criteria,
)?, self.geo_strategy,
}; self.offset,
self.limit,
Some(self.words_limit),
&mut DefaultSearchLogger,
&mut DefaultSearchLogger,
self.time_budget,
)?,
};
// consume context and located_query_terms to build MatchingWords. // consume context and located_query_terms to build MatchingWords.
let matching_words = match located_query_terms { let matching_words = match located_query_terms {
@ -206,7 +221,7 @@ impl<'a> Search<'a> {
None => MatchingWords::default(), None => MatchingWords::default(),
}; };
Ok(SearchResult { matching_words, candidates, document_scores, documents_ids }) Ok(SearchResult { matching_words, candidates, document_scores, documents_ids, degraded })
} }
} }
@ -229,6 +244,7 @@ impl fmt::Debug for Search<'_> {
index: _, index: _,
distribution_shift, distribution_shift,
embedder_name, embedder_name,
time_budget,
} = self; } = self;
f.debug_struct("Search") f.debug_struct("Search")
.field("query", query) .field("query", query)
@ -244,6 +260,7 @@ impl fmt::Debug for Search<'_> {
.field("words_limit", words_limit) .field("words_limit", words_limit)
.field("distribution_shift", distribution_shift) .field("distribution_shift", distribution_shift)
.field("embedder_name", embedder_name) .field("embedder_name", embedder_name)
.field("time_bduget", time_budget)
.finish() .finish()
} }
} }
@ -254,6 +271,7 @@ pub struct SearchResult {
pub candidates: RoaringBitmap, pub candidates: RoaringBitmap,
pub documents_ids: Vec<DocumentId>, pub documents_ids: Vec<DocumentId>,
pub document_scores: Vec<Vec<ScoreDetails>>, pub document_scores: Vec<Vec<ScoreDetails>>,
pub degraded: bool,
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]

View File

@ -5,12 +5,14 @@ use super::ranking_rules::{BoxRankingRule, RankingRuleQueryTrait};
use super::SearchContext; use super::SearchContext;
use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::{apply_distinct_rule, distinct_single_docid, DistinctOutput}; use crate::search::new::distinct::{apply_distinct_rule, distinct_single_docid, DistinctOutput};
use crate::Result; use crate::{Result, TimeBudget};
pub struct BucketSortOutput { pub struct BucketSortOutput {
pub docids: Vec<u32>, pub docids: Vec<u32>,
pub scores: Vec<Vec<ScoreDetails>>, pub scores: Vec<Vec<ScoreDetails>>,
pub all_candidates: RoaringBitmap, pub all_candidates: RoaringBitmap,
pub degraded: bool,
} }
// TODO: would probably be good to regroup some of these inside of a struct? // TODO: would probably be good to regroup some of these inside of a struct?
@ -25,6 +27,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
length: usize, length: usize,
scoring_strategy: ScoringStrategy, scoring_strategy: ScoringStrategy,
logger: &mut dyn SearchLogger<Q>, logger: &mut dyn SearchLogger<Q>,
time_budget: TimeBudget,
) -> Result<BucketSortOutput> { ) -> Result<BucketSortOutput> {
logger.initial_query(query); logger.initial_query(query);
logger.ranking_rules(&ranking_rules); logger.ranking_rules(&ranking_rules);
@ -41,6 +44,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
docids: vec![], docids: vec![],
scores: vec![], scores: vec![],
all_candidates: universe.clone(), all_candidates: universe.clone(),
degraded: false,
}); });
} }
if ranking_rules.is_empty() { if ranking_rules.is_empty() {
@ -74,6 +78,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
scores: vec![Default::default(); results.len()], scores: vec![Default::default(); results.len()],
docids: results, docids: results,
all_candidates, all_candidates,
degraded: false,
}); });
} else { } else {
let docids: Vec<u32> = universe.iter().skip(from).take(length).collect(); let docids: Vec<u32> = universe.iter().skip(from).take(length).collect();
@ -81,6 +86,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
scores: vec![Default::default(); docids.len()], scores: vec![Default::default(); docids.len()],
docids, docids,
all_candidates: universe.clone(), all_candidates: universe.clone(),
degraded: false,
}); });
}; };
} }
@ -154,6 +160,18 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
} }
while valid_docids.len() < length { while valid_docids.len() < length {
if time_budget.exceeded() {
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
maybe_add_to_results!(bucket);
return Ok(BucketSortOutput {
scores: vec![Default::default(); valid_docids.len()],
docids: valid_docids,
all_candidates,
degraded: true,
});
}
// The universe for this bucket is zero, so we don't need to sort // The universe for this bucket is zero, so we don't need to sort
// anything, just go back to the parent ranking rule. // anything, just go back to the parent ranking rule.
if ranking_rule_universes[cur_ranking_rule_index].is_empty() if ranking_rule_universes[cur_ranking_rule_index].is_empty()
@ -219,7 +237,12 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
)?; )?;
} }
Ok(BucketSortOutput { docids: valid_docids, scores: valid_scores, all_candidates }) Ok(BucketSortOutput {
docids: valid_docids,
scores: valid_scores,
all_candidates,
degraded: false,
})
} }
/// Add the candidates to the results. Take `distinct`, `from`, `length`, and `cur_offset` /// Add the candidates to the results. Take `distinct`, `from`, `length`, and `cur_offset`

View File

@ -502,7 +502,7 @@ mod tests {
use super::*; use super::*;
use crate::index::tests::TempIndex; use crate::index::tests::TempIndex;
use crate::{execute_search, filtered_universe, SearchContext}; use crate::{execute_search, filtered_universe, SearchContext, TimeBudget};
impl<'a> MatcherBuilder<'a> { impl<'a> MatcherBuilder<'a> {
fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self { fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
@ -522,6 +522,7 @@ mod tests {
Some(10), Some(10),
&mut crate::DefaultSearchLogger, &mut crate::DefaultSearchLogger,
&mut crate::DefaultSearchLogger, &mut crate::DefaultSearchLogger,
TimeBudget::max(),
) )
.unwrap(); .unwrap();

View File

@ -52,7 +52,8 @@ use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::apply_distinct_rule; use crate::search::new::distinct::apply_distinct_rule;
use crate::vector::DistributionShift; use crate::vector::DistributionShift;
use crate::{ use crate::{
AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError, AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, TimeBudget,
UserError,
}; };
/// A structure used throughout the execution of a search query. /// A structure used throughout the execution of a search query.
@ -518,6 +519,7 @@ pub fn execute_vector_search(
length: usize, length: usize,
distribution_shift: Option<DistributionShift>, distribution_shift: Option<DistributionShift>,
embedder_name: &str, embedder_name: &str,
time_budget: TimeBudget,
) -> Result<PartialSearchResult> { ) -> Result<PartialSearchResult> {
check_sort_criteria(ctx, sort_criteria.as_ref())?; check_sort_criteria(ctx, sort_criteria.as_ref())?;
@ -537,7 +539,7 @@ pub fn execute_vector_search(
let placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery> = let placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery> =
&mut placeholder_search_logger; &mut placeholder_search_logger;
let BucketSortOutput { docids, scores, all_candidates } = bucket_sort( let BucketSortOutput { docids, scores, all_candidates, degraded } = bucket_sort(
ctx, ctx,
ranking_rules, ranking_rules,
&PlaceholderQuery, &PlaceholderQuery,
@ -546,6 +548,7 @@ pub fn execute_vector_search(
length, length,
scoring_strategy, scoring_strategy,
placeholder_search_logger, placeholder_search_logger,
time_budget,
)?; )?;
Ok(PartialSearchResult { Ok(PartialSearchResult {
@ -553,6 +556,7 @@ pub fn execute_vector_search(
document_scores: scores, document_scores: scores,
documents_ids: docids, documents_ids: docids,
located_query_terms: None, located_query_terms: None,
degraded,
}) })
} }
@ -572,6 +576,7 @@ pub fn execute_search(
words_limit: Option<usize>, words_limit: Option<usize>,
placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>, placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>,
query_graph_logger: &mut dyn SearchLogger<QueryGraph>, query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
time_budget: TimeBudget,
) -> Result<PartialSearchResult> { ) -> Result<PartialSearchResult> {
check_sort_criteria(ctx, sort_criteria.as_ref())?; check_sort_criteria(ctx, sort_criteria.as_ref())?;
@ -648,6 +653,7 @@ pub fn execute_search(
length, length,
scoring_strategy, scoring_strategy,
query_graph_logger, query_graph_logger,
time_budget,
)? )?
} else { } else {
let ranking_rules = let ranking_rules =
@ -661,10 +667,11 @@ pub fn execute_search(
length, length,
scoring_strategy, scoring_strategy,
placeholder_search_logger, placeholder_search_logger,
time_budget,
)? )?
}; };
let BucketSortOutput { docids, scores, mut all_candidates } = bucket_sort_output; let BucketSortOutput { docids, scores, mut all_candidates, degraded } = bucket_sort_output;
let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?; let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
// The candidates is the universe unless the exhaustive number of hits // The candidates is the universe unless the exhaustive number of hits
@ -682,6 +689,7 @@ pub fn execute_search(
document_scores: scores, document_scores: scores,
documents_ids: docids, documents_ids: docids,
located_query_terms, located_query_terms,
degraded,
}) })
} }
@ -742,4 +750,6 @@ pub struct PartialSearchResult {
pub candidates: RoaringBitmap, pub candidates: RoaringBitmap,
pub documents_ids: Vec<DocumentId>, pub documents_ids: Vec<DocumentId>,
pub document_scores: Vec<Vec<ScoreDetails>>, pub document_scores: Vec<Vec<ScoreDetails>>,
pub degraded: bool,
} }

View File

@ -1,14 +1,19 @@
use std::cmp::Reverse; use std::cmp::Reverse;
use std::collections::HashSet; use std::collections::HashSet;
use std::io::Cursor; use std::io::Cursor;
use std::time::Duration;
use big_s::S; use big_s::S;
use either::{Either, Left, Right}; use either::{Either, Left, Right};
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use maplit::{btreemap, hashset}; use maplit::{btreemap, hashset};
use meili_snap::snapshot;
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings}; use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
use milli::{AscDesc, Criterion, DocumentId, Index, Member, Object, TermsMatchingStrategy}; use milli::{
AscDesc, Criterion, DocumentId, Filter, Index, Member, Object, Search, TermsMatchingStrategy,
TimeBudget,
};
use serde::{Deserialize, Deserializer}; use serde::{Deserialize, Deserializer};
use slice_group_by::GroupBy; use slice_group_by::GroupBy;
@ -349,3 +354,41 @@ where
let result = serde_json::Value::deserialize(deserializer)?; let result = serde_json::Value::deserialize(deserializer)?;
Ok(Some(result)) Ok(Some(result))
} }
#[test]
fn basic_degraded_search() {
use Criterion::*;
let criteria = vec![Words, Typo, Proximity, Attribute, Exactness];
let index = setup_search_index_with_criteria(&criteria);
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
search.query(TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.time_budget(TimeBudget::new(Duration::from_millis(0)));
let result = search.execute().unwrap();
assert!(result.degraded);
}
#[test]
fn degraded_search_cannot_skip_filter() {
use Criterion::*;
let criteria = vec![Words, Typo, Proximity, Attribute, Exactness];
let index = setup_search_index_with_criteria(&criteria);
let rtxn = index.read_txn().unwrap();
let mut search = Search::new(&rtxn, &index);
search.query(TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.time_budget(TimeBudget::new(Duration::from_millis(0)));
let filter_condition = Filter::from_str("tag = etiopia").unwrap().unwrap();
search.filter(filter_condition);
let result = search.execute().unwrap();
assert!(result.degraded);
snapshot!(format!("{:?}\n{:?}", result.candidates, result.documents_ids), @r###"
RoaringBitmap<[0, 2, 5, 8, 11, 14]>
[0, 2, 5, 8, 11, 14]
"###);
}