mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-23 05:00:06 +01:00
implements a first version of the cutoff without settings
This commit is contained in:
parent
10d053cd2f
commit
4a467739cd
@ -579,6 +579,7 @@ pub struct SearchAggregator {
|
|||||||
// requests
|
// requests
|
||||||
total_received: usize,
|
total_received: usize,
|
||||||
total_succeeded: usize,
|
total_succeeded: usize,
|
||||||
|
total_degraded: usize,
|
||||||
time_spent: BinaryHeap<usize>,
|
time_spent: BinaryHeap<usize>,
|
||||||
|
|
||||||
// sort
|
// sort
|
||||||
@ -758,9 +759,13 @@ impl SearchAggregator {
|
|||||||
hits_info: _,
|
hits_info: _,
|
||||||
facet_distribution: _,
|
facet_distribution: _,
|
||||||
facet_stats: _,
|
facet_stats: _,
|
||||||
|
degraded,
|
||||||
} = result;
|
} = result;
|
||||||
|
|
||||||
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
||||||
|
if *degraded {
|
||||||
|
self.total_degraded = self.total_degraded.saturating_add(1);
|
||||||
|
}
|
||||||
self.time_spent.push(*processing_time_ms as usize);
|
self.time_spent.push(*processing_time_ms as usize);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -802,6 +807,7 @@ impl SearchAggregator {
|
|||||||
semantic_ratio,
|
semantic_ratio,
|
||||||
embedder,
|
embedder,
|
||||||
hybrid,
|
hybrid,
|
||||||
|
total_degraded,
|
||||||
} = other;
|
} = other;
|
||||||
|
|
||||||
if self.timestamp.is_none() {
|
if self.timestamp.is_none() {
|
||||||
@ -816,6 +822,7 @@ impl SearchAggregator {
|
|||||||
// request
|
// request
|
||||||
self.total_received = self.total_received.saturating_add(total_received);
|
self.total_received = self.total_received.saturating_add(total_received);
|
||||||
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
|
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
|
||||||
|
self.total_degraded = self.total_degraded.saturating_add(total_degraded);
|
||||||
self.time_spent.append(time_spent);
|
self.time_spent.append(time_spent);
|
||||||
|
|
||||||
// sort
|
// sort
|
||||||
@ -921,6 +928,7 @@ impl SearchAggregator {
|
|||||||
semantic_ratio,
|
semantic_ratio,
|
||||||
embedder,
|
embedder,
|
||||||
hybrid,
|
hybrid,
|
||||||
|
total_degraded,
|
||||||
} = self;
|
} = self;
|
||||||
|
|
||||||
if total_received == 0 {
|
if total_received == 0 {
|
||||||
@ -940,6 +948,7 @@ impl SearchAggregator {
|
|||||||
"total_succeeded": total_succeeded,
|
"total_succeeded": total_succeeded,
|
||||||
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
|
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
|
||||||
"total_received": total_received,
|
"total_received": total_received,
|
||||||
|
"total_degraded": total_degraded,
|
||||||
},
|
},
|
||||||
"sort": {
|
"sort": {
|
||||||
"with_geoPoint": sort_with_geo_point,
|
"with_geoPoint": sort_with_geo_point,
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use std::cmp::min;
|
use std::cmp::min;
|
||||||
use std::collections::{BTreeMap, BTreeSet, HashSet};
|
use std::collections::{BTreeMap, BTreeSet, HashSet};
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::time::Instant;
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
use deserr::Deserr;
|
use deserr::Deserr;
|
||||||
use either::Either;
|
use either::Either;
|
||||||
@ -14,7 +14,7 @@ use meilisearch_types::heed::RoTxn;
|
|||||||
use meilisearch_types::index_uid::IndexUid;
|
use meilisearch_types::index_uid::IndexUid;
|
||||||
use meilisearch_types::milli::score_details::{self, ScoreDetails, ScoringStrategy};
|
use meilisearch_types::milli::score_details::{self, ScoreDetails, ScoringStrategy};
|
||||||
use meilisearch_types::milli::vector::DistributionShift;
|
use meilisearch_types::milli::vector::DistributionShift;
|
||||||
use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues};
|
use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget};
|
||||||
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||||
use meilisearch_types::{milli, Document};
|
use meilisearch_types::{milli, Document};
|
||||||
use milli::tokenizer::TokenizerBuilder;
|
use milli::tokenizer::TokenizerBuilder;
|
||||||
@ -323,6 +323,9 @@ pub struct SearchResult {
|
|||||||
pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>,
|
pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
|
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
|
||||||
|
|
||||||
|
#[serde(skip_serializing_if = "std::ops::Not::not")]
|
||||||
|
pub degraded: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Debug, Clone, PartialEq)]
|
#[derive(Serialize, Debug, Clone, PartialEq)]
|
||||||
@ -382,8 +385,10 @@ fn prepare_search<'t>(
|
|||||||
query: &'t SearchQuery,
|
query: &'t SearchQuery,
|
||||||
features: RoFeatures,
|
features: RoFeatures,
|
||||||
distribution: Option<DistributionShift>,
|
distribution: Option<DistributionShift>,
|
||||||
|
time_budget: TimeBudget,
|
||||||
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
|
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
|
||||||
let mut search = index.search(rtxn);
|
let mut search = index.search(rtxn);
|
||||||
|
search.time_budget(time_budget);
|
||||||
|
|
||||||
if query.vector.is_some() {
|
if query.vector.is_some() {
|
||||||
features.check_vector("Passing `vector` as a query parameter")?;
|
features.check_vector("Passing `vector` as a query parameter")?;
|
||||||
@ -491,19 +496,26 @@ pub fn perform_search(
|
|||||||
distribution: Option<DistributionShift>,
|
distribution: Option<DistributionShift>,
|
||||||
) -> Result<SearchResult, MeilisearchHttpError> {
|
) -> Result<SearchResult, MeilisearchHttpError> {
|
||||||
let before_search = Instant::now();
|
let before_search = Instant::now();
|
||||||
|
let time_budget = TimeBudget::new(Duration::from_millis(150));
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
|
|
||||||
let (search, is_finite_pagination, max_total_hits, offset) =
|
let (search, is_finite_pagination, max_total_hits, offset) =
|
||||||
prepare_search(index, &rtxn, &query, features, distribution)?;
|
prepare_search(index, &rtxn, &query, features, distribution, time_budget)?;
|
||||||
|
|
||||||
let milli::SearchResult { documents_ids, matching_words, candidates, document_scores, .. } =
|
let milli::SearchResult {
|
||||||
match &query.hybrid {
|
documents_ids,
|
||||||
Some(hybrid) => match *hybrid.semantic_ratio {
|
matching_words,
|
||||||
ratio if ratio == 0.0 || ratio == 1.0 => search.execute()?,
|
candidates,
|
||||||
ratio => search.execute_hybrid(ratio)?,
|
document_scores,
|
||||||
},
|
degraded,
|
||||||
None => search.execute()?,
|
..
|
||||||
};
|
} = match &query.hybrid {
|
||||||
|
Some(hybrid) => match *hybrid.semantic_ratio {
|
||||||
|
ratio if ratio == 0.0 || ratio == 1.0 => search.execute()?,
|
||||||
|
ratio => search.execute_hybrid(ratio)?,
|
||||||
|
},
|
||||||
|
None => search.execute()?,
|
||||||
|
};
|
||||||
|
|
||||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
|
|
||||||
@ -700,6 +712,7 @@ pub fn perform_search(
|
|||||||
processing_time_ms: before_search.elapsed().as_millis(),
|
processing_time_ms: before_search.elapsed().as_millis(),
|
||||||
facet_distribution,
|
facet_distribution,
|
||||||
facet_stats,
|
facet_stats,
|
||||||
|
degraded,
|
||||||
};
|
};
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
@ -712,9 +725,11 @@ pub fn perform_facet_search(
|
|||||||
features: RoFeatures,
|
features: RoFeatures,
|
||||||
) -> Result<FacetSearchResult, MeilisearchHttpError> {
|
) -> Result<FacetSearchResult, MeilisearchHttpError> {
|
||||||
let before_search = Instant::now();
|
let before_search = Instant::now();
|
||||||
|
let time_budget = TimeBudget::new(Duration::from_millis(150));
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
|
|
||||||
let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features, None)?;
|
let (search, _, _, _) =
|
||||||
|
prepare_search(index, &rtxn, &search_query, features, None, time_budget)?;
|
||||||
let mut facet_search =
|
let mut facet_search =
|
||||||
SearchForFacetValues::new(facet_name, search, search_query.hybrid.is_some());
|
SearchForFacetValues::new(facet_name, search, search_query.hybrid.is_some());
|
||||||
if let Some(facet_query) = &facet_query {
|
if let Some(facet_query) = &facet_query {
|
||||||
|
@ -6,7 +6,7 @@ use std::time::Instant;
|
|||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use milli::{
|
use milli::{
|
||||||
execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, Index, SearchContext,
|
execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, Index, SearchContext,
|
||||||
SearchLogger, TermsMatchingStrategy,
|
SearchLogger, TermsMatchingStrategy, TimeBudget,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[global_allocator]
|
#[global_allocator]
|
||||||
@ -65,6 +65,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||||||
None,
|
None,
|
||||||
&mut DefaultSearchLogger,
|
&mut DefaultSearchLogger,
|
||||||
logger,
|
logger,
|
||||||
|
TimeBudget::max(),
|
||||||
)?;
|
)?;
|
||||||
if let Some((logger, dir)) = detailed_logger {
|
if let Some((logger, dir)) = detailed_logger {
|
||||||
logger.finish(&mut ctx, Path::new(dir))?;
|
logger.finish(&mut ctx, Path::new(dir))?;
|
||||||
|
@ -2421,6 +2421,7 @@ pub(crate) mod tests {
|
|||||||
candidates: _,
|
candidates: _,
|
||||||
document_scores: _,
|
document_scores: _,
|
||||||
mut documents_ids,
|
mut documents_ids,
|
||||||
|
degraded: _,
|
||||||
} = search.execute().unwrap();
|
} = search.execute().unwrap();
|
||||||
let primary_key_id = index.fields_ids_map(&rtxn).unwrap().id("primary_key").unwrap();
|
let primary_key_id = index.fields_ids_map(&rtxn).unwrap().id("primary_key").unwrap();
|
||||||
documents_ids.sort_unstable();
|
documents_ids.sort_unstable();
|
||||||
|
@ -30,6 +30,7 @@ pub mod snapshot_tests;
|
|||||||
|
|
||||||
use std::collections::{BTreeMap, HashMap};
|
use std::collections::{BTreeMap, HashMap};
|
||||||
use std::convert::{TryFrom, TryInto};
|
use std::convert::{TryFrom, TryInto};
|
||||||
|
use std::fmt;
|
||||||
use std::hash::BuildHasherDefault;
|
use std::hash::BuildHasherDefault;
|
||||||
|
|
||||||
use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer};
|
use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer};
|
||||||
@ -104,6 +105,40 @@ pub const MAX_WORD_LENGTH: usize = MAX_LMDB_KEY_LENGTH / 2;
|
|||||||
|
|
||||||
pub const MAX_POSITION_PER_ATTRIBUTE: u32 = u16::MAX as u32 + 1;
|
pub const MAX_POSITION_PER_ATTRIBUTE: u32 = u16::MAX as u32 + 1;
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
pub struct TimeBudget {
|
||||||
|
started_at: std::time::Instant,
|
||||||
|
budget: std::time::Duration,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for TimeBudget {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
f.debug_struct("TimeBudget")
|
||||||
|
.field("started_at", &self.started_at)
|
||||||
|
.field("budget", &self.budget)
|
||||||
|
.field("left", &(self.budget - self.started_at.elapsed()))
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TimeBudget {
|
||||||
|
pub fn new(budget: std::time::Duration) -> Self {
|
||||||
|
Self { started_at: std::time::Instant::now(), budget }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn max() -> Self {
|
||||||
|
Self::new(std::time::Duration::from_secs(u64::MAX))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn exceeded(&self) -> bool {
|
||||||
|
self.must_stop()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn must_stop(&self) -> bool {
|
||||||
|
self.started_at.elapsed() > self.budget
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Convert an absolute word position into a relative position.
|
// Convert an absolute word position into a relative position.
|
||||||
// Return the field id of the attribute related to the absolute position
|
// Return the field id of the attribute related to the absolute position
|
||||||
// and the relative position in the attribute.
|
// and the relative position in the attribute.
|
||||||
|
@ -106,6 +106,7 @@ impl ScoreWithRatioResult {
|
|||||||
candidates: left.candidates | right.candidates,
|
candidates: left.candidates | right.candidates,
|
||||||
documents_ids,
|
documents_ids,
|
||||||
document_scores,
|
document_scores,
|
||||||
|
degraded: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -131,6 +132,7 @@ impl<'a> Search<'a> {
|
|||||||
index: self.index,
|
index: self.index,
|
||||||
distribution_shift: self.distribution_shift,
|
distribution_shift: self.distribution_shift,
|
||||||
embedder_name: self.embedder_name.clone(),
|
embedder_name: self.embedder_name.clone(),
|
||||||
|
time_budget: self.time_budget,
|
||||||
};
|
};
|
||||||
|
|
||||||
let vector_query = search.vector.take();
|
let vector_query = search.vector.take();
|
||||||
|
@ -11,7 +11,7 @@ use crate::score_details::{ScoreDetails, ScoringStrategy};
|
|||||||
use crate::vector::DistributionShift;
|
use crate::vector::DistributionShift;
|
||||||
use crate::{
|
use crate::{
|
||||||
execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Index, Result,
|
execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Index, Result,
|
||||||
SearchContext,
|
SearchContext, TimeBudget,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Building these factories is not free.
|
// Building these factories is not free.
|
||||||
@ -43,6 +43,8 @@ pub struct Search<'a> {
|
|||||||
index: &'a Index,
|
index: &'a Index,
|
||||||
distribution_shift: Option<DistributionShift>,
|
distribution_shift: Option<DistributionShift>,
|
||||||
embedder_name: Option<String>,
|
embedder_name: Option<String>,
|
||||||
|
|
||||||
|
time_budget: TimeBudget,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Search<'a> {
|
impl<'a> Search<'a> {
|
||||||
@ -64,6 +66,7 @@ impl<'a> Search<'a> {
|
|||||||
index,
|
index,
|
||||||
distribution_shift: None,
|
distribution_shift: None,
|
||||||
embedder_name: None,
|
embedder_name: None,
|
||||||
|
time_budget: TimeBudget::max(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -143,6 +146,11 @@ impl<'a> Search<'a> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn time_budget(&mut self, time_budget: TimeBudget) -> &mut Search<'a> {
|
||||||
|
self.time_budget = time_budget;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
|
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
|
||||||
if has_vector_search {
|
if has_vector_search {
|
||||||
let ctx = SearchContext::new(self.index, self.rtxn);
|
let ctx = SearchContext::new(self.index, self.rtxn);
|
||||||
@ -169,36 +177,43 @@ impl<'a> Search<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let universe = filtered_universe(&ctx, &self.filter)?;
|
let universe = filtered_universe(&ctx, &self.filter)?;
|
||||||
let PartialSearchResult { located_query_terms, candidates, documents_ids, document_scores } =
|
let PartialSearchResult {
|
||||||
match self.vector.as_ref() {
|
located_query_terms,
|
||||||
Some(vector) => execute_vector_search(
|
candidates,
|
||||||
&mut ctx,
|
documents_ids,
|
||||||
vector,
|
document_scores,
|
||||||
self.scoring_strategy,
|
degraded,
|
||||||
universe,
|
} = match self.vector.as_ref() {
|
||||||
&self.sort_criteria,
|
Some(vector) => execute_vector_search(
|
||||||
self.geo_strategy,
|
&mut ctx,
|
||||||
self.offset,
|
vector,
|
||||||
self.limit,
|
self.scoring_strategy,
|
||||||
self.distribution_shift,
|
universe,
|
||||||
embedder_name,
|
&self.sort_criteria,
|
||||||
)?,
|
self.geo_strategy,
|
||||||
None => execute_search(
|
self.offset,
|
||||||
&mut ctx,
|
self.limit,
|
||||||
self.query.as_deref(),
|
self.distribution_shift,
|
||||||
self.terms_matching_strategy,
|
embedder_name,
|
||||||
self.scoring_strategy,
|
self.time_budget,
|
||||||
self.exhaustive_number_hits,
|
)?,
|
||||||
universe,
|
None => execute_search(
|
||||||
&self.sort_criteria,
|
&mut ctx,
|
||||||
self.geo_strategy,
|
self.query.as_deref(),
|
||||||
self.offset,
|
self.terms_matching_strategy,
|
||||||
self.limit,
|
self.scoring_strategy,
|
||||||
Some(self.words_limit),
|
self.exhaustive_number_hits,
|
||||||
&mut DefaultSearchLogger,
|
universe,
|
||||||
&mut DefaultSearchLogger,
|
&self.sort_criteria,
|
||||||
)?,
|
self.geo_strategy,
|
||||||
};
|
self.offset,
|
||||||
|
self.limit,
|
||||||
|
Some(self.words_limit),
|
||||||
|
&mut DefaultSearchLogger,
|
||||||
|
&mut DefaultSearchLogger,
|
||||||
|
self.time_budget,
|
||||||
|
)?,
|
||||||
|
};
|
||||||
|
|
||||||
// consume context and located_query_terms to build MatchingWords.
|
// consume context and located_query_terms to build MatchingWords.
|
||||||
let matching_words = match located_query_terms {
|
let matching_words = match located_query_terms {
|
||||||
@ -206,7 +221,7 @@ impl<'a> Search<'a> {
|
|||||||
None => MatchingWords::default(),
|
None => MatchingWords::default(),
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(SearchResult { matching_words, candidates, document_scores, documents_ids })
|
Ok(SearchResult { matching_words, candidates, document_scores, documents_ids, degraded })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -229,6 +244,7 @@ impl fmt::Debug for Search<'_> {
|
|||||||
index: _,
|
index: _,
|
||||||
distribution_shift,
|
distribution_shift,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
|
time_budget,
|
||||||
} = self;
|
} = self;
|
||||||
f.debug_struct("Search")
|
f.debug_struct("Search")
|
||||||
.field("query", query)
|
.field("query", query)
|
||||||
@ -244,6 +260,7 @@ impl fmt::Debug for Search<'_> {
|
|||||||
.field("words_limit", words_limit)
|
.field("words_limit", words_limit)
|
||||||
.field("distribution_shift", distribution_shift)
|
.field("distribution_shift", distribution_shift)
|
||||||
.field("embedder_name", embedder_name)
|
.field("embedder_name", embedder_name)
|
||||||
|
.field("time_bduget", time_budget)
|
||||||
.finish()
|
.finish()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -254,6 +271,7 @@ pub struct SearchResult {
|
|||||||
pub candidates: RoaringBitmap,
|
pub candidates: RoaringBitmap,
|
||||||
pub documents_ids: Vec<DocumentId>,
|
pub documents_ids: Vec<DocumentId>,
|
||||||
pub document_scores: Vec<Vec<ScoreDetails>>,
|
pub document_scores: Vec<Vec<ScoreDetails>>,
|
||||||
|
pub degraded: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
@ -5,12 +5,14 @@ use super::ranking_rules::{BoxRankingRule, RankingRuleQueryTrait};
|
|||||||
use super::SearchContext;
|
use super::SearchContext;
|
||||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||||
use crate::search::new::distinct::{apply_distinct_rule, distinct_single_docid, DistinctOutput};
|
use crate::search::new::distinct::{apply_distinct_rule, distinct_single_docid, DistinctOutput};
|
||||||
use crate::Result;
|
use crate::{Result, TimeBudget};
|
||||||
|
|
||||||
pub struct BucketSortOutput {
|
pub struct BucketSortOutput {
|
||||||
pub docids: Vec<u32>,
|
pub docids: Vec<u32>,
|
||||||
pub scores: Vec<Vec<ScoreDetails>>,
|
pub scores: Vec<Vec<ScoreDetails>>,
|
||||||
pub all_candidates: RoaringBitmap,
|
pub all_candidates: RoaringBitmap,
|
||||||
|
|
||||||
|
pub degraded: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: would probably be good to regroup some of these inside of a struct?
|
// TODO: would probably be good to regroup some of these inside of a struct?
|
||||||
@ -25,6 +27,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
length: usize,
|
length: usize,
|
||||||
scoring_strategy: ScoringStrategy,
|
scoring_strategy: ScoringStrategy,
|
||||||
logger: &mut dyn SearchLogger<Q>,
|
logger: &mut dyn SearchLogger<Q>,
|
||||||
|
time_budget: TimeBudget,
|
||||||
) -> Result<BucketSortOutput> {
|
) -> Result<BucketSortOutput> {
|
||||||
logger.initial_query(query);
|
logger.initial_query(query);
|
||||||
logger.ranking_rules(&ranking_rules);
|
logger.ranking_rules(&ranking_rules);
|
||||||
@ -41,6 +44,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
docids: vec![],
|
docids: vec![],
|
||||||
scores: vec![],
|
scores: vec![],
|
||||||
all_candidates: universe.clone(),
|
all_candidates: universe.clone(),
|
||||||
|
degraded: false,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
if ranking_rules.is_empty() {
|
if ranking_rules.is_empty() {
|
||||||
@ -74,6 +78,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
scores: vec![Default::default(); results.len()],
|
scores: vec![Default::default(); results.len()],
|
||||||
docids: results,
|
docids: results,
|
||||||
all_candidates,
|
all_candidates,
|
||||||
|
degraded: false,
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
let docids: Vec<u32> = universe.iter().skip(from).take(length).collect();
|
let docids: Vec<u32> = universe.iter().skip(from).take(length).collect();
|
||||||
@ -81,6 +86,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
scores: vec![Default::default(); docids.len()],
|
scores: vec![Default::default(); docids.len()],
|
||||||
docids,
|
docids,
|
||||||
all_candidates: universe.clone(),
|
all_candidates: universe.clone(),
|
||||||
|
degraded: false,
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -154,6 +160,18 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
while valid_docids.len() < length {
|
while valid_docids.len() < length {
|
||||||
|
if time_budget.exceeded() {
|
||||||
|
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
|
||||||
|
maybe_add_to_results!(bucket);
|
||||||
|
|
||||||
|
return Ok(BucketSortOutput {
|
||||||
|
scores: vec![Default::default(); valid_docids.len()],
|
||||||
|
docids: valid_docids,
|
||||||
|
all_candidates,
|
||||||
|
degraded: true,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// The universe for this bucket is zero, so we don't need to sort
|
// The universe for this bucket is zero, so we don't need to sort
|
||||||
// anything, just go back to the parent ranking rule.
|
// anything, just go back to the parent ranking rule.
|
||||||
if ranking_rule_universes[cur_ranking_rule_index].is_empty()
|
if ranking_rule_universes[cur_ranking_rule_index].is_empty()
|
||||||
@ -219,7 +237,12 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(BucketSortOutput { docids: valid_docids, scores: valid_scores, all_candidates })
|
Ok(BucketSortOutput {
|
||||||
|
docids: valid_docids,
|
||||||
|
scores: valid_scores,
|
||||||
|
all_candidates,
|
||||||
|
degraded: false,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add the candidates to the results. Take `distinct`, `from`, `length`, and `cur_offset`
|
/// Add the candidates to the results. Take `distinct`, `from`, `length`, and `cur_offset`
|
||||||
|
@ -502,7 +502,7 @@ mod tests {
|
|||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::index::tests::TempIndex;
|
use crate::index::tests::TempIndex;
|
||||||
use crate::{execute_search, filtered_universe, SearchContext};
|
use crate::{execute_search, filtered_universe, SearchContext, TimeBudget};
|
||||||
|
|
||||||
impl<'a> MatcherBuilder<'a> {
|
impl<'a> MatcherBuilder<'a> {
|
||||||
fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
|
fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
|
||||||
@ -522,6 +522,7 @@ mod tests {
|
|||||||
Some(10),
|
Some(10),
|
||||||
&mut crate::DefaultSearchLogger,
|
&mut crate::DefaultSearchLogger,
|
||||||
&mut crate::DefaultSearchLogger,
|
&mut crate::DefaultSearchLogger,
|
||||||
|
TimeBudget::max(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
@ -52,7 +52,8 @@ use crate::score_details::{ScoreDetails, ScoringStrategy};
|
|||||||
use crate::search::new::distinct::apply_distinct_rule;
|
use crate::search::new::distinct::apply_distinct_rule;
|
||||||
use crate::vector::DistributionShift;
|
use crate::vector::DistributionShift;
|
||||||
use crate::{
|
use crate::{
|
||||||
AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError,
|
AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, TimeBudget,
|
||||||
|
UserError,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// A structure used throughout the execution of a search query.
|
/// A structure used throughout the execution of a search query.
|
||||||
@ -518,6 +519,7 @@ pub fn execute_vector_search(
|
|||||||
length: usize,
|
length: usize,
|
||||||
distribution_shift: Option<DistributionShift>,
|
distribution_shift: Option<DistributionShift>,
|
||||||
embedder_name: &str,
|
embedder_name: &str,
|
||||||
|
time_budget: TimeBudget,
|
||||||
) -> Result<PartialSearchResult> {
|
) -> Result<PartialSearchResult> {
|
||||||
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
||||||
|
|
||||||
@ -537,7 +539,7 @@ pub fn execute_vector_search(
|
|||||||
let placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery> =
|
let placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery> =
|
||||||
&mut placeholder_search_logger;
|
&mut placeholder_search_logger;
|
||||||
|
|
||||||
let BucketSortOutput { docids, scores, all_candidates } = bucket_sort(
|
let BucketSortOutput { docids, scores, all_candidates, degraded } = bucket_sort(
|
||||||
ctx,
|
ctx,
|
||||||
ranking_rules,
|
ranking_rules,
|
||||||
&PlaceholderQuery,
|
&PlaceholderQuery,
|
||||||
@ -546,6 +548,7 @@ pub fn execute_vector_search(
|
|||||||
length,
|
length,
|
||||||
scoring_strategy,
|
scoring_strategy,
|
||||||
placeholder_search_logger,
|
placeholder_search_logger,
|
||||||
|
time_budget,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
Ok(PartialSearchResult {
|
Ok(PartialSearchResult {
|
||||||
@ -553,6 +556,7 @@ pub fn execute_vector_search(
|
|||||||
document_scores: scores,
|
document_scores: scores,
|
||||||
documents_ids: docids,
|
documents_ids: docids,
|
||||||
located_query_terms: None,
|
located_query_terms: None,
|
||||||
|
degraded,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -572,6 +576,7 @@ pub fn execute_search(
|
|||||||
words_limit: Option<usize>,
|
words_limit: Option<usize>,
|
||||||
placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>,
|
placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>,
|
||||||
query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
|
query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
|
time_budget: TimeBudget,
|
||||||
) -> Result<PartialSearchResult> {
|
) -> Result<PartialSearchResult> {
|
||||||
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
||||||
|
|
||||||
@ -648,6 +653,7 @@ pub fn execute_search(
|
|||||||
length,
|
length,
|
||||||
scoring_strategy,
|
scoring_strategy,
|
||||||
query_graph_logger,
|
query_graph_logger,
|
||||||
|
time_budget,
|
||||||
)?
|
)?
|
||||||
} else {
|
} else {
|
||||||
let ranking_rules =
|
let ranking_rules =
|
||||||
@ -661,10 +667,11 @@ pub fn execute_search(
|
|||||||
length,
|
length,
|
||||||
scoring_strategy,
|
scoring_strategy,
|
||||||
placeholder_search_logger,
|
placeholder_search_logger,
|
||||||
|
time_budget,
|
||||||
)?
|
)?
|
||||||
};
|
};
|
||||||
|
|
||||||
let BucketSortOutput { docids, scores, mut all_candidates } = bucket_sort_output;
|
let BucketSortOutput { docids, scores, mut all_candidates, degraded } = bucket_sort_output;
|
||||||
let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
|
let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
|
||||||
|
|
||||||
// The candidates is the universe unless the exhaustive number of hits
|
// The candidates is the universe unless the exhaustive number of hits
|
||||||
@ -682,6 +689,7 @@ pub fn execute_search(
|
|||||||
document_scores: scores,
|
document_scores: scores,
|
||||||
documents_ids: docids,
|
documents_ids: docids,
|
||||||
located_query_terms,
|
located_query_terms,
|
||||||
|
degraded,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -742,4 +750,6 @@ pub struct PartialSearchResult {
|
|||||||
pub candidates: RoaringBitmap,
|
pub candidates: RoaringBitmap,
|
||||||
pub documents_ids: Vec<DocumentId>,
|
pub documents_ids: Vec<DocumentId>,
|
||||||
pub document_scores: Vec<Vec<ScoreDetails>>,
|
pub document_scores: Vec<Vec<ScoreDetails>>,
|
||||||
|
|
||||||
|
pub degraded: bool,
|
||||||
}
|
}
|
||||||
|
@ -1,14 +1,19 @@
|
|||||||
use std::cmp::Reverse;
|
use std::cmp::Reverse;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::io::Cursor;
|
use std::io::Cursor;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
use big_s::S;
|
use big_s::S;
|
||||||
use either::{Either, Left, Right};
|
use either::{Either, Left, Right};
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use maplit::{btreemap, hashset};
|
use maplit::{btreemap, hashset};
|
||||||
|
use meili_snap::snapshot;
|
||||||
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||||
use milli::{AscDesc, Criterion, DocumentId, Index, Member, Object, TermsMatchingStrategy};
|
use milli::{
|
||||||
|
AscDesc, Criterion, DocumentId, Filter, Index, Member, Object, Search, TermsMatchingStrategy,
|
||||||
|
TimeBudget,
|
||||||
|
};
|
||||||
use serde::{Deserialize, Deserializer};
|
use serde::{Deserialize, Deserializer};
|
||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
|
|
||||||
@ -349,3 +354,41 @@ where
|
|||||||
let result = serde_json::Value::deserialize(deserializer)?;
|
let result = serde_json::Value::deserialize(deserializer)?;
|
||||||
Ok(Some(result))
|
Ok(Some(result))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn basic_degraded_search() {
|
||||||
|
use Criterion::*;
|
||||||
|
let criteria = vec![Words, Typo, Proximity, Attribute, Exactness];
|
||||||
|
let index = setup_search_index_with_criteria(&criteria);
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut search = Search::new(&rtxn, &index);
|
||||||
|
search.query(TEST_QUERY);
|
||||||
|
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
||||||
|
search.time_budget(TimeBudget::new(Duration::from_millis(0)));
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
assert!(result.degraded);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn degraded_search_cannot_skip_filter() {
|
||||||
|
use Criterion::*;
|
||||||
|
let criteria = vec![Words, Typo, Proximity, Attribute, Exactness];
|
||||||
|
let index = setup_search_index_with_criteria(&criteria);
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut search = Search::new(&rtxn, &index);
|
||||||
|
search.query(TEST_QUERY);
|
||||||
|
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
||||||
|
search.time_budget(TimeBudget::new(Duration::from_millis(0)));
|
||||||
|
let filter_condition = Filter::from_str("tag = etiopia").unwrap().unwrap();
|
||||||
|
search.filter(filter_condition);
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
assert!(result.degraded);
|
||||||
|
snapshot!(format!("{:?}\n{:?}", result.candidates, result.documents_ids), @r###"
|
||||||
|
RoaringBitmap<[0, 2, 5, 8, 11, 14]>
|
||||||
|
[0, 2, 5, 8, 11, 14]
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user