mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Merge #4466
4466: Implements the search cutoff r=irevoire a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4488 ## What does this PR do? - Adds a cutoff to the bucket sort after 150ms has been spent - Adds a new setting to customize the default value of 150ms - When the time is exceeded, we exit early with what we had the time to sort - If the cutoff has been reached, the search details are updated with a new `Skip` ranking details for the ranking rules that were skipped - Adds analytics to measure the total number of degraded search requests - Adds the number of degraded search requests to the Prometheus metrics and Grafana dashboard - The cutoff **must not** skip the filters; otherwise, we would leak documents to people who don’t have the right to see them Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
commit
fc1c3f4a29
@ -238,6 +238,70 @@
|
|||||||
"title": "Total Searches (1h)",
|
"title": "Total Searches (1h)",
|
||||||
"type": "gauge"
|
"type": "gauge"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "thresholds"
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "red",
|
||||||
|
"value": 80
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 6,
|
||||||
|
"w": 4,
|
||||||
|
"x": 8,
|
||||||
|
"y": 1
|
||||||
|
},
|
||||||
|
"id": 26,
|
||||||
|
"options": {
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"showThresholdLabels": false,
|
||||||
|
"showThresholdMarkers": true,
|
||||||
|
"text": {}
|
||||||
|
},
|
||||||
|
"pluginVersion": "9.5.2",
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus"
|
||||||
|
},
|
||||||
|
"editorMode": "builder",
|
||||||
|
"exemplar": true,
|
||||||
|
"expr": "round(increase(meilisearch_degraded_search_requests{job=\"$job\"}[1h]))",
|
||||||
|
"interval": "",
|
||||||
|
"legendFormat": "",
|
||||||
|
"range": true,
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Total Degraded Searches (1h)",
|
||||||
|
"type": "gauge"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus"
|
"type": "prometheus"
|
||||||
|
@ -277,6 +277,7 @@ pub(crate) mod test {
|
|||||||
}),
|
}),
|
||||||
pagination: Setting::NotSet,
|
pagination: Setting::NotSet,
|
||||||
embedders: Setting::NotSet,
|
embedders: Setting::NotSet,
|
||||||
|
search_cutoff_ms: Setting::NotSet,
|
||||||
_kind: std::marker::PhantomData,
|
_kind: std::marker::PhantomData,
|
||||||
};
|
};
|
||||||
settings.check()
|
settings.check()
|
||||||
|
@ -379,6 +379,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
|||||||
v5::Setting::NotSet => v6::Setting::NotSet,
|
v5::Setting::NotSet => v6::Setting::NotSet,
|
||||||
},
|
},
|
||||||
embedders: v6::Setting::NotSet,
|
embedders: v6::Setting::NotSet,
|
||||||
|
search_cutoff_ms: v6::Setting::NotSet,
|
||||||
_kind: std::marker::PhantomData,
|
_kind: std::marker::PhantomData,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -259,6 +259,7 @@ InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
|
|||||||
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidSettingsSearchCutoffMs , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsEmbedders , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsEmbedders , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||||
|
@ -202,6 +202,9 @@ pub struct Settings<T> {
|
|||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsEmbedders>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSettingsEmbedders>)]
|
||||||
pub embedders: Setting<BTreeMap<String, Setting<milli::vector::settings::EmbeddingSettings>>>,
|
pub embedders: Setting<BTreeMap<String, Setting<milli::vector::settings::EmbeddingSettings>>>,
|
||||||
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSettingsSearchCutoffMs>)]
|
||||||
|
pub search_cutoff_ms: Setting<u64>,
|
||||||
|
|
||||||
#[serde(skip)]
|
#[serde(skip)]
|
||||||
#[deserr(skip)]
|
#[deserr(skip)]
|
||||||
@ -227,6 +230,7 @@ impl Settings<Checked> {
|
|||||||
faceting: Setting::Reset,
|
faceting: Setting::Reset,
|
||||||
pagination: Setting::Reset,
|
pagination: Setting::Reset,
|
||||||
embedders: Setting::Reset,
|
embedders: Setting::Reset,
|
||||||
|
search_cutoff_ms: Setting::Reset,
|
||||||
_kind: PhantomData,
|
_kind: PhantomData,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -249,6 +253,7 @@ impl Settings<Checked> {
|
|||||||
faceting,
|
faceting,
|
||||||
pagination,
|
pagination,
|
||||||
embedders,
|
embedders,
|
||||||
|
search_cutoff_ms,
|
||||||
..
|
..
|
||||||
} = self;
|
} = self;
|
||||||
|
|
||||||
@ -269,6 +274,7 @@ impl Settings<Checked> {
|
|||||||
faceting,
|
faceting,
|
||||||
pagination,
|
pagination,
|
||||||
embedders,
|
embedders,
|
||||||
|
search_cutoff_ms,
|
||||||
_kind: PhantomData,
|
_kind: PhantomData,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -315,6 +321,7 @@ impl Settings<Unchecked> {
|
|||||||
faceting: self.faceting,
|
faceting: self.faceting,
|
||||||
pagination: self.pagination,
|
pagination: self.pagination,
|
||||||
embedders: self.embedders,
|
embedders: self.embedders,
|
||||||
|
search_cutoff_ms: self.search_cutoff_ms,
|
||||||
_kind: PhantomData,
|
_kind: PhantomData,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -347,19 +354,40 @@ pub fn apply_settings_to_builder(
|
|||||||
settings: &Settings<Checked>,
|
settings: &Settings<Checked>,
|
||||||
builder: &mut milli::update::Settings,
|
builder: &mut milli::update::Settings,
|
||||||
) {
|
) {
|
||||||
match settings.searchable_attributes {
|
let Settings {
|
||||||
|
displayed_attributes,
|
||||||
|
searchable_attributes,
|
||||||
|
filterable_attributes,
|
||||||
|
sortable_attributes,
|
||||||
|
ranking_rules,
|
||||||
|
stop_words,
|
||||||
|
non_separator_tokens,
|
||||||
|
separator_tokens,
|
||||||
|
dictionary,
|
||||||
|
synonyms,
|
||||||
|
distinct_attribute,
|
||||||
|
proximity_precision,
|
||||||
|
typo_tolerance,
|
||||||
|
faceting,
|
||||||
|
pagination,
|
||||||
|
embedders,
|
||||||
|
search_cutoff_ms,
|
||||||
|
_kind,
|
||||||
|
} = settings;
|
||||||
|
|
||||||
|
match searchable_attributes {
|
||||||
Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
|
Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
|
||||||
Setting::Reset => builder.reset_searchable_fields(),
|
Setting::Reset => builder.reset_searchable_fields(),
|
||||||
Setting::NotSet => (),
|
Setting::NotSet => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
match settings.displayed_attributes {
|
match displayed_attributes {
|
||||||
Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
|
Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
|
||||||
Setting::Reset => builder.reset_displayed_fields(),
|
Setting::Reset => builder.reset_displayed_fields(),
|
||||||
Setting::NotSet => (),
|
Setting::NotSet => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
match settings.filterable_attributes {
|
match filterable_attributes {
|
||||||
Setting::Set(ref facets) => {
|
Setting::Set(ref facets) => {
|
||||||
builder.set_filterable_fields(facets.clone().into_iter().collect())
|
builder.set_filterable_fields(facets.clone().into_iter().collect())
|
||||||
}
|
}
|
||||||
@ -367,13 +395,13 @@ pub fn apply_settings_to_builder(
|
|||||||
Setting::NotSet => (),
|
Setting::NotSet => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
match settings.sortable_attributes {
|
match sortable_attributes {
|
||||||
Setting::Set(ref fields) => builder.set_sortable_fields(fields.iter().cloned().collect()),
|
Setting::Set(ref fields) => builder.set_sortable_fields(fields.iter().cloned().collect()),
|
||||||
Setting::Reset => builder.reset_sortable_fields(),
|
Setting::Reset => builder.reset_sortable_fields(),
|
||||||
Setting::NotSet => (),
|
Setting::NotSet => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
match settings.ranking_rules {
|
match ranking_rules {
|
||||||
Setting::Set(ref criteria) => {
|
Setting::Set(ref criteria) => {
|
||||||
builder.set_criteria(criteria.iter().map(|c| c.clone().into()).collect())
|
builder.set_criteria(criteria.iter().map(|c| c.clone().into()).collect())
|
||||||
}
|
}
|
||||||
@ -381,13 +409,13 @@ pub fn apply_settings_to_builder(
|
|||||||
Setting::NotSet => (),
|
Setting::NotSet => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
match settings.stop_words {
|
match stop_words {
|
||||||
Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()),
|
Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()),
|
||||||
Setting::Reset => builder.reset_stop_words(),
|
Setting::Reset => builder.reset_stop_words(),
|
||||||
Setting::NotSet => (),
|
Setting::NotSet => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
match settings.non_separator_tokens {
|
match non_separator_tokens {
|
||||||
Setting::Set(ref non_separator_tokens) => {
|
Setting::Set(ref non_separator_tokens) => {
|
||||||
builder.set_non_separator_tokens(non_separator_tokens.clone())
|
builder.set_non_separator_tokens(non_separator_tokens.clone())
|
||||||
}
|
}
|
||||||
@ -395,7 +423,7 @@ pub fn apply_settings_to_builder(
|
|||||||
Setting::NotSet => (),
|
Setting::NotSet => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
match settings.separator_tokens {
|
match separator_tokens {
|
||||||
Setting::Set(ref separator_tokens) => {
|
Setting::Set(ref separator_tokens) => {
|
||||||
builder.set_separator_tokens(separator_tokens.clone())
|
builder.set_separator_tokens(separator_tokens.clone())
|
||||||
}
|
}
|
||||||
@ -403,31 +431,31 @@ pub fn apply_settings_to_builder(
|
|||||||
Setting::NotSet => (),
|
Setting::NotSet => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
match settings.dictionary {
|
match dictionary {
|
||||||
Setting::Set(ref dictionary) => builder.set_dictionary(dictionary.clone()),
|
Setting::Set(ref dictionary) => builder.set_dictionary(dictionary.clone()),
|
||||||
Setting::Reset => builder.reset_dictionary(),
|
Setting::Reset => builder.reset_dictionary(),
|
||||||
Setting::NotSet => (),
|
Setting::NotSet => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
match settings.synonyms {
|
match synonyms {
|
||||||
Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()),
|
Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()),
|
||||||
Setting::Reset => builder.reset_synonyms(),
|
Setting::Reset => builder.reset_synonyms(),
|
||||||
Setting::NotSet => (),
|
Setting::NotSet => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
match settings.distinct_attribute {
|
match distinct_attribute {
|
||||||
Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()),
|
Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()),
|
||||||
Setting::Reset => builder.reset_distinct_field(),
|
Setting::Reset => builder.reset_distinct_field(),
|
||||||
Setting::NotSet => (),
|
Setting::NotSet => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
match settings.proximity_precision {
|
match proximity_precision {
|
||||||
Setting::Set(ref precision) => builder.set_proximity_precision((*precision).into()),
|
Setting::Set(ref precision) => builder.set_proximity_precision((*precision).into()),
|
||||||
Setting::Reset => builder.reset_proximity_precision(),
|
Setting::Reset => builder.reset_proximity_precision(),
|
||||||
Setting::NotSet => (),
|
Setting::NotSet => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
match settings.typo_tolerance {
|
match typo_tolerance {
|
||||||
Setting::Set(ref value) => {
|
Setting::Set(ref value) => {
|
||||||
match value.enabled {
|
match value.enabled {
|
||||||
Setting::Set(val) => builder.set_autorize_typos(val),
|
Setting::Set(val) => builder.set_autorize_typos(val),
|
||||||
@ -482,7 +510,7 @@ pub fn apply_settings_to_builder(
|
|||||||
Setting::NotSet => (),
|
Setting::NotSet => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
match &settings.faceting {
|
match faceting {
|
||||||
Setting::Set(FacetingSettings { max_values_per_facet, sort_facet_values_by }) => {
|
Setting::Set(FacetingSettings { max_values_per_facet, sort_facet_values_by }) => {
|
||||||
match max_values_per_facet {
|
match max_values_per_facet {
|
||||||
Setting::Set(val) => builder.set_max_values_per_facet(*val),
|
Setting::Set(val) => builder.set_max_values_per_facet(*val),
|
||||||
@ -504,7 +532,7 @@ pub fn apply_settings_to_builder(
|
|||||||
Setting::NotSet => (),
|
Setting::NotSet => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
match settings.pagination {
|
match pagination {
|
||||||
Setting::Set(ref value) => match value.max_total_hits {
|
Setting::Set(ref value) => match value.max_total_hits {
|
||||||
Setting::Set(val) => builder.set_pagination_max_total_hits(val),
|
Setting::Set(val) => builder.set_pagination_max_total_hits(val),
|
||||||
Setting::Reset => builder.reset_pagination_max_total_hits(),
|
Setting::Reset => builder.reset_pagination_max_total_hits(),
|
||||||
@ -514,11 +542,17 @@ pub fn apply_settings_to_builder(
|
|||||||
Setting::NotSet => (),
|
Setting::NotSet => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
match settings.embedders.clone() {
|
match embedders {
|
||||||
Setting::Set(value) => builder.set_embedder_settings(value),
|
Setting::Set(value) => builder.set_embedder_settings(value.clone()),
|
||||||
Setting::Reset => builder.reset_embedder_settings(),
|
Setting::Reset => builder.reset_embedder_settings(),
|
||||||
Setting::NotSet => (),
|
Setting::NotSet => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
match search_cutoff_ms {
|
||||||
|
Setting::Set(cutoff) => builder.set_search_cutoff(*cutoff),
|
||||||
|
Setting::Reset => builder.reset_search_cutoff(),
|
||||||
|
Setting::NotSet => (),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn settings(
|
pub fn settings(
|
||||||
@ -607,6 +641,8 @@ pub fn settings(
|
|||||||
.collect();
|
.collect();
|
||||||
let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };
|
let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };
|
||||||
|
|
||||||
|
let search_cutoff_ms = index.search_cutoff(rtxn)?;
|
||||||
|
|
||||||
Ok(Settings {
|
Ok(Settings {
|
||||||
displayed_attributes: match displayed_attributes {
|
displayed_attributes: match displayed_attributes {
|
||||||
Some(attrs) => Setting::Set(attrs),
|
Some(attrs) => Setting::Set(attrs),
|
||||||
@ -633,6 +669,10 @@ pub fn settings(
|
|||||||
faceting: Setting::Set(faceting),
|
faceting: Setting::Set(faceting),
|
||||||
pagination: Setting::Set(pagination),
|
pagination: Setting::Set(pagination),
|
||||||
embedders,
|
embedders,
|
||||||
|
search_cutoff_ms: match search_cutoff_ms {
|
||||||
|
Some(cutoff) => Setting::Set(cutoff),
|
||||||
|
None => Setting::Reset,
|
||||||
|
},
|
||||||
_kind: PhantomData,
|
_kind: PhantomData,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -783,6 +823,7 @@ pub(crate) mod test {
|
|||||||
faceting: Setting::NotSet,
|
faceting: Setting::NotSet,
|
||||||
pagination: Setting::NotSet,
|
pagination: Setting::NotSet,
|
||||||
embedders: Setting::NotSet,
|
embedders: Setting::NotSet,
|
||||||
|
search_cutoff_ms: Setting::NotSet,
|
||||||
_kind: PhantomData::<Unchecked>,
|
_kind: PhantomData::<Unchecked>,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -809,6 +850,7 @@ pub(crate) mod test {
|
|||||||
faceting: Setting::NotSet,
|
faceting: Setting::NotSet,
|
||||||
pagination: Setting::NotSet,
|
pagination: Setting::NotSet,
|
||||||
embedders: Setting::NotSet,
|
embedders: Setting::NotSet,
|
||||||
|
search_cutoff_ms: Setting::NotSet,
|
||||||
_kind: PhantomData::<Unchecked>,
|
_kind: PhantomData::<Unchecked>,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -579,6 +579,7 @@ pub struct SearchAggregator {
|
|||||||
// requests
|
// requests
|
||||||
total_received: usize,
|
total_received: usize,
|
||||||
total_succeeded: usize,
|
total_succeeded: usize,
|
||||||
|
total_degraded: usize,
|
||||||
time_spent: BinaryHeap<usize>,
|
time_spent: BinaryHeap<usize>,
|
||||||
|
|
||||||
// sort
|
// sort
|
||||||
@ -758,9 +759,13 @@ impl SearchAggregator {
|
|||||||
hits_info: _,
|
hits_info: _,
|
||||||
facet_distribution: _,
|
facet_distribution: _,
|
||||||
facet_stats: _,
|
facet_stats: _,
|
||||||
|
degraded,
|
||||||
} = result;
|
} = result;
|
||||||
|
|
||||||
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
||||||
|
if *degraded {
|
||||||
|
self.total_degraded = self.total_degraded.saturating_add(1);
|
||||||
|
}
|
||||||
self.time_spent.push(*processing_time_ms as usize);
|
self.time_spent.push(*processing_time_ms as usize);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -802,6 +807,7 @@ impl SearchAggregator {
|
|||||||
semantic_ratio,
|
semantic_ratio,
|
||||||
embedder,
|
embedder,
|
||||||
hybrid,
|
hybrid,
|
||||||
|
total_degraded,
|
||||||
} = other;
|
} = other;
|
||||||
|
|
||||||
if self.timestamp.is_none() {
|
if self.timestamp.is_none() {
|
||||||
@ -816,6 +822,7 @@ impl SearchAggregator {
|
|||||||
// request
|
// request
|
||||||
self.total_received = self.total_received.saturating_add(total_received);
|
self.total_received = self.total_received.saturating_add(total_received);
|
||||||
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
|
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
|
||||||
|
self.total_degraded = self.total_degraded.saturating_add(total_degraded);
|
||||||
self.time_spent.append(time_spent);
|
self.time_spent.append(time_spent);
|
||||||
|
|
||||||
// sort
|
// sort
|
||||||
@ -921,6 +928,7 @@ impl SearchAggregator {
|
|||||||
semantic_ratio,
|
semantic_ratio,
|
||||||
embedder,
|
embedder,
|
||||||
hybrid,
|
hybrid,
|
||||||
|
total_degraded,
|
||||||
} = self;
|
} = self;
|
||||||
|
|
||||||
if total_received == 0 {
|
if total_received == 0 {
|
||||||
@ -940,6 +948,7 @@ impl SearchAggregator {
|
|||||||
"total_succeeded": total_succeeded,
|
"total_succeeded": total_succeeded,
|
||||||
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
|
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
|
||||||
"total_received": total_received,
|
"total_received": total_received,
|
||||||
|
"total_degraded": total_degraded,
|
||||||
},
|
},
|
||||||
"sort": {
|
"sort": {
|
||||||
"with_geoPoint": sort_with_geo_point,
|
"with_geoPoint": sort_with_geo_point,
|
||||||
|
@ -22,6 +22,11 @@ lazy_static! {
|
|||||||
&["method", "path"]
|
&["method", "path"]
|
||||||
)
|
)
|
||||||
.expect("Can't create a metric");
|
.expect("Can't create a metric");
|
||||||
|
pub static ref MEILISEARCH_DEGRADED_SEARCH_REQUESTS: IntGauge = register_int_gauge!(opts!(
|
||||||
|
"meilisearch_degraded_search_requests",
|
||||||
|
"Meilisearch number of degraded search requests"
|
||||||
|
))
|
||||||
|
.expect("Can't create a metric");
|
||||||
pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge =
|
pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge =
|
||||||
register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch DB Size In Bytes"))
|
register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch DB Size In Bytes"))
|
||||||
.expect("Can't create a metric");
|
.expect("Can't create a metric");
|
||||||
|
@ -17,6 +17,7 @@ use crate::analytics::{Analytics, SearchAggregator};
|
|||||||
use crate::extractors::authentication::policies::*;
|
use crate::extractors::authentication::policies::*;
|
||||||
use crate::extractors::authentication::GuardedData;
|
use crate::extractors::authentication::GuardedData;
|
||||||
use crate::extractors::sequential_extractor::SeqHandler;
|
use crate::extractors::sequential_extractor::SeqHandler;
|
||||||
|
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchQuery, SemanticRatio,
|
add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchQuery, SemanticRatio,
|
||||||
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||||
@ -247,6 +248,9 @@ pub async fn search_with_post(
|
|||||||
.await?;
|
.await?;
|
||||||
if let Ok(ref search_result) = search_result {
|
if let Ok(ref search_result) = search_result {
|
||||||
aggregate.succeed(search_result);
|
aggregate.succeed(search_result);
|
||||||
|
if search_result.degraded {
|
||||||
|
MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
analytics.post_search(aggregate);
|
analytics.post_search(aggregate);
|
||||||
|
|
||||||
|
@ -624,6 +624,25 @@ fn embedder_analytics(
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
make_setting_route!(
|
||||||
|
"/search-cutoff-ms",
|
||||||
|
put,
|
||||||
|
u64,
|
||||||
|
meilisearch_types::deserr::DeserrJsonError<
|
||||||
|
meilisearch_types::error::deserr_codes::InvalidSettingsSearchCutoffMs,
|
||||||
|
>,
|
||||||
|
search_cutoff_ms,
|
||||||
|
"searchCutoffMs",
|
||||||
|
analytics,
|
||||||
|
|setting: &Option<u64>, req: &HttpRequest| {
|
||||||
|
analytics.publish(
|
||||||
|
"Search Cutoff Updated".to_string(),
|
||||||
|
serde_json::json!({"search_cutoff_ms": setting }),
|
||||||
|
Some(req),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
macro_rules! generate_configure {
|
macro_rules! generate_configure {
|
||||||
($($mod:ident),*) => {
|
($($mod:ident),*) => {
|
||||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||||
@ -654,7 +673,8 @@ generate_configure!(
|
|||||||
typo_tolerance,
|
typo_tolerance,
|
||||||
pagination,
|
pagination,
|
||||||
faceting,
|
faceting,
|
||||||
embedders
|
embedders,
|
||||||
|
search_cutoff_ms
|
||||||
);
|
);
|
||||||
|
|
||||||
pub async fn update_all(
|
pub async fn update_all(
|
||||||
@ -765,7 +785,8 @@ pub async fn update_all(
|
|||||||
"synonyms": {
|
"synonyms": {
|
||||||
"total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()),
|
"total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()),
|
||||||
},
|
},
|
||||||
"embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set())
|
"embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set()),
|
||||||
|
"search_cutoff_ms": new_settings.search_cutoff_ms.as_ref().set(),
|
||||||
}),
|
}),
|
||||||
Some(&req),
|
Some(&req),
|
||||||
);
|
);
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use std::cmp::min;
|
use std::cmp::min;
|
||||||
use std::collections::{BTreeMap, BTreeSet, HashSet};
|
use std::collections::{BTreeMap, BTreeSet, HashSet};
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::time::Instant;
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
use deserr::Deserr;
|
use deserr::Deserr;
|
||||||
use either::Either;
|
use either::Either;
|
||||||
@ -14,7 +14,7 @@ use meilisearch_types::heed::RoTxn;
|
|||||||
use meilisearch_types::index_uid::IndexUid;
|
use meilisearch_types::index_uid::IndexUid;
|
||||||
use meilisearch_types::milli::score_details::{self, ScoreDetails, ScoringStrategy};
|
use meilisearch_types::milli::score_details::{self, ScoreDetails, ScoringStrategy};
|
||||||
use meilisearch_types::milli::vector::DistributionShift;
|
use meilisearch_types::milli::vector::DistributionShift;
|
||||||
use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues};
|
use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget};
|
||||||
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||||
use meilisearch_types::{milli, Document};
|
use meilisearch_types::{milli, Document};
|
||||||
use milli::tokenizer::TokenizerBuilder;
|
use milli::tokenizer::TokenizerBuilder;
|
||||||
@ -323,6 +323,10 @@ pub struct SearchResult {
|
|||||||
pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>,
|
pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
|
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
|
||||||
|
|
||||||
|
// This information is only used for analytics purposes
|
||||||
|
#[serde(skip)]
|
||||||
|
pub degraded: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Debug, Clone, PartialEq)]
|
#[derive(Serialize, Debug, Clone, PartialEq)]
|
||||||
@ -382,8 +386,10 @@ fn prepare_search<'t>(
|
|||||||
query: &'t SearchQuery,
|
query: &'t SearchQuery,
|
||||||
features: RoFeatures,
|
features: RoFeatures,
|
||||||
distribution: Option<DistributionShift>,
|
distribution: Option<DistributionShift>,
|
||||||
|
time_budget: TimeBudget,
|
||||||
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
|
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
|
||||||
let mut search = index.search(rtxn);
|
let mut search = index.search(rtxn);
|
||||||
|
search.time_budget(time_budget);
|
||||||
|
|
||||||
if query.vector.is_some() {
|
if query.vector.is_some() {
|
||||||
features.check_vector("Passing `vector` as a query parameter")?;
|
features.check_vector("Passing `vector` as a query parameter")?;
|
||||||
@ -492,18 +498,28 @@ pub fn perform_search(
|
|||||||
) -> Result<SearchResult, MeilisearchHttpError> {
|
) -> Result<SearchResult, MeilisearchHttpError> {
|
||||||
let before_search = Instant::now();
|
let before_search = Instant::now();
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
|
let time_budget = match index.search_cutoff(&rtxn)? {
|
||||||
|
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
|
||||||
|
None => TimeBudget::default(),
|
||||||
|
};
|
||||||
|
|
||||||
let (search, is_finite_pagination, max_total_hits, offset) =
|
let (search, is_finite_pagination, max_total_hits, offset) =
|
||||||
prepare_search(index, &rtxn, &query, features, distribution)?;
|
prepare_search(index, &rtxn, &query, features, distribution, time_budget)?;
|
||||||
|
|
||||||
let milli::SearchResult { documents_ids, matching_words, candidates, document_scores, .. } =
|
let milli::SearchResult {
|
||||||
match &query.hybrid {
|
documents_ids,
|
||||||
Some(hybrid) => match *hybrid.semantic_ratio {
|
matching_words,
|
||||||
ratio if ratio == 0.0 || ratio == 1.0 => search.execute()?,
|
candidates,
|
||||||
ratio => search.execute_hybrid(ratio)?,
|
document_scores,
|
||||||
},
|
degraded,
|
||||||
None => search.execute()?,
|
..
|
||||||
};
|
} = match &query.hybrid {
|
||||||
|
Some(hybrid) => match *hybrid.semantic_ratio {
|
||||||
|
ratio if ratio == 0.0 || ratio == 1.0 => search.execute()?,
|
||||||
|
ratio => search.execute_hybrid(ratio)?,
|
||||||
|
},
|
||||||
|
None => search.execute()?,
|
||||||
|
};
|
||||||
|
|
||||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
|
|
||||||
@ -700,6 +716,7 @@ pub fn perform_search(
|
|||||||
processing_time_ms: before_search.elapsed().as_millis(),
|
processing_time_ms: before_search.elapsed().as_millis(),
|
||||||
facet_distribution,
|
facet_distribution,
|
||||||
facet_stats,
|
facet_stats,
|
||||||
|
degraded,
|
||||||
};
|
};
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
@ -713,8 +730,13 @@ pub fn perform_facet_search(
|
|||||||
) -> Result<FacetSearchResult, MeilisearchHttpError> {
|
) -> Result<FacetSearchResult, MeilisearchHttpError> {
|
||||||
let before_search = Instant::now();
|
let before_search = Instant::now();
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
|
let time_budget = match index.search_cutoff(&rtxn)? {
|
||||||
|
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
|
||||||
|
None => TimeBudget::default(),
|
||||||
|
};
|
||||||
|
|
||||||
let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features, None)?;
|
let (search, _, _, _) =
|
||||||
|
prepare_search(index, &rtxn, &search_query, features, None, time_budget)?;
|
||||||
let mut facet_search =
|
let mut facet_search =
|
||||||
SearchForFacetValues::new(facet_name, search, search_query.hybrid.is_some());
|
SearchForFacetValues::new(facet_name, search, search_query.hybrid.is_some());
|
||||||
if let Some(facet_query) = &facet_query {
|
if let Some(facet_query) = &facet_query {
|
||||||
|
@ -328,6 +328,11 @@ impl Index<'_> {
|
|||||||
self.service.patch_encoded(url, settings, self.encoder).await
|
self.service.patch_encoded(url, settings, self.encoder).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn update_settings_search_cutoff_ms(&self, settings: Value) -> (Value, StatusCode) {
|
||||||
|
let url = format!("/indexes/{}/settings/search-cutoff-ms", urlencode(self.uid.as_ref()));
|
||||||
|
self.service.put_encoded(url, settings, self.encoder).await
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn delete_settings(&self) -> (Value, StatusCode) {
|
pub async fn delete_settings(&self) -> (Value, StatusCode) {
|
||||||
let url = format!("/indexes/{}/settings", urlencode(self.uid.as_ref()));
|
let url = format!("/indexes/{}/settings", urlencode(self.uid.as_ref()));
|
||||||
self.service.delete(url).await
|
self.service.delete(url).await
|
||||||
|
@ -16,6 +16,7 @@ pub use server::{default_settings, Server};
|
|||||||
pub struct Value(pub serde_json::Value);
|
pub struct Value(pub serde_json::Value);
|
||||||
|
|
||||||
impl Value {
|
impl Value {
|
||||||
|
#[track_caller]
|
||||||
pub fn uid(&self) -> u64 {
|
pub fn uid(&self) -> u64 {
|
||||||
if let Some(uid) = self["uid"].as_u64() {
|
if let Some(uid) = self["uid"].as_u64() {
|
||||||
uid
|
uid
|
||||||
|
@ -77,7 +77,8 @@ async fn import_dump_v1_movie_raw() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
}
|
},
|
||||||
|
"searchCutoffMs": null
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -238,7 +239,8 @@ async fn import_dump_v1_movie_with_settings() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
}
|
},
|
||||||
|
"searchCutoffMs": null
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -385,7 +387,8 @@ async fn import_dump_v1_rubygems_with_settings() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
}
|
},
|
||||||
|
"searchCutoffMs": null
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -518,7 +521,8 @@ async fn import_dump_v2_movie_raw() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
}
|
},
|
||||||
|
"searchCutoffMs": null
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -663,7 +667,8 @@ async fn import_dump_v2_movie_with_settings() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
}
|
},
|
||||||
|
"searchCutoffMs": null
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -807,7 +812,8 @@ async fn import_dump_v2_rubygems_with_settings() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
}
|
},
|
||||||
|
"searchCutoffMs": null
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -940,7 +946,8 @@ async fn import_dump_v3_movie_raw() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
}
|
},
|
||||||
|
"searchCutoffMs": null
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -1085,7 +1092,8 @@ async fn import_dump_v3_movie_with_settings() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
}
|
},
|
||||||
|
"searchCutoffMs": null
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -1229,7 +1237,8 @@ async fn import_dump_v3_rubygems_with_settings() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
}
|
},
|
||||||
|
"searchCutoffMs": null
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -1362,7 +1371,8 @@ async fn import_dump_v4_movie_raw() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
}
|
},
|
||||||
|
"searchCutoffMs": null
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -1507,7 +1517,8 @@ async fn import_dump_v4_movie_with_settings() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
}
|
},
|
||||||
|
"searchCutoffMs": null
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -1651,7 +1662,8 @@ async fn import_dump_v4_rubygems_with_settings() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
}
|
},
|
||||||
|
"searchCutoffMs": null
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -1895,7 +1907,8 @@ async fn import_dump_v6_containing_experimental_features() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
}
|
},
|
||||||
|
"searchCutoffMs": null
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
|
@ -834,6 +834,94 @@ async fn test_score_details() {
|
|||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn test_degraded_score_details() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
let documents = NESTED_DOCUMENTS.clone();
|
||||||
|
|
||||||
|
index.add_documents(json!(documents), None).await;
|
||||||
|
// We can't really use anything else than 0ms here; otherwise, the test will get flaky.
|
||||||
|
let (res, _code) = index.update_settings(json!({ "searchCutoffMs": 0 })).await;
|
||||||
|
index.wait_task(res.uid()).await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.search(
|
||||||
|
json!({
|
||||||
|
"q": "b",
|
||||||
|
"attributesToRetrieve": ["doggos.name", "cattos"],
|
||||||
|
"showRankingScoreDetails": true,
|
||||||
|
}),
|
||||||
|
|response, code| {
|
||||||
|
meili_snap::snapshot!(code, @"200 OK");
|
||||||
|
meili_snap::snapshot!(meili_snap::json_string!(response, { ".processingTimeMs" => "[duration]" }), @r###"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"doggos": [
|
||||||
|
{
|
||||||
|
"name": "bobby"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "buddy"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"cattos": "pésti",
|
||||||
|
"_rankingScoreDetails": {
|
||||||
|
"skipped": {
|
||||||
|
"order": 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doggos": [
|
||||||
|
{
|
||||||
|
"name": "gros bill"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"cattos": [
|
||||||
|
"simba",
|
||||||
|
"pestiféré"
|
||||||
|
],
|
||||||
|
"_rankingScoreDetails": {
|
||||||
|
"skipped": {
|
||||||
|
"order": 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"doggos": [
|
||||||
|
{
|
||||||
|
"name": "turbo"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "fast"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"cattos": [
|
||||||
|
"moumoute",
|
||||||
|
"gomez"
|
||||||
|
],
|
||||||
|
"_rankingScoreDetails": {
|
||||||
|
"skipped": {
|
||||||
|
"order": 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "b",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 3
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn experimental_feature_vector_store() {
|
async fn experimental_feature_vector_store() {
|
||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
|
@ -337,3 +337,31 @@ async fn settings_bad_pagination() {
|
|||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn settings_bad_search_cutoff_ms() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
let (response, code) = index.update_settings(json!({ "searchCutoffMs": "doggo" })).await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"message": "Invalid value type at `.searchCutoffMs`: expected a positive integer, but found a string: `\"doggo\"`",
|
||||||
|
"code": "invalid_settings_search_cutoff_ms",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_settings_search_cutoff_ms"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let (response, code) = index.update_settings_search_cutoff_ms(json!("doggo")).await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"message": "Invalid value type: expected a positive integer, but found a string: `\"doggo\"`",
|
||||||
|
"code": "invalid_settings_search_cutoff_ms",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_settings_search_cutoff_ms"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
@ -35,6 +35,7 @@ static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(|
|
|||||||
"maxTotalHits": json!(1000),
|
"maxTotalHits": json!(1000),
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
map.insert("search_cutoff_ms", json!(null));
|
||||||
map
|
map
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -49,12 +50,12 @@ async fn get_settings_unexisting_index() {
|
|||||||
async fn get_settings() {
|
async fn get_settings() {
|
||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
let index = server.index("test");
|
let index = server.index("test");
|
||||||
index.create(None).await;
|
let (response, _code) = index.create(None).await;
|
||||||
index.wait_task(0).await;
|
index.wait_task(response.uid()).await;
|
||||||
let (response, code) = index.settings().await;
|
let (response, code) = index.settings().await;
|
||||||
assert_eq!(code, 200);
|
assert_eq!(code, 200);
|
||||||
let settings = response.as_object().unwrap();
|
let settings = response.as_object().unwrap();
|
||||||
assert_eq!(settings.keys().len(), 15);
|
assert_eq!(settings.keys().len(), 16);
|
||||||
assert_eq!(settings["displayedAttributes"], json!(["*"]));
|
assert_eq!(settings["displayedAttributes"], json!(["*"]));
|
||||||
assert_eq!(settings["searchableAttributes"], json!(["*"]));
|
assert_eq!(settings["searchableAttributes"], json!(["*"]));
|
||||||
assert_eq!(settings["filterableAttributes"], json!([]));
|
assert_eq!(settings["filterableAttributes"], json!([]));
|
||||||
@ -84,6 +85,7 @@ async fn get_settings() {
|
|||||||
})
|
})
|
||||||
);
|
);
|
||||||
assert_eq!(settings["proximityPrecision"], json!("byWord"));
|
assert_eq!(settings["proximityPrecision"], json!("byWord"));
|
||||||
|
assert_eq!(settings["searchCutoffMs"], json!(null));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
@ -285,7 +287,8 @@ test_setting_routes!(
|
|||||||
ranking_rules put,
|
ranking_rules put,
|
||||||
synonyms put,
|
synonyms put,
|
||||||
pagination patch,
|
pagination patch,
|
||||||
faceting patch
|
faceting patch,
|
||||||
|
search_cutoff_ms put
|
||||||
);
|
);
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
|
@ -6,7 +6,7 @@ use std::time::Instant;
|
|||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use milli::{
|
use milli::{
|
||||||
execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, Index, SearchContext,
|
execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, Index, SearchContext,
|
||||||
SearchLogger, TermsMatchingStrategy,
|
SearchLogger, TermsMatchingStrategy, TimeBudget,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[global_allocator]
|
#[global_allocator]
|
||||||
@ -65,6 +65,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||||||
None,
|
None,
|
||||||
&mut DefaultSearchLogger,
|
&mut DefaultSearchLogger,
|
||||||
logger,
|
logger,
|
||||||
|
TimeBudget::max(),
|
||||||
)?;
|
)?;
|
||||||
if let Some((logger, dir)) = detailed_logger {
|
if let Some((logger, dir)) = detailed_logger {
|
||||||
logger.finish(&mut ctx, Path::new(dir))?;
|
logger.finish(&mut ctx, Path::new(dir))?;
|
||||||
|
@ -67,6 +67,7 @@ pub mod main_key {
|
|||||||
pub const PAGINATION_MAX_TOTAL_HITS: &str = "pagination-max-total-hits";
|
pub const PAGINATION_MAX_TOTAL_HITS: &str = "pagination-max-total-hits";
|
||||||
pub const PROXIMITY_PRECISION: &str = "proximity-precision";
|
pub const PROXIMITY_PRECISION: &str = "proximity-precision";
|
||||||
pub const EMBEDDING_CONFIGS: &str = "embedding_configs";
|
pub const EMBEDDING_CONFIGS: &str = "embedding_configs";
|
||||||
|
pub const SEARCH_CUTOFF: &str = "search_cutoff";
|
||||||
}
|
}
|
||||||
|
|
||||||
pub mod db_name {
|
pub mod db_name {
|
||||||
@ -1505,6 +1506,18 @@ impl Index {
|
|||||||
_ => "default".to_owned(),
|
_ => "default".to_owned(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> {
|
||||||
|
self.main.remap_types::<Str, BEU64>().put(wtxn, main_key::SEARCH_CUTOFF, &cutoff)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn search_cutoff(&self, rtxn: &RoTxn<'_>) -> Result<Option<u64>> {
|
||||||
|
Ok(self.main.remap_types::<Str, BEU64>().get(rtxn, main_key::SEARCH_CUTOFF)?)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn delete_search_cutoff(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
|
||||||
|
self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEARCH_CUTOFF)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@ -2421,6 +2434,7 @@ pub(crate) mod tests {
|
|||||||
candidates: _,
|
candidates: _,
|
||||||
document_scores: _,
|
document_scores: _,
|
||||||
mut documents_ids,
|
mut documents_ids,
|
||||||
|
degraded: _,
|
||||||
} = search.execute().unwrap();
|
} = search.execute().unwrap();
|
||||||
let primary_key_id = index.fields_ids_map(&rtxn).unwrap().id("primary_key").unwrap();
|
let primary_key_id = index.fields_ids_map(&rtxn).unwrap().id("primary_key").unwrap();
|
||||||
documents_ids.sort_unstable();
|
documents_ids.sort_unstable();
|
||||||
|
@ -30,6 +30,7 @@ pub mod snapshot_tests;
|
|||||||
|
|
||||||
use std::collections::{BTreeMap, HashMap};
|
use std::collections::{BTreeMap, HashMap};
|
||||||
use std::convert::{TryFrom, TryInto};
|
use std::convert::{TryFrom, TryInto};
|
||||||
|
use std::fmt;
|
||||||
use std::hash::BuildHasherDefault;
|
use std::hash::BuildHasherDefault;
|
||||||
|
|
||||||
use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer};
|
use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer};
|
||||||
@ -104,6 +105,73 @@ pub const MAX_WORD_LENGTH: usize = MAX_LMDB_KEY_LENGTH / 2;
|
|||||||
|
|
||||||
pub const MAX_POSITION_PER_ATTRIBUTE: u32 = u16::MAX as u32 + 1;
|
pub const MAX_POSITION_PER_ATTRIBUTE: u32 = u16::MAX as u32 + 1;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct TimeBudget {
|
||||||
|
started_at: std::time::Instant,
|
||||||
|
budget: std::time::Duration,
|
||||||
|
|
||||||
|
/// When testing the time budget, ensuring we did more than iteration of the bucket sort can be useful.
|
||||||
|
/// But to avoid being flaky, the only option is to add the ability to stop after a specific number of calls instead of a `Duration`.
|
||||||
|
#[cfg(test)]
|
||||||
|
stop_after: Option<(std::sync::Arc<std::sync::atomic::AtomicUsize>, usize)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for TimeBudget {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
f.debug_struct("TimeBudget")
|
||||||
|
.field("started_at", &self.started_at)
|
||||||
|
.field("budget", &self.budget)
|
||||||
|
.field("left", &(self.budget - self.started_at.elapsed()))
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for TimeBudget {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new(std::time::Duration::from_millis(150))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TimeBudget {
|
||||||
|
pub fn new(budget: std::time::Duration) -> Self {
|
||||||
|
Self {
|
||||||
|
started_at: std::time::Instant::now(),
|
||||||
|
budget,
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
stop_after: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn max() -> Self {
|
||||||
|
Self::new(std::time::Duration::from_secs(u64::MAX))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
pub fn with_stop_after(mut self, stop_after: usize) -> Self {
|
||||||
|
use std::sync::atomic::AtomicUsize;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
self.stop_after = Some((Arc::new(AtomicUsize::new(0)), stop_after));
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn exceeded(&self) -> bool {
|
||||||
|
#[cfg(test)]
|
||||||
|
if let Some((current, stop_after)) = &self.stop_after {
|
||||||
|
let current = current.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||||
|
if current >= *stop_after {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
// if a number has been specified then we ignore entirely the time budget
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.started_at.elapsed() > self.budget
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Convert an absolute word position into a relative position.
|
// Convert an absolute word position into a relative position.
|
||||||
// Return the field id of the attribute related to the absolute position
|
// Return the field id of the attribute related to the absolute position
|
||||||
// and the relative position in the attribute.
|
// and the relative position in the attribute.
|
||||||
|
@ -17,6 +17,9 @@ pub enum ScoreDetails {
|
|||||||
Sort(Sort),
|
Sort(Sort),
|
||||||
Vector(Vector),
|
Vector(Vector),
|
||||||
GeoSort(GeoSort),
|
GeoSort(GeoSort),
|
||||||
|
|
||||||
|
/// Returned when we don't have the time to finish applying all the subsequent ranking-rules
|
||||||
|
Skipped,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
@ -50,6 +53,7 @@ impl ScoreDetails {
|
|||||||
ScoreDetails::Sort(_) => None,
|
ScoreDetails::Sort(_) => None,
|
||||||
ScoreDetails::GeoSort(_) => None,
|
ScoreDetails::GeoSort(_) => None,
|
||||||
ScoreDetails::Vector(_) => None,
|
ScoreDetails::Vector(_) => None,
|
||||||
|
ScoreDetails::Skipped => Some(Rank { rank: 0, max_rank: 1 }),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -97,6 +101,7 @@ impl ScoreDetails {
|
|||||||
ScoreDetails::Vector(vector) => RankOrValue::Score(
|
ScoreDetails::Vector(vector) => RankOrValue::Score(
|
||||||
vector.value_similarity.as_ref().map(|(_, s)| *s as f64).unwrap_or(0.0f64),
|
vector.value_similarity.as_ref().map(|(_, s)| *s as f64).unwrap_or(0.0f64),
|
||||||
),
|
),
|
||||||
|
ScoreDetails::Skipped => RankOrValue::Rank(Rank { rank: 0, max_rank: 1 }),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -256,6 +261,11 @@ impl ScoreDetails {
|
|||||||
details_map.insert(vector, details);
|
details_map.insert(vector, details);
|
||||||
order += 1;
|
order += 1;
|
||||||
}
|
}
|
||||||
|
ScoreDetails::Skipped => {
|
||||||
|
details_map
|
||||||
|
.insert("skipped".to_string(), serde_json::json!({ "order": order }));
|
||||||
|
order += 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
details_map
|
details_map
|
||||||
|
@ -10,6 +10,7 @@ struct ScoreWithRatioResult {
|
|||||||
matching_words: MatchingWords,
|
matching_words: MatchingWords,
|
||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
document_scores: Vec<(u32, ScoreWithRatio)>,
|
document_scores: Vec<(u32, ScoreWithRatio)>,
|
||||||
|
degraded: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
type ScoreWithRatio = (Vec<ScoreDetails>, f32);
|
type ScoreWithRatio = (Vec<ScoreDetails>, f32);
|
||||||
@ -49,8 +50,12 @@ fn compare_scores(
|
|||||||
order => return order,
|
order => return order,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
(Some(ScoreValue::Score(_)), Some(_)) => return Ordering::Greater,
|
(Some(ScoreValue::Score(x)), Some(_)) => {
|
||||||
(Some(_), Some(ScoreValue::Score(_))) => return Ordering::Less,
|
return if x == 0. { Ordering::Less } else { Ordering::Greater }
|
||||||
|
}
|
||||||
|
(Some(_), Some(ScoreValue::Score(x))) => {
|
||||||
|
return if x == 0. { Ordering::Greater } else { Ordering::Less }
|
||||||
|
}
|
||||||
// if we have this, we're bad
|
// if we have this, we're bad
|
||||||
(Some(ScoreValue::GeoSort(_)), Some(ScoreValue::Sort(_)))
|
(Some(ScoreValue::GeoSort(_)), Some(ScoreValue::Sort(_)))
|
||||||
| (Some(ScoreValue::Sort(_)), Some(ScoreValue::GeoSort(_))) => {
|
| (Some(ScoreValue::Sort(_)), Some(ScoreValue::GeoSort(_))) => {
|
||||||
@ -72,6 +77,7 @@ impl ScoreWithRatioResult {
|
|||||||
matching_words: results.matching_words,
|
matching_words: results.matching_words,
|
||||||
candidates: results.candidates,
|
candidates: results.candidates,
|
||||||
document_scores,
|
document_scores,
|
||||||
|
degraded: results.degraded,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -106,6 +112,7 @@ impl ScoreWithRatioResult {
|
|||||||
candidates: left.candidates | right.candidates,
|
candidates: left.candidates | right.candidates,
|
||||||
documents_ids,
|
documents_ids,
|
||||||
document_scores,
|
document_scores,
|
||||||
|
degraded: left.degraded | right.degraded,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -131,6 +138,7 @@ impl<'a> Search<'a> {
|
|||||||
index: self.index,
|
index: self.index,
|
||||||
distribution_shift: self.distribution_shift,
|
distribution_shift: self.distribution_shift,
|
||||||
embedder_name: self.embedder_name.clone(),
|
embedder_name: self.embedder_name.clone(),
|
||||||
|
time_budget: self.time_budget.clone(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let vector_query = search.vector.take();
|
let vector_query = search.vector.take();
|
||||||
|
@ -11,7 +11,7 @@ use crate::score_details::{ScoreDetails, ScoringStrategy};
|
|||||||
use crate::vector::DistributionShift;
|
use crate::vector::DistributionShift;
|
||||||
use crate::{
|
use crate::{
|
||||||
execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Index, Result,
|
execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Index, Result,
|
||||||
SearchContext,
|
SearchContext, TimeBudget,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Building these factories is not free.
|
// Building these factories is not free.
|
||||||
@ -43,6 +43,8 @@ pub struct Search<'a> {
|
|||||||
index: &'a Index,
|
index: &'a Index,
|
||||||
distribution_shift: Option<DistributionShift>,
|
distribution_shift: Option<DistributionShift>,
|
||||||
embedder_name: Option<String>,
|
embedder_name: Option<String>,
|
||||||
|
|
||||||
|
time_budget: TimeBudget,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Search<'a> {
|
impl<'a> Search<'a> {
|
||||||
@ -64,6 +66,7 @@ impl<'a> Search<'a> {
|
|||||||
index,
|
index,
|
||||||
distribution_shift: None,
|
distribution_shift: None,
|
||||||
embedder_name: None,
|
embedder_name: None,
|
||||||
|
time_budget: TimeBudget::max(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -143,6 +146,11 @@ impl<'a> Search<'a> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn time_budget(&mut self, time_budget: TimeBudget) -> &mut Search<'a> {
|
||||||
|
self.time_budget = time_budget;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
|
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
|
||||||
if has_vector_search {
|
if has_vector_search {
|
||||||
let ctx = SearchContext::new(self.index, self.rtxn);
|
let ctx = SearchContext::new(self.index, self.rtxn);
|
||||||
@ -169,36 +177,43 @@ impl<'a> Search<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let universe = filtered_universe(&ctx, &self.filter)?;
|
let universe = filtered_universe(&ctx, &self.filter)?;
|
||||||
let PartialSearchResult { located_query_terms, candidates, documents_ids, document_scores } =
|
let PartialSearchResult {
|
||||||
match self.vector.as_ref() {
|
located_query_terms,
|
||||||
Some(vector) => execute_vector_search(
|
candidates,
|
||||||
&mut ctx,
|
documents_ids,
|
||||||
vector,
|
document_scores,
|
||||||
self.scoring_strategy,
|
degraded,
|
||||||
universe,
|
} = match self.vector.as_ref() {
|
||||||
&self.sort_criteria,
|
Some(vector) => execute_vector_search(
|
||||||
self.geo_strategy,
|
&mut ctx,
|
||||||
self.offset,
|
vector,
|
||||||
self.limit,
|
self.scoring_strategy,
|
||||||
self.distribution_shift,
|
universe,
|
||||||
embedder_name,
|
&self.sort_criteria,
|
||||||
)?,
|
self.geo_strategy,
|
||||||
None => execute_search(
|
self.offset,
|
||||||
&mut ctx,
|
self.limit,
|
||||||
self.query.as_deref(),
|
self.distribution_shift,
|
||||||
self.terms_matching_strategy,
|
embedder_name,
|
||||||
self.scoring_strategy,
|
self.time_budget.clone(),
|
||||||
self.exhaustive_number_hits,
|
)?,
|
||||||
universe,
|
None => execute_search(
|
||||||
&self.sort_criteria,
|
&mut ctx,
|
||||||
self.geo_strategy,
|
self.query.as_deref(),
|
||||||
self.offset,
|
self.terms_matching_strategy,
|
||||||
self.limit,
|
self.scoring_strategy,
|
||||||
Some(self.words_limit),
|
self.exhaustive_number_hits,
|
||||||
&mut DefaultSearchLogger,
|
universe,
|
||||||
&mut DefaultSearchLogger,
|
&self.sort_criteria,
|
||||||
)?,
|
self.geo_strategy,
|
||||||
};
|
self.offset,
|
||||||
|
self.limit,
|
||||||
|
Some(self.words_limit),
|
||||||
|
&mut DefaultSearchLogger,
|
||||||
|
&mut DefaultSearchLogger,
|
||||||
|
self.time_budget.clone(),
|
||||||
|
)?,
|
||||||
|
};
|
||||||
|
|
||||||
// consume context and located_query_terms to build MatchingWords.
|
// consume context and located_query_terms to build MatchingWords.
|
||||||
let matching_words = match located_query_terms {
|
let matching_words = match located_query_terms {
|
||||||
@ -206,7 +221,7 @@ impl<'a> Search<'a> {
|
|||||||
None => MatchingWords::default(),
|
None => MatchingWords::default(),
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(SearchResult { matching_words, candidates, document_scores, documents_ids })
|
Ok(SearchResult { matching_words, candidates, document_scores, documents_ids, degraded })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -229,6 +244,7 @@ impl fmt::Debug for Search<'_> {
|
|||||||
index: _,
|
index: _,
|
||||||
distribution_shift,
|
distribution_shift,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
|
time_budget,
|
||||||
} = self;
|
} = self;
|
||||||
f.debug_struct("Search")
|
f.debug_struct("Search")
|
||||||
.field("query", query)
|
.field("query", query)
|
||||||
@ -244,6 +260,7 @@ impl fmt::Debug for Search<'_> {
|
|||||||
.field("words_limit", words_limit)
|
.field("words_limit", words_limit)
|
||||||
.field("distribution_shift", distribution_shift)
|
.field("distribution_shift", distribution_shift)
|
||||||
.field("embedder_name", embedder_name)
|
.field("embedder_name", embedder_name)
|
||||||
|
.field("time_budget", time_budget)
|
||||||
.finish()
|
.finish()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -254,6 +271,7 @@ pub struct SearchResult {
|
|||||||
pub candidates: RoaringBitmap,
|
pub candidates: RoaringBitmap,
|
||||||
pub documents_ids: Vec<DocumentId>,
|
pub documents_ids: Vec<DocumentId>,
|
||||||
pub document_scores: Vec<Vec<ScoreDetails>>,
|
pub document_scores: Vec<Vec<ScoreDetails>>,
|
||||||
|
pub degraded: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
@ -5,12 +5,14 @@ use super::ranking_rules::{BoxRankingRule, RankingRuleQueryTrait};
|
|||||||
use super::SearchContext;
|
use super::SearchContext;
|
||||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||||
use crate::search::new::distinct::{apply_distinct_rule, distinct_single_docid, DistinctOutput};
|
use crate::search::new::distinct::{apply_distinct_rule, distinct_single_docid, DistinctOutput};
|
||||||
use crate::Result;
|
use crate::{Result, TimeBudget};
|
||||||
|
|
||||||
pub struct BucketSortOutput {
|
pub struct BucketSortOutput {
|
||||||
pub docids: Vec<u32>,
|
pub docids: Vec<u32>,
|
||||||
pub scores: Vec<Vec<ScoreDetails>>,
|
pub scores: Vec<Vec<ScoreDetails>>,
|
||||||
pub all_candidates: RoaringBitmap,
|
pub all_candidates: RoaringBitmap,
|
||||||
|
|
||||||
|
pub degraded: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: would probably be good to regroup some of these inside of a struct?
|
// TODO: would probably be good to regroup some of these inside of a struct?
|
||||||
@ -25,6 +27,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
length: usize,
|
length: usize,
|
||||||
scoring_strategy: ScoringStrategy,
|
scoring_strategy: ScoringStrategy,
|
||||||
logger: &mut dyn SearchLogger<Q>,
|
logger: &mut dyn SearchLogger<Q>,
|
||||||
|
time_budget: TimeBudget,
|
||||||
) -> Result<BucketSortOutput> {
|
) -> Result<BucketSortOutput> {
|
||||||
logger.initial_query(query);
|
logger.initial_query(query);
|
||||||
logger.ranking_rules(&ranking_rules);
|
logger.ranking_rules(&ranking_rules);
|
||||||
@ -41,6 +44,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
docids: vec![],
|
docids: vec![],
|
||||||
scores: vec![],
|
scores: vec![],
|
||||||
all_candidates: universe.clone(),
|
all_candidates: universe.clone(),
|
||||||
|
degraded: false,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
if ranking_rules.is_empty() {
|
if ranking_rules.is_empty() {
|
||||||
@ -74,6 +78,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
scores: vec![Default::default(); results.len()],
|
scores: vec![Default::default(); results.len()],
|
||||||
docids: results,
|
docids: results,
|
||||||
all_candidates,
|
all_candidates,
|
||||||
|
degraded: false,
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
let docids: Vec<u32> = universe.iter().skip(from).take(length).collect();
|
let docids: Vec<u32> = universe.iter().skip(from).take(length).collect();
|
||||||
@ -81,6 +86,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
scores: vec![Default::default(); docids.len()],
|
scores: vec![Default::default(); docids.len()],
|
||||||
docids,
|
docids,
|
||||||
all_candidates: universe.clone(),
|
all_candidates: universe.clone(),
|
||||||
|
degraded: false,
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -154,6 +160,28 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
while valid_docids.len() < length {
|
while valid_docids.len() < length {
|
||||||
|
if time_budget.exceeded() {
|
||||||
|
loop {
|
||||||
|
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
|
||||||
|
ranking_rule_scores.push(ScoreDetails::Skipped);
|
||||||
|
maybe_add_to_results!(bucket);
|
||||||
|
ranking_rule_scores.pop();
|
||||||
|
|
||||||
|
if cur_ranking_rule_index == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
back!();
|
||||||
|
}
|
||||||
|
|
||||||
|
return Ok(BucketSortOutput {
|
||||||
|
scores: valid_scores,
|
||||||
|
docids: valid_docids,
|
||||||
|
all_candidates,
|
||||||
|
degraded: true,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// The universe for this bucket is zero, so we don't need to sort
|
// The universe for this bucket is zero, so we don't need to sort
|
||||||
// anything, just go back to the parent ranking rule.
|
// anything, just go back to the parent ranking rule.
|
||||||
if ranking_rule_universes[cur_ranking_rule_index].is_empty()
|
if ranking_rule_universes[cur_ranking_rule_index].is_empty()
|
||||||
@ -219,7 +247,12 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(BucketSortOutput { docids: valid_docids, scores: valid_scores, all_candidates })
|
Ok(BucketSortOutput {
|
||||||
|
docids: valid_docids,
|
||||||
|
scores: valid_scores,
|
||||||
|
all_candidates,
|
||||||
|
degraded: false,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add the candidates to the results. Take `distinct`, `from`, `length`, and `cur_offset`
|
/// Add the candidates to the results. Take `distinct`, `from`, `length`, and `cur_offset`
|
||||||
|
@ -502,7 +502,7 @@ mod tests {
|
|||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::index::tests::TempIndex;
|
use crate::index::tests::TempIndex;
|
||||||
use crate::{execute_search, filtered_universe, SearchContext};
|
use crate::{execute_search, filtered_universe, SearchContext, TimeBudget};
|
||||||
|
|
||||||
impl<'a> MatcherBuilder<'a> {
|
impl<'a> MatcherBuilder<'a> {
|
||||||
fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
|
fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
|
||||||
@ -522,6 +522,7 @@ mod tests {
|
|||||||
Some(10),
|
Some(10),
|
||||||
&mut crate::DefaultSearchLogger,
|
&mut crate::DefaultSearchLogger,
|
||||||
&mut crate::DefaultSearchLogger,
|
&mut crate::DefaultSearchLogger,
|
||||||
|
TimeBudget::max(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
@ -52,7 +52,8 @@ use crate::score_details::{ScoreDetails, ScoringStrategy};
|
|||||||
use crate::search::new::distinct::apply_distinct_rule;
|
use crate::search::new::distinct::apply_distinct_rule;
|
||||||
use crate::vector::DistributionShift;
|
use crate::vector::DistributionShift;
|
||||||
use crate::{
|
use crate::{
|
||||||
AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError,
|
AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, TimeBudget,
|
||||||
|
UserError,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// A structure used throughout the execution of a search query.
|
/// A structure used throughout the execution of a search query.
|
||||||
@ -518,6 +519,7 @@ pub fn execute_vector_search(
|
|||||||
length: usize,
|
length: usize,
|
||||||
distribution_shift: Option<DistributionShift>,
|
distribution_shift: Option<DistributionShift>,
|
||||||
embedder_name: &str,
|
embedder_name: &str,
|
||||||
|
time_budget: TimeBudget,
|
||||||
) -> Result<PartialSearchResult> {
|
) -> Result<PartialSearchResult> {
|
||||||
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
||||||
|
|
||||||
@ -537,7 +539,7 @@ pub fn execute_vector_search(
|
|||||||
let placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery> =
|
let placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery> =
|
||||||
&mut placeholder_search_logger;
|
&mut placeholder_search_logger;
|
||||||
|
|
||||||
let BucketSortOutput { docids, scores, all_candidates } = bucket_sort(
|
let BucketSortOutput { docids, scores, all_candidates, degraded } = bucket_sort(
|
||||||
ctx,
|
ctx,
|
||||||
ranking_rules,
|
ranking_rules,
|
||||||
&PlaceholderQuery,
|
&PlaceholderQuery,
|
||||||
@ -546,6 +548,7 @@ pub fn execute_vector_search(
|
|||||||
length,
|
length,
|
||||||
scoring_strategy,
|
scoring_strategy,
|
||||||
placeholder_search_logger,
|
placeholder_search_logger,
|
||||||
|
time_budget,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
Ok(PartialSearchResult {
|
Ok(PartialSearchResult {
|
||||||
@ -553,6 +556,7 @@ pub fn execute_vector_search(
|
|||||||
document_scores: scores,
|
document_scores: scores,
|
||||||
documents_ids: docids,
|
documents_ids: docids,
|
||||||
located_query_terms: None,
|
located_query_terms: None,
|
||||||
|
degraded,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -572,6 +576,7 @@ pub fn execute_search(
|
|||||||
words_limit: Option<usize>,
|
words_limit: Option<usize>,
|
||||||
placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>,
|
placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>,
|
||||||
query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
|
query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
|
time_budget: TimeBudget,
|
||||||
) -> Result<PartialSearchResult> {
|
) -> Result<PartialSearchResult> {
|
||||||
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
||||||
|
|
||||||
@ -648,6 +653,7 @@ pub fn execute_search(
|
|||||||
length,
|
length,
|
||||||
scoring_strategy,
|
scoring_strategy,
|
||||||
query_graph_logger,
|
query_graph_logger,
|
||||||
|
time_budget,
|
||||||
)?
|
)?
|
||||||
} else {
|
} else {
|
||||||
let ranking_rules =
|
let ranking_rules =
|
||||||
@ -661,10 +667,11 @@ pub fn execute_search(
|
|||||||
length,
|
length,
|
||||||
scoring_strategy,
|
scoring_strategy,
|
||||||
placeholder_search_logger,
|
placeholder_search_logger,
|
||||||
|
time_budget,
|
||||||
)?
|
)?
|
||||||
};
|
};
|
||||||
|
|
||||||
let BucketSortOutput { docids, scores, mut all_candidates } = bucket_sort_output;
|
let BucketSortOutput { docids, scores, mut all_candidates, degraded } = bucket_sort_output;
|
||||||
let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
|
let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
|
||||||
|
|
||||||
// The candidates is the universe unless the exhaustive number of hits
|
// The candidates is the universe unless the exhaustive number of hits
|
||||||
@ -682,6 +689,7 @@ pub fn execute_search(
|
|||||||
document_scores: scores,
|
document_scores: scores,
|
||||||
documents_ids: docids,
|
documents_ids: docids,
|
||||||
located_query_terms,
|
located_query_terms,
|
||||||
|
degraded,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -742,4 +750,6 @@ pub struct PartialSearchResult {
|
|||||||
pub candidates: RoaringBitmap,
|
pub candidates: RoaringBitmap,
|
||||||
pub documents_ids: Vec<DocumentId>,
|
pub documents_ids: Vec<DocumentId>,
|
||||||
pub document_scores: Vec<Vec<ScoreDetails>>,
|
pub document_scores: Vec<Vec<ScoreDetails>>,
|
||||||
|
|
||||||
|
pub degraded: bool,
|
||||||
}
|
}
|
||||||
|
429
milli/src/search/new/tests/cutoff.rs
Normal file
429
milli/src/search/new/tests/cutoff.rs
Normal file
@ -0,0 +1,429 @@
|
|||||||
|
//! This module test the search cutoff and ensure a few things:
|
||||||
|
//! 1. A basic test works and mark the search as degraded
|
||||||
|
//! 2. A test that ensure the filters are affectively applied even with a cutoff of 0
|
||||||
|
//! 3. A test that ensure the cutoff works well with the ranking scores
|
||||||
|
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use big_s::S;
|
||||||
|
use maplit::hashset;
|
||||||
|
use meili_snap::snapshot;
|
||||||
|
|
||||||
|
use crate::index::tests::TempIndex;
|
||||||
|
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||||
|
use crate::{Criterion, Filter, Search, TimeBudget};
|
||||||
|
|
||||||
|
fn create_index() -> TempIndex {
|
||||||
|
let index = TempIndex::new();
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|s| {
|
||||||
|
s.set_primary_key("id".to_owned());
|
||||||
|
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||||
|
s.set_filterable_fields(hashset! { S("id") });
|
||||||
|
s.set_criteria(vec![Criterion::Words, Criterion::Typo]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// reverse the ID / insertion order so we see better what was sorted from what got the insertion order ordering
|
||||||
|
index
|
||||||
|
.add_documents(documents!([
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"text": "hella puppo kefir",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"text": "hella puppy kefir",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"text": "hello",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"text": "hello puppy",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"text": "hello puppy kefir",
|
||||||
|
},
|
||||||
|
]))
|
||||||
|
.unwrap();
|
||||||
|
index
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn basic_degraded_search() {
|
||||||
|
let index = create_index();
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut search = Search::new(&rtxn, &index);
|
||||||
|
search.query("hello puppy kefir");
|
||||||
|
search.limit(3);
|
||||||
|
search.time_budget(TimeBudget::new(Duration::from_millis(0)));
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
assert!(result.degraded);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn degraded_search_cannot_skip_filter() {
|
||||||
|
let index = create_index();
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut search = Search::new(&rtxn, &index);
|
||||||
|
search.query("hello puppy kefir");
|
||||||
|
search.limit(100);
|
||||||
|
search.time_budget(TimeBudget::new(Duration::from_millis(0)));
|
||||||
|
let filter_condition = Filter::from_str("id > 2").unwrap().unwrap();
|
||||||
|
search.filter(filter_condition);
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
assert!(result.degraded);
|
||||||
|
snapshot!(format!("{:?}\n{:?}", result.candidates, result.documents_ids), @r###"
|
||||||
|
RoaringBitmap<[0, 1]>
|
||||||
|
[0, 1]
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[allow(clippy::format_collect)] // the test is already quite big
|
||||||
|
fn degraded_search_and_score_details() {
|
||||||
|
let index = create_index();
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut search = Search::new(&rtxn, &index);
|
||||||
|
search.query("hello puppy kefir");
|
||||||
|
search.limit(4);
|
||||||
|
search.scoring_strategy(ScoringStrategy::Detailed);
|
||||||
|
search.time_budget(TimeBudget::max());
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###"
|
||||||
|
IDs: [4, 1, 0, 3]
|
||||||
|
Scores: 1.0000 0.9167 0.8333 0.6667
|
||||||
|
Score Details:
|
||||||
|
[
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Typo(
|
||||||
|
Typo {
|
||||||
|
typo_count: 0,
|
||||||
|
max_typo_count: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Typo(
|
||||||
|
Typo {
|
||||||
|
typo_count: 1,
|
||||||
|
max_typo_count: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Typo(
|
||||||
|
Typo {
|
||||||
|
typo_count: 2,
|
||||||
|
max_typo_count: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 2,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Typo(
|
||||||
|
Typo {
|
||||||
|
typo_count: 0,
|
||||||
|
max_typo_count: 2,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// Do ONE loop iteration. Not much can be deduced, almost everyone matched the words first bucket.
|
||||||
|
search.time_budget(TimeBudget::max().with_stop_after(1));
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###"
|
||||||
|
IDs: [0, 1, 4, 2]
|
||||||
|
Scores: 0.6667 0.6667 0.6667 0.0000
|
||||||
|
Score Details:
|
||||||
|
[
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Skipped,
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Skipped,
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Skipped,
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Skipped,
|
||||||
|
],
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// Do TWO loop iterations. The first document should be entirely sorted
|
||||||
|
search.time_budget(TimeBudget::max().with_stop_after(2));
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###"
|
||||||
|
IDs: [4, 0, 1, 2]
|
||||||
|
Scores: 1.0000 0.6667 0.6667 0.0000
|
||||||
|
Score Details:
|
||||||
|
[
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Typo(
|
||||||
|
Typo {
|
||||||
|
typo_count: 0,
|
||||||
|
max_typo_count: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Skipped,
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Skipped,
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Skipped,
|
||||||
|
],
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// Do THREE loop iterations. The second document should be entirely sorted as well
|
||||||
|
search.time_budget(TimeBudget::max().with_stop_after(3));
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###"
|
||||||
|
IDs: [4, 1, 0, 2]
|
||||||
|
Scores: 1.0000 0.9167 0.6667 0.0000
|
||||||
|
Score Details:
|
||||||
|
[
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Typo(
|
||||||
|
Typo {
|
||||||
|
typo_count: 0,
|
||||||
|
max_typo_count: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Typo(
|
||||||
|
Typo {
|
||||||
|
typo_count: 1,
|
||||||
|
max_typo_count: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Skipped,
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Skipped,
|
||||||
|
],
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// Do FOUR loop iterations. The third document should be entirely sorted as well
|
||||||
|
// The words bucket have still not progressed thus the last document doesn't have any info yet.
|
||||||
|
search.time_budget(TimeBudget::max().with_stop_after(4));
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###"
|
||||||
|
IDs: [4, 1, 0, 2]
|
||||||
|
Scores: 1.0000 0.9167 0.8333 0.0000
|
||||||
|
Score Details:
|
||||||
|
[
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Typo(
|
||||||
|
Typo {
|
||||||
|
typo_count: 0,
|
||||||
|
max_typo_count: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Typo(
|
||||||
|
Typo {
|
||||||
|
typo_count: 1,
|
||||||
|
max_typo_count: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Typo(
|
||||||
|
Typo {
|
||||||
|
typo_count: 2,
|
||||||
|
max_typo_count: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Skipped,
|
||||||
|
],
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// After SIX loop iteration. The words ranking rule gave us a new bucket.
|
||||||
|
// Since we reached the limit we were able to early exit without checking the typo ranking rule.
|
||||||
|
search.time_budget(TimeBudget::max().with_stop_after(6));
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###"
|
||||||
|
IDs: [4, 1, 0, 3]
|
||||||
|
Scores: 1.0000 0.9167 0.8333 0.3333
|
||||||
|
Score Details:
|
||||||
|
[
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Typo(
|
||||||
|
Typo {
|
||||||
|
typo_count: 0,
|
||||||
|
max_typo_count: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Typo(
|
||||||
|
Typo {
|
||||||
|
typo_count: 1,
|
||||||
|
max_typo_count: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Typo(
|
||||||
|
Typo {
|
||||||
|
typo_count: 2,
|
||||||
|
max_typo_count: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 2,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Skipped,
|
||||||
|
],
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
}
|
@ -1,5 +1,6 @@
|
|||||||
pub mod attribute_fid;
|
pub mod attribute_fid;
|
||||||
pub mod attribute_position;
|
pub mod attribute_position;
|
||||||
|
pub mod cutoff;
|
||||||
pub mod distinct;
|
pub mod distinct;
|
||||||
pub mod exactness;
|
pub mod exactness;
|
||||||
pub mod geo_sort;
|
pub mod geo_sort;
|
||||||
|
@ -150,6 +150,7 @@ pub struct Settings<'a, 't, 'i> {
|
|||||||
pagination_max_total_hits: Setting<usize>,
|
pagination_max_total_hits: Setting<usize>,
|
||||||
proximity_precision: Setting<ProximityPrecision>,
|
proximity_precision: Setting<ProximityPrecision>,
|
||||||
embedder_settings: Setting<BTreeMap<String, Setting<EmbeddingSettings>>>,
|
embedder_settings: Setting<BTreeMap<String, Setting<EmbeddingSettings>>>,
|
||||||
|
search_cutoff: Setting<u64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||||
@ -183,6 +184,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
pagination_max_total_hits: Setting::NotSet,
|
pagination_max_total_hits: Setting::NotSet,
|
||||||
proximity_precision: Setting::NotSet,
|
proximity_precision: Setting::NotSet,
|
||||||
embedder_settings: Setting::NotSet,
|
embedder_settings: Setting::NotSet,
|
||||||
|
search_cutoff: Setting::NotSet,
|
||||||
indexer_config,
|
indexer_config,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -373,6 +375,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
self.embedder_settings = Setting::Reset;
|
self.embedder_settings = Setting::Reset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn set_search_cutoff(&mut self, value: u64) {
|
||||||
|
self.search_cutoff = Setting::Set(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn reset_search_cutoff(&mut self) {
|
||||||
|
self.search_cutoff = Setting::Reset;
|
||||||
|
}
|
||||||
|
|
||||||
#[tracing::instrument(
|
#[tracing::instrument(
|
||||||
level = "trace"
|
level = "trace"
|
||||||
skip(self, progress_callback, should_abort, old_fields_ids_map),
|
skip(self, progress_callback, should_abort, old_fields_ids_map),
|
||||||
@ -1026,6 +1036,24 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
Ok(update)
|
Ok(update)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn update_search_cutoff(&mut self) -> Result<bool> {
|
||||||
|
let changed = match self.search_cutoff {
|
||||||
|
Setting::Set(new) => {
|
||||||
|
let old = self.index.search_cutoff(self.wtxn)?;
|
||||||
|
if old == Some(new) {
|
||||||
|
false
|
||||||
|
} else {
|
||||||
|
self.index.put_search_cutoff(self.wtxn, new)?;
|
||||||
|
true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Setting::Reset => self.index.delete_search_cutoff(self.wtxn)?,
|
||||||
|
Setting::NotSet => false,
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(changed)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn execute<FP, FA>(mut self, progress_callback: FP, should_abort: FA) -> Result<()>
|
pub fn execute<FP, FA>(mut self, progress_callback: FP, should_abort: FA) -> Result<()>
|
||||||
where
|
where
|
||||||
FP: Fn(UpdateIndexingStep) + Sync,
|
FP: Fn(UpdateIndexingStep) + Sync,
|
||||||
@ -1079,6 +1107,9 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
// 3. Keep the old vectors but reattempt indexing on a prompt change: only actually changed prompt will need embedding + storage
|
// 3. Keep the old vectors but reattempt indexing on a prompt change: only actually changed prompt will need embedding + storage
|
||||||
let embedding_configs_updated = self.update_embedding_configs()?;
|
let embedding_configs_updated = self.update_embedding_configs()?;
|
||||||
|
|
||||||
|
// never trigger re-indexing
|
||||||
|
self.update_search_cutoff()?;
|
||||||
|
|
||||||
if stop_words_updated
|
if stop_words_updated
|
||||||
|| non_separator_tokens_updated
|
|| non_separator_tokens_updated
|
||||||
|| separator_tokens_updated
|
|| separator_tokens_updated
|
||||||
@ -2035,6 +2066,7 @@ mod tests {
|
|||||||
pagination_max_total_hits,
|
pagination_max_total_hits,
|
||||||
proximity_precision,
|
proximity_precision,
|
||||||
embedder_settings,
|
embedder_settings,
|
||||||
|
search_cutoff,
|
||||||
} = settings;
|
} = settings;
|
||||||
assert!(matches!(searchable_fields, Setting::NotSet));
|
assert!(matches!(searchable_fields, Setting::NotSet));
|
||||||
assert!(matches!(displayed_fields, Setting::NotSet));
|
assert!(matches!(displayed_fields, Setting::NotSet));
|
||||||
@ -2058,6 +2090,7 @@ mod tests {
|
|||||||
assert!(matches!(pagination_max_total_hits, Setting::NotSet));
|
assert!(matches!(pagination_max_total_hits, Setting::NotSet));
|
||||||
assert!(matches!(proximity_precision, Setting::NotSet));
|
assert!(matches!(proximity_precision, Setting::NotSet));
|
||||||
assert!(matches!(embedder_settings, Setting::NotSet));
|
assert!(matches!(embedder_settings, Setting::NotSet));
|
||||||
|
assert!(matches!(search_cutoff, Setting::NotSet));
|
||||||
})
|
})
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user