mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-10 13:34:30 +01:00
fix the search cutoff and add a test
This commit is contained in:
parent
b72495eb58
commit
b8cda6c300
@ -834,6 +834,115 @@ async fn test_score_details() {
|
|||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn test_degraded_score_details() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
let documents = NESTED_DOCUMENTS.clone();
|
||||||
|
|
||||||
|
index.add_documents(json!(documents), None).await;
|
||||||
|
// We can't really use anything else than 0ms here; otherwise, the test will get flaky.
|
||||||
|
let (res, _code) = index.update_settings(json!({ "searchCutoff": 0 })).await;
|
||||||
|
index.wait_task(res.uid()).await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.search(
|
||||||
|
json!({
|
||||||
|
"q": "b",
|
||||||
|
"showRankingScoreDetails": true,
|
||||||
|
}),
|
||||||
|
|response, code| {
|
||||||
|
meili_snap::snapshot!(code, @"200 OK");
|
||||||
|
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 852,
|
||||||
|
"father": "jean",
|
||||||
|
"mother": "michelle",
|
||||||
|
"doggos": [
|
||||||
|
{
|
||||||
|
"name": "bobby",
|
||||||
|
"age": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "buddy",
|
||||||
|
"age": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"cattos": "pésti",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
3
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"_rankingScoreDetails": {
|
||||||
|
"skipped": 0.0
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 654,
|
||||||
|
"father": "pierre",
|
||||||
|
"mother": "sabine",
|
||||||
|
"doggos": [
|
||||||
|
{
|
||||||
|
"name": "gros bill",
|
||||||
|
"age": 8
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"cattos": [
|
||||||
|
"simba",
|
||||||
|
"pestiféré"
|
||||||
|
],
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
54
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"_rankingScoreDetails": {
|
||||||
|
"skipped": 0.0
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 951,
|
||||||
|
"father": "jean-baptiste",
|
||||||
|
"mother": "sophie",
|
||||||
|
"doggos": [
|
||||||
|
{
|
||||||
|
"name": "turbo",
|
||||||
|
"age": 5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "fast",
|
||||||
|
"age": 6
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"cattos": [
|
||||||
|
"moumoute",
|
||||||
|
"gomez"
|
||||||
|
],
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
10,
|
||||||
|
23,
|
||||||
|
32
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"_rankingScoreDetails": {
|
||||||
|
"skipped": 0.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn experimental_feature_vector_store() {
|
async fn experimental_feature_vector_store() {
|
||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
|
@ -105,10 +105,15 @@ pub const MAX_WORD_LENGTH: usize = MAX_LMDB_KEY_LENGTH / 2;
|
|||||||
|
|
||||||
pub const MAX_POSITION_PER_ATTRIBUTE: u32 = u16::MAX as u32 + 1;
|
pub const MAX_POSITION_PER_ATTRIBUTE: u32 = u16::MAX as u32 + 1;
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone)]
|
||||||
pub struct TimeBudget {
|
pub struct TimeBudget {
|
||||||
started_at: std::time::Instant,
|
started_at: std::time::Instant,
|
||||||
budget: std::time::Duration,
|
budget: std::time::Duration,
|
||||||
|
|
||||||
|
/// When testing the time budget, ensuring we did more than iteration of the bucket sort can be useful.
|
||||||
|
/// But to avoid being flaky, the only option is to add the ability to stop after a specific number of calls instead of a `Duration`.
|
||||||
|
#[cfg(test)]
|
||||||
|
stop_after: Option<(std::sync::Arc<std::sync::atomic::AtomicUsize>, usize)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Debug for TimeBudget {
|
impl fmt::Debug for TimeBudget {
|
||||||
@ -129,18 +134,40 @@ impl Default for TimeBudget {
|
|||||||
|
|
||||||
impl TimeBudget {
|
impl TimeBudget {
|
||||||
pub fn new(budget: std::time::Duration) -> Self {
|
pub fn new(budget: std::time::Duration) -> Self {
|
||||||
Self { started_at: std::time::Instant::now(), budget }
|
Self {
|
||||||
|
started_at: std::time::Instant::now(),
|
||||||
|
budget,
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
stop_after: None,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn max() -> Self {
|
pub fn max() -> Self {
|
||||||
Self::new(std::time::Duration::from_secs(u64::MAX))
|
Self::new(std::time::Duration::from_secs(u64::MAX))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn exceeded(&self) -> bool {
|
#[cfg(test)]
|
||||||
self.must_stop()
|
pub fn with_stop_after(mut self, stop_after: usize) -> Self {
|
||||||
|
use std::sync::atomic::AtomicUsize;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
self.stop_after = Some((Arc::new(AtomicUsize::new(0)), stop_after));
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn exceeded(&self) -> bool {
|
||||||
|
#[cfg(test)]
|
||||||
|
if let Some((current, stop_after)) = &self.stop_after {
|
||||||
|
let current = current.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||||
|
if current >= *stop_after {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
// if a number has been specified then we ignore entirely the time budget
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn must_stop(&self) -> bool {
|
|
||||||
self.started_at.elapsed() > self.budget
|
self.started_at.elapsed() > self.budget
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -17,6 +17,9 @@ pub enum ScoreDetails {
|
|||||||
Sort(Sort),
|
Sort(Sort),
|
||||||
Vector(Vector),
|
Vector(Vector),
|
||||||
GeoSort(GeoSort),
|
GeoSort(GeoSort),
|
||||||
|
|
||||||
|
/// Returned when we don't have the time to finish applying all the subsequent ranking-rules
|
||||||
|
Skipped,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
@ -50,6 +53,7 @@ impl ScoreDetails {
|
|||||||
ScoreDetails::Sort(_) => None,
|
ScoreDetails::Sort(_) => None,
|
||||||
ScoreDetails::GeoSort(_) => None,
|
ScoreDetails::GeoSort(_) => None,
|
||||||
ScoreDetails::Vector(_) => None,
|
ScoreDetails::Vector(_) => None,
|
||||||
|
ScoreDetails::Skipped => Some(Rank { rank: 0, max_rank: 1 }),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -97,6 +101,7 @@ impl ScoreDetails {
|
|||||||
ScoreDetails::Vector(vector) => RankOrValue::Score(
|
ScoreDetails::Vector(vector) => RankOrValue::Score(
|
||||||
vector.value_similarity.as_ref().map(|(_, s)| *s as f64).unwrap_or(0.0f64),
|
vector.value_similarity.as_ref().map(|(_, s)| *s as f64).unwrap_or(0.0f64),
|
||||||
),
|
),
|
||||||
|
ScoreDetails::Skipped => RankOrValue::Score(0.),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -256,6 +261,13 @@ impl ScoreDetails {
|
|||||||
details_map.insert(vector, details);
|
details_map.insert(vector, details);
|
||||||
order += 1;
|
order += 1;
|
||||||
}
|
}
|
||||||
|
ScoreDetails::Skipped => {
|
||||||
|
details_map.insert(
|
||||||
|
"skipped".to_string(),
|
||||||
|
serde_json::Number::from_f64(0.).unwrap().into(),
|
||||||
|
);
|
||||||
|
order += 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
details_map
|
details_map
|
||||||
|
@ -132,7 +132,7 @@ impl<'a> Search<'a> {
|
|||||||
index: self.index,
|
index: self.index,
|
||||||
distribution_shift: self.distribution_shift,
|
distribution_shift: self.distribution_shift,
|
||||||
embedder_name: self.embedder_name.clone(),
|
embedder_name: self.embedder_name.clone(),
|
||||||
time_budget: self.time_budget,
|
time_budget: self.time_budget.clone(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let vector_query = search.vector.take();
|
let vector_query = search.vector.take();
|
||||||
|
@ -195,7 +195,7 @@ impl<'a> Search<'a> {
|
|||||||
self.limit,
|
self.limit,
|
||||||
self.distribution_shift,
|
self.distribution_shift,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
self.time_budget,
|
self.time_budget.clone(),
|
||||||
)?,
|
)?,
|
||||||
None => execute_search(
|
None => execute_search(
|
||||||
&mut ctx,
|
&mut ctx,
|
||||||
@ -211,7 +211,7 @@ impl<'a> Search<'a> {
|
|||||||
Some(self.words_limit),
|
Some(self.words_limit),
|
||||||
&mut DefaultSearchLogger,
|
&mut DefaultSearchLogger,
|
||||||
&mut DefaultSearchLogger,
|
&mut DefaultSearchLogger,
|
||||||
self.time_budget,
|
self.time_budget.clone(),
|
||||||
)?,
|
)?,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -161,11 +161,21 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
|
|
||||||
while valid_docids.len() < length {
|
while valid_docids.len() < length {
|
||||||
if time_budget.exceeded() {
|
if time_budget.exceeded() {
|
||||||
|
loop {
|
||||||
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
|
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
|
||||||
|
ranking_rule_scores.push(ScoreDetails::Skipped);
|
||||||
maybe_add_to_results!(bucket);
|
maybe_add_to_results!(bucket);
|
||||||
|
ranking_rule_scores.pop();
|
||||||
|
|
||||||
|
if cur_ranking_rule_index == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
back!();
|
||||||
|
}
|
||||||
|
|
||||||
return Ok(BucketSortOutput {
|
return Ok(BucketSortOutput {
|
||||||
scores: vec![Default::default(); valid_docids.len()],
|
scores: valid_scores,
|
||||||
docids: valid_docids,
|
docids: valid_docids,
|
||||||
all_candidates,
|
all_candidates,
|
||||||
degraded: true,
|
degraded: true,
|
||||||
|
419
milli/src/search/new/tests/cutoff.rs
Normal file
419
milli/src/search/new/tests/cutoff.rs
Normal file
@ -0,0 +1,419 @@
|
|||||||
|
//! This module test the search cutoff and ensure a few things:
|
||||||
|
//! 1. A basic test works and mark the search as degraded
|
||||||
|
//! 2. A test that ensure the filters are affectively applied even with a cutoff of 0
|
||||||
|
//! 3. A test that ensure the cutoff works well with the ranking scores
|
||||||
|
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use big_s::S;
|
||||||
|
use maplit::hashset;
|
||||||
|
use meili_snap::snapshot;
|
||||||
|
|
||||||
|
use crate::index::tests::TempIndex;
|
||||||
|
use crate::{Criterion, Filter, Search, TimeBudget};
|
||||||
|
|
||||||
|
fn create_index() -> TempIndex {
|
||||||
|
let index = TempIndex::new();
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|s| {
|
||||||
|
s.set_primary_key("id".to_owned());
|
||||||
|
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||||
|
s.set_filterable_fields(hashset! { S("id") });
|
||||||
|
s.set_criteria(vec![Criterion::Words, Criterion::Typo]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// reverse the ID / insertion order so we see better what was sorted from what got the insertion order ordering
|
||||||
|
index
|
||||||
|
.add_documents(documents!([
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"text": "hella puppo kefir",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"text": "hella puppy kefir",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"text": "hello",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"text": "hello puppy",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"text": "hello puppy kefir",
|
||||||
|
},
|
||||||
|
]))
|
||||||
|
.unwrap();
|
||||||
|
index
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn basic_degraded_search() {
|
||||||
|
let index = create_index();
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut search = Search::new(&rtxn, &index);
|
||||||
|
search.query("hello puppy kefir");
|
||||||
|
search.limit(3);
|
||||||
|
search.time_budget(TimeBudget::new(Duration::from_millis(0)));
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
assert!(result.degraded);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn degraded_search_cannot_skip_filter() {
|
||||||
|
let index = create_index();
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut search = Search::new(&rtxn, &index);
|
||||||
|
search.query("hello puppy kefir");
|
||||||
|
search.limit(100);
|
||||||
|
search.time_budget(TimeBudget::new(Duration::from_millis(0)));
|
||||||
|
let filter_condition = Filter::from_str("id > 2").unwrap().unwrap();
|
||||||
|
search.filter(filter_condition);
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
assert!(result.degraded);
|
||||||
|
snapshot!(format!("{:?}\n{:?}", result.candidates, result.documents_ids), @r###"
|
||||||
|
RoaringBitmap<[0, 1]>
|
||||||
|
[0, 1]
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn degraded_search_and_score_details() {
|
||||||
|
let index = create_index();
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut search = Search::new(&rtxn, &index);
|
||||||
|
search.query("hello puppy kefir");
|
||||||
|
search.limit(4);
|
||||||
|
search.time_budget(TimeBudget::max());
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###"
|
||||||
|
[
|
||||||
|
4,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
3,
|
||||||
|
]
|
||||||
|
[
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Typo(
|
||||||
|
Typo {
|
||||||
|
typo_count: 0,
|
||||||
|
max_typo_count: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Typo(
|
||||||
|
Typo {
|
||||||
|
typo_count: 1,
|
||||||
|
max_typo_count: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 2,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// Do ONE loop iteration. Not much can be deduced, almost everyone matched the words first bucket.
|
||||||
|
search.time_budget(TimeBudget::max().with_stop_after(1));
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###"
|
||||||
|
[
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
4,
|
||||||
|
2,
|
||||||
|
]
|
||||||
|
[
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Skipped,
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Skipped,
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Skipped,
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Skipped,
|
||||||
|
],
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// Do TWO loop iterations. The first document should be entirely sorted
|
||||||
|
search.time_budget(TimeBudget::max().with_stop_after(2));
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###"
|
||||||
|
[
|
||||||
|
4,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
]
|
||||||
|
[
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Typo(
|
||||||
|
Typo {
|
||||||
|
typo_count: 0,
|
||||||
|
max_typo_count: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Skipped,
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Skipped,
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Skipped,
|
||||||
|
],
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// Do THREE loop iterations. The second document should be entirely sorted as well
|
||||||
|
search.time_budget(TimeBudget::max().with_stop_after(3));
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###"
|
||||||
|
[
|
||||||
|
4,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
2,
|
||||||
|
]
|
||||||
|
[
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Typo(
|
||||||
|
Typo {
|
||||||
|
typo_count: 0,
|
||||||
|
max_typo_count: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Typo(
|
||||||
|
Typo {
|
||||||
|
typo_count: 1,
|
||||||
|
max_typo_count: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Skipped,
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Skipped,
|
||||||
|
],
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// Do FOUR loop iterations. The third document should be entirely sorted as well
|
||||||
|
// The words bucket have still not progressed thus the last document doesn't have any info yet.
|
||||||
|
search.time_budget(TimeBudget::max().with_stop_after(4));
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###"
|
||||||
|
[
|
||||||
|
4,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
2,
|
||||||
|
]
|
||||||
|
[
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Typo(
|
||||||
|
Typo {
|
||||||
|
typo_count: 0,
|
||||||
|
max_typo_count: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Typo(
|
||||||
|
Typo {
|
||||||
|
typo_count: 1,
|
||||||
|
max_typo_count: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Skipped,
|
||||||
|
],
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// After FIVE loop iteration. The words ranking rule gave us a new bucket.
|
||||||
|
// Since we reached the limit we were able to early exit without checking the typo ranking rule.
|
||||||
|
search.time_budget(TimeBudget::max().with_stop_after(5));
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###"
|
||||||
|
[
|
||||||
|
4,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
3,
|
||||||
|
]
|
||||||
|
[
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Typo(
|
||||||
|
Typo {
|
||||||
|
typo_count: 0,
|
||||||
|
max_typo_count: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Typo(
|
||||||
|
Typo {
|
||||||
|
typo_count: 1,
|
||||||
|
max_typo_count: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 3,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
Words(
|
||||||
|
Words {
|
||||||
|
matching_words: 2,
|
||||||
|
max_matching_words: 3,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
}
|
@ -1,5 +1,6 @@
|
|||||||
pub mod attribute_fid;
|
pub mod attribute_fid;
|
||||||
pub mod attribute_position;
|
pub mod attribute_position;
|
||||||
|
pub mod cutoff;
|
||||||
pub mod distinct;
|
pub mod distinct;
|
||||||
pub mod exactness;
|
pub mod exactness;
|
||||||
pub mod geo_sort;
|
pub mod geo_sort;
|
||||||
|
@ -1,19 +1,14 @@
|
|||||||
use std::cmp::Reverse;
|
use std::cmp::Reverse;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::io::Cursor;
|
use std::io::Cursor;
|
||||||
use std::time::Duration;
|
|
||||||
|
|
||||||
use big_s::S;
|
use big_s::S;
|
||||||
use either::{Either, Left, Right};
|
use either::{Either, Left, Right};
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use maplit::{btreemap, hashset};
|
use maplit::{btreemap, hashset};
|
||||||
use meili_snap::snapshot;
|
|
||||||
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||||
use milli::{
|
use milli::{AscDesc, Criterion, DocumentId, Index, Member, Object, TermsMatchingStrategy};
|
||||||
AscDesc, Criterion, DocumentId, Filter, Index, Member, Object, Search, TermsMatchingStrategy,
|
|
||||||
TimeBudget,
|
|
||||||
};
|
|
||||||
use serde::{Deserialize, Deserializer};
|
use serde::{Deserialize, Deserializer};
|
||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
|
|
||||||
@ -354,41 +349,3 @@ where
|
|||||||
let result = serde_json::Value::deserialize(deserializer)?;
|
let result = serde_json::Value::deserialize(deserializer)?;
|
||||||
Ok(Some(result))
|
Ok(Some(result))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn basic_degraded_search() {
|
|
||||||
use Criterion::*;
|
|
||||||
let criteria = vec![Words, Typo, Proximity, Attribute, Exactness];
|
|
||||||
let index = setup_search_index_with_criteria(&criteria);
|
|
||||||
let rtxn = index.read_txn().unwrap();
|
|
||||||
|
|
||||||
let mut search = Search::new(&rtxn, &index);
|
|
||||||
search.query(TEST_QUERY);
|
|
||||||
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
|
||||||
search.time_budget(TimeBudget::new(Duration::from_millis(0)));
|
|
||||||
|
|
||||||
let result = search.execute().unwrap();
|
|
||||||
assert!(result.degraded);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn degraded_search_cannot_skip_filter() {
|
|
||||||
use Criterion::*;
|
|
||||||
let criteria = vec![Words, Typo, Proximity, Attribute, Exactness];
|
|
||||||
let index = setup_search_index_with_criteria(&criteria);
|
|
||||||
let rtxn = index.read_txn().unwrap();
|
|
||||||
|
|
||||||
let mut search = Search::new(&rtxn, &index);
|
|
||||||
search.query(TEST_QUERY);
|
|
||||||
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
|
||||||
search.time_budget(TimeBudget::new(Duration::from_millis(0)));
|
|
||||||
let filter_condition = Filter::from_str("tag = etiopia").unwrap().unwrap();
|
|
||||||
search.filter(filter_condition);
|
|
||||||
|
|
||||||
let result = search.execute().unwrap();
|
|
||||||
assert!(result.degraded);
|
|
||||||
snapshot!(format!("{:?}\n{:?}", result.candidates, result.documents_ids), @r###"
|
|
||||||
RoaringBitmap<[0, 2, 5, 8, 11, 14]>
|
|
||||||
[0, 2, 5, 8, 11, 14]
|
|
||||||
"###);
|
|
||||||
}
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user