diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 1509847b7..3e08498de 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -240,6 +240,8 @@ InvalidSearchOffset , InvalidRequest , BAD_REQUEST ; InvalidSearchPage , InvalidRequest , BAD_REQUEST ; InvalidSearchQ , InvalidRequest , BAD_REQUEST ; InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ; +InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ; +InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ; InvalidSearchSort , InvalidRequest , BAD_REQUEST ; InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ; InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ; diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index afef95ed7..55e4905bd 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -569,6 +569,10 @@ pub struct SearchAggregator { // facets facets_sum_of_terms: usize, facets_total_number_of_facets: usize, + + // scoring + show_ranking_score: bool, + show_ranking_score_details: bool, } impl SearchAggregator { @@ -632,6 +636,9 @@ impl SearchAggregator { ret.crop_length = query.crop_length != DEFAULT_CROP_LENGTH(); ret.show_matches_position = query.show_matches_position; + ret.show_ranking_score = query.show_ranking_score; + ret.show_ranking_score_details = query.show_ranking_score_details; + ret } @@ -706,6 +713,10 @@ impl SearchAggregator { let matching_strategy = self.matching_strategy.entry(key).or_insert(0); *matching_strategy = matching_strategy.saturating_add(value); } + + // scoring + self.show_ranking_score |= other.show_ranking_score; + self.show_ranking_score_details |= other.show_ranking_score_details; } pub fn into_event(self, user: &User, event_name: &str) -> Option { @@ -760,7 +771,11 @@ impl SearchAggregator { }, "matching_strategy": { "most_used_strategy": self.matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), - } + }, + "scoring": { + "show_ranking_score": self.show_ranking_score, + "show_ranking_score_details": self.show_ranking_score_details, + }, }); Some(Track { diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index f9242f320..cb70147cd 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -56,6 +56,10 @@ pub struct SearchQueryGet { sort: Option, #[deserr(default, error = DeserrQueryParamError)] show_matches_position: Param, + #[deserr(default, error = DeserrQueryParamError)] + show_ranking_score: Param, + #[deserr(default, error = DeserrQueryParamError)] + show_ranking_score_details: Param, #[deserr(default, error = DeserrQueryParamError)] facets: Option>, #[deserr( default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError)] @@ -91,6 +95,8 @@ impl From for SearchQuery { filter, sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)), show_matches_position: other.show_matches_position.0, + show_ranking_score: other.show_ranking_score.0, + show_ranking_score_details: other.show_ranking_score_details.0, facets: other.facets.map(|o| o.into_iter().collect()), highlight_pre_tag: other.highlight_pre_tag, highlight_post_tag: other.highlight_post_tag, diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 581f4b653..b2858ead3 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -9,6 +9,7 @@ use meilisearch_auth::IndexSearchRules; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::*; use meilisearch_types::index_uid::IndexUid; +use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy}; use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; use meilisearch_types::{milli, Document}; use milli::tokenizer::TokenizerBuilder; @@ -54,6 +55,10 @@ pub struct SearchQuery { pub attributes_to_highlight: Option>, #[deserr(default, error = DeserrJsonError, default)] pub show_matches_position: bool, + #[deserr(default, error = DeserrJsonError, default)] + pub show_ranking_score: bool, + #[deserr(default, error = DeserrJsonError, default)] + pub show_ranking_score_details: bool, #[deserr(default, error = DeserrJsonError)] pub filter: Option, #[deserr(default, error = DeserrJsonError)] @@ -103,6 +108,10 @@ pub struct SearchQueryWithIndex { pub crop_length: usize, #[deserr(default, error = DeserrJsonError)] pub attributes_to_highlight: Option>, + #[deserr(default, error = DeserrJsonError, default)] + pub show_ranking_score: bool, + #[deserr(default, error = DeserrJsonError, default)] + pub show_ranking_score_details: bool, #[deserr(default, error = DeserrJsonError, default)] pub show_matches_position: bool, #[deserr(default, error = DeserrJsonError)] @@ -134,6 +143,8 @@ impl SearchQueryWithIndex { attributes_to_crop, crop_length, attributes_to_highlight, + show_ranking_score, + show_ranking_score_details, show_matches_position, filter, sort, @@ -155,6 +166,8 @@ impl SearchQueryWithIndex { attributes_to_crop, crop_length, attributes_to_highlight, + show_ranking_score, + show_ranking_score_details, show_matches_position, filter, sort, @@ -194,7 +207,7 @@ impl From for TermsMatchingStrategy { } } -#[derive(Debug, Clone, Serialize, PartialEq, Eq)] +#[derive(Debug, Clone, Serialize, PartialEq)] pub struct SearchHit { #[serde(flatten)] pub document: Document, @@ -202,6 +215,10 @@ pub struct SearchHit { pub formatted: Document, #[serde(rename = "_matchesPosition", skip_serializing_if = "Option::is_none")] pub matches_position: Option, + #[serde(rename = "_rankingScore", skip_serializing_if = "Option::is_none")] + pub ranking_score: Option, + #[serde(rename = "_rankingScoreDetails", skip_serializing_if = "Option::is_none")] + pub ranking_score_details: Option>, } #[derive(Serialize, Debug, Clone, PartialEq)] @@ -283,6 +300,11 @@ pub fn perform_search( .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); search.exhaustive_number_hits(is_finite_pagination); + search.scoring_strategy(if query.show_ranking_score || query.show_ranking_score_details { + ScoringStrategy::Detailed + } else { + ScoringStrategy::Skip + }); // compute the offset on the limit depending on the pagination mode. let (offset, limit) = if is_finite_pagination { @@ -320,7 +342,8 @@ pub fn perform_search( search.sort_criteria(sort); } - let milli::SearchResult { documents_ids, matching_words, candidates, .. } = search.execute()?; + let milli::SearchResult { documents_ids, matching_words, candidates, document_scores, .. } = + search.execute()?; let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); @@ -392,7 +415,7 @@ pub fn perform_search( let documents_iter = index.documents(&rtxn, documents_ids)?; - for (_id, obkv) in documents_iter { + for ((_id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) { // First generate a document with all the displayed fields let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?; @@ -416,7 +439,18 @@ pub fn perform_search( insert_geo_distance(sort, &mut document); } - let hit = SearchHit { document, formatted, matches_position }; + let ranking_score = + query.show_ranking_score.then(|| ScoreDetails::global_score(score.iter())); + let ranking_score_details = + query.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter())); + + let hit = SearchHit { + document, + formatted, + matches_position, + ranking_score_details, + ranking_score, + }; documents.push(hit); } diff --git a/meilisearch/tests/search/formatted.rs b/meilisearch/tests/search/formatted.rs index 8a40616a3..6347a90ca 100644 --- a/meilisearch/tests/search/formatted.rs +++ b/meilisearch/tests/search/formatted.rs @@ -1,3 +1,4 @@ +use insta::{allow_duplicates, assert_json_snapshot}; use serde_json::json; use super::*; @@ -18,30 +19,43 @@ async fn formatted_contain_wildcard() { |response, code| { assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "_formatted": { - "id": "852", - "cattos": "pésti", - }, - "_matchesPosition": {"cattos": [{"start": 0, "length": 5}]}, - }) - ); - } + allow_duplicates! { + assert_json_snapshot!(response["hits"][0], + { "._rankingScore" => "[score]" }, + @r###" + { + "_formatted": { + "id": "852", + "cattos": "pésti" + }, + "_matchesPosition": { + "cattos": [ + { + "start": 0, + "length": 5 + } + ] + } + } + "###); + } + } ) .await; index .search(json!({ "q": "pésti", "attributesToRetrieve": ["*"] }), |response, code| { assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "id": 852, - "cattos": "pésti", - }) - ); + allow_duplicates! { + assert_json_snapshot!(response["hits"][0], + { "._rankingScore" => "[score]" }, + @r###" + { + "id": 852, + "cattos": "pésti" + } + "###) + } }) .await; @@ -50,20 +64,29 @@ async fn formatted_contain_wildcard() { json!({ "q": "pésti", "attributesToRetrieve": ["*"], "attributesToHighlight": ["id"], "showMatchesPosition": true }), |response, code| { assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "id": 852, - "cattos": "pésti", - "_formatted": { - "id": "852", - "cattos": "pésti", - }, - "_matchesPosition": {"cattos": [{"start": 0, "length": 5}]}, - }) - ); - } - ) + allow_duplicates! { + assert_json_snapshot!(response["hits"][0], + { "._rankingScore" => "[score]" }, + @r###" + { + "id": 852, + "cattos": "pésti", + "_formatted": { + "id": "852", + "cattos": "pésti" + }, + "_matchesPosition": { + "cattos": [ + { + "start": 0, + "length": 5 + } + ] + } + } + "###) + } + }) .await; index @@ -71,17 +94,20 @@ async fn formatted_contain_wildcard() { json!({ "q": "pésti", "attributesToRetrieve": ["*"], "attributesToCrop": ["*"] }), |response, code| { assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "id": 852, - "cattos": "pésti", - "_formatted": { - "id": "852", - "cattos": "pésti", - } - }) - ); + allow_duplicates! { + assert_json_snapshot!(response["hits"][0], + { "._rankingScore" => "[score]" }, + @r###" + { + "id": 852, + "cattos": "pésti", + "_formatted": { + "id": "852", + "cattos": "pésti" + } + } + "###); + } }, ) .await; @@ -89,17 +115,20 @@ async fn formatted_contain_wildcard() { index .search(json!({ "q": "pésti", "attributesToCrop": ["*"] }), |response, code| { assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "id": 852, - "cattos": "pésti", - "_formatted": { - "id": "852", - "cattos": "pésti", - } - }) - ); + allow_duplicates! { + assert_json_snapshot!(response["hits"][0], + { "._rankingScore" => "[score]" }, + @r###" + { + "id": 852, + "cattos": "pésti", + "_formatted": { + "id": "852", + "cattos": "pésti" + } + } + "###) + } }) .await; } @@ -116,21 +145,24 @@ async fn format_nested() { index .search(json!({ "q": "pésti", "attributesToRetrieve": ["doggos"] }), |response, code| { assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "doggos": [ - { - "name": "bobby", - "age": 2, - }, - { - "name": "buddy", - "age": 4, - }, - ], - }) - ); + allow_duplicates! { + assert_json_snapshot!(response["hits"][0], + { "._rankingScore" => "[score]" }, + @r###" + { + "doggos": [ + { + "name": "bobby", + "age": 2 + }, + { + "name": "buddy", + "age": 4 + } + ] + } + "###) + } }) .await; @@ -139,19 +171,22 @@ async fn format_nested() { json!({ "q": "pésti", "attributesToRetrieve": ["doggos.name"] }), |response, code| { assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "doggos": [ - { - "name": "bobby", - }, - { - "name": "buddy", - }, - ], - }) - ); + allow_duplicates! { + assert_json_snapshot!(response["hits"][0], + { "._rankingScore" => "[score]" }, + @r###" + { + "doggos": [ + { + "name": "bobby" + }, + { + "name": "buddy" + } + ] + } + "###) + } }, ) .await; @@ -161,20 +196,30 @@ async fn format_nested() { json!({ "q": "bobby", "attributesToRetrieve": ["doggos.name"], "showMatchesPosition": true }), |response, code| { assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "doggos": [ - { - "name": "bobby", - }, - { - "name": "buddy", - }, - ], - "_matchesPosition": {"doggos.name": [{"start": 0, "length": 5}]}, - }) - ); + allow_duplicates! { + assert_json_snapshot!(response["hits"][0], + { "._rankingScore" => "[score]" }, + @r###" + { + "doggos": [ + { + "name": "bobby" + }, + { + "name": "buddy" + } + ], + "_matchesPosition": { + "doggos.name": [ + { + "start": 0, + "length": 5 + } + ] + } + } + "###) + } } ) .await; @@ -183,21 +228,24 @@ async fn format_nested() { .search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.name"] }), |response, code| { assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "_formatted": { - "doggos": [ - { - "name": "bobby", - }, - { - "name": "buddy", - }, - ], - }, - }) - ); + allow_duplicates! { + assert_json_snapshot!(response["hits"][0], + { "._rankingScore" => "[score]" }, + @r###" + { + "_formatted": { + "doggos": [ + { + "name": "bobby" + }, + { + "name": "buddy" + } + ] + } + } + "###) + } }) .await; @@ -205,21 +253,24 @@ async fn format_nested() { .search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToCrop": ["doggos.name"] }), |response, code| { assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "_formatted": { - "doggos": [ - { - "name": "bobby", - }, - { - "name": "buddy", - }, - ], - }, - }) - ); + allow_duplicates! { + assert_json_snapshot!(response["hits"][0], + { "._rankingScore" => "[score]" }, + @r###" + { + "_formatted": { + "doggos": [ + { + "name": "bobby" + }, + { + "name": "buddy" + } + ] + } + } + "###) + } }) .await; @@ -227,55 +278,61 @@ async fn format_nested() { .search(json!({ "q": "pésti", "attributesToRetrieve": ["doggos.name"], "attributesToHighlight": ["doggos.age"] }), |response, code| { assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "doggos": [ - { - "name": "bobby", - }, - { - "name": "buddy", - }, - ], - "_formatted": { - "doggos": [ - { - "name": "bobby", - "age": "2", - }, - { - "name": "buddy", - "age": "4", - }, - ], + allow_duplicates! { + assert_json_snapshot!(response["hits"][0], + { "._rankingScore" => "[score]" }, + @r###" + { + "doggos": [ + { + "name": "bobby" }, - }) - ); - }) + { + "name": "buddy" + } + ], + "_formatted": { + "doggos": [ + { + "name": "bobby", + "age": "2" + }, + { + "name": "buddy", + "age": "4" + } + ] + } + } + "###) + } + }) .await; index .search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.age"], "attributesToCrop": ["doggos.name"] }), |response, code| { assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "_formatted": { - "doggos": [ + allow_duplicates! { + assert_json_snapshot!(response["hits"][0], + { "._rankingScore" => "[score]" }, + @r###" { - "name": "bobby", - "age": "2", - }, - { - "name": "buddy", - "age": "4", - }, - ], - }, - }) - ); + "_formatted": { + "doggos": [ + { + "name": "bobby", + "age": "2" + }, + { + "name": "buddy", + "age": "4" + } + ] + } + } + "###) + } } ) .await; @@ -297,54 +354,66 @@ async fn displayedattr_2_smol() { .search(json!({ "attributesToRetrieve": ["father", "id"], "attributesToHighlight": ["mother"], "attributesToCrop": ["cattos"] }), |response, code| { assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "id": 852, - }) - ); + allow_duplicates! { + assert_json_snapshot!(response["hits"][0], + { "._rankingScore" => "[score]" }, + @r###" + { + "id": 852 + } + "###) + } }) .await; index .search(json!({ "attributesToRetrieve": ["id"] }), |response, code| { assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "id": 852, - }) - ); + allow_duplicates! { + assert_json_snapshot!(response["hits"][0], + { "._rankingScore" => "[score]" }, + @r###" + { + "id": 852 + } + "###) + } }) .await; index .search(json!({ "attributesToHighlight": ["id"] }), |response, code| { assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "id": 852, - "_formatted": { - "id": "852", - } - }) - ); + allow_duplicates! { + assert_json_snapshot!(response["hits"][0], + { "._rankingScore" => "[score]" }, + @r###" + { + "id": 852, + "_formatted": { + "id": "852" + } + } + "###) + } }) .await; index .search(json!({ "attributesToCrop": ["id"] }), |response, code| { assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "id": 852, - "_formatted": { - "id": "852", - } - }) - ); + allow_duplicates! { + assert_json_snapshot!(response["hits"][0], + { "._rankingScore" => "[score]" }, + @r###" + { + "id": 852, + "_formatted": { + "id": "852" + } + } + "###) + } }) .await; @@ -353,15 +422,18 @@ async fn displayedattr_2_smol() { json!({ "attributesToHighlight": ["id"], "attributesToCrop": ["id"] }), |response, code| { assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "id": 852, - "_formatted": { - "id": "852", - } - }) - ); + allow_duplicates! { + assert_json_snapshot!(response["hits"][0], + { "._rankingScore" => "[score]" }, + @r###" + { + "id": 852, + "_formatted": { + "id": "852" + } + } + "###) + } }, ) .await; @@ -369,31 +441,41 @@ async fn displayedattr_2_smol() { index .search(json!({ "attributesToHighlight": ["cattos"] }), |response, code| { assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "id": 852, - }) - ); + allow_duplicates! { + assert_json_snapshot!(response["hits"][0], + { "._rankingScore" => "[score]" }, + @r###" + { + "id": 852 + } + "###) + } }) .await; index .search(json!({ "attributesToCrop": ["cattos"] }), |response, code| { assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "id": 852, - }) - ); + allow_duplicates! { + assert_json_snapshot!(response["hits"][0], + { "._rankingScore" => "[score]" }, + @r###" + { + "id": 852 + } + "###) + } }) .await; index .search(json!({ "attributesToRetrieve": ["cattos"] }), |response, code| { assert_eq!(code, 200, "{}", response); - assert_eq!(response["hits"][0], json!({})); + allow_duplicates! { + assert_json_snapshot!(response["hits"][0], + { "._rankingScore" => "[score]" }, + @"{}") + } }) .await; @@ -402,7 +484,11 @@ async fn displayedattr_2_smol() { json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["cattos"], "attributesToCrop": ["cattos"] }), |response, code| { assert_eq!(code, 200, "{}", response); - assert_eq!(response["hits"][0], json!({})); + allow_duplicates! { + assert_json_snapshot!(response["hits"][0], + { "._rankingScore" => "[score]" }, + @"{}") + } } ) @@ -413,14 +499,17 @@ async fn displayedattr_2_smol() { json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["id"] }), |response, code| { assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "_formatted": { - "id": "852", - } - }) - ); + allow_duplicates! { + assert_json_snapshot!(response["hits"][0], + { "._rankingScore" => "[score]" }, + @r###" + { + "_formatted": { + "id": "852" + } + } + "###) + } }, ) .await; @@ -430,14 +519,17 @@ async fn displayedattr_2_smol() { json!({ "attributesToRetrieve": ["cattos"], "attributesToCrop": ["id"] }), |response, code| { assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "_formatted": { - "id": "852", - } - }) - ); + allow_duplicates! { + assert_json_snapshot!(response["hits"][0], + { "._rankingScore" => "[score]" }, + @r###" + { + "_formatted": { + "id": "852" + } + } + "###) + } }, ) .await; diff --git a/meilisearch/tests/search/multi.rs b/meilisearch/tests/search/multi.rs index b00ddf3de..7cff42f3d 100644 --- a/meilisearch/tests/search/multi.rs +++ b/meilisearch/tests/search/multi.rs @@ -65,7 +65,7 @@ async fn simple_search_single_index() { ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]" }, @r###" + insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" [ { "indexUid": "test", @@ -170,7 +170,7 @@ async fn simple_search_two_indexes() { ]})) .await; snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]" }, @r###" + insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" [ { "indexUid": "test", diff --git a/milli/examples/search.rs b/milli/examples/search.rs index 8898e5dac..87c9a004d 100644 --- a/milli/examples/search.rs +++ b/milli/examples/search.rs @@ -53,6 +53,7 @@ fn main() -> Result<(), Box> { &mut ctx, &(!query.trim().is_empty()).then(|| query.trim().to_owned()), TermsMatchingStrategy::Last, + milli::score_details::ScoringStrategy::Skip, false, &None, &None, diff --git a/milli/src/index.rs b/milli/src/index.rs index 1ccef13dd..fad3f665c 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -2488,8 +2488,12 @@ pub(crate) mod tests { let rtxn = index.read_txn().unwrap(); let search = Search::new(&rtxn, &index); - let SearchResult { matching_words: _, candidates: _, mut documents_ids } = - search.execute().unwrap(); + let SearchResult { + matching_words: _, + candidates: _, + document_scores: _, + mut documents_ids, + } = search.execute().unwrap(); let primary_key_id = index.fields_ids_map(&rtxn).unwrap().id("primary_key").unwrap(); documents_ids.sort_unstable(); let docs = index.documents(&rtxn, documents_ids).unwrap(); diff --git a/milli/src/lib.rs b/milli/src/lib.rs index e7acdde2c..d3ee4f08e 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -17,6 +17,7 @@ mod fields_ids_map; pub mod heed_codec; pub mod index; pub mod proximity; +pub mod score_details; mod search; pub mod update; diff --git a/milli/src/score_details.rs b/milli/src/score_details.rs new file mode 100644 index 000000000..23598e7f0 --- /dev/null +++ b/milli/src/score_details.rs @@ -0,0 +1,314 @@ +use serde::Serialize; + +use crate::distance_between_two_points; + +#[derive(Debug, Clone, PartialEq)] +pub enum ScoreDetails { + Words(Words), + Typo(Typo), + Proximity(Rank), + Fid(Rank), + Position(Rank), + ExactAttribute(ExactAttribute), + Exactness(Rank), + Sort(Sort), + GeoSort(GeoSort), +} + +impl ScoreDetails { + pub fn local_score(&self) -> Option { + self.rank().map(Rank::local_score) + } + + pub fn rank(&self) -> Option { + match self { + ScoreDetails::Words(details) => Some(details.rank()), + ScoreDetails::Typo(details) => Some(details.rank()), + ScoreDetails::Proximity(details) => Some(*details), + ScoreDetails::Fid(details) => Some(*details), + ScoreDetails::Position(details) => Some(*details), + ScoreDetails::ExactAttribute(details) => Some(details.rank()), + ScoreDetails::Exactness(details) => Some(*details), + ScoreDetails::Sort(_) => None, + ScoreDetails::GeoSort(_) => None, + } + } + + pub fn global_score<'a>(details: impl Iterator) -> f64 { + Rank::global_score(details.filter_map(Self::rank)) + } + + /// Panics + /// + /// - If Position is not preceded by Fid + /// - If Exactness is not preceded by ExactAttribute + pub fn to_json_map<'a>( + details: impl Iterator, + ) -> serde_json::Map { + let mut order = 0; + let mut fid_details = None; + let mut details_map = serde_json::Map::default(); + for details in details { + match details { + ScoreDetails::Words(words) => { + let words_details = serde_json::json!({ + "order": order, + "matchingWords": words.matching_words, + "maxMatchingWords": words.max_matching_words, + "score": words.rank().local_score(), + }); + details_map.insert("words".into(), words_details); + order += 1; + } + ScoreDetails::Typo(typo) => { + let typo_details = serde_json::json!({ + "order": order, + "typoCount": typo.typo_count, + "maxTypoCount": typo.max_typo_count, + "score": typo.rank().local_score(), + }); + details_map.insert("typo".into(), typo_details); + order += 1; + } + ScoreDetails::Proximity(proximity) => { + let proximity_details = serde_json::json!({ + "order": order, + "score": proximity.local_score(), + }); + details_map.insert("proximity".into(), proximity_details); + order += 1; + } + ScoreDetails::Fid(fid) => { + // copy the rank for future use in Position. + fid_details = Some(*fid); + // For now, fid is a virtual rule always followed by the "position" rule + let fid_details = serde_json::json!({ + "order": order, + "attribute_ranking_order_score": fid.local_score(), + }); + details_map.insert("attribute".into(), fid_details); + order += 1; + } + ScoreDetails::Position(position) => { + // For now, position is a virtual rule always preceded by the "fid" rule + let attribute_details = details_map + .get_mut("attribute") + .expect("position not preceded by attribute"); + let attribute_details = attribute_details + .as_object_mut() + .expect("attribute details was not an object"); + let Some(fid_details) = fid_details + else { + unimplemented!("position not preceded by attribute"); + }; + + attribute_details + .insert("query_word_distance_score".into(), position.local_score().into()); + let score = Rank::global_score([fid_details, *position].iter().copied()); + attribute_details.insert("score".into(), score.into()); + + // do not update the order since this was already done by fid + } + ScoreDetails::ExactAttribute(exact_attribute) => { + let exactness_details = serde_json::json!({ + "order": order, + "matchType": exact_attribute, + "score": exact_attribute.rank().local_score(), + }); + details_map.insert("exactness".into(), exactness_details); + order += 1; + } + ScoreDetails::Exactness(details) => { + // For now, exactness is a virtual rule always preceded by the "ExactAttribute" rule + let exactness_details = details_map + .get_mut("exactness") + .expect("Exactness not preceded by exactAttribute"); + let exactness_details = exactness_details + .as_object_mut() + .expect("exactness details was not an object"); + if exactness_details.get("matchType").expect("missing 'matchType'") + == &serde_json::json!(ExactAttribute::NoExactMatch) + { + let score = Rank::global_score( + [ExactAttribute::NoExactMatch.rank(), *details].iter().copied(), + ); + *exactness_details.get_mut("score").expect("missing score") = score.into(); + } + // do not update the order since this was already done by exactAttribute + } + ScoreDetails::Sort(details) => { + let sort = if details.redacted { + format!("") + } else { + format!( + "{}:{}", + details.field_name, + if details.ascending { "asc" } else { "desc" } + ) + }; + let value = + if details.redacted { "".into() } else { details.value.clone() }; + let sort_details = serde_json::json!({ + "order": order, + "value": value, + }); + details_map.insert(sort, sort_details); + order += 1; + } + ScoreDetails::GeoSort(details) => { + let sort = format!( + "_geoPoint({}, {}):{}", + details.target_point[0], + details.target_point[1], + if details.ascending { "asc" } else { "desc" } + ); + let point = if let Some(value) = details.value { + serde_json::json!({ "lat": value[0], "lng": value[1]}) + } else { + serde_json::Value::Null + }; + let sort_details = serde_json::json!({ + "order": order, + "value": point, + "distance": details.distance(), + }); + details_map.insert(sort, sort_details); + order += 1; + } + } + } + details_map + } +} + +/// The strategy to compute scores. +/// +/// It makes sense to pass down this strategy to the internals of the search, because +/// some optimizations (today, mainly skipping ranking rules for universes of a single document) +/// are not correct to do when computing the scores. +/// +/// This strategy could feasibly be extended to differentiate between the normalized score and the +/// detailed scores, but it is not useful today as the normalized score is *derived from* the +/// detailed scores. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum ScoringStrategy { + /// Don't compute scores + #[default] + Skip, + /// Compute detailed scores + Detailed, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Words { + pub matching_words: u32, + pub max_matching_words: u32, +} + +impl Words { + pub fn rank(&self) -> Rank { + Rank { rank: self.matching_words, max_rank: self.max_matching_words } + } + + pub(crate) fn from_rank(rank: Rank) -> Words { + Words { matching_words: rank.rank, max_matching_words: rank.max_rank } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Typo { + pub typo_count: u32, + pub max_typo_count: u32, +} + +impl Typo { + pub fn rank(&self) -> Rank { + Rank { + rank: self.max_typo_count - self.typo_count + 1, + max_rank: (self.max_typo_count + 1), + } + } + + // max_rank = max_typo + 1 + // max_typo = max_rank - 1 + // + // rank = max_typo - typo + 1 + // rank = max_rank - 1 - typo + 1 + // rank + typo = max_rank + // typo = max_rank - rank + pub fn from_rank(rank: Rank) -> Typo { + Typo { typo_count: rank.max_rank - rank.rank, max_typo_count: rank.max_rank - 1 } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Rank { + /// The ordinal rank, such that `max_rank` is the first rank, and 0 is the last rank. + /// + /// The higher the better. Documents with a rank of 0 have a score of 0 and are typically never returned + /// (they don't match the query). + pub rank: u32, + /// The maximum possible rank. Documents with this rank have a score of 1. + /// + /// The max rank should not be 0. + pub max_rank: u32, +} + +impl Rank { + pub fn local_score(self) -> f64 { + self.rank as f64 / self.max_rank as f64 + } + + pub fn global_score(details: impl Iterator) -> f64 { + let mut rank = Rank { rank: 1, max_rank: 1 }; + for inner_rank in details { + rank.rank -= 1; + + rank.rank *= inner_rank.max_rank; + rank.max_rank *= inner_rank.max_rank; + + rank.rank += inner_rank.rank; + } + rank.local_score() + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)] +#[serde(rename_all = "camelCase")] +pub enum ExactAttribute { + ExactMatch, + MatchesStart, + NoExactMatch, +} + +impl ExactAttribute { + pub fn rank(&self) -> Rank { + let rank = match self { + ExactAttribute::ExactMatch => 3, + ExactAttribute::MatchesStart => 2, + ExactAttribute::NoExactMatch => 1, + }; + Rank { rank, max_rank: 3 } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct Sort { + pub field_name: String, + pub ascending: bool, + pub redacted: bool, + pub value: serde_json::Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)] +pub struct GeoSort { + pub target_point: [f64; 2], + pub ascending: bool, + pub value: Option<[f64; 2]>, +} + +impl GeoSort { + pub fn distance(&self) -> Option { + self.value.map(|value| distance_between_two_points(&self.target_point, &value)) + } +} diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index dcef30920..3c972d9b0 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -7,6 +7,7 @@ use roaring::bitmap::RoaringBitmap; pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET}; pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords}; use self::new::PartialSearchResult; +use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::{ execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext, }; @@ -29,6 +30,7 @@ pub struct Search<'a> { sort_criteria: Option>, geo_strategy: new::GeoSortStrategy, terms_matching_strategy: TermsMatchingStrategy, + scoring_strategy: ScoringStrategy, words_limit: usize, exhaustive_number_hits: bool, rtxn: &'a heed::RoTxn<'a>, @@ -45,6 +47,7 @@ impl<'a> Search<'a> { sort_criteria: None, geo_strategy: new::GeoSortStrategy::default(), terms_matching_strategy: TermsMatchingStrategy::default(), + scoring_strategy: Default::default(), exhaustive_number_hits: false, words_limit: 10, rtxn, @@ -77,6 +80,11 @@ impl<'a> Search<'a> { self } + pub fn scoring_strategy(&mut self, value: ScoringStrategy) -> &mut Search<'a> { + self.scoring_strategy = value; + self + } + pub fn words_limit(&mut self, value: usize) -> &mut Search<'a> { self.words_limit = value; self @@ -93,7 +101,7 @@ impl<'a> Search<'a> { self } - /// Force the search to exhastivelly compute the number of candidates, + /// Forces the search to exhaustively compute the number of candidates, /// this will increase the search time but allows finite pagination. pub fn exhaustive_number_hits(&mut self, exhaustive_number_hits: bool) -> &mut Search<'a> { self.exhaustive_number_hits = exhaustive_number_hits; @@ -102,11 +110,12 @@ impl<'a> Search<'a> { pub fn execute(&self) -> Result { let mut ctx = SearchContext::new(self.index, self.rtxn); - let PartialSearchResult { located_query_terms, candidates, documents_ids } = + let PartialSearchResult { located_query_terms, candidates, documents_ids, document_scores } = execute_search( &mut ctx, &self.query, self.terms_matching_strategy, + self.scoring_strategy, self.exhaustive_number_hits, &self.filter, &self.sort_criteria, @@ -124,7 +133,7 @@ impl<'a> Search<'a> { None => MatchingWords::default(), }; - Ok(SearchResult { matching_words, candidates, documents_ids }) + Ok(SearchResult { matching_words, candidates, document_scores, documents_ids }) } } @@ -138,6 +147,7 @@ impl fmt::Debug for Search<'_> { sort_criteria, geo_strategy: _, terms_matching_strategy, + scoring_strategy, words_limit, exhaustive_number_hits, rtxn: _, @@ -150,6 +160,7 @@ impl fmt::Debug for Search<'_> { .field("limit", limit) .field("sort_criteria", sort_criteria) .field("terms_matching_strategy", terms_matching_strategy) + .field("scoring_strategy", scoring_strategy) .field("exhaustive_number_hits", exhaustive_number_hits) .field("words_limit", words_limit) .finish() @@ -160,8 +171,8 @@ impl fmt::Debug for Search<'_> { pub struct SearchResult { pub matching_words: MatchingWords, pub candidates: RoaringBitmap, - // TODO those documents ids should be associated with their criteria scores. pub documents_ids: Vec, + pub document_scores: Vec>, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs index 5144a0a28..168f8ba89 100644 --- a/milli/src/search/new/bucket_sort.rs +++ b/milli/src/search/new/bucket_sort.rs @@ -3,14 +3,18 @@ use roaring::RoaringBitmap; use super::logger::SearchLogger; use super::ranking_rules::{BoxRankingRule, RankingRuleQueryTrait}; use super::SearchContext; +use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::search::new::distinct::{apply_distinct_rule, distinct_single_docid, DistinctOutput}; use crate::Result; pub struct BucketSortOutput { pub docids: Vec, + pub scores: Vec>, pub all_candidates: RoaringBitmap, } +// TODO: would probably be good to regroup some of these inside of a struct? +#[allow(clippy::too_many_arguments)] pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( ctx: &mut SearchContext<'ctx>, mut ranking_rules: Vec>, @@ -18,6 +22,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( universe: &RoaringBitmap, from: usize, length: usize, + scoring_strategy: ScoringStrategy, logger: &mut dyn SearchLogger, ) -> Result { logger.initial_query(query); @@ -31,7 +36,11 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( }; if universe.len() < from as u64 { - return Ok(BucketSortOutput { docids: vec![], all_candidates: universe.clone() }); + return Ok(BucketSortOutput { + docids: vec![], + scores: vec![], + all_candidates: universe.clone(), + }); } if ranking_rules.is_empty() { if let Some(distinct_fid) = distinct_fid { @@ -49,22 +58,32 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( } let mut all_candidates = universe - excluded; all_candidates.extend(results.iter().copied()); - return Ok(BucketSortOutput { docids: results, all_candidates }); + return Ok(BucketSortOutput { + scores: vec![Default::default(); results.len()], + docids: results, + all_candidates, + }); } else { - let docids = universe.iter().skip(from).take(length).collect(); - return Ok(BucketSortOutput { docids, all_candidates: universe.clone() }); + let docids: Vec = universe.iter().skip(from).take(length).collect(); + return Ok(BucketSortOutput { + scores: vec![Default::default(); docids.len()], + docids, + all_candidates: universe.clone(), + }); }; } let ranking_rules_len = ranking_rules.len(); logger.start_iteration_ranking_rule(0, ranking_rules[0].as_ref(), query, universe); + ranking_rules[0].start_iteration(ctx, logger, universe, query)?; + let mut ranking_rule_scores: Vec = vec![]; + let mut ranking_rule_universes: Vec = vec![RoaringBitmap::default(); ranking_rules_len]; ranking_rule_universes[0] = universe.clone(); - let mut cur_ranking_rule_index = 0; /// Finish iterating over the current ranking rule, yielding @@ -89,11 +108,15 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( } else { cur_ranking_rule_index -= 1; } + if ranking_rule_scores.len() > cur_ranking_rule_index { + ranking_rule_scores.pop(); + } }; } let mut all_candidates = universe.clone(); let mut valid_docids = vec![]; + let mut valid_scores = vec![]; let mut cur_offset = 0usize; macro_rules! maybe_add_to_results { @@ -104,21 +127,26 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( length, logger, &mut valid_docids, + &mut valid_scores, &mut all_candidates, &mut ranking_rule_universes, &mut ranking_rules, cur_ranking_rule_index, &mut cur_offset, distinct_fid, + &ranking_rule_scores, $candidates, )?; }; } while valid_docids.len() < length { - // The universe for this bucket is zero or one element, so we don't need to sort - // anything, just extend the results and go back to the parent ranking rule. - if ranking_rule_universes[cur_ranking_rule_index].len() <= 1 { + // The universe for this bucket is zero, so we don't need to sort + // anything, just go back to the parent ranking rule. + if ranking_rule_universes[cur_ranking_rule_index].is_empty() + || (scoring_strategy == ScoringStrategy::Skip + && ranking_rule_universes[cur_ranking_rule_index].len() == 1) + { let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]); maybe_add_to_results!(bucket); back!(); @@ -130,6 +158,8 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( continue; }; + ranking_rule_scores.push(next_bucket.score); + logger.next_bucket_ranking_rule( cur_ranking_rule_index, ranking_rules[cur_ranking_rule_index].as_ref(), @@ -143,10 +173,11 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates; if cur_ranking_rule_index == ranking_rules_len - 1 - || next_bucket.candidates.len() <= 1 + || (scoring_strategy == ScoringStrategy::Skip && next_bucket.candidates.len() <= 1) || cur_offset + (next_bucket.candidates.len() as usize) < from { maybe_add_to_results!(next_bucket.candidates); + ranking_rule_scores.pop(); continue; } @@ -166,7 +197,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( )?; } - Ok(BucketSortOutput { docids: valid_docids, all_candidates }) + Ok(BucketSortOutput { docids: valid_docids, scores: valid_scores, all_candidates }) } /// Add the candidates to the results. Take `distinct`, `from`, `length`, and `cur_offset` @@ -179,14 +210,18 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>( logger: &mut dyn SearchLogger, valid_docids: &mut Vec, + valid_scores: &mut Vec>, all_candidates: &mut RoaringBitmap, ranking_rule_universes: &mut [RoaringBitmap], ranking_rules: &mut [BoxRankingRule<'ctx, Q>], + cur_ranking_rule_index: usize, cur_offset: &mut usize, + distinct_fid: Option, + ranking_rule_scores: &[ScoreDetails], candidates: RoaringBitmap, ) -> Result<()> { // First apply the distinct rule on the candidates, reducing the universes if necessary @@ -231,13 +266,17 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>( let candidates = candidates.iter().take(length - valid_docids.len()).copied().collect::>(); logger.add_to_results(&candidates); - valid_docids.extend(&candidates); + valid_docids.extend_from_slice(&candidates); + valid_scores + .extend(std::iter::repeat(ranking_rule_scores.to_owned()).take(candidates.len())); } } else { // if we have passed the offset already, add some of the documents (up to the limit) let candidates = candidates.iter().take(length - valid_docids.len()).collect::>(); logger.add_to_results(&candidates); - valid_docids.extend(&candidates); + valid_docids.extend_from_slice(&candidates); + valid_scores + .extend(std::iter::repeat(ranking_rule_scores.to_owned()).take(candidates.len())); } *cur_offset += candidates.len() as usize; diff --git a/milli/src/search/new/exact_attribute.rs b/milli/src/search/new/exact_attribute.rs index 6e0381295..7932f0c2a 100644 --- a/milli/src/search/new/exact_attribute.rs +++ b/milli/src/search/new/exact_attribute.rs @@ -2,6 +2,7 @@ use roaring::{MultiOps, RoaringBitmap}; use super::query_graph::QueryGraph; use super::ranking_rules::{RankingRule, RankingRuleOutput}; +use crate::score_details::{self, ScoreDetails}; use crate::search::new::query_graph::QueryNodeData; use crate::search::new::query_term::ExactTerm; use crate::{Result, SearchContext, SearchLogger}; @@ -244,7 +245,13 @@ impl State { candidates &= universe; ( State::AttributeStarts(query_graph.clone(), candidates_per_attribute), - Some(RankingRuleOutput { query: query_graph, candidates }), + Some(RankingRuleOutput { + query: query_graph, + candidates, + score: ScoreDetails::ExactAttribute( + score_details::ExactAttribute::ExactMatch, + ), + }), ) } State::AttributeStarts(query_graph, candidates_per_attribute) => { @@ -257,12 +264,24 @@ impl State { candidates &= universe; ( State::Empty(query_graph.clone()), - Some(RankingRuleOutput { query: query_graph, candidates }), + Some(RankingRuleOutput { + query: query_graph, + candidates, + score: ScoreDetails::ExactAttribute( + score_details::ExactAttribute::MatchesStart, + ), + }), ) } State::Empty(query_graph) => ( State::Empty(query_graph.clone()), - Some(RankingRuleOutput { query: query_graph, candidates: universe.clone() }), + Some(RankingRuleOutput { + query: query_graph, + candidates: universe.clone(), + score: ScoreDetails::ExactAttribute( + score_details::ExactAttribute::NoExactMatch, + ), + }), ), }; (state, output) diff --git a/milli/src/search/new/geo_sort.rs b/milli/src/search/new/geo_sort.rs index e94ed33d1..dddb7f426 100644 --- a/milli/src/search/new/geo_sort.rs +++ b/milli/src/search/new/geo_sort.rs @@ -8,6 +8,7 @@ use rstar::RTree; use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait}; use crate::heed_codec::facet::{FieldDocIdFacetCodec, OrderedF64Codec}; +use crate::score_details::{self, ScoreDetails}; use crate::{ distance_between_two_points, lat_lng_to_xyz, GeoPoint, Index, Result, SearchContext, SearchLogger, @@ -80,7 +81,7 @@ pub struct GeoSort { field_ids: Option<[u16; 2]>, rtree: Option>, - cached_sorted_docids: VecDeque, + cached_sorted_docids: VecDeque<(u32, [f64; 2])>, geo_candidates: RoaringBitmap, } @@ -130,7 +131,7 @@ impl GeoSort { let point = lat_lng_to_xyz(&self.point); for point in rtree.nearest_neighbor_iter(&point) { if self.geo_candidates.contains(point.data.0) { - self.cached_sorted_docids.push_back(point.data.0); + self.cached_sorted_docids.push_back(point.data); if self.cached_sorted_docids.len() >= cache_size { break; } @@ -142,7 +143,7 @@ impl GeoSort { let point = lat_lng_to_xyz(&opposite_of(self.point)); for point in rtree.nearest_neighbor_iter(&point) { if self.geo_candidates.contains(point.data.0) { - self.cached_sorted_docids.push_front(point.data.0); + self.cached_sorted_docids.push_front(point.data); if self.cached_sorted_docids.len() >= cache_size { break; } @@ -177,7 +178,7 @@ impl GeoSort { // computing the distance between two points is expensive thus we cache the result documents .sort_by_cached_key(|(_, p)| distance_between_two_points(&self.point, p) as usize); - self.cached_sorted_docids.extend(documents.into_iter().map(|(doc_id, _)| doc_id)); + self.cached_sorted_docids.extend(documents.into_iter()); }; Ok(()) @@ -220,12 +221,19 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort { logger: &mut dyn SearchLogger, universe: &RoaringBitmap, ) -> Result>> { - assert!(universe.len() > 1); let query = self.query.as_ref().unwrap().clone(); self.geo_candidates &= universe; if self.geo_candidates.is_empty() { - return Ok(Some(RankingRuleOutput { query, candidates: universe.clone() })); + return Ok(Some(RankingRuleOutput { + query, + candidates: universe.clone(), + score: ScoreDetails::GeoSort(score_details::GeoSort { + target_point: self.point, + ascending: self.ascending, + value: None, + }), + })); } let ascending = self.ascending; @@ -236,11 +244,16 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort { cache.pop_back() } }; - while let Some(id) = next(&mut self.cached_sorted_docids) { + while let Some((id, point)) = next(&mut self.cached_sorted_docids) { if self.geo_candidates.contains(id) { return Ok(Some(RankingRuleOutput { query, candidates: RoaringBitmap::from_iter([id]), + score: ScoreDetails::GeoSort(score_details::GeoSort { + target_point: self.point, + ascending: self.ascending, + value: Some(point), + }), })); } } diff --git a/milli/src/search/new/graph_based_ranking_rule.rs b/milli/src/search/new/graph_based_ranking_rule.rs index dd25ddd4a..fa3c0b3d0 100644 --- a/milli/src/search/new/graph_based_ranking_rule.rs +++ b/milli/src/search/new/graph_based_ranking_rule.rs @@ -50,6 +50,7 @@ use super::ranking_rule_graph::{ }; use super::small_bitmap::SmallBitmap; use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext}; +use crate::score_details::Rank; use crate::search::new::query_term::LocatedQueryTermSubset; use crate::search::new::ranking_rule_graph::PathVisitor; use crate::{Result, TermsMatchingStrategy}; @@ -118,6 +119,8 @@ pub struct GraphBasedRankingRuleState { all_costs: MappedInterner>, /// An index in the first element of `all_distances`, giving the cost of the next bucket cur_cost: u64, + /// One above the highest possible cost for this rule + next_max_cost: u64, } impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBasedRankingRule { @@ -131,7 +134,20 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase _universe: &RoaringBitmap, query_graph: &QueryGraph, ) -> Result<()> { + // the `next_max_cost` is the successor integer to the maximum cost of the paths in the graph. + // + // When there is a matching strategy, it also factors the additional costs of: + // 1. The words that are matched in phrases + // 2. Skipping words (by adding them to the paths with a cost) + let mut next_max_cost = 1; let removal_cost = if let Some(terms_matching_strategy) = self.terms_matching_strategy { + // add the cost of the phrase to the next_max_cost + next_max_cost += query_graph + .words_in_phrases_count(ctx) + // remove 1 from the words in phrases count, because when there is a phrase we can now have a document + // where only the phrase is matching, and none of the non-phrase words. + // With the `1` that `next_max_cost` is initialized with, this gets counted twice. + .saturating_sub(1) as u64; match terms_matching_strategy { TermsMatchingStrategy::Last => { let removal_order = @@ -139,13 +155,12 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase let mut forbidden_nodes = SmallBitmap::for_interned_values_in(&query_graph.nodes); let mut costs = query_graph.nodes.map(|_| None); - let mut cost = 100; + // FIXME: this works because only words uses termsmatchingstrategy at the moment. for ns in removal_order { for n in ns.iter() { - *costs.get_mut(n) = Some((cost, forbidden_nodes.clone())); + *costs.get_mut(n) = Some((1, forbidden_nodes.clone())); } forbidden_nodes.union(&ns); - cost += 100; } costs } @@ -162,12 +177,16 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase // Then pre-compute the cost of all paths from each node to the end node let all_costs = graph.find_all_costs_to_end(); + next_max_cost += + all_costs.get(graph.query_graph.root_node).iter().copied().max().unwrap_or(0); + let state = GraphBasedRankingRuleState { graph, conditions_cache: condition_docids_cache, dead_ends_cache, all_costs, cur_cost: 0, + next_max_cost, }; self.state = Some(state); @@ -181,17 +200,13 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase logger: &mut dyn SearchLogger, universe: &RoaringBitmap, ) -> Result>> { - // If universe.len() <= 1, the bucket sort algorithm - // should not have called this function. - assert!(universe.len() > 1); // Will crash if `next_bucket` is called before `start_iteration` or after `end_iteration`, // should never happen let mut state = self.state.take().unwrap(); + let all_costs = state.all_costs.get(state.graph.query_graph.root_node); // Retrieve the cost of the paths to compute - let Some(&cost) = state - .all_costs - .get(state.graph.query_graph.root_node) + let Some(&cost) = all_costs .iter() .find(|c| **c >= state.cur_cost) else { self.state = None; @@ -207,8 +222,12 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase dead_ends_cache, all_costs, cur_cost: _, + next_max_cost, } = &mut state; + let rank = *next_max_cost - cost; + let score = G::rank_to_score(Rank { rank: rank as u32, max_rank: *next_max_cost as u32 }); + let mut universe = universe.clone(); let mut used_conditions = SmallBitmap::for_interned_values_in(&graph.conditions_interner); @@ -295,8 +314,6 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase // We modify the next query graph so that it only contains the subgraph // that was used to compute this bucket - // But we only do it in case the bucket length is >1, because otherwise - // we know the child ranking rule won't be called anyway let paths: Vec, LocatedQueryTermSubset)>> = good_paths .into_iter() @@ -325,7 +342,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase self.state = Some(state); - Ok(Some(RankingRuleOutput { query: next_query_graph, candidates: bucket })) + Ok(Some(RankingRuleOutput { query: next_query_graph, candidates: bucket, score })) } fn end_iteration( diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs index 677687b32..f33d595e5 100644 --- a/milli/src/search/new/matches/mod.rs +++ b/milli/src/search/new/matches/mod.rs @@ -510,6 +510,7 @@ mod tests { &mut ctx, &Some(query.to_string()), crate::TermsMatchingStrategy::default(), + crate::score_details::ScoringStrategy::Skip, false, &None, &None, diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index a28f42f35..8df764f29 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -44,6 +44,7 @@ use self::geo_sort::GeoSort; pub use self::geo_sort::Strategy as GeoSortStrategy; use self::graph_based_ranking_rule::Words; use self::interner::Interned; +use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::search::new::distinct::apply_distinct_rule; use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError}; @@ -350,6 +351,7 @@ pub fn execute_search( ctx: &mut SearchContext, query: &Option, terms_matching_strategy: TermsMatchingStrategy, + scoring_strategy: ScoringStrategy, exhaustive_number_hits: bool, filters: &Option, sort_criteria: &Option>, @@ -411,7 +413,16 @@ pub fn execute_search( universe = resolve_universe(ctx, &universe, &graph, terms_matching_strategy, query_graph_logger)?; - bucket_sort(ctx, ranking_rules, &graph, &universe, from, length, query_graph_logger)? + bucket_sort( + ctx, + ranking_rules, + &graph, + &universe, + from, + length, + scoring_strategy, + query_graph_logger, + )? } else { let ranking_rules = get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_strategy)?; @@ -422,17 +433,20 @@ pub fn execute_search( &universe, from, length, + scoring_strategy, placeholder_search_logger, )? }; - let BucketSortOutput { docids, mut all_candidates } = bucket_sort_output; + let BucketSortOutput { docids, scores, mut all_candidates } = bucket_sort_output; + + let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?; // The candidates is the universe unless the exhaustive number of hits // is requested and a distinct attribute is set. if exhaustive_number_hits { if let Some(f) = ctx.index.distinct_field(ctx.txn)? { - if let Some(distinct_fid) = ctx.index.fields_ids_map(ctx.txn)?.id(f) { + if let Some(distinct_fid) = fields_ids_map.id(f) { all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining; } } @@ -440,6 +454,7 @@ pub fn execute_search( Ok(PartialSearchResult { candidates: all_candidates, + document_scores: scores, documents_ids: docids, located_query_terms, }) @@ -491,4 +506,5 @@ pub struct PartialSearchResult { pub located_query_terms: Option>, pub candidates: RoaringBitmap, pub documents_ids: Vec, + pub document_scores: Vec>, } diff --git a/milli/src/search/new/query_graph.rs b/milli/src/search/new/query_graph.rs index 114eb8c4e..f1f02b69c 100644 --- a/milli/src/search/new/query_graph.rs +++ b/milli/src/search/new/query_graph.rs @@ -342,6 +342,25 @@ impl QueryGraph { } res } + + /// Number of words in the phrases in this query graph + pub(crate) fn words_in_phrases_count(&self, ctx: &SearchContext) -> usize { + let mut word_count = 0; + for (_, node) in self.nodes.iter() { + match &node.data { + QueryNodeData::Term(term) => { + let Some(phrase) = term.term_subset.original_phrase(ctx) + else { + continue + }; + let phrase = ctx.phrase_interner.get(phrase); + word_count += phrase.words.iter().copied().filter(|a| a.is_some()).count() + } + _ => continue, + } + } + word_count + } } fn add_node(nodes_data: &mut Vec, node_data: QueryNodeData) -> u16 { diff --git a/milli/src/search/new/ranking_rule_graph/build.rs b/milli/src/search/new/ranking_rule_graph/build.rs index 015cd9845..4bacc5e5d 100644 --- a/milli/src/search/new/ranking_rule_graph/build.rs +++ b/milli/src/search/new/ranking_rule_graph/build.rs @@ -49,10 +49,15 @@ impl RankingRuleGraph { if let Some((cost_of_ignoring, forbidden_nodes)) = cost_of_ignoring_node.get(dest_idx) { + let dest = graph_nodes.get(dest_idx); + let dest_size = match &dest.data { + QueryNodeData::Term(term) => term.term_ids.len(), + _ => panic!(), + }; let new_edge_id = edges_store.insert(Some(Edge { source_node: source_id, dest_node: dest_idx, - cost: *cost_of_ignoring, + cost: *cost_of_ignoring * dest_size as u32, condition: None, nodes_to_skip: forbidden_nodes.clone(), })); diff --git a/milli/src/search/new/ranking_rule_graph/exactness/mod.rs b/milli/src/search/new/ranking_rule_graph/exactness/mod.rs index 0842d6d04..0a84bf7cf 100644 --- a/milli/src/search/new/ranking_rule_graph/exactness/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/exactness/mod.rs @@ -1,6 +1,7 @@ use roaring::RoaringBitmap; use super::{ComputedCondition, RankingRuleGraphTrait}; +use crate::score_details::{Rank, ScoreDetails}; use crate::search::new::interner::{DedupInterner, Interned}; use crate::search::new::query_term::{ExactTerm, LocatedQueryTermSubset}; use crate::search::new::resolve_query_graph::compute_query_term_subset_docids; @@ -84,4 +85,8 @@ impl RankingRuleGraphTrait for ExactnessGraph { Ok(vec![(0, exact_condition), (dest_node.term_ids.len() as u32, skip_condition)]) } + + fn rank_to_score(rank: Rank) -> ScoreDetails { + ScoreDetails::Exactness(rank) + } } diff --git a/milli/src/search/new/ranking_rule_graph/fid/mod.rs b/milli/src/search/new/ranking_rule_graph/fid/mod.rs index e3ccf23fa..8f3e0cc82 100644 --- a/milli/src/search/new/ranking_rule_graph/fid/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/fid/mod.rs @@ -2,6 +2,7 @@ use fxhash::FxHashSet; use roaring::RoaringBitmap; use super::{ComputedCondition, RankingRuleGraphTrait}; +use crate::score_details::{Rank, ScoreDetails}; use crate::search::new::interner::{DedupInterner, Interned}; use crate::search::new::query_term::LocatedQueryTermSubset; use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id; @@ -68,13 +69,42 @@ impl RankingRuleGraphTrait for FidGraph { } let mut edges = vec![]; - for fid in all_fields { + for fid in all_fields.iter().copied() { edges.push(( fid as u32 * term.term_ids.len() as u32, conditions_interner.insert(FidCondition { term: term.clone(), fid }), )); } + // always lookup the max_fid if we don't already and add an artificial condition for max scoring + let max_fid: Option = { + if let Some(max_fid) = ctx + .index + .searchable_fields_ids(ctx.txn)? + .map(|field_ids| field_ids.into_iter().max()) + { + max_fid + } else { + ctx.index.fields_ids_map(ctx.txn)?.ids().max() + } + }; + + if let Some(max_fid) = max_fid { + if !all_fields.contains(&max_fid) { + edges.push(( + max_fid as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10. + conditions_interner.insert(FidCondition { + term: term.clone(), // TODO remove this ugly clone + fid: max_fid, + }), + )); + } + } + Ok(edges) } + + fn rank_to_score(rank: Rank) -> ScoreDetails { + ScoreDetails::Fid(rank) + } } diff --git a/milli/src/search/new/ranking_rule_graph/mod.rs b/milli/src/search/new/ranking_rule_graph/mod.rs index 8de455822..209ec91de 100644 --- a/milli/src/search/new/ranking_rule_graph/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/mod.rs @@ -41,6 +41,7 @@ use super::interner::{DedupInterner, FixedSizeInterner, Interned, MappedInterner use super::query_term::LocatedQueryTermSubset; use super::small_bitmap::SmallBitmap; use super::{QueryGraph, QueryNode, SearchContext}; +use crate::score_details::{Rank, ScoreDetails}; use crate::Result; pub struct ComputedCondition { @@ -110,6 +111,9 @@ pub trait RankingRuleGraphTrait: Sized + 'static { source_node: Option<&LocatedQueryTermSubset>, dest_node: &LocatedQueryTermSubset, ) -> Result)>>; + + /// Convert the rank of a path to its corresponding score for the ranking rule + fn rank_to_score(rank: Rank) -> ScoreDetails; } /// The graph used by graph-based ranking rules. diff --git a/milli/src/search/new/ranking_rule_graph/position/mod.rs b/milli/src/search/new/ranking_rule_graph/position/mod.rs index c2e3b9012..646ff954a 100644 --- a/milli/src/search/new/ranking_rule_graph/position/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/position/mod.rs @@ -2,6 +2,7 @@ use fxhash::{FxHashMap, FxHashSet}; use roaring::RoaringBitmap; use super::{ComputedCondition, RankingRuleGraphTrait}; +use crate::score_details::{Rank, ScoreDetails}; use crate::search::new::interner::{DedupInterner, Interned}; use crate::search::new::query_term::LocatedQueryTermSubset; use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_position; @@ -77,6 +78,8 @@ impl RankingRuleGraphTrait for PositionGraph { let mut positions_for_costs = FxHashMap::>::default(); for position in all_positions { + // FIXME: bucketed position??? + let distance = position.abs_diff(*term.positions.start()); let cost = { let mut cost = 0; for i in 0..term.term_ids.len() { @@ -84,15 +87,17 @@ impl RankingRuleGraphTrait for PositionGraph { // Because if two words are in the same bucketed position (e.g. 32) and consecutive, // then their position cost will be 32+32=64, but an ngram of these two words at the // same position will have a cost of 32+32+1=65 - cost += cost_from_position(position as u32 + i as u32); + cost += cost_from_distance(distance as u32 + i as u32); } cost }; positions_for_costs.entry(cost).or_default().push(position); } - let mut edges = vec![]; + let max_cost = term.term_ids.len() as u32 * 10; + let max_cost_exists = positions_for_costs.contains_key(&max_cost); + let mut edges = vec![]; for (cost, positions) in positions_for_costs { edges.push(( cost, @@ -100,12 +105,25 @@ impl RankingRuleGraphTrait for PositionGraph { )); } + if !max_cost_exists { + // artificial empty condition for computing max cost + edges.push(( + max_cost, + conditions_interner + .insert(PositionCondition { term: term.clone(), positions: Vec::default() }), + )); + } + Ok(edges) } + + fn rank_to_score(rank: Rank) -> ScoreDetails { + ScoreDetails::Position(rank) + } } -fn cost_from_position(sum_positions: u32) -> u32 { - match sum_positions { +fn cost_from_distance(distance: u32) -> u32 { + match distance { 0 => 0, 1 => 1, 2..=4 => 2, diff --git a/milli/src/search/new/ranking_rule_graph/proximity/build.rs b/milli/src/search/new/ranking_rule_graph/proximity/build.rs index 660d59b3e..4c8fcff51 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/build.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/build.rs @@ -12,11 +12,11 @@ pub fn build_edges( left_term: Option<&LocatedQueryTermSubset>, right_term: &LocatedQueryTermSubset, ) -> Result)>> { - let right_ngram_length = right_term.term_ids.len(); + let right_ngram_max = right_term.term_ids.len().saturating_sub(1); let Some(left_term) = left_term else { return Ok(vec![( - (right_ngram_length - 1) as u32, + right_ngram_max as u32, conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }), )]) }; @@ -29,25 +29,25 @@ pub fn build_edges( // The remaining query graph represents `the sun .. are beautiful` // but `sun` and `are` have no proximity condition between them return Ok(vec![( - (right_ngram_length - 1) as u32, + right_ngram_max as u32, conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }), )]); } let mut conditions = vec![]; - for cost in right_ngram_length..(7 + right_ngram_length) { + for cost in right_ngram_max..(7 + right_ngram_max) { conditions.push(( cost as u32, conditions_interner.insert(ProximityCondition::Uninit { left_term: left_term.clone(), right_term: right_term.clone(), - cost: cost as u8, + cost: (cost + 1) as u8, }), )) } conditions.push(( - (7 + right_ngram_length) as u32, + (7 + right_ngram_max) as u32, conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }), )); diff --git a/milli/src/search/new/ranking_rule_graph/proximity/mod.rs b/milli/src/search/new/ranking_rule_graph/proximity/mod.rs index ead717a6f..532ace626 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/mod.rs @@ -4,6 +4,7 @@ pub mod compute_docids; use roaring::RoaringBitmap; use super::{ComputedCondition, RankingRuleGraphTrait}; +use crate::score_details::{Rank, ScoreDetails}; use crate::search::new::interner::{DedupInterner, Interned}; use crate::search::new::query_term::LocatedQueryTermSubset; use crate::search::new::SearchContext; @@ -36,4 +37,8 @@ impl RankingRuleGraphTrait for ProximityGraph { ) -> Result)>> { build::build_edges(ctx, conditions_interner, source_term, dest_term) } + + fn rank_to_score(rank: Rank) -> ScoreDetails { + ScoreDetails::Proximity(rank) + } } diff --git a/milli/src/search/new/ranking_rule_graph/typo/mod.rs b/milli/src/search/new/ranking_rule_graph/typo/mod.rs index a44be6015..035106ac3 100644 --- a/milli/src/search/new/ranking_rule_graph/typo/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/typo/mod.rs @@ -1,6 +1,7 @@ use roaring::RoaringBitmap; use super::{ComputedCondition, RankingRuleGraphTrait}; +use crate::score_details::{self, Rank, ScoreDetails}; use crate::search::new::interner::{DedupInterner, Interned}; use crate::search::new::query_term::LocatedQueryTermSubset; use crate::search::new::resolve_query_graph::compute_query_term_subset_docids; @@ -75,4 +76,8 @@ impl RankingRuleGraphTrait for TypoGraph { } Ok(edges) } + + fn rank_to_score(rank: Rank) -> ScoreDetails { + ScoreDetails::Typo(score_details::Typo::from_rank(rank)) + } } diff --git a/milli/src/search/new/ranking_rule_graph/words/mod.rs b/milli/src/search/new/ranking_rule_graph/words/mod.rs index 0a0cc112b..45a56829f 100644 --- a/milli/src/search/new/ranking_rule_graph/words/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/words/mod.rs @@ -1,6 +1,7 @@ use roaring::RoaringBitmap; use super::{ComputedCondition, RankingRuleGraphTrait}; +use crate::score_details::{self, Rank, ScoreDetails}; use crate::search::new::interner::{DedupInterner, Interned}; use crate::search::new::query_term::LocatedQueryTermSubset; use crate::search::new::resolve_query_graph::compute_query_term_subset_docids; @@ -41,9 +42,10 @@ impl RankingRuleGraphTrait for WordsGraph { _from: Option<&LocatedQueryTermSubset>, to_term: &LocatedQueryTermSubset, ) -> Result)>> { - Ok(vec![( - to_term.term_ids.len() as u32, - conditions_interner.insert(WordsCondition { term: to_term.clone() }), - )]) + Ok(vec![(0, conditions_interner.insert(WordsCondition { term: to_term.clone() }))]) + } + + fn rank_to_score(rank: Rank) -> ScoreDetails { + ScoreDetails::Words(score_details::Words::from_rank(rank)) } } diff --git a/milli/src/search/new/ranking_rules.rs b/milli/src/search/new/ranking_rules.rs index a771d3768..f54a1b8db 100644 --- a/milli/src/search/new/ranking_rules.rs +++ b/milli/src/search/new/ranking_rules.rs @@ -2,6 +2,7 @@ use roaring::RoaringBitmap; use super::logger::SearchLogger; use super::{QueryGraph, SearchContext}; +use crate::score_details::ScoreDetails; use crate::Result; /// An internal trait implemented by only [`PlaceholderQuery`] and [`QueryGraph`] @@ -66,4 +67,6 @@ pub struct RankingRuleOutput { pub query: Q, /// The allowed candidates for the child ranking rule pub candidates: RoaringBitmap, + /// The score for the candidates of the current bucket + pub score: ScoreDetails, } diff --git a/milli/src/search/new/sort.rs b/milli/src/search/new/sort.rs index 3f57b2aa5..f17aed6ed 100644 --- a/milli/src/search/new/sort.rs +++ b/milli/src/search/new/sort.rs @@ -1,9 +1,11 @@ +use heed::BytesDecode; use roaring::RoaringBitmap; use super::logger::SearchLogger; use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext}; -use crate::heed_codec::facet::FacetGroupKeyCodec; -use crate::heed_codec::ByteSliceRefCodec; +use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec}; +use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec}; +use crate::score_details::{self, ScoreDetails}; use crate::search::facet::{ascending_facet_sort, descending_facet_sort}; use crate::{FieldId, Index, Result}; @@ -49,6 +51,7 @@ pub struct Sort<'ctx, Query> { is_ascending: bool, original_query: Option, iter: Option>, + must_redact: bool, } impl<'ctx, Query> Sort<'ctx, Query> { pub fn new( @@ -59,15 +62,30 @@ impl<'ctx, Query> Sort<'ctx, Query> { ) -> Result { let fields_ids_map = index.fields_ids_map(rtxn)?; let field_id = fields_ids_map.id(&field_name); + let must_redact = Self::must_redact(index, rtxn, &field_name)?; - Ok(Self { field_name, field_id, is_ascending, original_query: None, iter: None }) + Ok(Self { + field_name, + field_id, + is_ascending, + original_query: None, + iter: None, + must_redact, + }) + } + + fn must_redact(index: &Index, rtxn: &'ctx heed::RoTxn, field_name: &str) -> Result { + let Some(displayed_fields) = index.displayed_fields(rtxn)? + else { return Ok(false); }; + + Ok(!displayed_fields.iter().any(|&field| field == field_name)) } } impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx, Query> { fn id(&self) -> String { let Self { field_name, is_ascending, .. } = self; - format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc " }) + format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc" }) } fn start_iteration( &mut self, @@ -118,12 +136,45 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx, (itertools::Either::Right(number_iter), itertools::Either::Right(string_iter)) }; + let number_iter = number_iter.map(|r| -> Result<_> { + let (docids, bytes) = r?; + Ok(( + docids, + serde_json::Value::Number( + serde_json::Number::from_f64( + OrderedF64Codec::bytes_decode(bytes).expect("some number"), + ) + .expect("too big float"), + ), + )) + }); + let string_iter = string_iter.map(|r| -> Result<_> { + let (docids, bytes) = r?; + Ok(( + docids, + serde_json::Value::String( + StrRefCodec::bytes_decode(bytes).expect("some string").to_owned(), + ), + )) + }); let query_graph = parent_query.clone(); + let ascending = self.is_ascending; + let field_name = self.field_name.clone(); + let must_redact = self.must_redact; RankingRuleOutputIterWrapper::new(Box::new(number_iter.chain(string_iter).map( move |r| { - let (docids, _) = r?; - Ok(RankingRuleOutput { query: query_graph.clone(), candidates: docids }) + let (docids, value) = r?; + Ok(RankingRuleOutput { + query: query_graph.clone(), + candidates: docids, + score: ScoreDetails::Sort(score_details::Sort { + field_name: field_name.clone(), + ascending, + redacted: must_redact, + value, + }), + }) }, ))) } @@ -146,7 +197,16 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx, Ok(Some(bucket)) } else { let query = self.original_query.as_ref().unwrap().clone(); - Ok(Some(RankingRuleOutput { query, candidates: universe.clone() })) + Ok(Some(RankingRuleOutput { + query, + candidates: universe.clone(), + score: ScoreDetails::Sort(score_details::Sort { + field_name: self.field_name.clone(), + ascending: self.is_ascending, + redacted: self.must_redact, + value: serde_json::Value::Null, + }), + })) } } diff --git a/milli/src/search/new/tests/attribute_fid.rs b/milli/src/search/new/tests/attribute_fid.rs index 177dc393a..09e52a394 100644 --- a/milli/src/search/new/tests/attribute_fid.rs +++ b/milli/src/search/new/tests/attribute_fid.rs @@ -122,8 +122,11 @@ fn test_attribute_fid_simple() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("the quick brown fox jumps over the lazy dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 6, 5, 4, 3, 9, 7, 8, 11, 10, 12, 13, 14, 0]"); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); + let document_ids_scores: Vec<_> = + documents_ids.iter().zip(document_scores.into_iter()).collect(); + insta::assert_snapshot!(format!("{document_ids_scores:#?}")); } #[test] @@ -135,6 +138,11 @@ fn test_attribute_fid_ngrams() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("the quick brown fox jumps over the lazy dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 6, 5, 4, 3, 9, 7, 8, 11, 10, 12, 13, 14, 0]"); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); + + let document_ids_scores: Vec<_> = + documents_ids.iter().zip(document_scores.into_iter()).collect(); + insta::assert_snapshot!(format!("{document_ids_scores:#?}")); } diff --git a/milli/src/search/new/tests/attribute_position.rs b/milli/src/search/new/tests/attribute_position.rs index 37f303b10..1513528ec 100644 --- a/milli/src/search/new/tests/attribute_position.rs +++ b/milli/src/search/new/tests/attribute_position.rs @@ -40,68 +40,68 @@ fn create_index() -> TempIndex { }, { "id": 5, - "text": "a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + "text": "a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a the quick brown fox", }, { "id": 6, - "text": "quick a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + "text": "quick a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a brown", }, { "id": 7, - "text": "a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + "text": "a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a quickbrown", }, { "id": 8, - "text": "a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + "text": "a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a quick brown", }, { "id": 9, - "text": "a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + "text": "a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a quickbrown", }, { @@ -137,8 +137,13 @@ fn test_attribute_position_simple() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("quick brown"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 2, 3, 4, 1, 0, 6, 8, 7, 9, 5]"); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); + + let document_ids_scores: Vec<_> = + documents_ids.iter().zip(document_scores.into_iter()).collect(); + insta::assert_snapshot!(format!("{document_ids_scores:#?}")); } #[test] fn test_attribute_position_repeated() { @@ -149,8 +154,13 @@ fn test_attribute_position_repeated() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("a a a a a"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[5, 7, 8, 9, 6]"); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); + + let document_ids_scores: Vec<_> = + documents_ids.iter().zip(document_scores.into_iter()).collect(); + insta::assert_snapshot!(format!("{document_ids_scores:#?}")); } #[test] @@ -162,8 +172,13 @@ fn test_attribute_position_different_fields() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("quick brown"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 2, 3, 4, 1, 0, 6, 8, 7, 9, 5]"); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); + + let document_ids_scores: Vec<_> = + documents_ids.iter().zip(document_scores.into_iter()).collect(); + insta::assert_snapshot!(format!("{document_ids_scores:#?}")); } #[test] @@ -175,6 +190,11 @@ fn test_attribute_position_ngrams() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("quick brown"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 2, 3, 4, 1, 0, 6, 8, 7, 9, 5]"); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); + + let document_ids_scores: Vec<_> = + documents_ids.iter().zip(document_scores.into_iter()).collect(); + insta::assert_snapshot!(format!("{document_ids_scores:#?}")); } diff --git a/milli/src/search/new/tests/exactness.rs b/milli/src/search/new/tests/exactness.rs index c5c963ede..a486342c1 100644 --- a/milli/src/search/new/tests/exactness.rs +++ b/milli/src/search/new/tests/exactness.rs @@ -474,8 +474,14 @@ fn test_exactness_simple_ordered() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::Last); s.query("the quick brown fox jumps over the lazy dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 8, 7, 6, 5, 4, 3, 2, 1]"); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); + + let document_ids_scores: Vec<_> = + documents_ids.iter().zip(document_scores.into_iter()).collect(); + insta::assert_snapshot!(format!("{document_ids_scores:#?}")); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -501,8 +507,14 @@ fn test_exactness_simple_reversed() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::Last); s.query("the quick brown fox jumps over the lazy dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 8, 3, 4, 5, 6, 7]"); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); + + let document_ids_scores: Vec<_> = + documents_ids.iter().zip(document_scores.into_iter()).collect(); + insta::assert_snapshot!(format!("{document_ids_scores:#?}")); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -519,8 +531,14 @@ fn test_exactness_simple_reversed() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::Last); s.query("the quick brown fox jumps over the lazy dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 8, 3, 4, 5, 6, 7]"); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); + + let document_ids_scores: Vec<_> = + documents_ids.iter().zip(document_scores.into_iter()).collect(); + insta::assert_snapshot!(format!("{document_ids_scores:#?}")); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -544,8 +562,14 @@ fn test_exactness_simple_random() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::Last); s.query("the quick brown fox jumps over the lazy dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[8, 7, 4, 6, 3, 5]"); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); + + let document_ids_scores: Vec<_> = + documents_ids.iter().zip(document_scores.into_iter()).collect(); + insta::assert_snapshot!(format!("{document_ids_scores:#?}")); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -568,8 +592,14 @@ fn test_exactness_attribute_starts_with_simple() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::Last); s.query("this balcony"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 1, 0]"); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); + + let document_ids_scores: Vec<_> = + documents_ids.iter().zip(document_scores.into_iter()).collect(); + insta::assert_snapshot!(format!("{document_ids_scores:#?}")); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -589,8 +619,14 @@ fn test_exactness_attribute_starts_with_phrase() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::Last); s.query("\"overlooking the sea\" is a beautiful balcony"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6, 5, 4, 1]"); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); + + let document_ids_scores: Vec<_> = + documents_ids.iter().zip(document_scores.into_iter()).collect(); + insta::assert_snapshot!(format!("{document_ids_scores:#?}")); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -604,8 +640,14 @@ fn test_exactness_attribute_starts_with_phrase() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::Last); s.query("overlooking the sea is a beautiful balcony"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6, 5, 4, 3, 1, 7]"); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); + + let document_ids_scores: Vec<_> = + documents_ids.iter().zip(document_scores.into_iter()).collect(); + insta::assert_snapshot!(format!("{document_ids_scores:#?}")); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -628,8 +670,14 @@ fn test_exactness_all_candidates_with_typo() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::Last); s.query("overlocking the sea is a beautiful balcony"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 6, 1, 7]"); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); + + let document_ids_scores: Vec<_> = + documents_ids.iter().zip(document_scores.into_iter()).collect(); + insta::assert_snapshot!(format!("{document_ids_scores:#?}")); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); // "overlooking" is returned here because the term matching strategy allows it // but it has the worst exactness score (0 exact words) @@ -659,8 +707,14 @@ fn test_exactness_after_words() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::Last); s.query("the quick brown fox jumps over the lazy dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 9, 18, 8, 17, 16, 6, 7, 15, 5, 14, 4, 13, 3, 12, 2, 1, 11]"); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); + + let document_ids_scores: Vec<_> = + documents_ids.iter().zip(document_scores.into_iter()).collect(); + insta::assert_snapshot!(format!("{document_ids_scores:#?}")); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" @@ -702,7 +756,13 @@ fn test_words_after_exactness() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::Last); s.query("the quick brown fox jumps over the lazy dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); + + let document_ids_scores: Vec<_> = + documents_ids.iter().zip(document_scores.into_iter()).collect(); + insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 9, 18, 8, 17, 16, 6, 7, 15, 5, 14, 4, 13, 3, 12, 2, 1, 11]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); @@ -745,7 +805,14 @@ fn test_proximity_after_exactness() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::Last); s.query("the quick brown fox jumps over the lazy dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); + + let document_ids_scores: Vec<_> = + documents_ids.iter().zip(document_scores.into_iter()).collect(); + insta::assert_snapshot!(format!("{document_ids_scores:#?}")); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 1, 0, 4, 5, 8, 7, 3, 6]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); @@ -776,7 +843,13 @@ fn test_proximity_after_exactness() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::Last); s.query("the quick brown fox jumps over the lazy dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); + + let document_ids_scores: Vec<_> = + documents_ids.iter().zip(document_scores.into_iter()).collect(); + insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); @@ -804,7 +877,13 @@ fn test_exactness_followed_by_typo_prefer_no_typo_prefix() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::Last); s.query("quick brown fox extra"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); + + let document_ids_scores: Vec<_> = + documents_ids.iter().zip(document_scores.into_iter()).collect(); + insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 1, 0, 4, 3]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); @@ -834,7 +913,13 @@ fn test_typo_followed_by_exactness() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::Last); s.query("extraordinarily quick brown fox"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); + + let document_ids_scores: Vec<_> = + documents_ids.iter().zip(document_scores.into_iter()).collect(); + insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 0, 4, 3]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); diff --git a/milli/src/search/new/tests/geo_sort.rs b/milli/src/search/new/tests/geo_sort.rs index 1f0003082..0d65b589a 100644 --- a/milli/src/search/new/tests/geo_sort.rs +++ b/milli/src/search/new/tests/geo_sort.rs @@ -7,6 +7,7 @@ use heed::RoTxn; use maplit::hashset; use crate::index::tests::TempIndex; +use crate::score_details::ScoreDetails; use crate::search::new::tests::collect_field_values; use crate::{AscDesc, Criterion, GeoSortStrategy, Member, Search, SearchResult}; @@ -28,30 +29,37 @@ fn execute_iterative_and_rtree_returns_the_same<'a>( rtxn: &RoTxn<'a>, index: &TempIndex, search: &mut Search<'a>, -) -> Vec { +) -> (Vec, Vec>) { search.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(2)); - let SearchResult { documents_ids, .. } = search.execute().unwrap(); + let SearchResult { documents_ids, document_scores: iterative_scores_bucketed, .. } = + search.execute().unwrap(); let iterative_ids_bucketed = collect_field_values(index, rtxn, "id", &documents_ids); search.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(1000)); - let SearchResult { documents_ids, .. } = search.execute().unwrap(); + let SearchResult { documents_ids, document_scores: iterative_scores, .. } = + search.execute().unwrap(); let iterative_ids = collect_field_values(index, rtxn, "id", &documents_ids); assert_eq!(iterative_ids_bucketed, iterative_ids, "iterative bucket"); + assert_eq!(iterative_scores_bucketed, iterative_scores, "iterative bucket score"); search.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(2)); - let SearchResult { documents_ids, .. } = search.execute().unwrap(); + let SearchResult { documents_ids, document_scores: rtree_scores_bucketed, .. } = + search.execute().unwrap(); let rtree_ids_bucketed = collect_field_values(index, rtxn, "id", &documents_ids); search.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(1000)); - let SearchResult { documents_ids, .. } = search.execute().unwrap(); + let SearchResult { documents_ids, document_scores: rtree_scores, .. } = + search.execute().unwrap(); let rtree_ids = collect_field_values(index, rtxn, "id", &documents_ids); assert_eq!(rtree_ids_bucketed, rtree_ids, "rtree bucket"); + assert_eq!(rtree_scores_bucketed, rtree_scores, "rtree bucket score"); assert_eq!(iterative_ids, rtree_ids, "iterative vs rtree"); + assert_eq!(iterative_scores, rtree_scores, "iterative vs rtree scores"); - iterative_ids.into_iter().map(|id| id.parse().unwrap()).collect() + (iterative_ids.into_iter().map(|id| id.parse().unwrap()).collect(), iterative_scores) } #[test] @@ -73,14 +81,17 @@ fn test_geo_sort() { let rtxn = index.read_txn().unwrap(); let mut s = Search::new(&rtxn, &index); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]); - let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); insta::assert_snapshot!(format!("{ids:?}"), @"[0, 1, 2, 3, 4, 5, 6, 8, 7, 10, 9]"); + insta::assert_snapshot!(format!("{scores:#?}")); s.sort_criteria(vec![AscDesc::Desc(Member::Geo([0., 0.]))]); - let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); insta::assert_snapshot!(format!("{ids:?}"), @"[5, 4, 3, 2, 1, 0, 6, 8, 7, 10, 9]"); + insta::assert_snapshot!(format!("{scores:#?}")); } #[test] @@ -101,52 +112,63 @@ fn test_geo_sort_around_the_edge_of_the_flat_earth() { let rtxn = index.read_txn().unwrap(); let mut s = Search::new(&rtxn, &index); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); // --- asc s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]); - let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); insta::assert_snapshot!(format!("{ids:?}"), @"[0, 1, 2, 3, 4]"); + insta::assert_snapshot!(format!("{scores:#?}")); // ensuring the lat doesn't wrap around s.sort_criteria(vec![AscDesc::Asc(Member::Geo([85., 0.]))]); - let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); insta::assert_snapshot!(format!("{ids:?}"), @"[1, 0, 3, 4, 2]"); + insta::assert_snapshot!(format!("{scores:#?}")); s.sort_criteria(vec![AscDesc::Asc(Member::Geo([-85., 0.]))]); - let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); insta::assert_snapshot!(format!("{ids:?}"), @"[2, 0, 3, 4, 1]"); + insta::assert_snapshot!(format!("{scores:#?}")); // ensuring the lng does wrap around s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 175.]))]); - let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); insta::assert_snapshot!(format!("{ids:?}"), @"[3, 4, 2, 1, 0]"); + insta::assert_snapshot!(format!("{scores:#?}")); s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., -175.]))]); - let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); insta::assert_snapshot!(format!("{ids:?}"), @"[4, 3, 2, 1, 0]"); + insta::assert_snapshot!(format!("{scores:#?}")); // --- desc s.sort_criteria(vec![AscDesc::Desc(Member::Geo([0., 0.]))]); - let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); insta::assert_snapshot!(format!("{ids:?}"), @"[4, 3, 2, 1, 0]"); + insta::assert_snapshot!(format!("{scores:#?}")); // ensuring the lat doesn't wrap around s.sort_criteria(vec![AscDesc::Desc(Member::Geo([85., 0.]))]); - let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); insta::assert_snapshot!(format!("{ids:?}"), @"[2, 4, 3, 0, 1]"); + insta::assert_snapshot!(format!("{scores:#?}")); s.sort_criteria(vec![AscDesc::Desc(Member::Geo([-85., 0.]))]); - let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); insta::assert_snapshot!(format!("{ids:?}"), @"[1, 4, 3, 0, 2]"); + insta::assert_snapshot!(format!("{scores:#?}")); // ensuring the lng does wrap around s.sort_criteria(vec![AscDesc::Desc(Member::Geo([0., 175.]))]); - let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); insta::assert_snapshot!(format!("{ids:?}"), @"[0, 1, 2, 4, 3]"); + insta::assert_snapshot!(format!("{scores:#?}")); s.sort_criteria(vec![AscDesc::Desc(Member::Geo([0., -175.]))]); - let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); insta::assert_snapshot!(format!("{ids:?}"), @"[0, 1, 2, 3, 4]"); + insta::assert_snapshot!(format!("{scores:#?}")); } #[test] @@ -166,19 +188,98 @@ fn geo_sort_mixed_with_words() { let rtxn = index.read_txn().unwrap(); let mut s = Search::new(&rtxn, &index); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]); s.query("jean"); - let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); insta::assert_snapshot!(format!("{ids:?}"), @"[0, 2, 3]"); + insta::assert_snapshot!(format!("{scores:#?}")); s.query("bob"); - let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); insta::assert_snapshot!(format!("{ids:?}"), @"[2, 4]"); + insta::assert_snapshot!(format!("{scores:#?}"), @r###" + [ + [ + Words( + Words { + matching_words: 1, + max_matching_words: 1, + }, + ), + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + -89.0, + 0.0, + ], + ), + }, + ), + ], + [ + Words( + Words { + matching_words: 1, + max_matching_words: 1, + }, + ), + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 0.0, + -179.0, + ], + ), + }, + ), + ], + ] + "###); s.query("intel"); - let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); insta::assert_snapshot!(format!("{ids:?}"), @"[1]"); + insta::assert_snapshot!(format!("{scores:#?}"), @r###" + [ + [ + Words( + Words { + matching_words: 1, + max_matching_words: 1, + }, + ), + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 88.0, + 0.0, + ], + ), + }, + ), + ], + ] + "###); } #[test] @@ -198,9 +299,11 @@ fn geo_sort_without_any_geo_faceted_documents() { let rtxn = index.read_txn().unwrap(); let mut s = Search::new(&rtxn, &index); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]); s.query("jean"); - let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); insta::assert_snapshot!(format!("{ids:?}"), @"[0, 2, 3]"); + insta::assert_snapshot!(format!("{scores:#?}")); } diff --git a/milli/src/search/new/tests/ngram_split_words.rs b/milli/src/search/new/tests/ngram_split_words.rs index fb99b8ba2..8427dd65b 100644 --- a/milli/src/search/new/tests/ngram_split_words.rs +++ b/milli/src/search/new/tests/ngram_split_words.rs @@ -80,10 +80,13 @@ fn test_2gram_simple() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.query("sun flower"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); // will also match documents with "sunflower" + prefix tolerance insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3, 5]"); + // scores are empty because the only rule is Words with All matching strategy + insta::assert_snapshot!(format!("{document_scores:?}"), @"[[], [], [], [], []]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ diff --git a/milli/src/search/new/tests/proximity.rs b/milli/src/search/new/tests/proximity.rs index 6e4181a95..b54007c6e 100644 --- a/milli/src/search/new/tests/proximity.rs +++ b/milli/src/search/new/tests/proximity.rs @@ -124,6 +124,8 @@ fn create_edge_cases_index() -> TempIndex { }, // The next 5 documents lay out a trap with the split word, phrase search, or synonym `sun flower`. // If the search query is "sunflower", the split word "Sun Flower" will match some documents. + // The next 5 documents lay out a trap with the split word, phrase search, or synonym `sun flower`. + // If the search query is "sunflower", the split word "Sun Flower" will match some documents. // If the query is `sunflower wilting`, then we should make sure that // the proximity condition `flower wilting: sprx N` also comes with the condition // `sun wilting: sprx N+1`, but this is not the exact condition we use for now. @@ -140,6 +142,7 @@ fn create_edge_cases_index() -> TempIndex { { "id": 3, // This document matches the query `sunflower wilting`, but the sprximity condition + // This document matches the query `sunflower wilting`, but the sprximity condition // between `sunflower` and `wilting` cannot be through the split-word `Sun Flower` // which would reduce to only `flower` and `wilting` being in sprximity. "text": "A flower wilting under the sun, unlike a sunflower" @@ -270,13 +273,13 @@ fn test_proximity_simple() { s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("the quick brown fox jumps over the lazy dog"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 9, 10, 7, 6, 5, 2, 3, 0, 1]"); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 4, 7, 6, 5, 2, 3, 0, 1]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ - "\"the quickbrown fox jumps over the lazy dog\"", "\"the quack brown fox jumps over the lazy dog\"", "\"the quick brown fox jumps over the lazy dog\"", + "\"the quickbrown fox jumps over the lazy dog\"", "\"the really quick brown fox jumps over the lazy dog\"", "\"the really quick brown fox jumps over the very lazy dog\"", "\"brown quick fox jumps over the lazy dog\"", @@ -295,9 +298,12 @@ fn test_proximity_split_word() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.query("sunflower wilting"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 5, 1, 3]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); // "2" and "4" should be swapped ideally insta::assert_debug_snapshot!(texts, @r###" @@ -312,9 +318,11 @@ fn test_proximity_split_word() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.query("\"sun flower\" wilting"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 1]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); // "2" and "4" should be swapped ideally insta::assert_debug_snapshot!(texts, @r###" @@ -337,9 +345,11 @@ fn test_proximity_split_word() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.query("xyz wilting"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 1]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); // "2" and "4" should be swapped ideally insta::assert_debug_snapshot!(texts, @r###" @@ -358,9 +368,11 @@ fn test_proximity_prefix_db() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.query("best s"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 6, 7, 11, 15]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); // This test illustrates the loss of precision from using the prefix DB @@ -381,9 +393,11 @@ fn test_proximity_prefix_db() { // Difference when using the `su` prefix, which is not in the prefix DB let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.query("best su"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 11, 7, 6, 15]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" @@ -406,9 +420,11 @@ fn test_proximity_prefix_db() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.query("best win"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[15, 16, 17, 18, 19, 20, 21, 22]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" @@ -428,9 +444,11 @@ fn test_proximity_prefix_db() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.query("best wint"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 17, 20, 16, 15]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" @@ -450,9 +468,11 @@ fn test_proximity_prefix_db() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.query("best wi"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 17, 15, 16, 20]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" diff --git a/milli/src/search/new/tests/proximity_typo.rs b/milli/src/search/new/tests/proximity_typo.rs index b459b178b..9fad21690 100644 --- a/milli/src/search/new/tests/proximity_typo.rs +++ b/milli/src/search/new/tests/proximity_typo.rs @@ -60,8 +60,41 @@ fn test_trap_basic() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("summer holiday"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1]"); + insta::assert_snapshot!(format!("{document_scores:#?}"), @r###" + [ + [ + Proximity( + Rank { + rank: 8, + max_rank: 8, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 2, + }, + ), + ], + [ + Proximity( + Rank { + rank: 8, + max_rank: 8, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 2, + }, + ), + ], + ] + "###); let texts = collect_field_values(&index, &txn, "text", &documents_ids); // This is incorrect, 1 should come before 0 insta::assert_debug_snapshot!(texts, @r###" diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_ngrams.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_ngrams.snap new file mode 100644 index 000000000..930a21626 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_ngrams.snap @@ -0,0 +1,244 @@ +--- +source: milli/src/search/new/tests/attribute_fid.rs +expression: "format!(\"{document_ids_scores:#?}\")" +--- +[ + ( + 2, + [ + Fid( + Rank { + rank: 19, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 91, + max_rank: 91, + }, + ), + ], + ), + ( + 6, + [ + Fid( + Rank { + rank: 15, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 81, + max_rank: 91, + }, + ), + ], + ), + ( + 5, + [ + Fid( + Rank { + rank: 14, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 79, + max_rank: 91, + }, + ), + ], + ), + ( + 4, + [ + Fid( + Rank { + rank: 13, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 77, + max_rank: 91, + }, + ), + ], + ), + ( + 3, + [ + Fid( + Rank { + rank: 12, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 83, + max_rank: 91, + }, + ), + ], + ), + ( + 9, + [ + Fid( + Rank { + rank: 11, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 75, + max_rank: 91, + }, + ), + ], + ), + ( + 8, + [ + Fid( + Rank { + rank: 10, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 79, + max_rank: 91, + }, + ), + ], + ), + ( + 7, + [ + Fid( + Rank { + rank: 10, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 73, + max_rank: 91, + }, + ), + ], + ), + ( + 11, + [ + Fid( + Rank { + rank: 7, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 77, + max_rank: 91, + }, + ), + ], + ), + ( + 10, + [ + Fid( + Rank { + rank: 6, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 81, + max_rank: 91, + }, + ), + ], + ), + ( + 13, + [ + Fid( + Rank { + rank: 6, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 81, + max_rank: 91, + }, + ), + ], + ), + ( + 12, + [ + Fid( + Rank { + rank: 6, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 78, + max_rank: 91, + }, + ), + ], + ), + ( + 14, + [ + Fid( + Rank { + rank: 5, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 75, + max_rank: 91, + }, + ), + ], + ), + ( + 0, + [ + Fid( + Rank { + rank: 1, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 91, + max_rank: 91, + }, + ), + ], + ), +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_simple.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_simple.snap new file mode 100644 index 000000000..930a21626 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_simple.snap @@ -0,0 +1,244 @@ +--- +source: milli/src/search/new/tests/attribute_fid.rs +expression: "format!(\"{document_ids_scores:#?}\")" +--- +[ + ( + 2, + [ + Fid( + Rank { + rank: 19, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 91, + max_rank: 91, + }, + ), + ], + ), + ( + 6, + [ + Fid( + Rank { + rank: 15, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 81, + max_rank: 91, + }, + ), + ], + ), + ( + 5, + [ + Fid( + Rank { + rank: 14, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 79, + max_rank: 91, + }, + ), + ], + ), + ( + 4, + [ + Fid( + Rank { + rank: 13, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 77, + max_rank: 91, + }, + ), + ], + ), + ( + 3, + [ + Fid( + Rank { + rank: 12, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 83, + max_rank: 91, + }, + ), + ], + ), + ( + 9, + [ + Fid( + Rank { + rank: 11, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 75, + max_rank: 91, + }, + ), + ], + ), + ( + 8, + [ + Fid( + Rank { + rank: 10, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 79, + max_rank: 91, + }, + ), + ], + ), + ( + 7, + [ + Fid( + Rank { + rank: 10, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 73, + max_rank: 91, + }, + ), + ], + ), + ( + 11, + [ + Fid( + Rank { + rank: 7, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 77, + max_rank: 91, + }, + ), + ], + ), + ( + 10, + [ + Fid( + Rank { + rank: 6, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 81, + max_rank: 91, + }, + ), + ], + ), + ( + 13, + [ + Fid( + Rank { + rank: 6, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 81, + max_rank: 91, + }, + ), + ], + ), + ( + 12, + [ + Fid( + Rank { + rank: 6, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 78, + max_rank: 91, + }, + ), + ], + ), + ( + 14, + [ + Fid( + Rank { + rank: 5, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 75, + max_rank: 91, + }, + ), + ], + ), + ( + 0, + [ + Fid( + Rank { + rank: 1, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 91, + max_rank: 91, + }, + ), + ], + ), +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_different_fields.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_different_fields.snap new file mode 100644 index 000000000..2626ee7d4 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_different_fields.snap @@ -0,0 +1,244 @@ +--- +source: milli/src/search/new/tests/attribute_position.rs +expression: "format!(\"{document_ids_scores:#?}\")" +--- +[ + ( + 10, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 21, + max_rank: 21, + }, + ), + ], + ), + ( + 12, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 21, + max_rank: 21, + }, + ), + ], + ), + ( + 11, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 20, + max_rank: 21, + }, + ), + ], + ), + ( + 13, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 20, + max_rank: 21, + }, + ), + ], + ), + ( + 3, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 19, + max_rank: 21, + }, + ), + ], + ), + ( + 4, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 19, + max_rank: 21, + }, + ), + ], + ), + ( + 2, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 18, + max_rank: 21, + }, + ), + ], + ), + ( + 0, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 15, + max_rank: 21, + }, + ), + ], + ), + ( + 1, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 15, + max_rank: 21, + }, + ), + ], + ), + ( + 6, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 13, + max_rank: 21, + }, + ), + ], + ), + ( + 8, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 5, + max_rank: 21, + }, + ), + ], + ), + ( + 7, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 4, + max_rank: 21, + }, + ), + ], + ), + ( + 9, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 4, + max_rank: 21, + }, + ), + ], + ), + ( + 5, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 1, + max_rank: 21, + }, + ), + ], + ), +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_ngrams.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_ngrams.snap new file mode 100644 index 000000000..2626ee7d4 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_ngrams.snap @@ -0,0 +1,244 @@ +--- +source: milli/src/search/new/tests/attribute_position.rs +expression: "format!(\"{document_ids_scores:#?}\")" +--- +[ + ( + 10, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 21, + max_rank: 21, + }, + ), + ], + ), + ( + 12, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 21, + max_rank: 21, + }, + ), + ], + ), + ( + 11, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 20, + max_rank: 21, + }, + ), + ], + ), + ( + 13, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 20, + max_rank: 21, + }, + ), + ], + ), + ( + 3, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 19, + max_rank: 21, + }, + ), + ], + ), + ( + 4, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 19, + max_rank: 21, + }, + ), + ], + ), + ( + 2, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 18, + max_rank: 21, + }, + ), + ], + ), + ( + 0, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 15, + max_rank: 21, + }, + ), + ], + ), + ( + 1, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 15, + max_rank: 21, + }, + ), + ], + ), + ( + 6, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 13, + max_rank: 21, + }, + ), + ], + ), + ( + 8, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 5, + max_rank: 21, + }, + ), + ], + ), + ( + 7, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 4, + max_rank: 21, + }, + ), + ], + ), + ( + 9, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 4, + max_rank: 21, + }, + ), + ], + ), + ( + 5, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 1, + max_rank: 21, + }, + ), + ], + ), +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_repeated.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_repeated.snap new file mode 100644 index 000000000..73dec5f8b --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_repeated.snap @@ -0,0 +1,91 @@ +--- +source: milli/src/search/new/tests/attribute_position.rs +expression: "format!(\"{document_ids_scores:#?}\")" +--- +[ + ( + 5, + [ + Fid( + Rank { + rank: 11, + max_rank: 11, + }, + ), + Position( + Rank { + rank: 51, + max_rank: 51, + }, + ), + ], + ), + ( + 7, + [ + Fid( + Rank { + rank: 11, + max_rank: 11, + }, + ), + Position( + Rank { + rank: 51, + max_rank: 51, + }, + ), + ], + ), + ( + 8, + [ + Fid( + Rank { + rank: 11, + max_rank: 11, + }, + ), + Position( + Rank { + rank: 51, + max_rank: 51, + }, + ), + ], + ), + ( + 9, + [ + Fid( + Rank { + rank: 11, + max_rank: 11, + }, + ), + Position( + Rank { + rank: 51, + max_rank: 51, + }, + ), + ], + ), + ( + 6, + [ + Fid( + Rank { + rank: 11, + max_rank: 11, + }, + ), + Position( + Rank { + rank: 50, + max_rank: 51, + }, + ), + ], + ), +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_simple-2.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_simple-2.snap new file mode 100644 index 000000000..2626ee7d4 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_simple-2.snap @@ -0,0 +1,244 @@ +--- +source: milli/src/search/new/tests/attribute_position.rs +expression: "format!(\"{document_ids_scores:#?}\")" +--- +[ + ( + 10, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 21, + max_rank: 21, + }, + ), + ], + ), + ( + 12, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 21, + max_rank: 21, + }, + ), + ], + ), + ( + 11, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 20, + max_rank: 21, + }, + ), + ], + ), + ( + 13, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 20, + max_rank: 21, + }, + ), + ], + ), + ( + 3, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 19, + max_rank: 21, + }, + ), + ], + ), + ( + 4, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 19, + max_rank: 21, + }, + ), + ], + ), + ( + 2, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 18, + max_rank: 21, + }, + ), + ], + ), + ( + 0, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 15, + max_rank: 21, + }, + ), + ], + ), + ( + 1, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 15, + max_rank: 21, + }, + ), + ], + ), + ( + 6, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 13, + max_rank: 21, + }, + ), + ], + ), + ( + 8, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 5, + max_rank: 21, + }, + ), + ], + ), + ( + 7, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 4, + max_rank: 21, + }, + ), + ], + ), + ( + 9, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 4, + max_rank: 21, + }, + ), + ], + ), + ( + 5, + [ + Fid( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Position( + Rank { + rank: 1, + max_rank: 21, + }, + ), + ], + ), +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_after_words.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_after_words.snap new file mode 100644 index 000000000..ef95520bb --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_after_words.snap @@ -0,0 +1,366 @@ +--- +source: milli/src/search/new/tests/exactness.rs +expression: "format!(\"{document_ids_scores:#?}\")" +--- +[ + ( + 19, + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 10, + max_rank: 10, + }, + ), + ], + ), + ( + 9, + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 7, + max_rank: 10, + }, + ), + ], + ), + ( + 18, + [ + Words( + Words { + matching_words: 8, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 9, + max_rank: 9, + }, + ), + ], + ), + ( + 8, + [ + Words( + Words { + matching_words: 8, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 6, + max_rank: 9, + }, + ), + ], + ), + ( + 17, + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 8, + max_rank: 8, + }, + ), + ], + ), + ( + 16, + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 8, + max_rank: 8, + }, + ), + ], + ), + ( + 6, + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 5, + max_rank: 8, + }, + ), + ], + ), + ( + 7, + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 5, + max_rank: 8, + }, + ), + ], + ), + ( + 15, + [ + Words( + Words { + matching_words: 5, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 6, + max_rank: 6, + }, + ), + ], + ), + ( + 5, + [ + Words( + Words { + matching_words: 5, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 3, + max_rank: 6, + }, + ), + ], + ), + ( + 14, + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 5, + max_rank: 5, + }, + ), + ], + ), + ( + 4, + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 3, + max_rank: 5, + }, + ), + ], + ), + ( + 13, + [ + Words( + Words { + matching_words: 3, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 4, + max_rank: 4, + }, + ), + ], + ), + ( + 3, + [ + Words( + Words { + matching_words: 3, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 2, + max_rank: 4, + }, + ), + ], + ), + ( + 12, + [ + Words( + Words { + matching_words: 2, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 3, + max_rank: 3, + }, + ), + ], + ), + ( + 2, + [ + Words( + Words { + matching_words: 2, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 2, + max_rank: 3, + }, + ), + ], + ), + ( + 1, + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 2, + max_rank: 2, + }, + ), + ], + ), + ( + 11, + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 2, + max_rank: 2, + }, + ), + ], + ), +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_all_candidates_with_typo.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_all_candidates_with_typo.snap new file mode 100644 index 000000000..d48bf9933 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_all_candidates_with_typo.snap @@ -0,0 +1,106 @@ +--- +source: milli/src/search/new/tests/exactness.rs +expression: "format!(\"{document_ids_scores:#?}\")" +--- +[ + ( + 4, + [ + Words( + Words { + matching_words: 7, + max_matching_words: 7, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 7, + max_rank: 8, + }, + ), + ], + ), + ( + 5, + [ + Words( + Words { + matching_words: 7, + max_matching_words: 7, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 7, + max_rank: 8, + }, + ), + ], + ), + ( + 6, + [ + Words( + Words { + matching_words: 7, + max_matching_words: 7, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 7, + max_rank: 8, + }, + ), + ], + ), + ( + 1, + [ + Words( + Words { + matching_words: 4, + max_matching_words: 7, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 4, + max_rank: 5, + }, + ), + ], + ), + ( + 7, + [ + Words( + Words { + matching_words: 1, + max_matching_words: 7, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 1, + max_rank: 2, + }, + ), + ], + ), +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_attribute_starts_with_phrase-3.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_attribute_starts_with_phrase-3.snap new file mode 100644 index 000000000..991ff4cee --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_attribute_starts_with_phrase-3.snap @@ -0,0 +1,126 @@ +--- +source: milli/src/search/new/tests/exactness.rs +expression: "format!(\"{document_ids_scores:#?}\")" +--- +[ + ( + 6, + [ + Words( + Words { + matching_words: 7, + max_matching_words: 7, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 8, + max_rank: 8, + }, + ), + ], + ), + ( + 5, + [ + Words( + Words { + matching_words: 7, + max_matching_words: 7, + }, + ), + ExactAttribute( + MatchesStart, + ), + Exactness( + Rank { + rank: 8, + max_rank: 8, + }, + ), + ], + ), + ( + 4, + [ + Words( + Words { + matching_words: 7, + max_matching_words: 7, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 8, + max_rank: 8, + }, + ), + ], + ), + ( + 3, + [ + Words( + Words { + matching_words: 7, + max_matching_words: 7, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 7, + max_rank: 8, + }, + ), + ], + ), + ( + 1, + [ + Words( + Words { + matching_words: 4, + max_matching_words: 7, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 5, + max_rank: 5, + }, + ), + ], + ), + ( + 7, + [ + Words( + Words { + matching_words: 1, + max_matching_words: 7, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 2, + max_rank: 2, + }, + ), + ], + ), +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_attribute_starts_with_phrase.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_attribute_starts_with_phrase.snap new file mode 100644 index 000000000..703f3cb7a --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_attribute_starts_with_phrase.snap @@ -0,0 +1,86 @@ +--- +source: milli/src/search/new/tests/exactness.rs +expression: "format!(\"{document_ids_scores:#?}\")" +--- +[ + ( + 6, + [ + Words( + Words { + matching_words: 7, + max_matching_words: 7, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 6, + max_rank: 6, + }, + ), + ], + ), + ( + 5, + [ + Words( + Words { + matching_words: 7, + max_matching_words: 7, + }, + ), + ExactAttribute( + MatchesStart, + ), + Exactness( + Rank { + rank: 6, + max_rank: 6, + }, + ), + ], + ), + ( + 4, + [ + Words( + Words { + matching_words: 7, + max_matching_words: 7, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 6, + max_rank: 6, + }, + ), + ], + ), + ( + 1, + [ + Words( + Words { + matching_words: 4, + max_matching_words: 7, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 3, + max_rank: 3, + }, + ), + ], + ), +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_attribute_starts_with_simple.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_attribute_starts_with_simple.snap new file mode 100644 index 000000000..eb6141468 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_attribute_starts_with_simple.snap @@ -0,0 +1,66 @@ +--- +source: milli/src/search/new/tests/exactness.rs +expression: "format!(\"{document_ids_scores:#?}\")" +--- +[ + ( + 2, + [ + Words( + Words { + matching_words: 2, + max_matching_words: 2, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 3, + max_rank: 3, + }, + ), + ], + ), + ( + 1, + [ + Words( + Words { + matching_words: 2, + max_matching_words: 2, + }, + ), + ExactAttribute( + MatchesStart, + ), + Exactness( + Rank { + rank: 3, + max_rank: 3, + }, + ), + ], + ), + ( + 0, + [ + Words( + Words { + matching_words: 2, + max_matching_words: 2, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 3, + max_rank: 3, + }, + ), + ], + ), +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_followed_by_typo_prefer_no_typo_prefix.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_followed_by_typo_prefer_no_typo_prefix.snap new file mode 100644 index 000000000..987e585a8 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_followed_by_typo_prefer_no_typo_prefix.snap @@ -0,0 +1,136 @@ +--- +source: milli/src/search/new/tests/exactness.rs +expression: "format!(\"{document_ids_scores:#?}\")" +--- +[ + ( + 2, + [ + Words( + Words { + matching_words: 4, + max_matching_words: 4, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 1, + }, + ), + ], + ), + ( + 1, + [ + Words( + Words { + matching_words: 4, + max_matching_words: 4, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 4, + max_rank: 5, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 2, + }, + ), + ], + ), + ( + 0, + [ + Words( + Words { + matching_words: 4, + max_matching_words: 4, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 4, + max_rank: 5, + }, + ), + Typo( + Typo { + typo_count: 1, + max_typo_count: 2, + }, + ), + ], + ), + ( + 4, + [ + Words( + Words { + matching_words: 4, + max_matching_words: 4, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 4, + max_rank: 5, + }, + ), + Typo( + Typo { + typo_count: 1, + max_typo_count: 2, + }, + ), + ], + ), + ( + 3, + [ + Words( + Words { + matching_words: 4, + max_matching_words: 4, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 3, + max_rank: 5, + }, + ), + Typo( + Typo { + typo_count: 2, + max_typo_count: 3, + }, + ), + ], + ), +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_ordered.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_ordered.snap new file mode 100644 index 000000000..c5993a09e --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_ordered.snap @@ -0,0 +1,186 @@ +--- +source: milli/src/search/new/tests/exactness.rs +expression: "format!(\"{document_ids_scores:#?}\")" +--- +[ + ( + 9, + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 10, + max_rank: 10, + }, + ), + ], + ), + ( + 8, + [ + Words( + Words { + matching_words: 8, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 9, + max_rank: 9, + }, + ), + ], + ), + ( + 7, + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 8, + max_rank: 8, + }, + ), + ], + ), + ( + 6, + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 8, + max_rank: 8, + }, + ), + ], + ), + ( + 5, + [ + Words( + Words { + matching_words: 5, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 6, + max_rank: 6, + }, + ), + ], + ), + ( + 4, + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 5, + max_rank: 5, + }, + ), + ], + ), + ( + 3, + [ + Words( + Words { + matching_words: 3, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 4, + max_rank: 4, + }, + ), + ], + ), + ( + 2, + [ + Words( + Words { + matching_words: 2, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 3, + max_rank: 3, + }, + ), + ], + ), + ( + 1, + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 2, + max_rank: 2, + }, + ), + ], + ), +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_random.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_random.snap new file mode 100644 index 000000000..d920eb4a0 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_random.snap @@ -0,0 +1,126 @@ +--- +source: milli/src/search/new/tests/exactness.rs +expression: "format!(\"{document_ids_scores:#?}\")" +--- +[ + ( + 8, + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 10, + max_rank: 10, + }, + ), + ], + ), + ( + 7, + [ + Words( + Words { + matching_words: 3, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 4, + max_rank: 4, + }, + ), + ], + ), + ( + 4, + [ + Words( + Words { + matching_words: 2, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 3, + max_rank: 3, + }, + ), + ], + ), + ( + 6, + [ + Words( + Words { + matching_words: 2, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 3, + max_rank: 3, + }, + ), + ], + ), + ( + 3, + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 2, + max_rank: 2, + }, + ), + ], + ), + ( + 5, + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 2, + max_rank: 2, + }, + ), + ], + ), +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_reversed-3.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_reversed-3.snap new file mode 100644 index 000000000..d0bc0fb46 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_reversed-3.snap @@ -0,0 +1,146 @@ +--- +source: milli/src/search/new/tests/exactness.rs +expression: "format!(\"{document_ids_scores:#?}\")" +--- +[ + ( + 9, + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 10, + max_rank: 10, + }, + ), + ], + ), + ( + 8, + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 10, + max_rank: 10, + }, + ), + ], + ), + ( + 3, + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + ExactAttribute( + MatchesStart, + ), + Exactness( + Rank { + rank: 2, + max_rank: 2, + }, + ), + ], + ), + ( + 4, + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 2, + max_rank: 2, + }, + ), + ], + ), + ( + 5, + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 2, + max_rank: 2, + }, + ), + ], + ), + ( + 6, + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 2, + max_rank: 2, + }, + ), + ], + ), + ( + 7, + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 2, + max_rank: 2, + }, + ), + ], + ), +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_reversed.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_reversed.snap new file mode 100644 index 000000000..d0bc0fb46 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_reversed.snap @@ -0,0 +1,146 @@ +--- +source: milli/src/search/new/tests/exactness.rs +expression: "format!(\"{document_ids_scores:#?}\")" +--- +[ + ( + 9, + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 10, + max_rank: 10, + }, + ), + ], + ), + ( + 8, + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 10, + max_rank: 10, + }, + ), + ], + ), + ( + 3, + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + ExactAttribute( + MatchesStart, + ), + Exactness( + Rank { + rank: 2, + max_rank: 2, + }, + ), + ], + ), + ( + 4, + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 2, + max_rank: 2, + }, + ), + ], + ), + ( + 5, + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 2, + max_rank: 2, + }, + ), + ], + ), + ( + 6, + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 2, + max_rank: 2, + }, + ), + ], + ), + ( + 7, + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 2, + max_rank: 2, + }, + ), + ], + ), +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__proximity_after_exactness-4.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__proximity_after_exactness-4.snap new file mode 100644 index 000000000..21c6da724 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__proximity_after_exactness-4.snap @@ -0,0 +1,84 @@ +--- +source: milli/src/search/new/tests/exactness.rs +expression: "format!(\"{document_ids_scores:#?}\")" +--- +[ + ( + 0, + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 10, + max_rank: 10, + }, + ), + Proximity( + Rank { + rank: 35, + max_rank: 57, + }, + ), + ], + ), + ( + 1, + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 10, + max_rank: 10, + }, + ), + Proximity( + Rank { + rank: 35, + max_rank: 57, + }, + ), + ], + ), + ( + 2, + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 10, + max_rank: 10, + }, + ), + Proximity( + Rank { + rank: 35, + max_rank: 57, + }, + ), + ], + ), +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__proximity_after_exactness.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__proximity_after_exactness.snap new file mode 100644 index 000000000..7a33134cf --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__proximity_after_exactness.snap @@ -0,0 +1,240 @@ +--- +source: milli/src/search/new/tests/exactness.rs +expression: "format!(\"{document_ids_scores:#?}\")" +--- +[ + ( + 2, + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 10, + max_rank: 10, + }, + ), + Proximity( + Rank { + rank: 57, + max_rank: 57, + }, + ), + ], + ), + ( + 1, + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 10, + max_rank: 10, + }, + ), + Proximity( + Rank { + rank: 56, + max_rank: 57, + }, + ), + ], + ), + ( + 0, + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 10, + max_rank: 10, + }, + ), + Proximity( + Rank { + rank: 35, + max_rank: 57, + }, + ), + ], + ), + ( + 4, + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + ExactAttribute( + MatchesStart, + ), + Exactness( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Proximity( + Rank { + rank: 22, + max_rank: 22, + }, + ), + ], + ), + ( + 5, + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + ExactAttribute( + MatchesStart, + ), + Exactness( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Proximity( + Rank { + rank: 22, + max_rank: 22, + }, + ), + ], + ), + ( + 8, + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + ExactAttribute( + MatchesStart, + ), + Exactness( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Proximity( + Rank { + rank: 22, + max_rank: 22, + }, + ), + ], + ), + ( + 7, + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Proximity( + Rank { + rank: 21, + max_rank: 22, + }, + ), + ], + ), + ( + 3, + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Proximity( + Rank { + rank: 17, + max_rank: 22, + }, + ), + ], + ), + ( + 6, + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 5, + max_rank: 5, + }, + ), + Proximity( + Rank { + rank: 17, + max_rank: 22, + }, + ), + ], + ), +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__typo_followed_by_exactness.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__typo_followed_by_exactness.snap new file mode 100644 index 000000000..6670f3e4f --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__typo_followed_by_exactness.snap @@ -0,0 +1,110 @@ +--- +source: milli/src/search/new/tests/exactness.rs +expression: "format!(\"{document_ids_scores:#?}\")" +--- +[ + ( + 1, + [ + Words( + Words { + matching_words: 4, + max_matching_words: 4, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 5, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 5, + max_rank: 5, + }, + ), + ], + ), + ( + 0, + [ + Words( + Words { + matching_words: 4, + max_matching_words: 4, + }, + ), + Typo( + Typo { + typo_count: 1, + max_typo_count: 5, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 4, + max_rank: 5, + }, + ), + ], + ), + ( + 4, + [ + Words( + Words { + matching_words: 4, + max_matching_words: 4, + }, + ), + Typo( + Typo { + typo_count: 2, + max_typo_count: 5, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 4, + max_rank: 5, + }, + ), + ], + ), + ( + 3, + [ + Words( + Words { + matching_words: 4, + max_matching_words: 4, + }, + ), + Typo( + Typo { + typo_count: 2, + max_typo_count: 5, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 3, + max_rank: 5, + }, + ), + ], + ), +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__words_after_exactness.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__words_after_exactness.snap new file mode 100644 index 000000000..ef95520bb --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__words_after_exactness.snap @@ -0,0 +1,366 @@ +--- +source: milli/src/search/new/tests/exactness.rs +expression: "format!(\"{document_ids_scores:#?}\")" +--- +[ + ( + 19, + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 10, + max_rank: 10, + }, + ), + ], + ), + ( + 9, + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 7, + max_rank: 10, + }, + ), + ], + ), + ( + 18, + [ + Words( + Words { + matching_words: 8, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 9, + max_rank: 9, + }, + ), + ], + ), + ( + 8, + [ + Words( + Words { + matching_words: 8, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 6, + max_rank: 9, + }, + ), + ], + ), + ( + 17, + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 8, + max_rank: 8, + }, + ), + ], + ), + ( + 16, + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 8, + max_rank: 8, + }, + ), + ], + ), + ( + 6, + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 5, + max_rank: 8, + }, + ), + ], + ), + ( + 7, + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 5, + max_rank: 8, + }, + ), + ], + ), + ( + 15, + [ + Words( + Words { + matching_words: 5, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 6, + max_rank: 6, + }, + ), + ], + ), + ( + 5, + [ + Words( + Words { + matching_words: 5, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 3, + max_rank: 6, + }, + ), + ], + ), + ( + 14, + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 5, + max_rank: 5, + }, + ), + ], + ), + ( + 4, + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 3, + max_rank: 5, + }, + ), + ], + ), + ( + 13, + [ + Words( + Words { + matching_words: 3, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 4, + max_rank: 4, + }, + ), + ], + ), + ( + 3, + [ + Words( + Words { + matching_words: 3, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 2, + max_rank: 4, + }, + ), + ], + ), + ( + 12, + [ + Words( + Words { + matching_words: 2, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 3, + max_rank: 3, + }, + ), + ], + ), + ( + 2, + [ + Words( + Words { + matching_words: 2, + max_matching_words: 9, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 2, + max_rank: 3, + }, + ), + ], + ), + ( + 1, + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 2, + max_rank: 2, + }, + ), + ], + ), + ( + 11, + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + ExactAttribute( + ExactMatch, + ), + Exactness( + Rank { + rank: 2, + max_rank: 2, + }, + ), + ], + ), +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort-2.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort-2.snap new file mode 100644 index 000000000..0efa8f3a4 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort-2.snap @@ -0,0 +1,168 @@ +--- +source: milli/src/search/new/tests/geo_sort.rs +expression: "format!(\"{scores:#?}\")" +--- +[ + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 0.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 1.0, + 1.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 2.0, + -1.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + -2.0, + -2.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 3.0, + 5.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 6.0, + -5.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: None, + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: None, + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: None, + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: None, + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: None, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort-4.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort-4.snap new file mode 100644 index 000000000..8f372e254 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort-4.snap @@ -0,0 +1,168 @@ +--- +source: milli/src/search/new/tests/geo_sort.rs +expression: "format!(\"{scores:#?}\")" +--- +[ + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 6.0, + -5.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 3.0, + 5.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: Some( + [ + -2.0, + -2.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 2.0, + -1.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 1.0, + 1.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 0.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: None, + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: None, + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: None, + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: None, + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: None, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-10.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-10.snap new file mode 100644 index 000000000..0f8bfa648 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-10.snap @@ -0,0 +1,91 @@ +--- +source: milli/src/search/new/tests/geo_sort.rs +expression: "format!(\"{scores:#?}\")" +--- +[ + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + -175.0, + ], + ascending: true, + value: Some( + [ + 0.0, + -179.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + -175.0, + ], + ascending: true, + value: Some( + [ + 0.0, + 178.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + -175.0, + ], + ascending: true, + value: Some( + [ + -89.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + -175.0, + ], + ascending: true, + value: Some( + [ + 88.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + -175.0, + ], + ascending: true, + value: Some( + [ + 0.0, + 0.0, + ], + ), + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-12.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-12.snap new file mode 100644 index 000000000..fbcdfc508 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-12.snap @@ -0,0 +1,91 @@ +--- +source: milli/src/search/new/tests/geo_sort.rs +expression: "format!(\"{scores:#?}\")" +--- +[ + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 0.0, + -179.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 0.0, + 178.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: Some( + [ + -89.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 88.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 0.0, + 0.0, + ], + ), + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-14.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-14.snap new file mode 100644 index 000000000..e948696c6 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-14.snap @@ -0,0 +1,91 @@ +--- +source: milli/src/search/new/tests/geo_sort.rs +expression: "format!(\"{scores:#?}\")" +--- +[ + [ + GeoSort( + GeoSort { + target_point: [ + 85.0, + 0.0, + ], + ascending: false, + value: Some( + [ + -89.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 85.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 0.0, + -179.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 85.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 0.0, + 178.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 85.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 0.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 85.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 88.0, + 0.0, + ], + ), + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-16.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-16.snap new file mode 100644 index 000000000..5747ecfdd --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-16.snap @@ -0,0 +1,91 @@ +--- +source: milli/src/search/new/tests/geo_sort.rs +expression: "format!(\"{scores:#?}\")" +--- +[ + [ + GeoSort( + GeoSort { + target_point: [ + -85.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 88.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + -85.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 0.0, + -179.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + -85.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 0.0, + 178.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + -85.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 0.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + -85.0, + 0.0, + ], + ascending: false, + value: Some( + [ + -89.0, + 0.0, + ], + ), + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-18.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-18.snap new file mode 100644 index 000000000..6ed2d8e12 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-18.snap @@ -0,0 +1,91 @@ +--- +source: milli/src/search/new/tests/geo_sort.rs +expression: "format!(\"{scores:#?}\")" +--- +[ + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 175.0, + ], + ascending: false, + value: Some( + [ + 0.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 175.0, + ], + ascending: false, + value: Some( + [ + 88.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 175.0, + ], + ascending: false, + value: Some( + [ + -89.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 175.0, + ], + ascending: false, + value: Some( + [ + 0.0, + -179.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 175.0, + ], + ascending: false, + value: Some( + [ + 0.0, + 178.0, + ], + ), + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-2.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-2.snap new file mode 100644 index 000000000..45eba0c45 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-2.snap @@ -0,0 +1,91 @@ +--- +source: milli/src/search/new/tests/geo_sort.rs +expression: "format!(\"{scores:#?}\")" +--- +[ + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 0.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 88.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + -89.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 0.0, + 178.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 0.0, + -179.0, + ], + ), + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-20.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-20.snap new file mode 100644 index 000000000..bb7cc3041 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-20.snap @@ -0,0 +1,91 @@ +--- +source: milli/src/search/new/tests/geo_sort.rs +expression: "format!(\"{scores:#?}\")" +--- +[ + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + -175.0, + ], + ascending: false, + value: Some( + [ + 0.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + -175.0, + ], + ascending: false, + value: Some( + [ + 88.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + -175.0, + ], + ascending: false, + value: Some( + [ + -89.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + -175.0, + ], + ascending: false, + value: Some( + [ + 0.0, + 178.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + -175.0, + ], + ascending: false, + value: Some( + [ + 0.0, + -179.0, + ], + ), + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-4.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-4.snap new file mode 100644 index 000000000..ab344c098 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-4.snap @@ -0,0 +1,91 @@ +--- +source: milli/src/search/new/tests/geo_sort.rs +expression: "format!(\"{scores:#?}\")" +--- +[ + [ + GeoSort( + GeoSort { + target_point: [ + 85.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 88.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 85.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 0.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 85.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 0.0, + 178.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 85.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 0.0, + -179.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 85.0, + 0.0, + ], + ascending: true, + value: Some( + [ + -89.0, + 0.0, + ], + ), + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-6.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-6.snap new file mode 100644 index 000000000..2120ab105 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-6.snap @@ -0,0 +1,91 @@ +--- +source: milli/src/search/new/tests/geo_sort.rs +expression: "format!(\"{scores:#?}\")" +--- +[ + [ + GeoSort( + GeoSort { + target_point: [ + -85.0, + 0.0, + ], + ascending: true, + value: Some( + [ + -89.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + -85.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 0.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + -85.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 0.0, + 178.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + -85.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 0.0, + -179.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + -85.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 88.0, + 0.0, + ], + ), + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-8.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-8.snap new file mode 100644 index 000000000..421e2b73a --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-8.snap @@ -0,0 +1,91 @@ +--- +source: milli/src/search/new/tests/geo_sort.rs +expression: "format!(\"{scores:#?}\")" +--- +[ + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 175.0, + ], + ascending: true, + value: Some( + [ + 0.0, + 178.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 175.0, + ], + ascending: true, + value: Some( + [ + 0.0, + -179.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 175.0, + ], + ascending: true, + value: Some( + [ + -89.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 175.0, + ], + ascending: true, + value: Some( + [ + 88.0, + 0.0, + ], + ), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 175.0, + ], + ascending: true, + value: Some( + [ + 0.0, + 0.0, + ], + ), + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_mixed_with_words-2.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_mixed_with_words-2.snap new file mode 100644 index 000000000..92523a9f3 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_mixed_with_words-2.snap @@ -0,0 +1,75 @@ +--- +source: milli/src/search/new/tests/geo_sort.rs +expression: "format!(\"{scores:#?}\")" +--- +[ + [ + Words( + Words { + matching_words: 1, + max_matching_words: 1, + }, + ), + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 0.0, + 0.0, + ], + ), + }, + ), + ], + [ + Words( + Words { + matching_words: 1, + max_matching_words: 1, + }, + ), + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + -89.0, + 0.0, + ], + ), + }, + ), + ], + [ + Words( + Words { + matching_words: 1, + max_matching_words: 1, + }, + ), + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 0.0, + 178.0, + ], + ), + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_without_any_geo_faceted_documents-2.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_without_any_geo_faceted_documents-2.snap new file mode 100644 index 000000000..7ce63c137 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_without_any_geo_faceted_documents-2.snap @@ -0,0 +1,60 @@ +--- +source: milli/src/search/new/tests/geo_sort.rs +expression: "format!(\"{scores:#?}\")" +--- +[ + [ + Words( + Words { + matching_words: 1, + max_matching_words: 1, + }, + ), + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: None, + }, + ), + ], + [ + Words( + Words { + matching_words: 1, + max_matching_words: 1, + }, + ), + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: None, + }, + ), + ], + [ + Words( + Words { + matching_words: 1, + max_matching_words: 1, + }, + ), + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: None, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-11.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-11.snap new file mode 100644 index 000000000..0860aaf83 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-11.snap @@ -0,0 +1,70 @@ +--- +source: milli/src/search/new/tests/proximity.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Proximity( + Rank { + rank: 8, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 7, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 6, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 6, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 5, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 5, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 4, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 1, + max_rank: 8, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-14.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-14.snap new file mode 100644 index 000000000..ca74b9c58 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-14.snap @@ -0,0 +1,70 @@ +--- +source: milli/src/search/new/tests/proximity.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Proximity( + Rank { + rank: 8, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 7, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 6, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 6, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 5, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 1, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 1, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 1, + max_rank: 8, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-2.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-2.snap new file mode 100644 index 000000000..a6a01fbba --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-2.snap @@ -0,0 +1,78 @@ +--- +source: milli/src/search/new/tests/proximity.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Proximity( + Rank { + rank: 8, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 7, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 6, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 6, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 5, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 1, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 1, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 1, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 1, + max_rank: 8, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-5.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-5.snap new file mode 100644 index 000000000..13e49d4bc --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-5.snap @@ -0,0 +1,78 @@ +--- +source: milli/src/search/new/tests/proximity.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Proximity( + Rank { + rank: 8, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 7, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 6, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 6, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 5, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 5, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 4, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 1, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 1, + max_rank: 8, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-8.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-8.snap new file mode 100644 index 000000000..a7d18b8fc --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-8.snap @@ -0,0 +1,70 @@ +--- +source: milli/src/search/new/tests/proximity.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Proximity( + Rank { + rank: 1, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 1, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 1, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 1, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 1, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 1, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 1, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 1, + max_rank: 8, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_split_word-2.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_split_word-2.snap new file mode 100644 index 000000000..6a43c385e --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_split_word-2.snap @@ -0,0 +1,46 @@ +--- +source: milli/src/search/new/tests/proximity.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Proximity( + Rank { + rank: 8, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 8, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 8, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 1, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 1, + max_rank: 8, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_split_word-5.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_split_word-5.snap new file mode 100644 index 000000000..5d8caae94 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_split_word-5.snap @@ -0,0 +1,30 @@ +--- +source: milli/src/search/new/tests/proximity.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Proximity( + Rank { + rank: 8, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 8, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 1, + max_rank: 8, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_split_word-8.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_split_word-8.snap new file mode 100644 index 000000000..5d8caae94 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_split_word-8.snap @@ -0,0 +1,30 @@ +--- +source: milli/src/search/new/tests/proximity.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Proximity( + Rank { + rank: 8, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 8, + max_rank: 8, + }, + ), + ], + [ + Proximity( + Rank { + rank: 1, + max_rank: 8, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__redacted-2.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__redacted-2.snap new file mode 100644 index 000000000..d94e0252d --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__redacted-2.snap @@ -0,0 +1,206 @@ +--- +source: milli/src/search/new/tests/sort.rs +expression: document_scores_json +--- +[ + { + "vague:asc": { + "order": 0, + "value": 0.0 + }, + "": { + "order": 1, + "value": "" + } + }, + { + "vague:asc": { + "order": 0, + "value": 1.0 + }, + "": { + "order": 1, + "value": "" + } + }, + { + "vague:asc": { + "order": 0, + "value": 1.0 + }, + "": { + "order": 1, + "value": "" + } + }, + { + "vague:asc": { + "order": 0, + "value": 1.0 + }, + "": { + "order": 1, + "value": "" + } + }, + { + "vague:asc": { + "order": 0, + "value": 1.1367 + }, + "": { + "order": 1, + "value": "" + } + }, + { + "vague:asc": { + "order": 0, + "value": 1.2367 + }, + "": { + "order": 1, + "value": "" + } + }, + { + "vague:asc": { + "order": 0, + "value": 1.5673 + }, + "": { + "order": 1, + "value": "" + } + }, + { + "vague:asc": { + "order": 0, + "value": "0" + }, + "": { + "order": 1, + "value": "" + } + }, + { + "vague:asc": { + "order": 0, + "value": "1" + }, + "": { + "order": 1, + "value": "" + } + }, + { + "vague:asc": { + "order": 0, + "value": "false" + }, + "": { + "order": 1, + "value": "" + } + }, + { + "vague:asc": { + "order": 0, + "value": "false" + }, + "": { + "order": 1, + "value": "" + } + }, + { + "vague:asc": { + "order": 0, + "value": "true" + }, + "": { + "order": 1, + "value": "" + } + }, + { + "vague:asc": { + "order": 0, + "value": "true" + }, + "": { + "order": 1, + "value": "" + } + }, + { + "vague:asc": { + "order": 0, + "value": null + }, + "": { + "order": 1, + "value": "" + } + }, + { + "vague:asc": { + "order": 0, + "value": null + }, + "": { + "order": 1, + "value": "" + } + }, + { + "vague:asc": { + "order": 0, + "value": null + }, + "": { + "order": 1, + "value": "" + } + }, + { + "vague:asc": { + "order": 0, + "value": null + }, + "": { + "order": 1, + "value": "" + } + }, + { + "vague:asc": { + "order": 0, + "value": null + }, + "": { + "order": 1, + "value": "" + } + }, + { + "vague:asc": { + "order": 0, + "value": null + }, + "": { + "order": 1, + "value": "" + } + }, + { + "vague:asc": { + "order": 0, + "value": null + }, + "": { + "order": 1, + "value": "" + } + } +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-11.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-11.snap new file mode 100644 index 000000000..76a732618 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-11.snap @@ -0,0 +1,206 @@ +--- +source: milli/src/search/new/tests/sort.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Sort( + Sort { + field_name: "vague", + ascending: false, + redacted: false, + value: Number(2.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: false, + redacted: false, + value: Number(1.5673), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: false, + redacted: false, + value: Number(1.2367), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: false, + redacted: false, + value: Number(1.1367), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: false, + redacted: false, + value: Number(1.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: false, + redacted: false, + value: Number(1.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: false, + redacted: false, + value: Number(0.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: false, + redacted: false, + value: String("true"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: false, + redacted: false, + value: String("true"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: false, + redacted: false, + value: String("false"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: false, + redacted: false, + value: String("false"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: false, + redacted: false, + value: String("1"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: false, + redacted: false, + value: String("0"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: false, + redacted: false, + value: Null, + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: false, + redacted: false, + value: Null, + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: false, + redacted: false, + value: Null, + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: false, + redacted: false, + value: Null, + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: false, + redacted: false, + value: Null, + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: false, + redacted: false, + value: Null, + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: false, + redacted: false, + value: Null, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-2.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-2.snap new file mode 100644 index 000000000..5e70de729 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-2.snap @@ -0,0 +1,206 @@ +--- +source: milli/src/search/new/tests/sort.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Sort( + Sort { + field_name: "letter", + ascending: false, + redacted: false, + value: String("i"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "letter", + ascending: false, + redacted: false, + value: String("i"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "letter", + ascending: false, + redacted: false, + value: String("i"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "letter", + ascending: false, + redacted: false, + value: String("h"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "letter", + ascending: false, + redacted: false, + value: String("g"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "letter", + ascending: false, + redacted: false, + value: String("g"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "letter", + ascending: false, + redacted: false, + value: String("f"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "letter", + ascending: false, + redacted: false, + value: String("f"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "letter", + ascending: false, + redacted: false, + value: String("f"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "letter", + ascending: false, + redacted: false, + value: String("e"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "letter", + ascending: false, + redacted: false, + value: String("e"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "letter", + ascending: false, + redacted: false, + value: String("e"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "letter", + ascending: false, + redacted: false, + value: String("e"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "letter", + ascending: false, + redacted: false, + value: String("e"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "letter", + ascending: false, + redacted: false, + value: String("e"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "letter", + ascending: false, + redacted: false, + value: String("d"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "letter", + ascending: false, + redacted: false, + value: String("c"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "letter", + ascending: false, + redacted: false, + value: String("c"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "letter", + ascending: false, + redacted: false, + value: String("c"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "letter", + ascending: false, + redacted: false, + value: String("b"), + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-5.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-5.snap new file mode 100644 index 000000000..6661bdc15 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-5.snap @@ -0,0 +1,206 @@ +--- +source: milli/src/search/new/tests/sort.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Sort( + Sort { + field_name: "rank", + ascending: false, + redacted: false, + value: Number(5.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "rank", + ascending: false, + redacted: false, + value: Number(4.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "rank", + ascending: false, + redacted: false, + value: Number(3.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "rank", + ascending: false, + redacted: false, + value: Number(2.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "rank", + ascending: false, + redacted: false, + value: Number(2.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "rank", + ascending: false, + redacted: false, + value: Number(2.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "rank", + ascending: false, + redacted: false, + value: Number(2.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "rank", + ascending: false, + redacted: false, + value: Number(2.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "rank", + ascending: false, + redacted: false, + value: Number(1.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "rank", + ascending: false, + redacted: false, + value: Number(1.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "rank", + ascending: false, + redacted: false, + value: Number(1.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "rank", + ascending: false, + redacted: false, + value: Number(1.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "rank", + ascending: false, + redacted: false, + value: Number(1.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "rank", + ascending: false, + redacted: false, + value: Number(1.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "rank", + ascending: false, + redacted: false, + value: Number(1.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "rank", + ascending: false, + redacted: false, + value: Number(0.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "rank", + ascending: false, + redacted: false, + value: Number(0.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "rank", + ascending: false, + redacted: false, + value: Number(0.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "rank", + ascending: false, + redacted: false, + value: Number(0.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "rank", + ascending: false, + redacted: false, + value: Number(0.0), + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-8.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-8.snap new file mode 100644 index 000000000..9940a22c3 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-8.snap @@ -0,0 +1,206 @@ +--- +source: milli/src/search/new/tests/sort.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Sort( + Sort { + field_name: "vague", + ascending: true, + redacted: false, + value: Number(0.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: true, + redacted: false, + value: Number(1.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: true, + redacted: false, + value: Number(1.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: true, + redacted: false, + value: Number(1.0), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: true, + redacted: false, + value: Number(1.1367), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: true, + redacted: false, + value: Number(1.2367), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: true, + redacted: false, + value: Number(1.5673), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: true, + redacted: false, + value: String("0"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: true, + redacted: false, + value: String("1"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: true, + redacted: false, + value: String("false"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: true, + redacted: false, + value: String("false"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: true, + redacted: false, + value: String("true"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: true, + redacted: false, + value: String("true"), + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: true, + redacted: false, + value: Null, + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: true, + redacted: false, + value: Null, + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: true, + redacted: false, + value: Null, + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: true, + redacted: false, + value: Null, + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: true, + redacted: false, + value: Null, + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: true, + redacted: false, + value: Null, + }, + ), + ], + [ + Sort( + Sort { + field_name: "vague", + ascending: true, + redacted: false, + value: Null, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__stop_words__stop_words_in_phrase-6.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__stop_words__stop_words_in_phrase-6.snap new file mode 100644 index 000000000..1ca6a33a4 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__stop_words__stop_words_in_phrase-6.snap @@ -0,0 +1,129 @@ +--- +source: milli/src/search/new/tests/stop_words.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 2, + }, + ), + Proximity( + Rank { + rank: 1, + max_rank: 1, + }, + ), + Fid( + Rank { + rank: 1, + max_rank: 1, + }, + ), + Position( + Rank { + rank: 31, + max_rank: 31, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 4, + max_rank: 4, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 2, + }, + ), + Proximity( + Rank { + rank: 1, + max_rank: 1, + }, + ), + Fid( + Rank { + rank: 1, + max_rank: 1, + }, + ), + Position( + Rank { + rank: 31, + max_rank: 31, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 4, + max_rank: 4, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 2, + }, + ), + Proximity( + Rank { + rank: 1, + max_rank: 1, + }, + ), + Fid( + Rank { + rank: 1, + max_rank: 1, + }, + ), + Position( + Rank { + rank: 27, + max_rank: 31, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 4, + max_rank: 4, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__stop_words__stop_words_in_phrase-8.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__stop_words__stop_words_in_phrase-8.snap new file mode 100644 index 000000000..7a059b374 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__stop_words__stop_words_in_phrase-8.snap @@ -0,0 +1,13 @@ +--- +source: milli/src/search/new/tests/stop_words.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [], + [], + [], + [], + [], + [], + [], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_bucketing-2.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_bucketing-2.snap new file mode 100644 index 000000000..71a7e3ac4 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_bucketing-2.snap @@ -0,0 +1,12 @@ +--- +source: milli/src/search/new/tests/typo.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [], + [], + [], + [], + [], + [], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_bucketing-5.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_bucketing-5.snap new file mode 100644 index 000000000..28c700539 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_bucketing-5.snap @@ -0,0 +1,54 @@ +--- +source: milli/src/search/new/tests/typo.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Typo( + Typo { + typo_count: 0, + max_typo_count: 5, + }, + ), + ], + [ + Typo( + Typo { + typo_count: 0, + max_typo_count: 5, + }, + ), + ], + [ + Typo( + Typo { + typo_count: 1, + max_typo_count: 5, + }, + ), + ], + [ + Typo( + Typo { + typo_count: 1, + max_typo_count: 5, + }, + ), + ], + [ + Typo( + Typo { + typo_count: 2, + max_typo_count: 5, + }, + ), + ], + [ + Typo( + Typo { + typo_count: 5, + max_typo_count: 5, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_bucketing-8.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_bucketing-8.snap new file mode 100644 index 000000000..11830a905 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_bucketing-8.snap @@ -0,0 +1,54 @@ +--- +source: milli/src/search/new/tests/typo.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Typo( + Typo { + typo_count: 0, + max_typo_count: 6, + }, + ), + ], + [ + Typo( + Typo { + typo_count: 0, + max_typo_count: 6, + }, + ), + ], + [ + Typo( + Typo { + typo_count: 2, + max_typo_count: 6, + }, + ), + ], + [ + Typo( + Typo { + typo_count: 2, + max_typo_count: 6, + }, + ), + ], + [ + Typo( + Typo { + typo_count: 3, + max_typo_count: 6, + }, + ), + ], + [ + Typo( + Typo { + typo_count: 4, + max_typo_count: 6, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_exact_attribute-4.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_exact_attribute-4.snap new file mode 100644 index 000000000..cabea4842 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_exact_attribute-4.snap @@ -0,0 +1,9 @@ +--- +source: milli/src/search/new/tests/typo.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [], + [], + [], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_exact_word-12.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_exact_word-12.snap new file mode 100644 index 000000000..cabea4842 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_exact_word-12.snap @@ -0,0 +1,9 @@ +--- +source: milli/src/search/new/tests/typo.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [], + [], + [], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_ranking_rule_not_preceded_by_words_ranking_rule-2.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_ranking_rule_not_preceded_by_words_ranking_rule-2.snap new file mode 100644 index 000000000..89f4d94ca --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_ranking_rule_not_preceded_by_words_ranking_rule-2.snap @@ -0,0 +1,244 @@ +--- +source: milli/src/search/new/tests/typo.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 1, + max_typo_count: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 8, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 8, + }, + ), + ], + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 7, + }, + ), + ], + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 7, + }, + ), + ], + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 7, + }, + ), + ], + [ + Words( + Words { + matching_words: 5, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 5, + }, + ), + ], + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 4, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 2, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 2, + }, + ), + ], + [ + Words( + Words { + matching_words: 2, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 1, + max_typo_count: 2, + }, + ), + ], + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 1, + }, + ), + ], + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 1, + }, + ), + ], + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 1, + }, + ), + ], + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 1, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_ranking_rule_not_preceded_by_words_ranking_rule-5.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_ranking_rule_not_preceded_by_words_ranking_rule-5.snap new file mode 100644 index 000000000..89f4d94ca --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_ranking_rule_not_preceded_by_words_ranking_rule-5.snap @@ -0,0 +1,244 @@ +--- +source: milli/src/search/new/tests/typo.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 1, + max_typo_count: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 8, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 8, + }, + ), + ], + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 7, + }, + ), + ], + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 7, + }, + ), + ], + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 7, + }, + ), + ], + [ + Words( + Words { + matching_words: 5, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 5, + }, + ), + ], + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 4, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 2, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 2, + }, + ), + ], + [ + Words( + Words { + matching_words: 2, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 1, + max_typo_count: 2, + }, + ), + ], + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 1, + }, + ), + ], + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 1, + }, + ), + ], + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 1, + }, + ), + ], + [ + Words( + Words { + matching_words: 1, + max_matching_words: 9, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 1, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_synonyms-2.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_synonyms-2.snap new file mode 100644 index 000000000..887f45b11 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_synonyms-2.snap @@ -0,0 +1,30 @@ +--- +source: milli/src/search/new/tests/typo.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Typo( + Typo { + typo_count: 0, + max_typo_count: 13, + }, + ), + ], + [ + Typo( + Typo { + typo_count: 0, + max_typo_count: 13, + }, + ), + ], + [ + Typo( + Typo { + typo_count: 1, + max_typo_count: 13, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_synonyms-5.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_synonyms-5.snap new file mode 100644 index 000000000..18587e269 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_synonyms-5.snap @@ -0,0 +1,30 @@ +--- +source: milli/src/search/new/tests/typo.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Typo( + Typo { + typo_count: 0, + max_typo_count: 13, + }, + ), + ], + [ + Typo( + Typo { + typo_count: 2, + max_typo_count: 13, + }, + ), + ], + [ + Typo( + Typo { + typo_count: 2, + max_typo_count: 13, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo_proximity__trap_basic_and_complex1-2.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo_proximity__trap_basic_and_complex1-2.snap new file mode 100644 index 000000000..09cb68044 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo_proximity__trap_basic_and_complex1-2.snap @@ -0,0 +1,62 @@ +--- +source: milli/src/search/new/tests/typo_proximity.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Typo( + Typo { + typo_count: 0, + max_typo_count: 3, + }, + ), + Proximity( + Rank { + rank: 8, + max_rank: 8, + }, + ), + ], + [ + Typo( + Typo { + typo_count: 0, + max_typo_count: 3, + }, + ), + Proximity( + Rank { + rank: 5, + max_rank: 8, + }, + ), + ], + [ + Typo( + Typo { + typo_count: 1, + max_typo_count: 3, + }, + ), + Proximity( + Rank { + rank: 8, + max_rank: 8, + }, + ), + ], + [ + Typo( + Typo { + typo_count: 1, + max_typo_count: 3, + }, + ), + Proximity( + Rank { + rank: 7, + max_rank: 8, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo_proximity__trap_complex2-2.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo_proximity__trap_complex2-2.snap new file mode 100644 index 000000000..5b93dfecd --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo_proximity__trap_complex2-2.snap @@ -0,0 +1,34 @@ +--- +source: milli/src/search/new/tests/typo_proximity.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Typo( + Typo { + typo_count: 1, + max_typo_count: 5, + }, + ), + Proximity( + Rank { + rank: 15, + max_rank: 15, + }, + ), + ], + [ + Typo( + Typo { + typo_count: 1, + max_typo_count: 5, + }, + ), + Proximity( + Rank { + rank: 8, + max_rank: 15, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_phrase-2.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_phrase-2.snap new file mode 100644 index 000000000..61c39153f --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_phrase-2.snap @@ -0,0 +1,216 @@ +--- +source: milli/src/search/new/tests/words_tms.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 50, + max_rank: 50, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 50, + max_rank: 50, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 49, + max_rank: 50, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 49, + max_rank: 50, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 48, + max_rank: 50, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 41, + max_rank: 50, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 40, + max_rank: 50, + }, + ), + ], + [ + Words( + Words { + matching_words: 8, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 43, + max_rank: 43, + }, + ), + ], + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 36, + max_rank: 36, + }, + ), + ], + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 31, + max_rank: 36, + }, + ), + ], + [ + Words( + Words { + matching_words: 5, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 22, + max_rank: 22, + }, + ), + ], + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 15, + max_rank: 15, + }, + ), + ], + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 15, + max_rank: 15, + }, + ), + ], + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 15, + max_rank: 15, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 8, + max_rank: 8, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_phrase-5.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_phrase-5.snap new file mode 100644 index 000000000..e43908ee7 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_phrase-5.snap @@ -0,0 +1,160 @@ +--- +source: milli/src/search/new/tests/words_tms.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 43, + max_rank: 43, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 43, + max_rank: 43, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 42, + max_rank: 43, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 42, + max_rank: 43, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 41, + max_rank: 43, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 34, + max_rank: 43, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 33, + max_rank: 43, + }, + ), + ], + [ + Words( + Words { + matching_words: 8, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 36, + max_rank: 36, + }, + ), + ], + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 29, + max_rank: 29, + }, + ), + ], + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 24, + max_rank: 29, + }, + ), + ], + [ + Words( + Words { + matching_words: 5, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 15, + max_rank: 15, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_simple-2.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_simple-2.snap new file mode 100644 index 000000000..4fc992b3d --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_simple-2.snap @@ -0,0 +1,286 @@ +--- +source: milli/src/search/new/tests/words_tms.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 57, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 57, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 56, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 56, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 55, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 54, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 53, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 52, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 51, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 48, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 47, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 1, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 8, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 50, + max_rank: 50, + }, + ), + ], + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 43, + max_rank: 43, + }, + ), + ], + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 38, + max_rank: 43, + }, + ), + ], + [ + Words( + Words { + matching_words: 5, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 29, + max_rank: 29, + }, + ), + ], + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 22, + max_rank: 22, + }, + ), + ], + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 22, + max_rank: 22, + }, + ), + ], + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 22, + max_rank: 22, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 15, + max_rank: 15, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_simple-5.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_simple-5.snap new file mode 100644 index 000000000..1f070e381 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_simple-5.snap @@ -0,0 +1,286 @@ +--- +source: milli/src/search/new/tests/words_tms.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 57, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 56, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 55, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 54, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 54, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 54, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 53, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 53, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 52, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 47, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 45, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 1, + max_rank: 57, + }, + ), + ], + [ + Words( + Words { + matching_words: 8, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 47, + max_rank: 50, + }, + ), + ], + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 40, + max_rank: 43, + }, + ), + ], + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 35, + max_rank: 43, + }, + ), + ], + [ + Words( + Words { + matching_words: 5, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 26, + max_rank: 29, + }, + ), + ], + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 19, + max_rank: 22, + }, + ), + ], + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 19, + max_rank: 22, + }, + ), + ], + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 19, + max_rank: 22, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 9, + }, + ), + Proximity( + Rank { + rank: 13, + max_rank: 15, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_all-2.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_all-2.snap new file mode 100644 index 000000000..ff74473c8 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_all-2.snap @@ -0,0 +1,102 @@ +--- +source: milli/src/search/new/tests/words_tms.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Proximity( + Rank { + rank: 57, + max_rank: 57, + }, + ), + ], + [ + Proximity( + Rank { + rank: 57, + max_rank: 57, + }, + ), + ], + [ + Proximity( + Rank { + rank: 56, + max_rank: 57, + }, + ), + ], + [ + Proximity( + Rank { + rank: 56, + max_rank: 57, + }, + ), + ], + [ + Proximity( + Rank { + rank: 55, + max_rank: 57, + }, + ), + ], + [ + Proximity( + Rank { + rank: 54, + max_rank: 57, + }, + ), + ], + [ + Proximity( + Rank { + rank: 53, + max_rank: 57, + }, + ), + ], + [ + Proximity( + Rank { + rank: 52, + max_rank: 57, + }, + ), + ], + [ + Proximity( + Rank { + rank: 51, + max_rank: 57, + }, + ), + ], + [ + Proximity( + Rank { + rank: 48, + max_rank: 57, + }, + ), + ], + [ + Proximity( + Rank { + rank: 47, + max_rank: 57, + }, + ), + ], + [ + Proximity( + Rank { + rank: 1, + max_rank: 57, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_last_phrase-2.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_last_phrase-2.snap new file mode 100644 index 000000000..9b6b18a77 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_last_phrase-2.snap @@ -0,0 +1,86 @@ +--- +source: milli/src/search/new/tests/words_tms.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 8, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 5, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_last_phrase-5.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_last_phrase-5.snap new file mode 100644 index 000000000..d43e118bb --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_last_phrase-5.snap @@ -0,0 +1,54 @@ +--- +source: milli/src/search/new/tests/words_tms.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 8, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 5, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 5, + max_matching_words: 9, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_last_simple-2.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_last_simple-2.snap new file mode 100644 index 000000000..eecf623e5 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_last_simple-2.snap @@ -0,0 +1,166 @@ +--- +source: milli/src/search/new/tests/words_tms.rs +expression: "format!(\"{document_scores:#?}\")" +--- +[ + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 9, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 8, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 7, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 5, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 4, + max_matching_words: 9, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 9, + }, + ), + ], +] diff --git a/milli/src/search/new/tests/sort.rs b/milli/src/search/new/tests/sort.rs index 52acc646c..aa6aa971f 100644 --- a/milli/src/search/new/tests/sort.rs +++ b/milli/src/search/new/tests/sort.rs @@ -16,7 +16,9 @@ use maplit::hashset; use crate::index::tests::TempIndex; use crate::search::new::tests::collect_field_values; -use crate::{AscDesc, Criterion, Member, Search, SearchResult, TermsMatchingStrategy}; +use crate::{ + score_details, AscDesc, Criterion, Member, Search, SearchResult, TermsMatchingStrategy, +}; fn create_index() -> TempIndex { let index = TempIndex::new(); @@ -183,10 +185,12 @@ fn test_sort() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::Last); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 22, 23, 20, 18, 19, 15, 16, 17, 9, 10, 11, 12, 13, 14, 8, 5, 6, 7, 2]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let letter_values = collect_field_values(&index, &txn, "letter", &documents_ids); insta::assert_debug_snapshot!(letter_values, @r###" @@ -216,10 +220,12 @@ fn test_sort() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::Last); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank")))]); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 13, 12, 4, 7, 11, 17, 23, 1, 3, 6, 10, 16, 19, 22, 0, 2, 5, 8, 9]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let rank_values = collect_field_values(&index, &txn, "rank", &documents_ids); insta::assert_debug_snapshot!(rank_values, @r###" @@ -249,10 +255,12 @@ fn test_sort() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::Last); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.sort_criteria(vec![AscDesc::Asc(Member::Field(S("vague")))]); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 4, 5, 22, 23, 13, 1, 3, 12, 21, 11, 20, 6, 7, 8, 9, 10, 14, 15]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let vague_values = collect_field_values(&index, &txn, "vague", &documents_ids); insta::assert_debug_snapshot!(vague_values, @r###" @@ -282,10 +290,12 @@ fn test_sort() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::Last); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("vague")))]); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 13, 23, 22, 2, 5, 0, 11, 20, 12, 21, 3, 1, 6, 7, 8, 9, 10, 14, 15]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let vague_values = collect_field_values(&index, &txn, "vague", &documents_ids); insta::assert_debug_snapshot!(vague_values, @r###" @@ -313,3 +323,33 @@ fn test_sort() { ] "###); } + +#[test] +fn test_redacted() { + let index = create_index(); + index + .update_settings(|s| { + s.set_displayed_fields(vec!["text".to_owned(), "vague".to_owned()]); + s.set_sortable_fields(hashset! { S("rank"), S("vague"), S("letter") }); + s.set_criteria(vec![Criterion::Sort]); + }) + .unwrap(); + + let txn = index.read_txn().unwrap(); + + let mut s = Search::new(&txn, &index); + s.terms_matching_strategy(TermsMatchingStrategy::Last); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + s.sort_criteria(vec![ + AscDesc::Asc(Member::Field(S("vague"))), + AscDesc::Asc(Member::Field(S("letter"))), + ]); + + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); + let document_scores_json: Vec<_> = document_scores + .iter() + .map(|scores| score_details::ScoreDetails::to_json_map(scores.iter())) + .collect(); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 4, 5, 22, 23, 13, 1, 3, 12, 21, 11, 20, 6, 7, 8, 9, 10, 14, 15]"); + insta::assert_json_snapshot!(document_scores_json); +} diff --git a/milli/src/search/new/tests/stop_words.rs b/milli/src/search/new/tests/stop_words.rs index 92168f6d6..4ad587240 100644 --- a/milli/src/search/new/tests/stop_words.rs +++ b/milli/src/search/new/tests/stop_words.rs @@ -81,28 +81,212 @@ fn test_ignore_stop_words() { let mut s = Search::new(&txn, &index); s.query("xyz to the"); s.terms_matching_strategy(TermsMatchingStrategy::Last); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]"); + insta::assert_snapshot!(format!("{document_scores:#?}"), @r###" + [ + [ + Words( + Words { + matching_words: 1, + max_matching_words: 1, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 1, + }, + ), + Proximity( + Rank { + rank: 1, + max_rank: 1, + }, + ), + Fid( + Rank { + rank: 1, + max_rank: 1, + }, + ), + Position( + Rank { + rank: 9, + max_rank: 11, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 2, + max_rank: 2, + }, + ), + ], + ] + "###); // `xyz` is treated as a prefix here, so it's not ignored let mut s = Search::new(&txn, &index); s.query("to the xyz"); s.terms_matching_strategy(TermsMatchingStrategy::Last); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]"); + insta::assert_snapshot!(format!("{document_scores:#?}"), @r###" + [ + [ + Words( + Words { + matching_words: 1, + max_matching_words: 2, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 1, + }, + ), + Proximity( + Rank { + rank: 1, + max_rank: 1, + }, + ), + Fid( + Rank { + rank: 1, + max_rank: 1, + }, + ), + Position( + Rank { + rank: 9, + max_rank: 11, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 2, + max_rank: 2, + }, + ), + ], + ] + "###); // `xyz` is not treated as a prefix anymore because of the trailing space, so it's ignored let mut s = Search::new(&txn, &index); s.query("to the xyz "); s.terms_matching_strategy(TermsMatchingStrategy::Last); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]"); + insta::assert_snapshot!(format!("{document_scores:#?}"), @r###" + [ + [ + Words( + Words { + matching_words: 1, + max_matching_words: 1, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 1, + }, + ), + Proximity( + Rank { + rank: 1, + max_rank: 1, + }, + ), + Fid( + Rank { + rank: 1, + max_rank: 1, + }, + ), + Position( + Rank { + rank: 9, + max_rank: 11, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 2, + max_rank: 2, + }, + ), + ], + ] + "###); let mut s = Search::new(&txn, &index); s.query("to the dragon xyz"); s.terms_matching_strategy(TermsMatchingStrategy::Last); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]"); + insta::assert_snapshot!(format!("{document_scores:#?}"), @r###" + [ + [ + Words( + Words { + matching_words: 2, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 2, + }, + ), + Proximity( + Rank { + rank: 7, + max_rank: 8, + }, + ), + Fid( + Rank { + rank: 1, + max_rank: 1, + }, + ), + Position( + Rank { + rank: 17, + max_rank: 21, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 3, + max_rank: 3, + }, + ), + ], + ] + "###); } #[test] @@ -114,24 +298,163 @@ fn test_stop_words_in_phrase() { let mut s = Search::new(&txn, &index); s.query("\"how to train your dragon\""); s.terms_matching_strategy(TermsMatchingStrategy::Last); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3, 6]"); + insta::assert_snapshot!(format!("{document_scores:#?}"), @r###" + [ + [ + Words( + Words { + matching_words: 4, + max_matching_words: 4, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 0, + }, + ), + Proximity( + Rank { + rank: 1, + max_rank: 1, + }, + ), + Fid( + Rank { + rank: 1, + max_rank: 1, + }, + ), + Position( + Rank { + rank: 11, + max_rank: 11, + }, + ), + ExactAttribute( + MatchesStart, + ), + Exactness( + Rank { + rank: 2, + max_rank: 2, + }, + ), + ], + [ + Words( + Words { + matching_words: 4, + max_matching_words: 4, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 0, + }, + ), + Proximity( + Rank { + rank: 1, + max_rank: 1, + }, + ), + Fid( + Rank { + rank: 1, + max_rank: 1, + }, + ), + Position( + Rank { + rank: 11, + max_rank: 11, + }, + ), + ExactAttribute( + MatchesStart, + ), + Exactness( + Rank { + rank: 2, + max_rank: 2, + }, + ), + ], + ] + "###); let mut s = Search::new(&txn, &index); s.query("how \"to\" train \"the"); s.terms_matching_strategy(TermsMatchingStrategy::Last); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]"); + insta::assert_snapshot!(format!("{document_scores:#?}"), @r###" + [ + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 2, + }, + ), + Proximity( + Rank { + rank: 6, + max_rank: 8, + }, + ), + Fid( + Rank { + rank: 1, + max_rank: 1, + }, + ), + Position( + Rank { + rank: 29, + max_rank: 31, + }, + ), + ExactAttribute( + NoExactMatch, + ), + Exactness( + Rank { + rank: 4, + max_rank: 4, + }, + ), + ], + ] + "###); let mut s = Search::new(&txn, &index); s.query("how \"to\" train \"The dragon"); s.terms_matching_strategy(TermsMatchingStrategy::Last); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3, 6, 5]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let mut s = Search::new(&txn, &index); s.query("\"to\""); s.terms_matching_strategy(TermsMatchingStrategy::Last); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3, 4, 5, 6]"); + // The search is handled as a placeholder search because it doesn't have any non-stop words in it. + // As a result the scores are empty lists + insta::assert_snapshot!(format!("{document_scores:#?}")); } diff --git a/milli/src/search/new/tests/typo.rs b/milli/src/search/new/tests/typo.rs index 536f6653d..4f5e851f5 100644 --- a/milli/src/search/new/tests/typo.rs +++ b/milli/src/search/new/tests/typo.rs @@ -160,8 +160,9 @@ fn test_no_typo() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("the quick brown fox jumps over the lazy dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]"); + insta::assert_snapshot!(format!("{document_scores:?}"), @"[[]]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -184,8 +185,14 @@ fn test_default_typo() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("the quick brown fox jumps over the lazy dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 23]"); + insta::assert_snapshot!(format!("{document_scores:#?}"), @r###" + [ + [], + [], + ] + "###); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -198,8 +205,9 @@ fn test_default_typo() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("the quack brown fox jumps over the lazy dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]"); + insta::assert_snapshot!(format!("{document_scores:?}"), @"[[]]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -211,8 +219,9 @@ fn test_default_typo() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("the quicest brownest fox jummps over the laziest dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3]"); + insta::assert_snapshot!(format!("{document_scores:?}"), @"[[]]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -229,8 +238,9 @@ fn test_phrase_no_typo_allowed() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("the \"quick brewn\" fox jumps over the lazy dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]"); + insta::assert_snapshot!(format!("{document_scores:?}"), @"[]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @"[]"); } @@ -258,8 +268,9 @@ fn test_typo_exact_word() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("the quick brown fox jumps over the lazy dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]"); + insta::assert_snapshot!(format!("{document_scores:?}"), @"[[]]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -271,15 +282,17 @@ fn test_typo_exact_word() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("the quack brown fox jumps over the lazy dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]"); + insta::assert_snapshot!(format!("{document_scores:?}"), @"[]"); // words not in exact_words (quicest, jummps) have normal typo handling let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("the quicest brownest fox jummps over the laziest dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3]"); + insta::assert_snapshot!(format!("{document_scores:?}"), @"[[]]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -290,9 +303,11 @@ fn test_typo_exact_word() { // exact words do not disable prefix (sunflowering OK, but no sunflowar) let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.query("network interconnection sunflower"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[16, 17, 18]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -327,9 +342,11 @@ fn test_typo_exact_attribute() { // Exact match returns both exact attributes and tolerant ones. let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.query("the quick brown fox jumps over the lazy dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 24, 25]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -350,9 +367,16 @@ fn test_typo_exact_attribute() { // 1 typo only returns the tolerant attribute let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.query("the quidk brown fox jumps over the lazy dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[24, 25]"); + insta::assert_snapshot!(format!("{document_scores:#?}"), @r###" + [ + [], + [], + ] + "###); let texts = collect_field_values(&index, &txn, "tolerant_text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -364,9 +388,16 @@ fn test_typo_exact_attribute() { // combine with exact words let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.query("the quivk brown fox jumps over the lazy dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[23, 25]"); + insta::assert_snapshot!(format!("{document_scores:#?}"), @r###" + [ + [], + [], + ] + "###); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -385,9 +416,11 @@ fn test_typo_exact_attribute() { // No result in tolerant attribute let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.query("the quicest brownest fox jummps over the laziest dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]"); + insta::assert_snapshot!(format!("{document_scores:?}"), @"[]"); } #[test] @@ -397,9 +430,11 @@ fn test_ngram_typos() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.query("the extra lagant fox skyrocketed over the languorous dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]"); + insta::assert_snapshot!(format!("{document_scores:?}"), @"[[]]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -409,9 +444,11 @@ fn test_ngram_typos() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.query("the ex tra lagant fox skyrocketed over the languorous dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]"); + insta::assert_snapshot!(format!("{document_scores:#?}"), @"[]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @"[]"); } @@ -428,9 +465,11 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::Last); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.query("the quick brown fox jumps over the lazy dog"); - let SearchResult { documents_ids: ids_1, .. } = s.execute().unwrap(); + let SearchResult { documents_ids: ids_1, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{ids_1:?}"), @"[0, 23, 7, 8, 9, 22, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &ids_1); insta::assert_debug_snapshot!(texts, @r###" [ @@ -462,9 +501,11 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::Last); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.query("the quick brown fox jumps over the lazy dog"); - let SearchResult { documents_ids: ids_2, .. } = s.execute().unwrap(); + let SearchResult { documents_ids: ids_2, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{ids_2:?}"), @"[0, 23, 7, 8, 9, 22, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); assert_eq!(ids_1, ids_2); } @@ -478,9 +519,11 @@ fn test_typo_bucketing() { // First do the search with just the Words ranking rule let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.query("network interconnection sunflower"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 15, 16, 17, 18, 20]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -504,9 +547,11 @@ fn test_typo_bucketing() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.query("network interconnection sunflower"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[16, 18, 17, 20, 15, 14]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -521,9 +566,11 @@ fn test_typo_bucketing() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.query("network interconnection sun flower"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[17, 19, 16, 18, 20, 15]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -555,9 +602,11 @@ fn test_typo_synonyms() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.query("the quick brown fox jumps over the lackadaisical dog"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 22, 23]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -569,12 +618,14 @@ fn test_typo_synonyms() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.query("the fast brownish fox jumps over the lackadaisical dog"); // The interaction of ngrams + synonyms means that the multi-word synonyms end up having a typo cost. // This is probably not what we want. - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 0, 22]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ diff --git a/milli/src/search/new/tests/typo_proximity.rs b/milli/src/search/new/tests/typo_proximity.rs index 103cc4717..8dd110704 100644 --- a/milli/src/search/new/tests/typo_proximity.rs +++ b/milli/src/search/new/tests/typo_proximity.rs @@ -90,8 +90,10 @@ fn test_trap_basic_and_complex1() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("beautiful summer"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 0, 3, 2]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -111,8 +113,10 @@ fn test_trap_complex2() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("delicious sweet dessert"); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[5, 4]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ diff --git a/milli/src/search/new/tests/words_tms.rs b/milli/src/search/new/tests/words_tms.rs index 826f9c47d..27213ecff 100644 --- a/milli/src/search/new/tests/words_tms.rs +++ b/milli/src/search/new/tests/words_tms.rs @@ -134,10 +134,12 @@ fn test_words_tms_last_simple() { let mut s = Search::new(&txn, &index); s.query("the quick brown fox jumps over the lazy dog"); s.terms_matching_strategy(TermsMatchingStrategy::Last); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); // 6 and 7 have the same score because "the" appears twice insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 8, 6, 7, 5, 4, 11, 12, 3]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -167,8 +169,10 @@ fn test_words_tms_last_simple() { let mut s = Search::new(&txn, &index); s.query("extravagant the quick brown fox jumps over the lazy dog"); s.terms_matching_strategy(TermsMatchingStrategy::Last); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]"); + insta::assert_snapshot!(format!("{document_scores:?}"), @"[]"); } #[test] @@ -179,10 +183,12 @@ fn test_words_tms_last_phrase() { let mut s = Search::new(&txn, &index); s.query("\"the quick brown fox\" jumps over the lazy dog"); s.terms_matching_strategy(TermsMatchingStrategy::Last); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); // "The quick brown fox" is a phrase, not deleted by this term matching strategy insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 17, 21, 8, 6, 7, 5, 4, 11, 12]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -202,11 +208,13 @@ fn test_words_tms_last_phrase() { let mut s = Search::new(&txn, &index); s.query("\"the quick brown fox\" jumps over the \"lazy\" dog"); s.terms_matching_strategy(TermsMatchingStrategy::Last); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); // "lazy" is a phrase, not deleted by this term matching strategy // but words before it can be deleted insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 17, 21, 8, 11, 12]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -222,10 +230,12 @@ fn test_words_tms_last_phrase() { let mut s = Search::new(&txn, &index); s.query("\"the quick brown fox jumps over the lazy dog\""); s.terms_matching_strategy(TermsMatchingStrategy::Last); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); // The whole query is a phrase, no terms are removed insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9]"); + insta::assert_snapshot!(format!("{document_scores:?}"), @"[[Words(Words { matching_words: 9, max_matching_words: 9 })]]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -236,10 +246,12 @@ fn test_words_tms_last_phrase() { let mut s = Search::new(&txn, &index); s.query("\"the quick brown fox jumps over the lazy dog"); s.terms_matching_strategy(TermsMatchingStrategy::Last); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); // The whole query is still a phrase, even without closing quotes, so no terms are removed insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9]"); + insta::assert_snapshot!(format!("{document_scores:?}"), @"[[Words(Words { matching_words: 9, max_matching_words: 9 })]]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -261,10 +273,12 @@ fn test_words_proximity_tms_last_simple() { let mut s = Search::new(&txn, &index); s.query("the quick brown fox jumps over the lazy dog"); s.terms_matching_strategy(TermsMatchingStrategy::Last); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); // 7 is better than 6 because of the proximity between "the" and its surrounding terms insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -294,10 +308,12 @@ fn test_words_proximity_tms_last_simple() { let mut s = Search::new(&txn, &index); s.query("the brown quick fox jumps over the lazy dog"); s.terms_matching_strategy(TermsMatchingStrategy::Last); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); // 10 is better than 9 because of the proximity between "quick" and "brown" insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 18, 19, 9, 20, 21, 14, 17, 13, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -338,11 +354,13 @@ fn test_words_proximity_tms_last_phrase() { let mut s = Search::new(&txn, &index); s.query("the \"quick brown\" fox jumps over the lazy dog"); s.terms_matching_strategy(TermsMatchingStrategy::Last); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); // "quick brown" is a phrase. The proximity of its first and last words // to their adjacent query words should be taken into account insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 16, 15, 8, 7, 6, 5, 4, 11, 12, 3]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -367,12 +385,14 @@ fn test_words_proximity_tms_last_phrase() { let mut s = Search::new(&txn, &index); s.query("the \"quick brown\" \"fox jumps\" over the lazy dog"); s.terms_matching_strategy(TermsMatchingStrategy::Last); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); // "quick brown" is a phrase. The proximity of its first and last words // to their adjacent query words should be taken into account. // The same applies to `fox jumps`. insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 16, 15, 8, 7, 6, 5]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -404,9 +424,11 @@ fn test_words_tms_all() { let mut s = Search::new(&txn, &index); s.query("the quick brown fox jumps over the lazy dog"); s.terms_matching_strategy(TermsMatchingStrategy::All); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22]"); + insta::assert_snapshot!(format!("{document_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @r###" [ @@ -428,9 +450,11 @@ fn test_words_tms_all() { let mut s = Search::new(&txn, &index); s.query("extravagant"); s.terms_matching_strategy(TermsMatchingStrategy::All); - let SearchResult { documents_ids, .. } = s.execute().unwrap(); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]"); + insta::assert_snapshot!(format!("{document_scores:?}"), @"[]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); insta::assert_debug_snapshot!(texts, @"[]"); } diff --git a/milli/tests/assets/test_set.ndjson b/milli/tests/assets/test_set.ndjson index 175d9b3ce..351aac417 100644 --- a/milli/tests/assets/test_set.ndjson +++ b/milli/tests/assets/test_set.ndjson @@ -68,7 +68,7 @@ "word_rank": 0, "typo_rank": 1, "proximity_rank": 16, - "attribute_rank": 208, + "attribute_rank": 209, "exact_rank": 5, "asc_desc_rank": 3, "sort_by_rank": 2, @@ -155,7 +155,7 @@ "word_rank": 1, "typo_rank": 0, "proximity_rank": 1, - "attribute_rank": 1, + "attribute_rank": 2, "exact_rank": 3, "asc_desc_rank": 4, "sort_by_rank": 1, @@ -199,7 +199,7 @@ "word_rank": 1, "typo_rank": 0, "proximity_rank": 1, - "attribute_rank": 2, + "attribute_rank": 1, "exact_rank": 3, "asc_desc_rank": 2, "sort_by_rank": 1, @@ -220,7 +220,7 @@ "word_rank": 0, "typo_rank": 2, "proximity_rank": 10, - "attribute_rank": 209, + "attribute_rank": 208, "exact_rank": 6, "asc_desc_rank": 1, "sort_by_rank": 2,