From 12a7a45930ef4ec59068885bd9eab6077e30dcdd Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 11 Jul 2024 16:27:50 +0200 Subject: [PATCH 01/14] Add roaring to meilisearch --- Cargo.lock | 1 + meilisearch/Cargo.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 087b7f87f..7cdf80b8d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3394,6 +3394,7 @@ dependencies = [ "rayon", "regex", "reqwest", + "roaring", "rustls 0.21.12", "rustls-pemfile 1.0.4", "segment", diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 9119651db..97a10d3d9 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -102,6 +102,7 @@ tracing-subscriber = { version = "0.3.18", features = ["json"] } tracing-trace = { version = "0.1.0", path = "../tracing-trace" } tracing-actix-web = "0.7.11" build-info = { version = "1.7.0", path = "../build-info" } +roaring = "0.10.2" [dev-dependencies] actix-rt = "2.10.0" From 7fb3e378ff385b12e1739f6685f8cd6112d82664 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 11 Jul 2024 16:28:14 +0200 Subject: [PATCH 02/14] Do not fail sort comparisons when the field name or target point are different --- milli/src/score_details.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/milli/src/score_details.rs b/milli/src/score_details.rs index 0a9b77e2b..1efa3b8e6 100644 --- a/milli/src/score_details.rs +++ b/milli/src/score_details.rs @@ -425,9 +425,6 @@ pub struct Sort { impl PartialOrd for Sort { fn partial_cmp(&self, other: &Self) -> Option { - if self.field_name != other.field_name { - return None; - } if self.ascending != other.ascending { return None; } @@ -466,9 +463,6 @@ pub struct GeoSort { impl PartialOrd for GeoSort { fn partial_cmp(&self, other: &Self) -> Option { - if self.target_point != other.target_point { - return None; - } if self.ascending != other.ascending { return None; } From e83da004464f240f20fac815bad56c1aabc86e92 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 11 Jul 2024 16:29:35 +0200 Subject: [PATCH 03/14] Milli changes to match to allow for more flexible lifetimes --- milli/src/search/new/matches/mod.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs index f387e232b..7bc4d9c5d 100644 --- a/milli/src/search/new/matches/mod.rs +++ b/milli/src/search/new/matches/mod.rs @@ -46,7 +46,7 @@ impl<'m> MatcherBuilder<'m> { self } - pub fn build<'t>(&'m self, text: &'t str) -> Matcher<'t, 'm> { + pub fn build<'t>(&self, text: &'t str) -> Matcher<'t, 'm, '_> { let crop_marker = match &self.crop_marker { Some(marker) => marker.as_str(), None => DEFAULT_CROP_MARKER, @@ -105,19 +105,19 @@ pub struct MatchBounds { pub length: usize, } -/// Structure used to analize a string, compute words that match, +/// Structure used to analyze a string, compute words that match, /// and format the source string, returning a highlighted and cropped sub-string. -pub struct Matcher<'t, 'm> { +pub struct Matcher<'t, 'tokenizer, 'b> { text: &'t str, - matching_words: &'m MatchingWords, - tokenizer: &'m Tokenizer<'m>, - crop_marker: &'m str, - highlight_prefix: &'m str, - highlight_suffix: &'m str, + matching_words: &'b MatchingWords, + tokenizer: &'b Tokenizer<'tokenizer>, + crop_marker: &'b str, + highlight_prefix: &'b str, + highlight_suffix: &'b str, matches: Option<(Vec>, Vec)>, } -impl<'t> Matcher<'t, '_> { +impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_> { /// Iterates over tokens and save any of them that matches the query. fn compute_matches(&mut self) -> &mut Self { /// some words are counted as matches only if they are close together and in the good order, From b9982587d4db43a1c175353a670ae8f4de73e47d Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 11 Jul 2024 16:31:44 +0200 Subject: [PATCH 04/14] Add new errors to meilisearch --- meilisearch-types/src/deserr/mod.rs | 1 + meilisearch-types/src/error.rs | 11 +++++++++++ meilisearch/src/error.rs | 10 ++++++++++ 3 files changed, 22 insertions(+) diff --git a/meilisearch-types/src/deserr/mod.rs b/meilisearch-types/src/deserr/mod.rs index 87ca342d7..3c5e0fcf8 100644 --- a/meilisearch-types/src/deserr/mod.rs +++ b/meilisearch-types/src/deserr/mod.rs @@ -192,6 +192,7 @@ merge_with_error_impl_take_error_message!(ParseOffsetDateTimeError); merge_with_error_impl_take_error_message!(ParseTaskKindError); merge_with_error_impl_take_error_message!(ParseTaskStatusError); merge_with_error_impl_take_error_message!(IndexUidFormatError); +merge_with_error_impl_take_error_message!(InvalidMultiSearchWeight); merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio); merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold); merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold); diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 092d852ac..a88ca6059 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -238,6 +238,11 @@ InvalidIndexLimit , InvalidRequest , BAD_REQUEST ; InvalidIndexOffset , InvalidRequest , BAD_REQUEST ; InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ; InvalidIndexUid , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchFederated , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchFederationOptions , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchQueryPagination , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ; @@ -512,6 +517,12 @@ impl fmt::Display for deserr_codes::InvalidSearchSemanticRatio { } } +impl fmt::Display for deserr_codes::InvalidMultiSearchWeight { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "the value of `weight` is invalid, expected a positive float (>= 0.0).") + } +} + impl fmt::Display for deserr_codes::InvalidSimilarId { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index 1d2475948..59b86e774 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -25,6 +25,10 @@ pub enum MeilisearchHttpError { DocumentNotFound(String), #[error("Sending an empty filter is forbidden.")] EmptyFilter, + #[error("Using `federationOptions` is not allowed in a non-federated search.\n Hint: remove `federationOptions` from query #{0} or add `federation: {{}}` to the request.")] + FederationOptionsInNonFederatedRequest(usize), + #[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n Hint: remove `{1}` from query #{0} or remove `federation: {{}}` from the request")] + PaginationInFederatedQuery(usize, &'static str), #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))] InvalidExpression(&'static [&'static str], Value), #[error("A {0} payload is missing.")] @@ -86,6 +90,12 @@ impl ErrorCode for MeilisearchHttpError { MeilisearchHttpError::DocumentFormat(e) => e.error_code(), MeilisearchHttpError::Join(_) => Code::Internal, MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid, + MeilisearchHttpError::FederationOptionsInNonFederatedRequest(_) => { + Code::InvalidMultiSearchFederationOptions + } + MeilisearchHttpError::PaginationInFederatedQuery(_, _) => { + Code::InvalidMultiSearchQueryPagination + } } } } From edab4e75b007d4e48b0102c0a012031d928ba5d8 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 11 Jul 2024 16:33:24 +0200 Subject: [PATCH 05/14] Make SearchKind cloneable --- meilisearch/src/search.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 2bc87d2ba..14ff6deff 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -257,11 +257,13 @@ pub struct HybridQuery { pub embedder: Option, } +#[derive(Clone)] pub enum SearchKind { KeywordOnly, SemanticOnly { embedder_name: String, embedder: Arc }, Hybrid { embedder_name: String, embedder: Arc, semantic_ratio: f32 }, } + impl SearchKind { pub(crate) fn semantic( index_scheduler: &index_scheduler::IndexScheduler, From 2123d76089b28a5ffc5f49568c81eff0ffe87fe9 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 11 Jul 2024 16:35:11 +0200 Subject: [PATCH 06/14] search: introduce "search_from_kind" --- meilisearch/src/search.rs | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 14ff6deff..54dc6b0b7 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -866,15 +866,7 @@ pub fn perform_search( used_negative_operator, }, semantic_hit_count, - ) = match &search_kind { - SearchKind::KeywordOnly => (search.execute()?, None), - SearchKind::SemanticOnly { .. } => { - let results = search.execute()?; - let semantic_hit_count = results.document_scores.len() as u32; - (results, Some(semantic_hit_count)) - } - SearchKind::Hybrid { semantic_ratio, .. } => search.execute_hybrid(*semantic_ratio)?, - }; + ) = search_from_kind(search_kind, search)?; let SearchQuery { q, @@ -990,6 +982,22 @@ pub fn perform_search( Ok(result) } +pub fn search_from_kind( + search_kind: SearchKind, + search: milli::Search<'_>, +) -> Result<(milli::SearchResult, Option), MeilisearchHttpError> { + let (milli_result, semantic_hit_count) = match &search_kind { + SearchKind::KeywordOnly => (search.execute()?, None), + SearchKind::SemanticOnly { .. } => { + let results = search.execute()?; + let semantic_hit_count = results.document_scores.len() as u32; + (results, Some(semantic_hit_count)) + } + SearchKind::Hybrid { semantic_ratio, .. } => search.execute_hybrid(*semantic_ratio)?, + }; + Ok((milli_result, semantic_hit_count)) +} + struct AttributesFormat { attributes_to_retrieve: Option>, retrieve_vectors: RetrieveVectors, From d3a6d2a6faa46fe1d83fa51ec935b7a424988483 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 11 Jul 2024 16:35:59 +0200 Subject: [PATCH 07/14] search: introduce hitmaker --- meilisearch/src/search.rs | 324 +++++++++++++++++++++++++------------- 1 file changed, 213 insertions(+), 111 deletions(-) diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 54dc6b0b7..2a684817a 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -1,6 +1,6 @@ use core::fmt; use std::cmp::min; -use std::collections::{BTreeMap, BTreeSet, HashSet}; +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::str::FromStr; use std::sync::Arc; use std::time::{Duration, Instant}; @@ -913,8 +913,13 @@ pub fn perform_search( show_ranking_score_details, }; - let documents = - make_hits(index, &rtxn, format, matching_words, documents_ids, document_scores)?; + let documents = make_hits( + index, + &rtxn, + format, + matching_words, + documents_ids.iter().copied().zip(document_scores.iter()), + )?; let number_of_hits = min(candidates.len() as usize, max_total_hits); let hits_info = if is_finite_pagination { @@ -1043,131 +1048,191 @@ impl RetrieveVectors { } } -fn make_hits( - index: &Index, - rtxn: &RoTxn<'_>, - format: AttributesFormat, - matching_words: milli::MatchingWords, - documents_ids: Vec, - document_scores: Vec>, -) -> Result, MeilisearchHttpError> { - let fields_ids_map = index.fields_ids_map(rtxn).unwrap(); - let displayed_ids = - index.displayed_fields_ids(rtxn)?.map(|fields| fields.into_iter().collect::>()); +struct HitMaker<'a> { + index: &'a Index, + rtxn: &'a RoTxn<'a>, + fields_ids_map: FieldsIdsMap, + displayed_ids: BTreeSet, + vectors_fid: Option, + retrieve_vectors: RetrieveVectors, + to_retrieve_ids: BTreeSet, + embedding_configs: Vec, + formatter_builder: MatcherBuilder<'a>, + formatted_options: BTreeMap, + show_ranking_score: bool, + show_ranking_score_details: bool, + sort: Option>, + show_matches_position: bool, +} - let vectors_fid = fields_ids_map.id(milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME); +impl<'a> HitMaker<'a> { + pub fn tokenizer<'b>( + script_lang_map: &'b HashMap>, + dictionary: Option<&'b [&'b str]>, + separators: Option<&'b [&'b str]>, + ) -> milli::tokenizer::Tokenizer<'b> { + let mut tokenizer_builder = TokenizerBuilder::default(); + tokenizer_builder.create_char_map(true); + if !script_lang_map.is_empty() { + tokenizer_builder.allow_list(script_lang_map); + } - let vectors_is_hidden = match (&displayed_ids, vectors_fid) { - // displayed_ids is a wildcard, so `_vectors` can be displayed regardless of its fid - (None, _) => false, - // displayed_ids is a finite list, and `_vectors` cannot be part of it because it is not an existing field - (Some(_), None) => true, - // displayed_ids is a finit list, so hide if `_vectors` is not part of it - (Some(map), Some(vectors_fid)) => map.contains(&vectors_fid), - }; + if let Some(separators) = separators { + tokenizer_builder.separators(separators); + } - let retrieve_vectors = if let RetrieveVectors::Retrieve = format.retrieve_vectors { - if vectors_is_hidden { - RetrieveVectors::Hide + if let Some(dictionary) = dictionary { + tokenizer_builder.words_dict(dictionary); + } + + tokenizer_builder.into_tokenizer() + } + + pub fn formatter_builder( + matching_words: milli::MatchingWords, + tokenizer: milli::tokenizer::Tokenizer<'_>, + ) -> MatcherBuilder<'_> { + let formatter_builder = MatcherBuilder::new(matching_words, tokenizer); + + formatter_builder + } + + pub fn new( + index: &'a Index, + rtxn: &'a RoTxn<'a>, + format: AttributesFormat, + mut formatter_builder: MatcherBuilder<'a>, + ) -> Result { + formatter_builder.crop_marker(format.crop_marker); + formatter_builder.highlight_prefix(format.highlight_pre_tag); + formatter_builder.highlight_suffix(format.highlight_post_tag); + + let fields_ids_map = index.fields_ids_map(rtxn)?; + let displayed_ids = index + .displayed_fields_ids(rtxn)? + .map(|fields| fields.into_iter().collect::>()); + + let vectors_fid = + fields_ids_map.id(milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME); + + let vectors_is_hidden = match (&displayed_ids, vectors_fid) { + // displayed_ids is a wildcard, so `_vectors` can be displayed regardless of its fid + (None, _) => false, + // displayed_ids is a finite list, and `_vectors` cannot be part of it because it is not an existing field + (Some(_), None) => true, + // displayed_ids is a finit list, so hide if `_vectors` is not part of it + (Some(map), Some(vectors_fid)) => map.contains(&vectors_fid), + }; + + let displayed_ids = + displayed_ids.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect()); + + let retrieve_vectors = if let RetrieveVectors::Retrieve = format.retrieve_vectors { + if vectors_is_hidden { + RetrieveVectors::Hide + } else { + RetrieveVectors::Retrieve + } } else { - RetrieveVectors::Retrieve - } - } else { - format.retrieve_vectors - }; + format.retrieve_vectors + }; - let displayed_ids = - displayed_ids.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect()); - let fids = |attrs: &BTreeSet| { - let mut ids = BTreeSet::new(); - for attr in attrs { - if attr == "*" { - ids.clone_from(&displayed_ids); - break; + let fids = |attrs: &BTreeSet| { + let mut ids = BTreeSet::new(); + for attr in attrs { + if attr == "*" { + ids.clone_from(&displayed_ids); + break; + } + + if let Some(id) = fields_ids_map.id(attr) { + ids.insert(id); + } } + ids + }; + let to_retrieve_ids: BTreeSet<_> = format + .attributes_to_retrieve + .as_ref() + .map(fids) + .unwrap_or_else(|| displayed_ids.clone()) + .intersection(&displayed_ids) + .cloned() + .collect(); - if let Some(id) = fields_ids_map.id(attr) { - ids.insert(id); - } - } - ids - }; - let to_retrieve_ids: BTreeSet<_> = format - .attributes_to_retrieve - .as_ref() - .map(fids) - .unwrap_or_else(|| displayed_ids.clone()) - .intersection(&displayed_ids) - .cloned() - .collect(); + let attr_to_highlight = format.attributes_to_highlight.unwrap_or_default(); + let attr_to_crop = format.attributes_to_crop.unwrap_or_default(); + let formatted_options = compute_formatted_options( + &attr_to_highlight, + &attr_to_crop, + format.crop_length, + &to_retrieve_ids, + &fields_ids_map, + &displayed_ids, + ); - let attr_to_highlight = format.attributes_to_highlight.unwrap_or_default(); - let attr_to_crop = format.attributes_to_crop.unwrap_or_default(); - let formatted_options = compute_formatted_options( - &attr_to_highlight, - &attr_to_crop, - format.crop_length, - &to_retrieve_ids, - &fields_ids_map, - &displayed_ids, - ); - let mut tokenizer_builder = TokenizerBuilder::default(); - tokenizer_builder.create_char_map(true); - let script_lang_map = index.script_language(rtxn)?; - if !script_lang_map.is_empty() { - tokenizer_builder.allow_list(&script_lang_map); + let embedding_configs = index.embedding_configs(rtxn)?; + + Ok(Self { + index, + rtxn, + fields_ids_map, + displayed_ids, + vectors_fid, + retrieve_vectors, + to_retrieve_ids, + embedding_configs, + formatter_builder, + formatted_options, + show_ranking_score: format.show_ranking_score, + show_ranking_score_details: format.show_ranking_score_details, + show_matches_position: format.show_matches_position, + sort: format.sort, + }) } - let separators = index.allowed_separators(rtxn)?; - let separators: Option> = - separators.as_ref().map(|x| x.iter().map(String::as_str).collect()); - if let Some(ref separators) = separators { - tokenizer_builder.separators(separators); - } - let dictionary = index.dictionary(rtxn)?; - let dictionary: Option> = - dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); - if let Some(ref dictionary) = dictionary { - tokenizer_builder.words_dict(dictionary); - } - let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_builder.build()); - formatter_builder.crop_marker(format.crop_marker); - formatter_builder.highlight_prefix(format.highlight_pre_tag); - formatter_builder.highlight_suffix(format.highlight_post_tag); - let mut documents = Vec::new(); - let embedding_configs = index.embedding_configs(rtxn)?; - let documents_iter = index.documents(rtxn, documents_ids)?; - for ((id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) { + + pub fn make_hit( + &self, + id: u32, + score: &[ScoreDetails], + ) -> Result { + let (_, obkv) = + self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?; + // First generate a document with all the displayed fields - let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?; + let displayed_document = make_document(&self.displayed_ids, &self.fields_ids_map, obkv)?; let add_vectors_fid = - vectors_fid.filter(|_fid| retrieve_vectors == RetrieveVectors::Retrieve); + self.vectors_fid.filter(|_fid| self.retrieve_vectors == RetrieveVectors::Retrieve); // select the attributes to retrieve - let attributes_to_retrieve = to_retrieve_ids + let attributes_to_retrieve = self + .to_retrieve_ids .iter() // skip the vectors_fid if RetrieveVectors::Hide - .filter(|fid| match vectors_fid { + .filter(|fid| match self.vectors_fid { Some(vectors_fid) => { - !(retrieve_vectors == RetrieveVectors::Hide && **fid == vectors_fid) + !(self.retrieve_vectors == RetrieveVectors::Hide && **fid == vectors_fid) } None => true, }) // need to retrieve the existing `_vectors` field if the `RetrieveVectors::Retrieve` .chain(add_vectors_fid.iter()) - .map(|&fid| fields_ids_map.name(fid).expect("Missing field name")); + .map(|&fid| self.fields_ids_map.name(fid).expect("Missing field name")); + let mut document = permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve); - if retrieve_vectors == RetrieveVectors::Retrieve { + if self.retrieve_vectors == RetrieveVectors::Retrieve { // Clippy is wrong #[allow(clippy::manual_unwrap_or_default)] let mut vectors = match document.remove("_vectors") { Some(Value::Object(map)) => map, _ => Default::default(), }; - for (name, vector) in index.embeddings(rtxn, id)? { - let user_provided = embedding_configs + for (name, vector) in self.index.embeddings(self.rtxn, id)? { + let user_provided = self + .embedding_configs .iter() .find(|conf| conf.name == name) .is_some_and(|conf| conf.user_provided.contains(id)); @@ -1180,21 +1245,21 @@ fn make_hits( let (matches_position, formatted) = format_fields( &displayed_document, - &fields_ids_map, - &formatter_builder, - &formatted_options, - format.show_matches_position, - &displayed_ids, + &self.fields_ids_map, + &self.formatter_builder, + &self.formatted_options, + self.show_matches_position, + &self.displayed_ids, )?; - if let Some(sort) = format.sort.as_ref() { + if let Some(sort) = self.sort.as_ref() { insert_geo_distance(sort, &mut document); } let ranking_score = - format.show_ranking_score.then(|| ScoreDetails::global_score(score.iter())); + self.show_ranking_score.then(|| ScoreDetails::global_score(score.iter())); let ranking_score_details = - format.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter())); + self.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter())); let hit = SearchHit { document, @@ -1203,7 +1268,38 @@ fn make_hits( ranking_score_details, ranking_score, }; - documents.push(hit); + + Ok(hit) + } +} + +fn make_hits<'a>( + index: &Index, + rtxn: &RoTxn<'_>, + format: AttributesFormat, + matching_words: milli::MatchingWords, + documents_ids_scores: impl Iterator)> + 'a, +) -> Result, MeilisearchHttpError> { + let mut documents = Vec::new(); + + let script_lang_map = index.script_language(rtxn)?; + + let dictionary = index.dictionary(rtxn)?; + let dictionary: Option> = + dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); + let separators = index.allowed_separators(rtxn)?; + let separators: Option> = + separators.as_ref().map(|x| x.iter().map(String::as_str).collect()); + + let tokenizer = + HitMaker::tokenizer(&script_lang_map, dictionary.as_deref(), separators.as_deref()); + + let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer); + + let hit_maker = HitMaker::new(index, rtxn, format, formatter_builder)?; + + for (id, score) in documents_ids_scores { + documents.push(hit_maker.make_hit(id, score)?); } Ok(documents) } @@ -1319,7 +1415,13 @@ pub fn perform_similar( show_ranking_score_details, }; - let hits = make_hits(index, &rtxn, format, Default::default(), documents_ids, document_scores)?; + let hits = make_hits( + index, + &rtxn, + format, + Default::default(), + documents_ids.iter().copied().zip(document_scores.iter()), + )?; let max_total_hits = index .pagination_max_total_hits(&rtxn) @@ -1492,10 +1594,10 @@ fn make_document( Ok(document) } -fn format_fields<'a>( +fn format_fields( document: &Document, field_ids_map: &FieldsIdsMap, - builder: &'a MatcherBuilder<'a>, + builder: &MatcherBuilder<'_>, formatted_options: &BTreeMap, compute_matches: bool, displayable_ids: &BTreeSet, @@ -1550,9 +1652,9 @@ fn format_fields<'a>( Ok((matches_position, document)) } -fn format_value<'a>( +fn format_value( value: Value, - builder: &'a MatcherBuilder<'a>, + builder: &MatcherBuilder<'_>, format_options: Option, infos: &mut Vec, compute_matches: bool, From 5c323cecc77ed23410543da0b9d62215bd7b5a23 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 11 Jul 2024 16:36:25 +0200 Subject: [PATCH 08/14] search: introduce federated search --- meilisearch/src/search.rs | 41 +- meilisearch/src/search/federated.rs | 633 ++++++++++++++ .../src/search/federated/ranking_rules.rs | 823 ++++++++++++++++++ 3 files changed, 1489 insertions(+), 8 deletions(-) create mode 100644 meilisearch/src/search/federated.rs create mode 100644 meilisearch/src/search/federated/ranking_rules.rs diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 2a684817a..ea4bbd038 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -31,6 +31,9 @@ use serde_json::{json, Value}; use crate::error::MeilisearchHttpError; +mod federated; +pub use federated::{perform_federated_search, FederatedSearch, Federation, FederationOptions}; + type MatchesPosition = BTreeMap>; pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0; @@ -360,7 +363,7 @@ impl SearchQuery { } } -/// A `SearchQuery` + an index UID. +/// A `SearchQuery` + an index UID and optional FederationOptions. // This struct contains the fields of `SearchQuery` inline. // This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields. // The `From` implementation ensures both structs remain up to date. @@ -375,10 +378,10 @@ pub struct SearchQueryWithIndex { pub vector: Option>, #[deserr(default, error = DeserrJsonError)] pub hybrid: Option, - #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError)] - pub offset: usize, - #[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError)] - pub limit: usize, + #[deserr(default, error = DeserrJsonError)] + pub offset: Option, + #[deserr(default, error = DeserrJsonError)] + pub limit: Option, #[deserr(default, error = DeserrJsonError)] pub page: Option, #[deserr(default, error = DeserrJsonError)] @@ -419,12 +422,33 @@ pub struct SearchQueryWithIndex { pub attributes_to_search_on: Option>, #[deserr(default, error = DeserrJsonError, default)] pub ranking_score_threshold: Option, + + #[deserr(default)] + pub federation_options: Option, } impl SearchQueryWithIndex { - pub fn into_index_query(self) -> (IndexUid, SearchQuery) { + pub fn has_federation_options(&self) -> bool { + self.federation_options.is_some() + } + pub fn has_pagination(&self) -> Option<&'static str> { + if self.offset.is_some() { + Some("offset") + } else if self.limit.is_some() { + Some("limit") + } else if self.page.is_some() { + Some("page") + } else if self.hits_per_page.is_some() { + Some("hitsPerPage") + } else { + None + } + } + + pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option) { let SearchQueryWithIndex { index_uid, + federation_options, q, vector, offset, @@ -456,8 +480,8 @@ impl SearchQueryWithIndex { SearchQuery { q, vector, - offset, - limit, + offset: offset.unwrap_or(DEFAULT_SEARCH_OFFSET()), + limit: limit.unwrap_or(DEFAULT_SEARCH_LIMIT()), page, hits_per_page, attributes_to_retrieve, @@ -482,6 +506,7 @@ impl SearchQueryWithIndex { // do not use ..Default::default() here, // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex` }, + federation_options, ) } } diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs new file mode 100644 index 000000000..ebb1da7b8 --- /dev/null +++ b/meilisearch/src/search/federated.rs @@ -0,0 +1,633 @@ +use std::cmp::Ordering; +use std::collections::BTreeMap; +use std::fmt; +use std::iter::Zip; +use std::rc::Rc; +use std::str::FromStr as _; +use std::time::Duration; +use std::vec::{IntoIter, Vec}; + +use actix_http::StatusCode; +use index_scheduler::{IndexScheduler, RoFeatures}; +use meilisearch_types::deserr::DeserrJsonError; +use meilisearch_types::error::deserr_codes::{ + InvalidMultiSearchWeight, InvalidSearchLimit, InvalidSearchOffset, +}; +use meilisearch_types::error::ResponseError; +use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue}; +use meilisearch_types::milli::{self, DocumentId, TimeBudget}; +use roaring::RoaringBitmap; +use serde::Serialize; + +use self::ranking_rules::RankingRules; +use super::{ + prepare_search, AttributesFormat, HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, + SearchQuery, SearchQueryWithIndex, +}; +use crate::error::MeilisearchHttpError; +use crate::routes::indexes::search::search_kind; + +mod ranking_rules; + +pub const DEFAULT_FEDERATED_WEIGHT: fn() -> f64 = || 1.0; + +#[derive(Debug, Default, Clone, Copy, PartialEq, deserr::Deserr)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +pub struct FederationOptions { + #[deserr(default, error = DeserrJsonError)] + pub weight: Weight, +} + +#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)] +#[deserr(try_from(f64) = TryFrom::try_from -> InvalidMultiSearchWeight)] +pub struct Weight(f64); + +impl Default for Weight { + fn default() -> Self { + Weight(DEFAULT_FEDERATED_WEIGHT()) + } +} + +impl std::convert::TryFrom for Weight { + type Error = InvalidMultiSearchWeight; + + fn try_from(f: f64) -> Result { + // the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable + #[allow(clippy::manual_range_contains)] + if f < 0.0 { + Err(InvalidMultiSearchWeight) + } else { + Ok(Weight(f)) + } + } +} + +impl std::ops::Deref for Weight { + type Target = f64; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +#[derive(Debug, deserr::Deserr)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +pub struct Federation { + #[deserr(default = super::DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError)] + pub limit: usize, + #[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError)] + pub offset: usize, +} + +#[derive(Debug, deserr::Deserr)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +pub struct FederatedSearch { + pub queries: Vec, + #[deserr(default)] + pub federation: Option, +} +#[derive(Serialize, Clone, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct FederatedSearchResult { + pub hits: Vec, + pub processing_time_ms: u128, + #[serde(flatten)] + pub hits_info: HitsInfo, + + #[serde(skip_serializing_if = "Option::is_none")] + pub semantic_hit_count: Option, + + // These fields are only used for analytics purposes + #[serde(skip)] + pub degraded: bool, + #[serde(skip)] + pub used_negative_operator: bool, +} + +impl fmt::Debug for FederatedSearchResult { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let FederatedSearchResult { + hits, + processing_time_ms, + hits_info, + semantic_hit_count, + degraded, + used_negative_operator, + } = self; + + let mut debug = f.debug_struct("SearchResult"); + // The most important thing when looking at a search result is the time it took to process + debug.field("processing_time_ms", &processing_time_ms); + debug.field("hits", &format!("[{} hits returned]", hits.len())); + debug.field("hits_info", &hits_info); + if *used_negative_operator { + debug.field("used_negative_operator", used_negative_operator); + } + if *degraded { + debug.field("degraded", degraded); + } + if let Some(semantic_hit_count) = semantic_hit_count { + debug.field("semantic_hit_count", &semantic_hit_count); + } + + debug.finish() + } +} + +struct WeightedScore<'a> { + details: &'a [ScoreDetails], + weight: f64, +} + +impl<'a> WeightedScore<'a> { + pub fn new(details: &'a [ScoreDetails], weight: f64) -> Self { + Self { details, weight } + } + + pub fn weighted_global_score(&self) -> f64 { + ScoreDetails::global_score(self.details.iter()) * self.weight + } + + pub fn compare_weighted_global_scores(&self, other: &Self) -> Ordering { + self.weighted_global_score() + .partial_cmp(&other.weighted_global_score()) + // both are numbers, possibly infinite + .unwrap() + } + + pub fn compare(&self, other: &Self) -> Ordering { + let mut left_it = ScoreDetails::score_values(self.details.iter()); + let mut right_it = ScoreDetails::score_values(other.details.iter()); + + loop { + let left = left_it.next(); + let right = right_it.next(); + + match (left, right) { + (None, None) => return Ordering::Equal, + (None, Some(_)) => return Ordering::Less, + (Some(_), None) => return Ordering::Greater, + (Some(ScoreValue::Score(left)), Some(ScoreValue::Score(right))) => { + let left = left * self.weight; + let right = right * other.weight; + if (left - right).abs() <= f64::EPSILON { + continue; + } + return left.partial_cmp(&right).unwrap(); + } + (Some(ScoreValue::Sort(left)), Some(ScoreValue::Sort(right))) => { + match left.partial_cmp(right) { + Some(Ordering::Equal) => continue, + Some(order) => return order, + None => return self.compare_weighted_global_scores(other), + } + } + (Some(ScoreValue::GeoSort(left)), Some(ScoreValue::GeoSort(right))) => { + match left.partial_cmp(right) { + Some(Ordering::Equal) => continue, + Some(order) => return order, + None => { + return self.compare_weighted_global_scores(other); + } + } + } + // not comparable details, use global + (Some(ScoreValue::Score(_)), Some(_)) + | (Some(_), Some(ScoreValue::Score(_))) + | (Some(ScoreValue::GeoSort(_)), Some(ScoreValue::Sort(_))) + | (Some(ScoreValue::Sort(_)), Some(ScoreValue::GeoSort(_))) => { + let left_count = left_it.count(); + let right_count = right_it.count(); + // compare how many remaining groups of rules each side has. + // the group with the most remaining groups wins. + return left_count + .cmp(&right_count) + // breaks ties with the global ranking score + .then_with(|| self.compare_weighted_global_scores(other)); + } + } + } + } +} + +struct QueryByIndex { + query: SearchQuery, + federation_options: FederationOptions, + query_index: usize, +} + +struct SearchResultByQuery<'a> { + documents_ids: Vec, + document_scores: Vec>, + federation_options: FederationOptions, + hit_maker: HitMaker<'a>, + query_index: usize, +} + +struct SearchResultByQueryIter<'a> { + it: Zip, IntoIter>>, + federation_options: FederationOptions, + hit_maker: Rc>, + query_index: usize, +} + +impl<'a> SearchResultByQueryIter<'a> { + fn new( + SearchResultByQuery { + documents_ids, + document_scores, + federation_options, + hit_maker, + query_index, + }: SearchResultByQuery<'a>, + ) -> Self { + let it = documents_ids.into_iter().zip(document_scores); + Self { it, federation_options, hit_maker: Rc::new(hit_maker), query_index } + } +} + +struct SearchResultByQueryIterItem<'a> { + docid: DocumentId, + score: Vec, + federation_options: FederationOptions, + hit_maker: Rc>, + query_index: usize, +} + +fn merge_index_local_results( + results_by_query: Vec>, +) -> impl Iterator + '_ { + itertools::kmerge_by( + results_by_query.into_iter().map(SearchResultByQueryIter::new), + |left: &SearchResultByQueryIterItem, right: &SearchResultByQueryIterItem| { + let left_score = WeightedScore::new(&left.score, *left.federation_options.weight); + let right_score = WeightedScore::new(&right.score, *right.federation_options.weight); + + match left_score.compare(&right_score) { + // the biggest score goes first + Ordering::Greater => true, + // break ties using query index + Ordering::Equal => left.query_index < right.query_index, + Ordering::Less => false, + } + }, + ) +} + +fn merge_index_global_results( + results_by_index: Vec, +) -> impl Iterator { + itertools::kmerge_by( + results_by_index.into_iter().map(|result_by_index| result_by_index.hits.into_iter()), + |left: &SearchHitByIndex, right: &SearchHitByIndex| { + let left_score = WeightedScore::new(&left.score, *left.federation_options.weight); + let right_score = WeightedScore::new(&right.score, *right.federation_options.weight); + + match left_score.compare(&right_score) { + // the biggest score goes first + Ordering::Greater => true, + // break ties using query index + Ordering::Equal => left.query_index < right.query_index, + Ordering::Less => false, + } + }, + ) +} + +impl<'a> Iterator for SearchResultByQueryIter<'a> { + type Item = SearchResultByQueryIterItem<'a>; + + fn next(&mut self) -> Option { + let (docid, score) = self.it.next()?; + Some(SearchResultByQueryIterItem { + docid, + score, + federation_options: self.federation_options, + hit_maker: Rc::clone(&self.hit_maker), + query_index: self.query_index, + }) + } +} + +struct SearchHitByIndex { + hit: SearchHit, + score: Vec, + federation_options: FederationOptions, + query_index: usize, +} + +struct SearchResultByIndex { + hits: Vec, + candidates: RoaringBitmap, + degraded: bool, + used_negative_operator: bool, +} + +pub fn perform_federated_search( + index_scheduler: &IndexScheduler, + queries: Vec, + federation: Federation, + features: RoFeatures, +) -> Result { + let before_search = std::time::Instant::now(); + + // this implementation partition the queries by index to guarantee an important property: + // - all the queries to a particular index use the same read transaction. + // This is an important property, otherwise we cannot guarantee the self-consistency of the results. + + // 1. partition queries by index + let mut queries_by_index: BTreeMap> = Default::default(); + for (query_index, federated_query) in queries.into_iter().enumerate() { + if let Some(pagination_field) = federated_query.has_pagination() { + return Err(MeilisearchHttpError::PaginationInFederatedQuery( + query_index, + pagination_field, + ) + .into()); + } + + let (index_uid, query, federation_options) = federated_query.into_index_query_federation(); + + queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex { + query, + federation_options: federation_options.unwrap_or_default(), + query_index, + }) + } + + // 2. perform queries, merge and make hits index by index + let required_hit_count = federation.limit + federation.offset; + // In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic + // Then in step (3), we'll update its value if there is any semantic search + let mut semantic_hit_count = None; + let mut results_by_index = Vec::with_capacity(queries_by_index.len()); + let mut previous_query_data: Option<(RankingRules, usize, String)> = None; + + for (index_uid, queries) in queries_by_index { + let index = match index_scheduler.index(&index_uid) { + Ok(index) => index, + Err(err) => { + let mut err = ResponseError::from(err); + // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but + // here the resource not found is not part of the URL. + err.code = StatusCode::BAD_REQUEST; + if let Some(query) = queries.first() { + err.message = + format!("Inside `.queries[{}]`: {}", query.query_index, err.message); + } + return Err(err); + } + }; + + // Important: this is the only transaction we'll use for this index during this federated search + let rtxn = index.read_txn()?; + + let criteria = index.criteria(&rtxn)?; + + // stuff we need for the hitmaker + let script_lang_map = index.script_language(&rtxn)?; + + let dictionary = index.dictionary(&rtxn)?; + let dictionary: Option> = + dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); + let separators = index.allowed_separators(&rtxn)?; + let separators: Option> = + separators.as_ref().map(|x| x.iter().map(String::as_str).collect()); + + // each query gets its individual cutoff + let cutoff = index.search_cutoff(&rtxn)?; + + let mut degraded = false; + let mut used_negative_operator = false; + let mut candidates = RoaringBitmap::new(); + + // 2.1. Compute all candidates for each query in the index + let mut results_by_query = Vec::with_capacity(queries.len()); + + for QueryByIndex { query, federation_options, query_index } in queries { + // use an immediately invoked lambda to capture the result without returning from the function + + let res: Result<(), ResponseError> = (|| { + let search_kind = search_kind(&query, index_scheduler, &index, features)?; + + let canonicalization_kind = match (&search_kind, &query.q) { + (SearchKind::SemanticOnly { .. }, _) => { + ranking_rules::CanonicalizationKind::Vector + } + (_, Some(q)) if !q.is_empty() => ranking_rules::CanonicalizationKind::Keyword, + _ => ranking_rules::CanonicalizationKind::Placeholder, + }; + + let sort = if let Some(sort) = &query.sort { + let sorts: Vec<_> = + match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() { + Ok(sorts) => sorts, + Err(asc_desc_error) => { + return Err(milli::Error::from(milli::SortError::from( + asc_desc_error, + )) + .into()) + } + }; + Some(sorts) + } else { + None + }; + + let ranking_rules = ranking_rules::RankingRules::new( + criteria.clone(), + sort, + query.matching_strategy.into(), + canonicalization_kind, + ); + + if let Some((previous_ranking_rules, previous_query_index, previous_index_uid)) = + previous_query_data.take() + { + if let Err(error) = ranking_rules.is_compatible_with(&previous_ranking_rules) { + return Err(error.to_response_error( + &ranking_rules, + &previous_ranking_rules, + query_index, + previous_query_index, + &index_uid, + &previous_index_uid, + )); + } + previous_query_data = if previous_ranking_rules.constraint_count() + > ranking_rules.constraint_count() + { + Some((previous_ranking_rules, previous_query_index, previous_index_uid)) + } else { + Some((ranking_rules, query_index, index_uid.clone())) + }; + } else { + previous_query_data = Some((ranking_rules, query_index, index_uid.clone())); + } + + match search_kind { + SearchKind::KeywordOnly => {} + _ => semantic_hit_count = Some(0), + } + + let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?; + + let time_budget = match cutoff { + Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)), + None => TimeBudget::default(), + }; + + let (mut search, _is_finite_pagination, _max_total_hits, _offset) = + prepare_search(&index, &rtxn, &query, &search_kind, time_budget)?; + + search.scoring_strategy(milli::score_details::ScoringStrategy::Detailed); + search.offset(0); + search.limit(required_hit_count); + + let (result, _semantic_hit_count) = super::search_from_kind(search_kind, search)?; + let format = AttributesFormat { + attributes_to_retrieve: query.attributes_to_retrieve, + retrieve_vectors, + attributes_to_highlight: query.attributes_to_highlight, + attributes_to_crop: query.attributes_to_crop, + crop_length: query.crop_length, + crop_marker: query.crop_marker, + highlight_pre_tag: query.highlight_pre_tag, + highlight_post_tag: query.highlight_post_tag, + show_matches_position: query.show_matches_position, + sort: query.sort, + show_ranking_score: query.show_ranking_score, + show_ranking_score_details: query.show_ranking_score_details, + }; + + let milli::SearchResult { + matching_words, + candidates: query_candidates, + documents_ids, + document_scores, + degraded: query_degraded, + used_negative_operator: query_used_negative_operator, + } = result; + + candidates |= query_candidates; + degraded |= query_degraded; + used_negative_operator |= query_used_negative_operator; + + let tokenizer = HitMaker::tokenizer( + &script_lang_map, + dictionary.as_deref(), + separators.as_deref(), + ); + + let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer); + + let hit_maker = HitMaker::new(&index, &rtxn, format, formatter_builder)?; + + results_by_query.push(SearchResultByQuery { + federation_options, + hit_maker, + query_index, + documents_ids, + document_scores, + }); + Ok(()) + })(); + + if let Err(mut error) = res { + error.message = format!("Inside `.queries[{query_index}]`: {}", error.message); + return Err(error); + } + } + // 2.2. merge inside index + let mut documents_seen = RoaringBitmap::new(); + let merged_result: Result, ResponseError> = + merge_index_local_results(results_by_query) + // skip documents we've already seen & mark that we saw the current document + .filter(|SearchResultByQueryIterItem { docid, .. }| documents_seen.insert(*docid)) + .take(required_hit_count) + // 2.3 make hits + .map( + |SearchResultByQueryIterItem { + docid, + score, + federation_options, + hit_maker, + query_index, + }| { + let mut hit = hit_maker.make_hit(docid, &score)?; + let weighted_score = + ScoreDetails::global_score(score.iter()) * (*federation_options.weight); + + let _federation = serde_json::json!( + { + "indexUid": index_uid, + "queriesPosition": query_index, + "weightedRankingScore": weighted_score, + } + ); + hit.document.insert("_federation".to_string(), _federation); + Ok(SearchHitByIndex { hit, score, federation_options, query_index }) + }, + ) + .collect(); + + let merged_result = merged_result?; + results_by_index.push(SearchResultByIndex { + hits: merged_result, + candidates, + degraded, + used_negative_operator, + }); + } + + // 3. merge hits and metadata across indexes + // 3.1 merge metadata + let (estimated_total_hits, degraded, used_negative_operator) = { + let mut estimated_total_hits = 0; + let mut degraded = false; + let mut used_negative_operator = false; + + for SearchResultByIndex { + hits: _, + candidates, + degraded: degraded_by_index, + used_negative_operator: used_negative_operator_by_index, + } in &results_by_index + { + estimated_total_hits += candidates.len() as usize; + degraded |= *degraded_by_index; + used_negative_operator |= *used_negative_operator_by_index; + } + + (estimated_total_hits, degraded, used_negative_operator) + }; + + // 3.2 merge hits + let merged_hits: Vec<_> = merge_index_global_results(results_by_index) + .skip(federation.offset) + .take(federation.limit) + .inspect(|hit| { + if let Some(semantic_hit_count) = &mut semantic_hit_count { + if hit.score.iter().any(|score| matches!(&score, ScoreDetails::Vector(_))) { + *semantic_hit_count += 1; + } + } + }) + .map(|hit| hit.hit) + .collect(); + + let search_result = FederatedSearchResult { + hits: merged_hits, + processing_time_ms: before_search.elapsed().as_millis(), + hits_info: HitsInfo::OffsetLimit { + limit: federation.limit, + offset: federation.offset, + estimated_total_hits, + }, + semantic_hit_count, + degraded, + used_negative_operator, + }; + + Ok(search_result) +} diff --git a/meilisearch/src/search/federated/ranking_rules.rs b/meilisearch/src/search/federated/ranking_rules.rs new file mode 100644 index 000000000..d31c0ed35 --- /dev/null +++ b/meilisearch/src/search/federated/ranking_rules.rs @@ -0,0 +1,823 @@ +use std::collections::HashMap; +use std::fmt::Write; + +use itertools::Itertools as _; +use meilisearch_types::error::{Code, ResponseError}; +use meilisearch_types::milli::{AscDesc, Criterion, Member, TermsMatchingStrategy}; + +pub struct RankingRules { + canonical_criteria: Vec, + canonical_sort: Option>, + canonicalization_actions: Vec, + source_criteria: Vec, + source_sort: Option>, +} + +pub enum CanonicalizationAction { + PrependedWords { + prepended_index: RankingRuleSource, + }, + RemovedDuplicate { + earlier_occurrence: RankingRuleSource, + removed_occurrence: RankingRuleSource, + }, + RemovedWords { + reason: RemoveWords, + removed_occurrence: RankingRuleSource, + }, + RemovedPlaceholder { + removed_occurrence: RankingRuleSource, + }, + TruncatedVector { + vector_rule: RankingRuleSource, + truncated_from: RankingRuleSource, + }, + RemovedVector { + vector_rule: RankingRuleSource, + removed_occurrence: RankingRuleSource, + }, + RemovedSort { + removed_occurrence: RankingRuleSource, + }, +} + +pub enum RemoveWords { + WasPrepended, + MatchingStrategyAll, +} + +impl std::fmt::Display for RemoveWords { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let reason = match self { + RemoveWords::WasPrepended => "it was previously prepended", + RemoveWords::MatchingStrategyAll => "`query.matchingWords` is set to `all`", + }; + f.write_str(reason) + } +} + +pub enum CanonicalizationKind { + Placeholder, + Keyword, + Vector, +} + +pub struct CompatibilityError { + previous: RankingRule, + current: RankingRule, +} +impl CompatibilityError { + pub(crate) fn to_response_error( + &self, + ranking_rules: &RankingRules, + previous_ranking_rules: &RankingRules, + query_index: usize, + previous_query_index: usize, + index_uid: &str, + previous_index_uid: &str, + ) -> meilisearch_types::error::ResponseError { + let rule = self.current.as_string( + &ranking_rules.canonical_criteria, + &ranking_rules.canonical_sort, + query_index, + index_uid, + ); + let previous_rule = self.previous.as_string( + &previous_ranking_rules.canonical_criteria, + &previous_ranking_rules.canonical_sort, + previous_query_index, + previous_index_uid, + ); + + let canonicalization_actions = ranking_rules.canonicalization_notes(); + let previous_canonicalization_actions = previous_ranking_rules.canonicalization_notes(); + + let mut msg = String::new(); + let reason = self.reason(); + let _ = writeln!( + &mut msg, + "The results of queries #{previous_query_index} and #{query_index} are incompatible: " + ); + let _ = writeln!(&mut msg, " 1. {previous_rule}"); + let _ = writeln!(&mut msg, " 2. {rule}"); + let _ = writeln!(&mut msg, " - {reason}"); + + if !previous_canonicalization_actions.is_empty() { + let _ = write!(&mut msg, " - note: The ranking rules of query #{previous_query_index} were modified during canonicalization:\n{previous_canonicalization_actions}"); + } + + if !canonicalization_actions.is_empty() { + let _ = write!(&mut msg, " - note: The ranking rules of query #{query_index} were modified during canonicalization:\n{canonicalization_actions}"); + } + + ResponseError::from_msg(msg, Code::InvalidMultiSearchQueryRankingRules) + } + pub fn reason(&self) -> &'static str { + match (self.previous.kind, self.current.kind) { + (RankingRuleKind::Relevancy, RankingRuleKind::AscendingSort) + | (RankingRuleKind::Relevancy, RankingRuleKind::DescendingSort) + | (RankingRuleKind::AscendingSort, RankingRuleKind::Relevancy) + | (RankingRuleKind::DescendingSort, RankingRuleKind::Relevancy) => { + "cannot compare a relevancy rule with a sort rule" + } + + (RankingRuleKind::Relevancy, RankingRuleKind::AscendingGeoSort) + | (RankingRuleKind::Relevancy, RankingRuleKind::DescendingGeoSort) + | (RankingRuleKind::AscendingGeoSort, RankingRuleKind::Relevancy) + | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::Relevancy) => { + "cannot compare a relevancy rule with a sort rule" + } + + (RankingRuleKind::AscendingSort, RankingRuleKind::DescendingSort) + | (RankingRuleKind::DescendingSort, RankingRuleKind::AscendingSort) => { + "cannot compare two sort rules in opposite directions" + } + + (RankingRuleKind::AscendingSort, RankingRuleKind::AscendingGeoSort) + | (RankingRuleKind::AscendingSort, RankingRuleKind::DescendingGeoSort) + | (RankingRuleKind::DescendingSort, RankingRuleKind::AscendingGeoSort) + | (RankingRuleKind::DescendingSort, RankingRuleKind::DescendingGeoSort) + | (RankingRuleKind::AscendingGeoSort, RankingRuleKind::AscendingSort) + | (RankingRuleKind::AscendingGeoSort, RankingRuleKind::DescendingSort) + | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::AscendingSort) + | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::DescendingSort) => { + "cannot compare a sort rule with a geosort rule" + } + + (RankingRuleKind::AscendingGeoSort, RankingRuleKind::DescendingGeoSort) + | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::AscendingGeoSort) => { + "cannot compare two geosort rules in opposite directions" + } + (RankingRuleKind::Relevancy, RankingRuleKind::Relevancy) + | (RankingRuleKind::AscendingSort, RankingRuleKind::AscendingSort) + | (RankingRuleKind::DescendingSort, RankingRuleKind::DescendingSort) + | (RankingRuleKind::AscendingGeoSort, RankingRuleKind::AscendingGeoSort) + | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::DescendingGeoSort) => { + "internal error, comparison should be possible" + } + } + } +} + +impl RankingRules { + pub fn new( + criteria: Vec, + sort: Option>, + terms_matching_strategy: TermsMatchingStrategy, + canonicalization_kind: CanonicalizationKind, + ) -> Self { + let (canonical_criteria, canonical_sort, canonicalization_actions) = + Self::canonicalize(&criteria, &sort, terms_matching_strategy, canonicalization_kind); + Self { + canonical_criteria, + canonical_sort, + canonicalization_actions, + source_criteria: criteria, + source_sort: sort, + } + } + + fn canonicalize( + criteria: &[Criterion], + sort: &Option>, + terms_matching_strategy: TermsMatchingStrategy, + canonicalization_kind: CanonicalizationKind, + ) -> (Vec, Option>, Vec) { + match canonicalization_kind { + CanonicalizationKind::Placeholder => Self::canonicalize_placeholder(criteria, sort), + CanonicalizationKind::Keyword => { + Self::canonicalize_keyword(criteria, sort, terms_matching_strategy) + } + CanonicalizationKind::Vector => Self::canonicalize_vector(criteria, sort), + } + } + + fn canonicalize_placeholder( + criteria: &[Criterion], + sort_query: &Option>, + ) -> (Vec, Option>, Vec) { + let mut sort = None; + + let mut sorted_fields = HashMap::new(); + let mut canonicalization_actions = Vec::new(); + let mut canonical_criteria = Vec::new(); + let mut canonical_sort = None; + + for (criterion_index, criterion) in criteria.iter().enumerate() { + match criterion.clone() { + Criterion::Words + | Criterion::Typo + | Criterion::Proximity + | Criterion::Attribute + | Criterion::Exactness => { + canonicalization_actions.push(CanonicalizationAction::RemovedPlaceholder { + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }) + } + + Criterion::Sort => { + if let Some(previous_index) = sort { + canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate { + earlier_occurrence: RankingRuleSource::Criterion(previous_index), + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }); + } else if let Some(sort_query) = sort_query { + sort = Some(criterion_index); + canonical_criteria.push(criterion.clone()); + canonical_sort = Some(canonicalize_sort( + &mut sorted_fields, + sort_query.as_slice(), + criterion_index, + &mut canonicalization_actions, + )); + } else { + canonicalization_actions.push(CanonicalizationAction::RemovedSort { + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }) + } + } + Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) { + std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions + .push(CanonicalizationAction::RemovedDuplicate { + earlier_occurrence: *entry.get(), + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }), + std::collections::hash_map::Entry::Vacant(entry) => { + entry.insert(RankingRuleSource::Criterion(criterion_index)); + canonical_criteria.push(criterion.clone()) + } + }, + } + } + + (canonical_criteria, canonical_sort, canonicalization_actions) + } + + fn canonicalize_vector( + criteria: &[Criterion], + sort_query: &Option>, + ) -> (Vec, Option>, Vec) { + let mut sort = None; + + let mut sorted_fields = HashMap::new(); + let mut canonicalization_actions = Vec::new(); + let mut canonical_criteria = Vec::new(); + let mut canonical_sort = None; + + let mut vector = None; + + 'criteria: for (criterion_index, criterion) in criteria.iter().enumerate() { + match criterion.clone() { + Criterion::Words + | Criterion::Typo + | Criterion::Proximity + | Criterion::Attribute + | Criterion::Exactness => match vector { + Some(previous_occurrence) => { + if sorted_fields.is_empty() { + canonicalization_actions.push(CanonicalizationAction::RemovedVector { + vector_rule: RankingRuleSource::Criterion(previous_occurrence), + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }); + } else { + canonicalization_actions.push( + CanonicalizationAction::TruncatedVector { + vector_rule: RankingRuleSource::Criterion(previous_occurrence), + truncated_from: RankingRuleSource::Criterion(criterion_index), + }, + ); + break 'criteria; + } + } + None => { + canonical_criteria.push(criterion.clone()); + vector = Some(criterion_index); + } + }, + + Criterion::Sort => { + if let Some(previous_index) = sort { + canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate { + earlier_occurrence: RankingRuleSource::Criterion(previous_index), + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }); + } else if let Some(sort_query) = sort_query { + sort = Some(criterion_index); + canonical_criteria.push(criterion.clone()); + canonical_sort = Some(canonicalize_sort( + &mut sorted_fields, + sort_query.as_slice(), + criterion_index, + &mut canonicalization_actions, + )); + } else { + canonicalization_actions.push(CanonicalizationAction::RemovedSort { + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }) + } + } + Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) { + std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions + .push(CanonicalizationAction::RemovedDuplicate { + earlier_occurrence: *entry.get(), + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }), + std::collections::hash_map::Entry::Vacant(entry) => { + entry.insert(RankingRuleSource::Criterion(criterion_index)); + canonical_criteria.push(criterion.clone()) + } + }, + } + } + + (canonical_criteria, canonical_sort, canonicalization_actions) + } + + fn canonicalize_keyword( + criteria: &[Criterion], + sort_query: &Option>, + terms_matching_strategy: TermsMatchingStrategy, + ) -> (Vec, Option>, Vec) { + let mut words = None; + let mut typo = None; + let mut proximity = None; + let mut sort = None; + let mut attribute = None; + let mut exactness = None; + let mut sorted_fields = HashMap::new(); + + let mut canonical_criteria = Vec::new(); + let mut canonical_sort = None; + + let mut canonicalization_actions = Vec::new(); + + for (criterion_index, criterion) in criteria.iter().enumerate() { + let criterion = criterion.clone(); + match criterion.clone() { + Criterion::Words => { + if let TermsMatchingStrategy::All = terms_matching_strategy { + canonicalization_actions.push(CanonicalizationAction::RemovedWords { + reason: RemoveWords::MatchingStrategyAll, + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }); + continue; + } + if let Some(maybe_previous_index) = words { + if let Some(previous_index) = maybe_previous_index { + canonicalization_actions.push( + CanonicalizationAction::RemovedDuplicate { + earlier_occurrence: RankingRuleSource::Criterion( + previous_index, + ), + removed_occurrence: RankingRuleSource::Criterion( + criterion_index, + ), + }, + ); + continue; + } + canonicalization_actions.push(CanonicalizationAction::RemovedWords { + reason: RemoveWords::WasPrepended, + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }) + } + words = Some(Some(criterion_index)); + canonical_criteria.push(criterion); + } + Criterion::Typo => { + canonicalize_criterion( + criterion, + criterion_index, + terms_matching_strategy, + &mut words, + &mut canonicalization_actions, + &mut canonical_criteria, + &mut typo, + ); + } + Criterion::Proximity => { + canonicalize_criterion( + criterion, + criterion_index, + terms_matching_strategy, + &mut words, + &mut canonicalization_actions, + &mut canonical_criteria, + &mut proximity, + ); + } + Criterion::Attribute => { + canonicalize_criterion( + criterion, + criterion_index, + terms_matching_strategy, + &mut words, + &mut canonicalization_actions, + &mut canonical_criteria, + &mut attribute, + ); + } + Criterion::Exactness => { + canonicalize_criterion( + criterion, + criterion_index, + terms_matching_strategy, + &mut words, + &mut canonicalization_actions, + &mut canonical_criteria, + &mut exactness, + ); + } + + Criterion::Sort => { + if let Some(previous_index) = sort { + canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate { + earlier_occurrence: RankingRuleSource::Criterion(previous_index), + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }); + } else if let Some(sort_query) = sort_query { + sort = Some(criterion_index); + canonical_criteria.push(criterion); + canonical_sort = Some(canonicalize_sort( + &mut sorted_fields, + sort_query.as_slice(), + criterion_index, + &mut canonicalization_actions, + )); + } else { + canonicalization_actions.push(CanonicalizationAction::RemovedSort { + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }) + } + } + Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) { + std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions + .push(CanonicalizationAction::RemovedDuplicate { + earlier_occurrence: *entry.get(), + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }), + std::collections::hash_map::Entry::Vacant(entry) => { + entry.insert(RankingRuleSource::Criterion(criterion_index)); + canonical_criteria.push(criterion) + } + }, + } + } + + (canonical_criteria, canonical_sort, canonicalization_actions) + } + + pub fn is_compatible_with(&self, previous: &Self) -> Result<(), CompatibilityError> { + for (current, previous) in self.coalesce_iterator().zip(previous.coalesce_iterator()) { + if current.kind != previous.kind { + return Err(CompatibilityError { current, previous }); + } + } + Ok(()) + } + + pub fn constraint_count(&self) -> usize { + self.coalesce_iterator().count() + } + + fn coalesce_iterator(&self) -> impl Iterator + '_ { + self.canonical_criteria + .iter() + .enumerate() + .flat_map(|(criterion_index, criterion)| { + RankingRule::from_criterion(criterion_index, criterion, &self.canonical_sort) + }) + .coalesce( + |previous @ RankingRule { source: previous_source, kind: previous_kind }, + current @ RankingRule { source, kind }| { + match (previous_kind, kind) { + (RankingRuleKind::Relevancy, RankingRuleKind::Relevancy) => { + let merged_source = match (previous_source, source) { + ( + RankingRuleSource::Criterion(previous), + RankingRuleSource::Criterion(current), + ) => RankingRuleSource::CoalescedCriteria(previous, current), + ( + RankingRuleSource::CoalescedCriteria(begin, _end), + RankingRuleSource::Criterion(current), + ) => RankingRuleSource::CoalescedCriteria(begin, current), + (_previous, current) => current, + }; + Ok(RankingRule { source: merged_source, kind }) + } + _ => Err((previous, current)), + } + }, + ) + } + + fn canonicalization_notes(&self) -> String { + use CanonicalizationAction::*; + let mut notes = String::new(); + for (index, action) in self.canonicalization_actions.iter().enumerate() { + let index = index + 1; + let _ = match action { + PrependedWords { prepended_index } => writeln!( + &mut notes, + " {index}. Prepended rule `words` before first relevancy rule `{}` at position {}", + prepended_index.rule_name(&self.source_criteria, &self.source_sort), + prepended_index.rule_position() + ), + RemovedDuplicate { earlier_occurrence, removed_occurrence } => writeln!( + &mut notes, + " {index}. Removed duplicate rule `{}` at position {} as it already appears at position {}", + earlier_occurrence.rule_name(&self.source_criteria, &self.source_sort), + removed_occurrence.rule_position(), + earlier_occurrence.rule_position(), + ), + RemovedWords { reason, removed_occurrence } => writeln!( + &mut notes, + " {index}. Removed rule `words` at position {} because {reason}", + removed_occurrence.rule_position() + ), + RemovedPlaceholder { removed_occurrence } => writeln!( + &mut notes, + " {index}. Removed relevancy rule `{}` at position {} because the query is a placeholder search (`q`: \"\")", + removed_occurrence.rule_name(&self.source_criteria, &self.source_sort), + removed_occurrence.rule_position() + ), + TruncatedVector { vector_rule, truncated_from } => writeln!( + &mut notes, + " {index}. Truncated relevancy rule `{}` at position {} and later rules because the query is a vector search and `vector` was inserted at position {}", + truncated_from.rule_name(&self.source_criteria, &self.source_sort), + truncated_from.rule_position(), + vector_rule.rule_position(), + ), + RemovedVector { vector_rule, removed_occurrence } => writeln!( + &mut notes, + " {index}. Removed relevancy rule `{}` at position {} because the query is a vector search and `vector` was already inserted at position {}", + removed_occurrence.rule_name(&self.source_criteria, &self.source_sort), + removed_occurrence.rule_position(), + vector_rule.rule_position(), + ), + RemovedSort { removed_occurrence } => writeln!( + &mut notes, + " {index}. Removed rule `sort` at position {} because `query.sort` is empty", +removed_occurrence.rule_position() + ), + }; + } + notes + } +} + +fn canonicalize_sort( + sorted_fields: &mut HashMap, + sort_query: &[AscDesc], + criterion_index: usize, + canonicalization_actions: &mut Vec, +) -> Vec { + let mut geo_sorted = None; + let mut canonical_sort = Vec::new(); + for (sort_index, asc_desc) in sort_query.iter().enumerate() { + let source = RankingRuleSource::Sort { criterion_index, sort_index }; + let asc_desc = asc_desc.clone(); + match asc_desc.clone() { + AscDesc::Asc(Member::Field(s)) | AscDesc::Desc(Member::Field(s)) => { + match sorted_fields.entry(s) { + std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions + .push(CanonicalizationAction::RemovedDuplicate { + earlier_occurrence: *entry.get(), + removed_occurrence: source, + }), + std::collections::hash_map::Entry::Vacant(entry) => { + entry.insert(source); + canonical_sort.push(asc_desc); + } + } + } + AscDesc::Asc(Member::Geo(_)) | AscDesc::Desc(Member::Geo(_)) => match geo_sorted { + Some(earlier_sort_index) => { + canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate { + earlier_occurrence: RankingRuleSource::Sort { + criterion_index, + sort_index: earlier_sort_index, + }, + removed_occurrence: source, + }) + } + None => { + geo_sorted = Some(sort_index); + canonical_sort.push(asc_desc); + } + }, + } + } + canonical_sort +} + +fn canonicalize_criterion( + criterion: Criterion, + criterion_index: usize, + terms_matching_strategy: TermsMatchingStrategy, + words: &mut Option>, + canonicalization_actions: &mut Vec, + canonical_criteria: &mut Vec, + rule: &mut Option, +) { + *words = match (terms_matching_strategy, words.take()) { + (TermsMatchingStrategy::All, words) => words, + (_, None) => { + // inject words + canonicalization_actions.push(CanonicalizationAction::PrependedWords { + prepended_index: RankingRuleSource::Criterion(criterion_index), + }); + canonical_criteria.push(Criterion::Words); + Some(None) + } + (_, words) => words, + }; + if let Some(previous_index) = *rule { + canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate { + earlier_occurrence: RankingRuleSource::Criterion(previous_index), + removed_occurrence: RankingRuleSource::Criterion(criterion_index), + }); + } else { + *rule = Some(criterion_index); + canonical_criteria.push(criterion) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum RankingRuleKind { + Relevancy, + AscendingSort, + DescendingSort, + AscendingGeoSort, + DescendingGeoSort, +} + +#[derive(Debug, Clone, Copy)] +pub struct RankingRule { + source: RankingRuleSource, + kind: RankingRuleKind, +} + +#[derive(Debug, Clone, Copy)] +pub enum RankingRuleSource { + Criterion(usize), + CoalescedCriteria(usize, usize), + Sort { criterion_index: usize, sort_index: usize }, +} + +impl RankingRuleSource { + fn rule_name(&self, criteria: &[Criterion], sort: &Option>) -> String { + match self { + RankingRuleSource::Criterion(criterion_index) => criteria + .get(*criterion_index) + .map(|c| c.to_string()) + .unwrap_or_else(|| "unknown".into()), + RankingRuleSource::CoalescedCriteria(begin, end) => { + let rules: Vec<_> = criteria + .get(*begin..=*end) + .iter() + .flat_map(|c| c.iter()) + .map(|c| c.to_string()) + .collect(); + rules.join(", ") + } + RankingRuleSource::Sort { criterion_index: _, sort_index } => { + match sort.as_deref().and_then(|sort| sort.get(*sort_index)) { + Some(sort) => match sort { + AscDesc::Asc(Member::Field(field_name)) => format!("{field_name}:asc"), + AscDesc::Desc(Member::Field(field_name)) => { + format!("{field_name}:desc") + } + AscDesc::Asc(Member::Geo(_)) => "_geo(..):asc".to_string(), + AscDesc::Desc(Member::Geo(_)) => "_geo(..):desc".to_string(), + }, + None => "unknown".into(), + } + } + } + } + + fn rule_position(&self) -> String { + match self { + RankingRuleSource::Criterion(criterion_index) => { + format!("#{criterion_index} in ranking rules") + } + RankingRuleSource::CoalescedCriteria(begin, end) => { + format!("#{begin} to #{end} in ranking rules") + } + RankingRuleSource::Sort { criterion_index, sort_index } => format!( + "#{sort_index} in `query.sort` (as `sort` is #{criterion_index} in ranking rules)" + ), + } + } +} + +impl RankingRule { + fn from_criterion<'a>( + criterion_index: usize, + criterion: &'a Criterion, + sort: &'a Option>, + ) -> impl Iterator + 'a { + let kind = match criterion { + Criterion::Words + | Criterion::Typo + | Criterion::Proximity + | Criterion::Attribute + | Criterion::Exactness => RankingRuleKind::Relevancy, + Criterion::Asc(s) if s == "_geo" => RankingRuleKind::AscendingGeoSort, + + Criterion::Asc(_) => RankingRuleKind::AscendingSort, + Criterion::Desc(s) if s == "_geo" => RankingRuleKind::DescendingGeoSort, + + Criterion::Desc(_) => RankingRuleKind::DescendingSort, + Criterion::Sort => { + return either::Right(sort.iter().flatten().enumerate().map( + move |(rule_index, asc_desc)| { + Self::from_asc_desc(asc_desc, criterion_index, rule_index) + }, + )) + } + }; + + either::Left(std::iter::once(Self { + source: RankingRuleSource::Criterion(criterion_index), + kind, + })) + } + + fn from_asc_desc(asc_desc: &AscDesc, sort_index: usize, rule_index_in_sort: usize) -> Self { + let kind = match asc_desc { + AscDesc::Asc(Member::Field(_)) => RankingRuleKind::AscendingSort, + AscDesc::Desc(Member::Field(_)) => RankingRuleKind::DescendingSort, + AscDesc::Asc(Member::Geo(_)) => RankingRuleKind::AscendingGeoSort, + AscDesc::Desc(Member::Geo(_)) => RankingRuleKind::DescendingGeoSort, + }; + Self { + source: RankingRuleSource::Sort { + criterion_index: sort_index, + sort_index: rule_index_in_sort, + }, + kind, + } + } + + fn as_string( + &self, + canonical_criteria: &[Criterion], + canonical_sort: &Option>, + query_index: usize, + index_uid: &str, + ) -> String { + let kind = match self.kind { + RankingRuleKind::Relevancy => "relevancy", + RankingRuleKind::AscendingSort => "ascending sort", + RankingRuleKind::DescendingSort => "descending sort", + RankingRuleKind::AscendingGeoSort => "ascending geo sort", + RankingRuleKind::DescendingGeoSort => "descending geo sort", + }; + let rules = self.fetch_from_source(canonical_criteria, canonical_sort); + + let source = match self.source { + RankingRuleSource::Criterion(criterion_index) => format!("`queries[{query_index}]`, `{index_uid}.rankingRules[{criterion_index}]`"), + RankingRuleSource::CoalescedCriteria(begin, end) => format!("`queries[{query_index}]`, `{index_uid}.rankingRules[{begin}..={end}]`"), + RankingRuleSource::Sort { criterion_index, sort_index } => format!("`queries[{query_index}].sort[{sort_index}]`, `{index_uid}.rankingRules[{criterion_index}]`"), + }; + + format!("{source}: {kind} {rules}") + } + + fn fetch_from_source( + &self, + canonical_criteria: &[Criterion], + canonical_sort: &Option>, + ) -> String { + let rule_name = match self.source { + RankingRuleSource::Criterion(index) => { + canonical_criteria.get(index).map(|criterion| criterion.to_string()) + } + RankingRuleSource::CoalescedCriteria(begin, end) => { + let rules: Vec = canonical_criteria + .get(begin..=end) + .into_iter() + .flat_map(|criteria| criteria.iter()) + .map(|criterion| criterion.to_string()) + .collect(); + + (!rules.is_empty()).then_some(rules.join(", ")) + } + RankingRuleSource::Sort { criterion_index: _, sort_index } => canonical_sort + .as_deref() + .and_then(|canonical_sort| canonical_sort.get(sort_index)) + .and_then(|asc_desc: &AscDesc| match asc_desc { + AscDesc::Asc(Member::Field(s)) | AscDesc::Desc(Member::Field(s)) => { + Some(format!("on field `{s}`")) + } + _ => None, + }), + }; + + let rule_name = rule_name.unwrap_or_else(|| "default".into()); + + format!("rule(s) {rule_name}") + } +} From 83d71662aa976e152b4b1312ca18b5271441a536 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 11 Jul 2024 16:37:01 +0200 Subject: [PATCH 09/14] Changes to multi_search route --- meilisearch/src/routes/multi_search.rs | 175 ++++++++++++++++--------- 1 file changed, 111 insertions(+), 64 deletions(-) diff --git a/meilisearch/src/routes/multi_search.rs b/meilisearch/src/routes/multi_search.rs index 1d697dac6..ae626888d 100644 --- a/meilisearch/src/routes/multi_search.rs +++ b/meilisearch/src/routes/multi_search.rs @@ -10,12 +10,14 @@ use serde::Serialize; use tracing::debug; use crate::analytics::{Analytics, MultiSearchAggregator}; +use crate::error::MeilisearchHttpError; use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; use crate::routes::indexes::search::search_kind; use crate::search::{ - add_search_rules, perform_search, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex, + add_search_rules, perform_federated_search, perform_search, FederatedSearch, RetrieveVectors, + SearchQueryWithIndex, SearchResultWithIndex, }; use crate::search_queue::SearchQueue; @@ -28,85 +30,44 @@ struct SearchResults { results: Vec, } -#[derive(Debug, deserr::Deserr)] -#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] -pub struct SearchQueries { - queries: Vec, -} - pub async fn multi_search_with_post( index_scheduler: GuardedData, Data>, search_queue: Data, - params: AwebJson, + params: AwebJson, req: HttpRequest, analytics: web::Data, ) -> Result { - let queries = params.into_inner().queries; - - let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req); - let features = index_scheduler.features(); - // Since we don't want to process half of the search requests and then get a permit refused // we're going to get one permit for the whole duration of the multi-search request. let _permit = search_queue.try_get_search_permit().await?; - // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only, - // so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code - // changes. - let search_results: Result<_, (ResponseError, usize)> = async { - let mut search_results = Vec::with_capacity(queries.len()); - for (query_index, (index_uid, mut query)) in - queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate() - { - debug!(on_index = query_index, parameters = ?query, "Multi-search"); + let federated_search = params.into_inner(); + let mut multi_aggregate = MultiSearchAggregator::from_federated_search(&federated_search, &req); + + let FederatedSearch { mut queries, federation } = federated_search; + + let features = index_scheduler.features(); + + // regardless of federation, check authorization and apply search rules + let auth = 'check_authorization: { + for (query_index, federated_query) in queries.iter_mut().enumerate() { + let index_uid = federated_query.index_uid.as_str(); // Check index from API key - if !index_scheduler.filters().is_index_authorized(&index_uid) { - return Err(AuthenticationError::InvalidToken).with_index(query_index); + if !index_scheduler.filters().is_index_authorized(index_uid) { + break 'check_authorization Err(AuthenticationError::InvalidToken) + .with_index(query_index); } // Apply search rules from tenant token - if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) + if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(index_uid) { - add_search_rules(&mut query.filter, search_rules); + add_search_rules(&mut federated_query.filter, search_rules); } - - let index = index_scheduler - .index(&index_uid) - .map_err(|err| { - let mut err = ResponseError::from(err); - // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but - // here the resource not found is not part of the URL. - err.code = StatusCode::BAD_REQUEST; - err - }) - .with_index(query_index)?; - - let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features) - .with_index(query_index)?; - let retrieve_vector = - RetrieveVectors::new(query.retrieve_vectors, features).with_index(query_index)?; - - let search_result = tokio::task::spawn_blocking(move || { - perform_search(&index, query, search_kind, retrieve_vector) - }) - .await - .with_index(query_index)?; - - search_results.push(SearchResultWithIndex { - index_uid: index_uid.into_inner(), - result: search_result.with_index(query_index)?, - }); } - Ok(search_results) - } - .await; + Ok(()) + }; - if search_results.is_ok() { - multi_aggregate.succeed(); - } - analytics.post_multi_search(multi_aggregate); - - let search_results = search_results.map_err(|(mut err, query_index)| { + auth.map_err(|(mut err, query_index)| { // Add the query index that failed as context for the error message. // We're doing it only here and not directly in the `WithIndex` trait so that the `with_index` function returns a different type // of result and we can benefit from static typing. @@ -114,9 +75,95 @@ pub async fn multi_search_with_post( err })?; - debug!(returns = ?search_results, "Multi-search"); + let response = match federation { + Some(federation) => { + let search_result = tokio::task::spawn_blocking(move || { + perform_federated_search(&index_scheduler, queries, federation, features) + }) + .await; - Ok(HttpResponse::Ok().json(SearchResults { results: search_results })) + if let Ok(Ok(_)) = search_result { + multi_aggregate.succeed(); + } + + analytics.post_multi_search(multi_aggregate); + HttpResponse::Ok().json(search_result??) + } + None => { + // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only, + // so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code + // changes. + let search_results: Result<_, (ResponseError, usize)> = async { + let mut search_results = Vec::with_capacity(queries.len()); + for (query_index, (index_uid, query, federation_options)) in queries + .into_iter() + .map(SearchQueryWithIndex::into_index_query_federation) + .enumerate() + { + debug!(on_index = query_index, parameters = ?query, "Multi-search"); + + if federation_options.is_some() { + return Err(( + MeilisearchHttpError::FederationOptionsInNonFederatedRequest( + query_index, + ) + .into(), + query_index, + )); + } + + let index = index_scheduler + .index(&index_uid) + .map_err(|err| { + let mut err = ResponseError::from(err); + // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but + // here the resource not found is not part of the URL. + err.code = StatusCode::BAD_REQUEST; + err + }) + .with_index(query_index)?; + + let search_kind = + search_kind(&query, index_scheduler.get_ref(), &index, features) + .with_index(query_index)?; + let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features) + .with_index(query_index)?; + + let search_result = tokio::task::spawn_blocking(move || { + perform_search(&index, query, search_kind, retrieve_vector) + }) + .await + .with_index(query_index)?; + + search_results.push(SearchResultWithIndex { + index_uid: index_uid.into_inner(), + result: search_result.with_index(query_index)?, + }); + } + Ok(search_results) + } + .await; + + if search_results.is_ok() { + multi_aggregate.succeed(); + } + analytics.post_multi_search(multi_aggregate); + + let search_results = search_results.map_err(|(mut err, query_index)| { + // Add the query index that failed as context for the error message. + // We're doing it only here and not directly in the `WithIndex` trait so that the `with_index` function returns a different type + // of result and we can benefit from static typing. + err.message = format!("Inside `.queries[{query_index}]`: {}", err.message); + err + })?; + + debug!(returns = ?search_results, "Multi-search"); + + HttpResponse::Ok().json(SearchResults { results: search_results }) + } + }; + + Ok(response) } /// Local `Result` extension trait to avoid `map_err` boilerplate. From 3167411e98a1b7d49498bfc28b7fa97eba74eff8 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 11 Jul 2024 16:37:16 +0200 Subject: [PATCH 10/14] Analytics --- meilisearch/src/analytics/mock_analytics.rs | 2 +- .../src/analytics/segment_analytics.rs | 34 +++++++++++++++---- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/meilisearch/src/analytics/mock_analytics.rs b/meilisearch/src/analytics/mock_analytics.rs index e212775a2..54b8d4f1b 100644 --- a/meilisearch/src/analytics/mock_analytics.rs +++ b/meilisearch/src/analytics/mock_analytics.rs @@ -42,7 +42,7 @@ pub struct MultiSearchAggregator; #[allow(dead_code)] impl MultiSearchAggregator { - pub fn from_queries(_: &dyn Any, _: &dyn Any) -> Self { + pub fn from_federated_search(_: &dyn Any, _: &dyn Any) -> Self { Self } diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 405baa057..81354a139 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -34,8 +34,8 @@ use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumen use crate::routes::indexes::facet_search::FacetSearchQuery; use crate::routes::{create_all_stats, Stats}; use crate::search::{ - FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult, - SimilarQuery, SimilarResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, + FacetSearchResult, FederatedSearch, MatchingStrategy, SearchQuery, SearchQueryWithIndex, + SearchResult, SimilarQuery, SimilarResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEMANTIC_RATIO, }; @@ -1095,22 +1095,33 @@ pub struct MultiSearchAggregator { show_ranking_score: bool, show_ranking_score_details: bool, + // federation + use_federation: bool, + // context user_agents: HashSet, } impl MultiSearchAggregator { - pub fn from_queries(query: &[SearchQueryWithIndex], request: &HttpRequest) -> Self { + pub fn from_federated_search( + federated_search: &FederatedSearch, + request: &HttpRequest, + ) -> Self { let timestamp = Some(OffsetDateTime::now_utc()); let user_agents = extract_user_agents(request).into_iter().collect(); - let distinct_indexes: HashSet<_> = query + let use_federation = federated_search.federation.is_some(); + + let distinct_indexes: HashSet<_> = federated_search + .queries .iter() .map(|query| { + let query = &query; // make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex let SearchQueryWithIndex { index_uid, + federation_options: _, q: _, vector: _, offset: _, @@ -1142,8 +1153,10 @@ impl MultiSearchAggregator { }) .collect(); - let show_ranking_score = query.iter().any(|query| query.show_ranking_score); - let show_ranking_score_details = query.iter().any(|query| query.show_ranking_score_details); + let show_ranking_score = + federated_search.queries.iter().any(|query| query.show_ranking_score); + let show_ranking_score_details = + federated_search.queries.iter().any(|query| query.show_ranking_score_details); Self { timestamp, @@ -1151,10 +1164,11 @@ impl MultiSearchAggregator { total_succeeded: 0, total_distinct_index_count: distinct_indexes.len(), total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 }, - total_search_count: query.len(), + total_search_count: federated_search.queries.len(), show_ranking_score, show_ranking_score_details, user_agents, + use_federation, } } @@ -1180,6 +1194,7 @@ impl MultiSearchAggregator { let show_ranking_score_details = this.show_ranking_score_details || other.show_ranking_score_details; let mut user_agents = this.user_agents; + let use_federation = this.use_federation || other.use_federation; for user_agent in other.user_agents.into_iter() { user_agents.insert(user_agent); @@ -1196,6 +1211,7 @@ impl MultiSearchAggregator { user_agents, show_ranking_score, show_ranking_score_details, + use_federation, // do not add _ or ..Default::default() here }; @@ -1214,6 +1230,7 @@ impl MultiSearchAggregator { user_agents, show_ranking_score, show_ranking_score_details, + use_federation, } = self; if total_received == 0 { @@ -1238,6 +1255,9 @@ impl MultiSearchAggregator { "scoring": { "show_ranking_score": show_ranking_score, "show_ranking_score_details": show_ranking_score_details, + }, + "federation": { + "use_federation": use_federation, } }); From a61b852695d835cc08ce6dea02613797464a2b02 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 4 Jul 2024 12:40:02 +0200 Subject: [PATCH 11/14] Add tests --- .../tests/auth/tenant_token_multi_search.rs | 75 + meilisearch/tests/search/mod.rs | 73 + meilisearch/tests/search/multi.rs | 3947 ++++++++++++++++- 3 files changed, 4094 insertions(+), 1 deletion(-) diff --git a/meilisearch/tests/auth/tenant_token_multi_search.rs b/meilisearch/tests/auth/tenant_token_multi_search.rs index 81146d14e..e994aa3bc 100644 --- a/meilisearch/tests/auth/tenant_token_multi_search.rs +++ b/meilisearch/tests/auth/tenant_token_multi_search.rs @@ -310,6 +310,23 @@ macro_rules! compute_authorized_single_search { tenant_token, key_content ); + + // federated + let (response, code) = server.multi_search(json!({"federation": {}, "queries" : [{"indexUid": "sales", "filter": $filter}]})).await; + assert_eq!( + 200, code, + "{} using tenant_token: {:?} generated with parent_key: {:?}", + response, tenant_token, key_content + ); + assert_eq!( + // same count as the search is federated over a single query + $expected_count, + response["hits"].as_array().unwrap().len(), + "{} using tenant_token: {:?} generated with parent_key: {:?}", + response, + tenant_token, + key_content + ); } } }; @@ -375,6 +392,25 @@ macro_rules! compute_authorized_multiple_search { tenant_token, key_content ); + + let (response, code) = server.multi_search(json!({"federation": {}, "queries" : [ + {"indexUid": "sales", "filter": $filter1}, + {"indexUid": "products", "filter": $filter2}, + ]})).await; + assert_eq!( + code, 200, + "{} using tenant_token: {:?} generated with parent_key: {:?}", + response, tenant_token, key_content + ); + assert_eq!( + response["hits"].as_array().unwrap().len(), + // sum of counts as the search is federated across to queries in different indexes + $expected_count1 + $expected_count2, + "{} using tenant_token: {:?} generated with parent_key: {:?}", + response, + tenant_token, + key_content + ); } } }; @@ -433,6 +469,24 @@ macro_rules! compute_forbidden_single_search { "{} using tenant_token: {:?} generated with parent_key: {:?}", response, tenant_token, key_content ); + + let (mut response, code) = server.multi_search(json!({"federation": {}, "queries" : [{"indexUid": "sales"}]})).await; + if failed_query_index.is_none() && !response["message"].is_null() { + response["message"] = serde_json::json!(null); + } + assert_eq!( + response, + invalid_response(failed_query_index), + "{} using tenant_token: {:?} generated with parent_key: {:?}", + response, + tenant_token, + key_content + ); + assert_eq!( + code, 403, + "{} using tenant_token: {:?} generated with parent_key: {:?}", + response, tenant_token, key_content + ); } } }; @@ -494,6 +548,27 @@ macro_rules! compute_forbidden_multiple_search { "{} using tenant_token: {:?} generated with parent_key: {:?}", response, tenant_token, key_content ); + + let (mut response, code) = server.multi_search(json!({"federation": {}, "queries" : [ + {"indexUid": "sales"}, + {"indexUid": "products"}, + ]})).await; + if failed_query_index.is_none() && !response["message"].is_null() { + response["message"] = serde_json::json!(null); + } + assert_eq!( + response, + invalid_response(failed_query_index), + "{} using tenant_token: {:?} generated with parent_key: {:?}", + response, + tenant_token, + key_content + ); + assert_eq!( + code, 403, + "{} using tenant_token: {:?} generated with parent_key: {:?}", + response, tenant_token, key_content + ); } } }; diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index e239ff767..181e0937f 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -132,6 +132,79 @@ static NESTED_DOCUMENTS: Lazy = Lazy::new(|| { ]) }); +static FRUITS_DOCUMENTS: Lazy = Lazy::new(|| { + json!([ + { + "name": "Exclusive sale: green apple", + "id": "green-apple-boosted", + "BOOST": true + }, + { + "name": "Pear", + "id": "pear", + }, + { + "name": "Red apple gala", + "id": "red-apple-gala", + }, + { + "name": "Exclusive sale: Red Tomato", + "id": "red-tomatoes-boosted", + "BOOST": true + }, + { + "name": "Exclusive sale: Red delicious apple", + "id": "red-delicious-boosted", + "BOOST": true, + } + ]) +}); + +static VECTOR_DOCUMENTS: Lazy = Lazy::new(|| { + json!([ + { + "id": "A", + "description": "the dog barks at the cat", + "_vectors": { + // dimensions [canine, feline, young] + "animal": [0.9, 0.8, 0.05], + // dimensions [negative/positive, energy] + "sentiment": [-0.1, 0.55] + } + }, + { + "id": "B", + "description": "the kitten scratched the beagle", + "_vectors": { + // dimensions [canine, feline, young] + "animal": [0.8, 0.9, 0.5], + // dimensions [negative/positive, energy] + "sentiment": [-0.2, 0.65] + } + }, + { + "id": "C", + "description": "the dog had to stay alone today", + "_vectors": { + // dimensions [canine, feline, young] + "animal": [0.85, 0.02, 0.1], + // dimensions [negative/positive, energy] + "sentiment": [-1.0, 0.1] + } + }, + { + "id": "D", + "description": "the little boy pets the puppy", + "_vectors": { + // dimensions [canine, feline, young] + "animal": [0.8, 0.09, 0.8], + // dimensions [negative/positive, energy] + "sentiment": [0.8, 0.3] + } + }, + ]) +}); + #[actix_rt::test] async fn simple_placeholder_search() { let server = Server::new().await; diff --git a/meilisearch/tests/search/multi.rs b/meilisearch/tests/search/multi.rs index b5cf8f476..e0158b7c3 100644 --- a/meilisearch/tests/search/multi.rs +++ b/meilisearch/tests/search/multi.rs @@ -1,8 +1,9 @@ use meili_snap::{json_string, snapshot}; -use super::{DOCUMENTS, NESTED_DOCUMENTS}; +use super::{DOCUMENTS, FRUITS_DOCUMENTS, NESTED_DOCUMENTS}; use crate::common::Server; use crate::json; +use crate::search::{SCORE_DOCUMENTS, VECTOR_DOCUMENTS}; #[actix_rt::test] async fn search_empty_list() { @@ -17,6 +18,23 @@ async fn search_empty_list() { "###); } +#[actix_rt::test] +async fn federation_empty_list() { + let server = Server::new().await; + + let (response, code) = server.multi_search(json!({"federation": {}, "queries": []})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, {".processingTimeMs" => "[time]"}), @r###" + { + "hits": [], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 0 + } + "###); +} + #[actix_rt::test] async fn search_json_object() { let server = Server::new().await; @@ -33,6 +51,22 @@ async fn search_json_object() { "###); } +#[actix_rt::test] +async fn federation_no_queries() { + let server = Server::new().await; + + let (response, code) = server.multi_search(json!({"federation": {}})).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Missing field `queries`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); +} + #[actix_rt::test] async fn search_json_array() { let server = Server::new().await; @@ -113,6 +147,184 @@ async fn simple_search_single_index() { "###); } +#[actix_rt::test] +async fn federation_single_search_single_index() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "test", "q": "glass"}, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + { + "hits": [ + { + "title": "Gläss", + "id": "450465", + "_vectors": { + "manual": [ + -100.0, + 340.0, + 90.0 + ] + }, + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1 + } + "###); +} + +#[actix_rt::test] +async fn federation_multiple_search_single_index() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = SCORE_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid": "test", "q": "the bat"}, + {"indexUid": "test", "q": "badman returns"}, + {"indexUid" : "test", "q": "batman"}, + {"indexUid": "test", "q": "batman returns"}, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + { + "hits": [ + { + "title": "Batman", + "id": "D", + "_federation": { + "indexUid": "test", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "id": "C", + "_federation": { + "indexUid": "test", + "queriesPosition": 3, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "test", + "queriesPosition": 2, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "test", + "queriesPosition": 2, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "title": "Badman", + "id": "E", + "_federation": { + "indexUid": "test", + "queriesPosition": 1, + "weightedRankingScore": 0.5 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 5 + } + "###); +} + +#[actix_rt::test] +async fn federation_two_search_single_index() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "test", "q": "glass"}, + {"indexUid": "test", "q": "captain"}, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + { + "hits": [ + { + "title": "Gläss", + "id": "450465", + "_vectors": { + "manual": [ + -100.0, + 340.0, + 90.0 + ] + }, + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "id": "299537", + "_vectors": { + "manual": [ + 1.0, + 2.0, + 54.0 + ] + }, + "_federation": { + "indexUid": "test", + "queriesPosition": 1, + "weightedRankingScore": 0.9848484848484848 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2 + } + "###); +} + #[actix_rt::test] async fn simple_search_missing_index_uid() { let server = Server::new().await; @@ -138,6 +350,31 @@ async fn simple_search_missing_index_uid() { "###); } +#[actix_rt::test] +async fn federation_simple_search_missing_index_uid() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"q": "glass"}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, @r###" + { + "message": "Missing field `indexUid` inside `.queries[0]`", + "code": "missing_index_uid", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#missing_index_uid" + } + "###); +} + #[actix_rt::test] async fn simple_search_illegal_index_uid() { let server = Server::new().await; @@ -163,6 +400,31 @@ async fn simple_search_illegal_index_uid() { "###); } +#[actix_rt::test] +async fn federation_search_illegal_index_uid() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid": "hé", "q": "glass"}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, @r###" + { + "message": "Invalid value at `.queries[0].indexUid`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "code": "invalid_index_uid", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_index_uid" + } + "###); +} + #[actix_rt::test] async fn simple_search_two_indexes() { let server = Server::new().await; @@ -266,6 +528,352 @@ async fn simple_search_two_indexes() { "###); } +#[actix_rt::test] +async fn federation_two_search_two_indexes() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + let index = server.index("nested"); + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(1).await; + + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "test", "q": "glass"}, + {"indexUid": "nested", "q": "pésti"}, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + { + "hits": [ + { + "title": "Gläss", + "id": "450465", + "_vectors": { + "manual": [ + -100.0, + 340.0, + 90.0 + ] + }, + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "id": 852, + "father": "jean", + "mother": "michelle", + "doggos": [ + { + "name": "bobby", + "age": 2 + }, + { + "name": "buddy", + "age": 4 + } + ], + "cattos": "pésti", + "_vectors": { + "manual": [ + 1.0, + 2.0, + 3.0 + ] + }, + "_federation": { + "indexUid": "nested", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "id": 654, + "father": "pierre", + "mother": "sabine", + "doggos": [ + { + "name": "gros bill", + "age": 8 + } + ], + "cattos": [ + "simba", + "pestiféré" + ], + "_vectors": { + "manual": [ + 1.0, + 2.0, + 54.0 + ] + }, + "_federation": { + "indexUid": "nested", + "queriesPosition": 1, + "weightedRankingScore": 0.7803030303030303 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3 + } + "###); +} + +#[actix_rt::test] +async fn federation_multiple_search_multiple_indexes() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + let index = server.index("nested"); + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(1).await; + + let index = server.index("score"); + let documents = SCORE_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(2).await; + + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "test", "q": "glass"}, + {"indexUid" : "test", "q": "captain"}, + {"indexUid": "nested", "q": "pésti"}, + {"indexUid" : "test", "q": "Escape"}, + {"indexUid": "nested", "q": "jean"}, + {"indexUid": "score", "q": "jean"}, + {"indexUid": "test", "q": "the bat"}, + {"indexUid": "score", "q": "the bat"}, + {"indexUid": "score", "q": "badman returns"}, + {"indexUid" : "score", "q": "batman"}, + {"indexUid": "score", "q": "batman returns"}, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + { + "hits": [ + { + "title": "Gläss", + "id": "450465", + "_vectors": { + "manual": [ + -100.0, + 340.0, + 90.0 + ] + }, + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "id": 852, + "father": "jean", + "mother": "michelle", + "doggos": [ + { + "name": "bobby", + "age": 2 + }, + { + "name": "buddy", + "age": 4 + } + ], + "cattos": "pésti", + "_vectors": { + "manual": [ + 1.0, + 2.0, + 3.0 + ] + }, + "_federation": { + "indexUid": "nested", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "id": "D", + "_federation": { + "indexUid": "score", + "queriesPosition": 9, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "id": "C", + "_federation": { + "indexUid": "score", + "queriesPosition": 10, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "id": "299537", + "_vectors": { + "manual": [ + 1.0, + 2.0, + 54.0 + ] + }, + "_federation": { + "indexUid": "test", + "queriesPosition": 1, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "title": "Escape Room", + "id": "522681", + "_vectors": { + "manual": [ + 10.0, + -23.0, + 32.0 + ] + }, + "_federation": { + "indexUid": "test", + "queriesPosition": 3, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "id": 951, + "father": "jean-baptiste", + "mother": "sophie", + "doggos": [ + { + "name": "turbo", + "age": 5 + }, + { + "name": "fast", + "age": 6 + } + ], + "cattos": [ + "moumoute", + "gomez" + ], + "_vectors": { + "manual": [ + 10.0, + 23.0, + 32.0 + ] + }, + "_federation": { + "indexUid": "nested", + "queriesPosition": 4, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "score", + "queriesPosition": 9, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "score", + "queriesPosition": 9, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "id": 654, + "father": "pierre", + "mother": "sabine", + "doggos": [ + { + "name": "gros bill", + "age": 8 + } + ], + "cattos": [ + "simba", + "pestiféré" + ], + "_vectors": { + "manual": [ + 1.0, + 2.0, + 54.0 + ] + }, + "_federation": { + "indexUid": "nested", + "queriesPosition": 2, + "weightedRankingScore": 0.7803030303030303 + } + }, + { + "title": "Badman", + "id": "E", + "_federation": { + "indexUid": "score", + "queriesPosition": 8, + "weightedRankingScore": 0.5 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "id": "166428", + "_vectors": { + "manual": [ + -100.0, + 231.0, + 32.0 + ] + }, + "_federation": { + "indexUid": "test", + "queriesPosition": 6, + "weightedRankingScore": 0.4166666666666667 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 12 + } + "###); +} + #[actix_rt::test] async fn search_one_index_doesnt_exist() { let server = Server::new().await; @@ -292,6 +900,32 @@ async fn search_one_index_doesnt_exist() { "###); } +#[actix_rt::test] +async fn federation_one_index_doesnt_exist() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "test", "q": "glass"}, + {"indexUid": "nested", "q": "pésti"}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Inside `.queries[1]`: Index `nested` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + } + "###); +} + #[actix_rt::test] async fn search_multiple_indexes_dont_exist() { let server = Server::new().await; @@ -313,6 +947,29 @@ async fn search_multiple_indexes_dont_exist() { "###); } +#[actix_rt::test] +async fn federation_multiple_indexes_dont_exist() { + let server = Server::new().await; + + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "test", "q": "glass"}, + {"indexUid": "nested", "q": "pésti"}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + // order of indexes that are not found depends on the alphabetical order of index names + // the query index is the lowest index with that index + snapshot!(json_string!(response), @r###" + { + "message": "Inside `.queries[1]`: Index `nested` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + } + "###); +} + #[actix_rt::test] async fn search_one_query_error() { let server = Server::new().await; @@ -345,6 +1002,70 @@ async fn search_one_query_error() { "###); } +#[actix_rt::test] +async fn federation_one_query_error() { + let server = Server::new().await; + + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + let index = server.index("nested"); + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(1).await; + + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "test", "q": "glass"}, + {"indexUid": "nested", "q": "pésti", "filter": ["title = toto"]}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Inside `.queries[1]`: Attribute `title` is not filterable. This index does not have configured filterable attributes.\n1:6 title = toto", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); +} + +#[actix_rt::test] +async fn federation_one_query_sort_error() { + let server = Server::new().await; + + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + let index = server.index("nested"); + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(1).await; + + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "test", "q": "glass"}, + {"indexUid": "nested", "q": "pésti", "sort": ["doggos:desc"]}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Inside `.queries[1]`: Attribute `doggos` is not sortable. This index does not have configured sortable attributes.", + "code": "invalid_search_sort", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_sort" + } + "###); +} + #[actix_rt::test] async fn search_multiple_query_errors() { let server = Server::new().await; @@ -376,3 +1097,3227 @@ async fn search_multiple_query_errors() { } "###); } + +#[actix_rt::test] +async fn federation_multiple_query_errors() { + let server = Server::new().await; + + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + let index = server.index("nested"); + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(1).await; + + let (response, code) = server + .multi_search(json!({"queries": [ + {"indexUid" : "test", "q": "glass", "filter": ["title = toto"]}, + {"indexUid": "nested", "q": "pésti", "filter": ["doggos IN [intel, kefir]"]}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Inside `.queries[0]`: Attribute `title` is not filterable. This index does not have configured filterable attributes.\n1:6 title = toto", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); +} + +#[actix_rt::test] +async fn federation_multiple_query_sort_errors() { + let server = Server::new().await; + + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + let index = server.index("nested"); + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(1).await; + + let (response, code) = server + .multi_search(json!({"queries": [ + {"indexUid" : "test", "q": "glass", "sort": ["title:desc"]}, + {"indexUid": "nested", "q": "pésti", "sort": ["doggos:desc"]}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Inside `.queries[0]`: Attribute `title` is not sortable. This index does not have configured sortable attributes.", + "code": "invalid_search_sort", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_sort" + } + "###); +} + +#[actix_rt::test] +async fn federation_multiple_query_errors_interleaved() { + let server = Server::new().await; + + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + let index = server.index("nested"); + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(1).await; + + let (response, code) = server + .multi_search(json!({"queries": [ + {"indexUid" : "test", "q": "glass"}, + {"indexUid": "nested", "q": "pésti", "filter": ["doggos IN [intel, kefir]"]}, + {"indexUid" : "test", "q": "glass", "filter": ["title = toto"]}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Inside `.queries[1]`: Attribute `doggos` is not filterable. This index does not have configured filterable attributes.\n1:7 doggos IN [intel, kefir]", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); +} + +#[actix_rt::test] +async fn federation_multiple_query_sort_errors_interleaved() { + let server = Server::new().await; + + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + let index = server.index("nested"); + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(1).await; + + let (response, code) = server + .multi_search(json!({"queries": [ + {"indexUid" : "test", "q": "glass"}, + {"indexUid": "nested", "q": "pésti", "sort": ["doggos:desc"]}, + {"indexUid" : "test", "q": "glass", "sort": ["title:desc"]}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Inside `.queries[1]`: Attribute `doggos` is not sortable. This index does not have configured sortable attributes.", + "code": "invalid_search_sort", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_sort" + } + "###); +} + +#[actix_rt::test] +async fn federation_filter() { + let server = Server::new().await; + + let index = server.index("fruits"); + + let documents = FRUITS_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings( + json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST"]}), + ) + .await; + index.wait_task(value.uid()).await; + + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "fruits", "q": "apple red", "filter": "BOOST = true", "showRankingScore": true, "federationOptions": {"weight": 3.0}}, + {"indexUid": "fruits", "q": "apple red", "showRankingScore": true}, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "name": "Exclusive sale: Red delicious apple", + "id": "red-delicious-boosted", + "BOOST": true, + "_federation": { + "indexUid": "fruits", + "queriesPosition": 0, + "weightedRankingScore": 2.7281746031746033 + }, + "_rankingScore": 0.9093915343915344 + }, + { + "name": "Exclusive sale: green apple", + "id": "green-apple-boosted", + "BOOST": true, + "_federation": { + "indexUid": "fruits", + "queriesPosition": 0, + "weightedRankingScore": 1.318181818181818 + }, + "_rankingScore": 0.4393939393939394 + }, + { + "name": "Red apple gala", + "id": "red-apple-gala", + "_federation": { + "indexUid": "fruits", + "queriesPosition": 1, + "weightedRankingScore": 0.953042328042328 + }, + "_rankingScore": 0.953042328042328 + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3 + } + "###); +} + +#[actix_rt::test] +async fn federation_sort_same_indexes_same_criterion_same_direction() { + let server = Server::new().await; + + let index = server.index("nested"); + + let documents = NESTED_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["mother"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + // two identical placeholder search should have all results from first query + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "nested", "q": "", "sort": ["mother:asc"], "showRankingScore": true }, + {"indexUid" : "nested", "q": "", "sort": ["mother:asc"], "showRankingScore": true }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "id": 852, + "father": "jean", + "mother": "michelle", + "doggos": [ + { + "name": "bobby", + "age": 2 + }, + { + "name": "buddy", + "age": 4 + } + ], + "cattos": "pésti", + "_vectors": { + "manual": [ + 1.0, + 2.0, + 3.0 + ] + }, + "_federation": { + "indexUid": "nested", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "id": 750, + "father": "romain", + "mother": "michelle", + "cattos": [ + "enigma" + ], + "_vectors": { + "manual": [ + 10.0, + 23.0, + 32.0 + ] + }, + "_federation": { + "indexUid": "nested", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "id": 654, + "father": "pierre", + "mother": "sabine", + "doggos": [ + { + "name": "gros bill", + "age": 8 + } + ], + "cattos": [ + "simba", + "pestiféré" + ], + "_vectors": { + "manual": [ + 1.0, + 2.0, + 54.0 + ] + }, + "_federation": { + "indexUid": "nested", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "id": 951, + "father": "jean-baptiste", + "mother": "sophie", + "doggos": [ + { + "name": "turbo", + "age": 5 + }, + { + "name": "fast", + "age": 6 + } + ], + "cattos": [ + "moumoute", + "gomez" + ], + "_vectors": { + "manual": [ + 10.0, + 23.0, + 32.0 + ] + }, + "_federation": { + "indexUid": "nested", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 4 + } + "###); + + // mix and match query + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "nested", "q": "pésti", "sort": ["mother:asc"], "showRankingScore": true }, + {"indexUid" : "nested", "q": "jean", "sort": ["mother:asc"], "showRankingScore": true }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "id": 852, + "father": "jean", + "mother": "michelle", + "doggos": [ + { + "name": "bobby", + "age": 2 + }, + { + "name": "buddy", + "age": 4 + } + ], + "cattos": "pésti", + "_vectors": { + "manual": [ + 1.0, + 2.0, + 3.0 + ] + }, + "_federation": { + "indexUid": "nested", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "id": 654, + "father": "pierre", + "mother": "sabine", + "doggos": [ + { + "name": "gros bill", + "age": 8 + } + ], + "cattos": [ + "simba", + "pestiféré" + ], + "_vectors": { + "manual": [ + 1.0, + 2.0, + 54.0 + ] + }, + "_federation": { + "indexUid": "nested", + "queriesPosition": 0, + "weightedRankingScore": 0.7803030303030303 + }, + "_rankingScore": 0.7803030303030303 + }, + { + "id": 951, + "father": "jean-baptiste", + "mother": "sophie", + "doggos": [ + { + "name": "turbo", + "age": 5 + }, + { + "name": "fast", + "age": 6 + } + ], + "cattos": [ + "moumoute", + "gomez" + ], + "_vectors": { + "manual": [ + 10.0, + 23.0, + 32.0 + ] + }, + "_federation": { + "indexUid": "nested", + "queriesPosition": 1, + "weightedRankingScore": 0.9848484848484848 + }, + "_rankingScore": 0.9848484848484848 + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3 + } + "###); +} + +#[actix_rt::test] +async fn federation_sort_same_indexes_same_criterion_opposite_direction() { + let server = Server::new().await; + + let index = server.index("nested"); + + let documents = NESTED_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["mother"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + // two identical placeholder search should have all results from first query + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "nested", "q": "", "sort": ["mother:asc"], "showRankingScore": true }, + {"indexUid" : "nested", "q": "", "sort": ["mother:desc"], "showRankingScore": true }, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.queries[1]`: The results of queries #0 and #1 are incompatible: \n 1. `queries[0].sort[0]`, `nested.rankingRules[0]`: ascending sort rule(s) on field `mother`\n 2. `queries[1].sort[0]`, `nested.rankingRules[0]`: descending sort rule(s) on field `mother`\n - cannot compare two sort rules in opposite directions\n - note: The ranking rules of query #0 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n - note: The ranking rules of query #1 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n", + "code": "invalid_multi_search_query_ranking_rules", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_ranking_rules" + } + "###); + + // mix and match query: should be ranked by ranking score + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "nested", "q": "pésti", "sort": ["mother:asc"], "showRankingScore": true }, + {"indexUid" : "nested", "q": "jean", "sort": ["mother:desc"], "showRankingScore": true }, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.queries[1]`: The results of queries #0 and #1 are incompatible: \n 1. `queries[0].sort[0]`, `nested.rankingRules[0]`: ascending sort rule(s) on field `mother`\n 2. `queries[1].sort[0]`, `nested.rankingRules[0]`: descending sort rule(s) on field `mother`\n - cannot compare two sort rules in opposite directions\n", + "code": "invalid_multi_search_query_ranking_rules", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_ranking_rules" + } + "###); +} + +#[actix_rt::test] +async fn federation_sort_same_indexes_different_criterion_same_direction() { + let server = Server::new().await; + + let index = server.index("nested"); + + let documents = NESTED_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["mother", "father"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + // return mothers and fathers ordered accross fields. + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "nested", "q": "", "sort": ["mother:asc"], "showRankingScore": true }, + {"indexUid" : "nested", "q": "", "sort": ["father:asc"], "showRankingScore": true }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "id": 852, + "father": "jean", + "mother": "michelle", + "doggos": [ + { + "name": "bobby", + "age": 2 + }, + { + "name": "buddy", + "age": 4 + } + ], + "cattos": "pésti", + "_vectors": { + "manual": [ + 1.0, + 2.0, + 3.0 + ] + }, + "_federation": { + "indexUid": "nested", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "id": 951, + "father": "jean-baptiste", + "mother": "sophie", + "doggos": [ + { + "name": "turbo", + "age": 5 + }, + { + "name": "fast", + "age": 6 + } + ], + "cattos": [ + "moumoute", + "gomez" + ], + "_vectors": { + "manual": [ + 10.0, + 23.0, + 32.0 + ] + }, + "_federation": { + "indexUid": "nested", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "id": 750, + "father": "romain", + "mother": "michelle", + "cattos": [ + "enigma" + ], + "_vectors": { + "manual": [ + 10.0, + 23.0, + 32.0 + ] + }, + "_federation": { + "indexUid": "nested", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "id": 654, + "father": "pierre", + "mother": "sabine", + "doggos": [ + { + "name": "gros bill", + "age": 8 + } + ], + "cattos": [ + "simba", + "pestiféré" + ], + "_vectors": { + "manual": [ + 1.0, + 2.0, + 54.0 + ] + }, + "_federation": { + "indexUid": "nested", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 4 + } + "###); + + // mix and match query: will be sorted across mother and father names + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "nested", "q": "pésti", "sort": ["mother:desc"], "showRankingScore": true }, + {"indexUid" : "nested", "q": "jean-bap", "sort": ["father:desc"], "showRankingScore": true }, + {"indexUid" : "nested", "q": "jea", "sort": ["father:desc"], "showRankingScore": true }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "id": 654, + "father": "pierre", + "mother": "sabine", + "doggos": [ + { + "name": "gros bill", + "age": 8 + } + ], + "cattos": [ + "simba", + "pestiféré" + ], + "_vectors": { + "manual": [ + 1.0, + 2.0, + 54.0 + ] + }, + "_federation": { + "indexUid": "nested", + "queriesPosition": 0, + "weightedRankingScore": 0.7803030303030303 + }, + "_rankingScore": 0.7803030303030303 + }, + { + "id": 852, + "father": "jean", + "mother": "michelle", + "doggos": [ + { + "name": "bobby", + "age": 2 + }, + { + "name": "buddy", + "age": 4 + } + ], + "cattos": "pésti", + "_vectors": { + "manual": [ + 1.0, + 2.0, + 3.0 + ] + }, + "_federation": { + "indexUid": "nested", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "id": 951, + "father": "jean-baptiste", + "mother": "sophie", + "doggos": [ + { + "name": "turbo", + "age": 5 + }, + { + "name": "fast", + "age": 6 + } + ], + "cattos": [ + "moumoute", + "gomez" + ], + "_vectors": { + "manual": [ + 10.0, + 23.0, + 32.0 + ] + }, + "_federation": { + "indexUid": "nested", + "queriesPosition": 1, + "weightedRankingScore": 0.9991181657848324 + }, + "_rankingScore": 0.9991181657848324 + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3 + } + "###); +} + +#[actix_rt::test] +async fn federation_sort_same_indexes_different_criterion_opposite_direction() { + let server = Server::new().await; + + let index = server.index("nested"); + + let documents = NESTED_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["mother", "father"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + // two identical placeholder search should have all results from first query + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "nested", "q": "", "sort": ["mother:asc"], "showRankingScore": true }, + {"indexUid" : "nested", "q": "", "sort": ["father:desc"], "showRankingScore": true }, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.queries[1]`: The results of queries #0 and #1 are incompatible: \n 1. `queries[0].sort[0]`, `nested.rankingRules[0]`: ascending sort rule(s) on field `mother`\n 2. `queries[1].sort[0]`, `nested.rankingRules[0]`: descending sort rule(s) on field `father`\n - cannot compare two sort rules in opposite directions\n - note: The ranking rules of query #0 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n - note: The ranking rules of query #1 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n", + "code": "invalid_multi_search_query_ranking_rules", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_ranking_rules" + } + "###); + + // mix and match query: should be ranked by ranking score + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "nested", "q": "pésti", "sort": ["mother:asc"], "showRankingScore": true }, + {"indexUid" : "nested", "q": "jean", "sort": ["father:desc"], "showRankingScore": true }, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.queries[1]`: The results of queries #0 and #1 are incompatible: \n 1. `queries[0].sort[0]`, `nested.rankingRules[0]`: ascending sort rule(s) on field `mother`\n 2. `queries[1].sort[0]`, `nested.rankingRules[0]`: descending sort rule(s) on field `father`\n - cannot compare two sort rules in opposite directions\n", + "code": "invalid_multi_search_query_ranking_rules", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_ranking_rules" + } + "###); +} + +#[actix_rt::test] +async fn federation_sort_different_indexes_same_criterion_same_direction() { + let server = Server::new().await; + + let index = server.index("movies"); + + let documents = DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("batman"); + + let documents = SCORE_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + // return titles ordered accross indexes + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "showRankingScore": true }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "showRankingScore": true }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "id": "E", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Batman", + "id": "D", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Batman Returns", + "id": "C", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Captain Marvel", + "id": "299537", + "_vectors": { + "manual": [ + 1.0, + 2.0, + 54.0 + ] + }, + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Escape Room", + "id": "522681", + "_vectors": { + "manual": [ + 10.0, + -23.0, + 32.0 + ] + }, + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Gläss", + "id": "450465", + "_vectors": { + "manual": [ + -100.0, + 340.0, + 90.0 + ] + }, + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "id": "166428", + "_vectors": { + "manual": [ + -100.0, + 231.0, + 32.0 + ] + }, + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Shazam!", + "id": "287947", + "_vectors": { + "manual": [ + 1.0, + 2.0, + 3.0 + ] + }, + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 10 + } + "###); + + // mix and match query: will be sorted across indexes + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "batman", "q": "badman returns", "sort": ["title:desc"], "showRankingScore": true }, + {"indexUid" : "movies", "q": "captain", "sort": ["title:desc"], "showRankingScore": true }, + {"indexUid" : "batman", "q": "the bat", "sort": ["title:desc"], "showRankingScore": true }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Captain Marvel", + "id": "299537", + "_vectors": { + "manual": [ + 1.0, + 2.0, + 54.0 + ] + }, + "_federation": { + "indexUid": "movies", + "queriesPosition": 1, + "weightedRankingScore": 0.9848484848484848 + }, + "_rankingScore": 0.9848484848484848 + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "batman", + "queriesPosition": 2, + "weightedRankingScore": 0.9528218694885362 + }, + "_rankingScore": 0.9528218694885362 + }, + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "batman", + "queriesPosition": 2, + "weightedRankingScore": 0.9528218694885362 + }, + "_rankingScore": 0.9528218694885362 + }, + { + "title": "Batman Returns", + "id": "C", + "_federation": { + "indexUid": "batman", + "queriesPosition": 0, + "weightedRankingScore": 0.8317901234567902 + }, + "_rankingScore": 0.8317901234567902 + }, + { + "title": "Batman", + "id": "D", + "_federation": { + "indexUid": "batman", + "queriesPosition": 0, + "weightedRankingScore": 0.23106060606060605 + }, + "_rankingScore": 0.23106060606060605 + }, + { + "title": "Badman", + "id": "E", + "_federation": { + "indexUid": "batman", + "queriesPosition": 0, + "weightedRankingScore": 0.5 + }, + "_rankingScore": 0.5 + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 6 + } + "###); +} + +#[actix_rt::test] +async fn federation_sort_different_ranking_rules() { + let server = Server::new().await; + + let index = server.index("movies"); + + let documents = DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("batman"); + + let documents = SCORE_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "rankingRules": [ + "words", + "typo", + "proximity", + "attribute", + "sort", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + // return titles ordered accross indexes + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "showRankingScore": true }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "showRankingScore": true }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "id": "E", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Batman", + "id": "D", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Batman Returns", + "id": "C", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Captain Marvel", + "id": "299537", + "_vectors": { + "manual": [ + 1.0, + 2.0, + 54.0 + ] + }, + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Escape Room", + "id": "522681", + "_vectors": { + "manual": [ + 10.0, + -23.0, + 32.0 + ] + }, + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Gläss", + "id": "450465", + "_vectors": { + "manual": [ + -100.0, + 340.0, + 90.0 + ] + }, + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "id": "166428", + "_vectors": { + "manual": [ + -100.0, + 231.0, + 32.0 + ] + }, + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Shazam!", + "id": "287947", + "_vectors": { + "manual": [ + 1.0, + 2.0, + 3.0 + ] + }, + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 10 + } + "###); + + // mix and match query: order difficult to understand + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "batman", "q": "badman returns", "sort": ["title:desc"], "showRankingScore": true }, + {"indexUid" : "movies", "q": "captain", "sort": ["title:desc"], "showRankingScore": true }, + {"indexUid" : "batman", "q": "the bat", "sort": ["title:desc"], "showRankingScore": true }, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.queries[1]`: The results of queries #2 and #1 are incompatible: \n 1. `queries[2]`, `batman.rankingRules[0..=3]`: relevancy rule(s) words, typo, proximity, attribute\n 2. `queries[1].sort[0]`, `movies.rankingRules[0]`: descending sort rule(s) on field `title`\n - cannot compare a relevancy rule with a sort rule\n", + "code": "invalid_multi_search_query_ranking_rules", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_ranking_rules" + } + "###); +} + +#[actix_rt::test] +async fn federation_sort_different_indexes_same_criterion_opposite_direction() { + let server = Server::new().await; + + let index = server.index("movies"); + + let documents = DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("batman"); + + let documents = SCORE_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + // all results from query 0 + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "showRankingScore": true }, + {"indexUid" : "batman", "q": "", "sort": ["title:desc"], "showRankingScore": true }, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.queries[0]`: The results of queries #1 and #0 are incompatible: \n 1. `queries[1].sort[0]`, `batman.rankingRules[0]`: descending sort rule(s) on field `title`\n 2. `queries[0].sort[0]`, `movies.rankingRules[0]`: ascending sort rule(s) on field `title`\n - cannot compare two sort rules in opposite directions\n - note: The ranking rules of query #1 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n - note: The ranking rules of query #0 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n", + "code": "invalid_multi_search_query_ranking_rules", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_ranking_rules" + } + "###); + + // mix and match query: will be sorted by ranking score + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "batman", "q": "badman returns", "sort": ["title:asc"], "showRankingScore": true }, + {"indexUid" : "movies", "q": "captain", "sort": ["title:desc"], "showRankingScore": true }, + {"indexUid" : "batman", "q": "the bat", "sort": ["title:asc"], "showRankingScore": true }, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.queries[1]`: The results of queries #2 and #1 are incompatible: \n 1. `queries[2].sort[0]`, `batman.rankingRules[0]`: ascending sort rule(s) on field `title`\n 2. `queries[1].sort[0]`, `movies.rankingRules[0]`: descending sort rule(s) on field `title`\n - cannot compare two sort rules in opposite directions\n", + "code": "invalid_multi_search_query_ranking_rules", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_ranking_rules" + } + "###); +} + +#[actix_rt::test] +async fn federation_sort_different_indexes_different_criterion_same_direction() { + let server = Server::new().await; + + let index = server.index("movies"); + + let documents = DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("batman"); + + let documents = SCORE_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["id"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + // return titles ordered accross indexes + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "showRankingScore": true }, + {"indexUid" : "batman", "q": "", "sort": ["id:asc"], "showRankingScore": true }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Batman Returns", + "id": "C", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Captain Marvel", + "id": "299537", + "_vectors": { + "manual": [ + 1.0, + 2.0, + 54.0 + ] + }, + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Batman", + "id": "D", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Badman", + "id": "E", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Escape Room", + "id": "522681", + "_vectors": { + "manual": [ + 10.0, + -23.0, + 32.0 + ] + }, + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Gläss", + "id": "450465", + "_vectors": { + "manual": [ + -100.0, + 340.0, + 90.0 + ] + }, + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "id": "166428", + "_vectors": { + "manual": [ + -100.0, + 231.0, + 32.0 + ] + }, + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + }, + { + "title": "Shazam!", + "id": "287947", + "_vectors": { + "manual": [ + 1.0, + 2.0, + 3.0 + ] + }, + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + }, + "_rankingScore": 1.0 + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 10 + } + "###); + + // mix and match query: will be sorted across indexes and criterion + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "batman", "q": "badman returns", "sort": ["id:desc"], "showRankingScore": true }, + {"indexUid" : "movies", "q": "captain", "sort": ["title:desc"], "showRankingScore": true }, + {"indexUid" : "batman", "q": "the bat", "sort": ["id:desc"], "showRankingScore": true }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "id": "E", + "_federation": { + "indexUid": "batman", + "queriesPosition": 0, + "weightedRankingScore": 0.5 + }, + "_rankingScore": 0.5 + }, + { + "title": "Batman", + "id": "D", + "_federation": { + "indexUid": "batman", + "queriesPosition": 0, + "weightedRankingScore": 0.23106060606060605 + }, + "_rankingScore": 0.23106060606060605 + }, + { + "title": "Captain Marvel", + "id": "299537", + "_vectors": { + "manual": [ + 1.0, + 2.0, + 54.0 + ] + }, + "_federation": { + "indexUid": "movies", + "queriesPosition": 1, + "weightedRankingScore": 0.9848484848484848 + }, + "_rankingScore": 0.9848484848484848 + }, + { + "title": "Batman Returns", + "id": "C", + "_federation": { + "indexUid": "batman", + "queriesPosition": 0, + "weightedRankingScore": 0.8317901234567902 + }, + "_rankingScore": 0.8317901234567902 + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "batman", + "queriesPosition": 2, + "weightedRankingScore": 0.9528218694885362 + }, + "_rankingScore": 0.9528218694885362 + }, + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "batman", + "queriesPosition": 2, + "weightedRankingScore": 0.9528218694885362 + }, + "_rankingScore": 0.9528218694885362 + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 6 + } + "###); +} + +#[actix_rt::test] +async fn federation_sort_different_indexes_different_criterion_opposite_direction() { + let server = Server::new().await; + + let index = server.index("movies"); + + let documents = DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("batman"); + + let documents = SCORE_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["id"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + // all results from query 0 first + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "showRankingScore": true }, + {"indexUid" : "batman", "q": "", "sort": ["id:desc"], "showRankingScore": true }, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.queries[0]`: The results of queries #1 and #0 are incompatible: \n 1. `queries[1].sort[0]`, `batman.rankingRules[0]`: descending sort rule(s) on field `id`\n 2. `queries[0].sort[0]`, `movies.rankingRules[0]`: ascending sort rule(s) on field `title`\n - cannot compare two sort rules in opposite directions\n - note: The ranking rules of query #1 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n - note: The ranking rules of query #0 were modified during canonicalization:\n 1. Removed relevancy rule `words` at position #1 in ranking rules because the query is a placeholder search (`q`: \"\")\n 2. Removed relevancy rule `typo` at position #2 in ranking rules because the query is a placeholder search (`q`: \"\")\n 3. Removed relevancy rule `proximity` at position #3 in ranking rules because the query is a placeholder search (`q`: \"\")\n 4. Removed relevancy rule `attribute` at position #4 in ranking rules because the query is a placeholder search (`q`: \"\")\n 5. Removed relevancy rule `exactness` at position #5 in ranking rules because the query is a placeholder search (`q`: \"\")\n", + "code": "invalid_multi_search_query_ranking_rules", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_ranking_rules" + } + "###); + + // mix and match query: more or less by ranking score + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "batman", "q": "badman returns", "sort": ["id:desc"], "showRankingScore": true }, + {"indexUid" : "movies", "q": "captain", "sort": ["title:asc"], "showRankingScore": true }, + {"indexUid" : "batman", "q": "the bat", "sort": ["id:desc"], "showRankingScore": true }, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.queries[1]`: The results of queries #2 and #1 are incompatible: \n 1. `queries[2].sort[0]`, `batman.rankingRules[0]`: descending sort rule(s) on field `id`\n 2. `queries[1].sort[0]`, `movies.rankingRules[0]`: ascending sort rule(s) on field `title`\n - cannot compare two sort rules in opposite directions\n", + "code": "invalid_multi_search_query_ranking_rules", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_ranking_rules" + } + "###); +} + +#[actix_rt::test] +async fn federation_limit_offset() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + let index = server.index("nested"); + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(1).await; + + let index = server.index("score"); + let documents = SCORE_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(2).await; + { + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "test", "q": "glass", "attributesToRetrieve": ["title"]}, + {"indexUid" : "test", "q": "captain", "attributesToRetrieve": ["title"]}, + {"indexUid": "nested", "q": "pésti", "attributesToRetrieve": ["id"]}, + {"indexUid" : "test", "q": "Escape", "attributesToRetrieve": ["title"]}, + {"indexUid": "nested", "q": "jean", "attributesToRetrieve": ["id"]}, + {"indexUid": "score", "q": "jean", "attributesToRetrieve": ["title"]}, + {"indexUid": "test", "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid": "score", "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid": "score", "q": "badman returns", "attributesToRetrieve": ["title"]}, + {"indexUid" : "score", "q": "batman", "attributesToRetrieve": ["title"]}, + {"indexUid": "score", "q": "batman returns", "attributesToRetrieve": ["title"]}, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + { + "hits": [ + { + "title": "Gläss", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "id": 852, + "_federation": { + "indexUid": "nested", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "score", + "queriesPosition": 9, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "score", + "queriesPosition": 10, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "test", + "queriesPosition": 1, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "test", + "queriesPosition": 3, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "id": 951, + "_federation": { + "indexUid": "nested", + "queriesPosition": 4, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "score", + "queriesPosition": 9, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "score", + "queriesPosition": 9, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "id": 654, + "_federation": { + "indexUid": "nested", + "queriesPosition": 2, + "weightedRankingScore": 0.7803030303030303 + } + }, + { + "title": "Badman", + "_federation": { + "indexUid": "score", + "queriesPosition": 8, + "weightedRankingScore": 0.5 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "test", + "queriesPosition": 6, + "weightedRankingScore": 0.4166666666666667 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 12 + } + "###); + } + + { + let (response, code) = server + .multi_search(json!({"federation": {"limit": 1}, "queries": [ + {"indexUid" : "test", "q": "glass", "attributesToRetrieve": ["title"]}, + {"indexUid" : "test", "q": "captain", "attributesToRetrieve": ["title"]}, + {"indexUid": "nested", "q": "pésti", "attributesToRetrieve": ["id"]}, + {"indexUid" : "test", "q": "Escape", "attributesToRetrieve": ["title"]}, + {"indexUid": "nested", "q": "jean", "attributesToRetrieve": ["id"]}, + {"indexUid": "score", "q": "jean", "attributesToRetrieve": ["title"]}, + {"indexUid": "test", "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid": "score", "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid": "score", "q": "badman returns", "attributesToRetrieve": ["title"]}, + {"indexUid" : "score", "q": "batman", "attributesToRetrieve": ["title"]}, + {"indexUid": "score", "q": "batman returns", "attributesToRetrieve": ["title"]}, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + { + "hits": [ + { + "title": "Gläss", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 1, + "offset": 0, + "estimatedTotalHits": 12 + } + "###); + } + + { + let (response, code) = server + .multi_search(json!({"federation": {"offset": 2}, "queries": [ + {"indexUid" : "test", "q": "glass", "attributesToRetrieve": ["title"]}, + {"indexUid" : "test", "q": "captain", "attributesToRetrieve": ["title"]}, + {"indexUid": "nested", "q": "pésti", "attributesToRetrieve": ["id"]}, + {"indexUid" : "test", "q": "Escape", "attributesToRetrieve": ["title"]}, + {"indexUid": "nested", "q": "jean", "attributesToRetrieve": ["id"]}, + {"indexUid": "score", "q": "jean", "attributesToRetrieve": ["title"]}, + {"indexUid": "test", "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid": "score", "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid": "score", "q": "badman returns", "attributesToRetrieve": ["title"]}, + {"indexUid" : "score", "q": "batman", "attributesToRetrieve": ["title"]}, + {"indexUid": "score", "q": "batman returns", "attributesToRetrieve": ["title"]}, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + { + "hits": [ + { + "title": "Batman", + "_federation": { + "indexUid": "score", + "queriesPosition": 9, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "score", + "queriesPosition": 10, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "test", + "queriesPosition": 1, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "test", + "queriesPosition": 3, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "id": 951, + "_federation": { + "indexUid": "nested", + "queriesPosition": 4, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "score", + "queriesPosition": 9, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "score", + "queriesPosition": 9, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "id": 654, + "_federation": { + "indexUid": "nested", + "queriesPosition": 2, + "weightedRankingScore": 0.7803030303030303 + } + }, + { + "title": "Badman", + "_federation": { + "indexUid": "score", + "queriesPosition": 8, + "weightedRankingScore": 0.5 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "test", + "queriesPosition": 6, + "weightedRankingScore": 0.4166666666666667 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 2, + "estimatedTotalHits": 12 + } + "###); + } + + { + let (response, code) = server + .multi_search(json!({"federation": {"offset": 12}, "queries": [ + {"indexUid" : "test", "q": "glass", "attributesToRetrieve": ["title"]}, + {"indexUid" : "test", "q": "captain", "attributesToRetrieve": ["title"]}, + {"indexUid": "nested", "q": "pésti", "attributesToRetrieve": ["id"]}, + {"indexUid" : "test", "q": "Escape", "attributesToRetrieve": ["title"]}, + {"indexUid": "nested", "q": "jean", "attributesToRetrieve": ["id"]}, + {"indexUid": "score", "q": "jean", "attributesToRetrieve": ["title"]}, + {"indexUid": "test", "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid": "score", "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid": "score", "q": "badman returns", "attributesToRetrieve": ["title"]}, + {"indexUid" : "score", "q": "batman", "attributesToRetrieve": ["title"]}, + {"indexUid": "score", "q": "batman returns", "attributesToRetrieve": ["title"]}, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + { + "hits": [], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 12, + "estimatedTotalHits": 12 + } + "###); + } +} + +#[actix_rt::test] +async fn federation_formatting() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + let index = server.index("nested"); + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(1).await; + + let index = server.index("score"); + let documents = SCORE_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(2).await; + { + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "test", "q": "glass", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, + {"indexUid" : "test", "q": "captain", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, + {"indexUid": "nested", "q": "pésti", "attributesToRetrieve": ["id"]}, + {"indexUid" : "test", "q": "Escape", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, + {"indexUid": "nested", "q": "jean", "attributesToRetrieve": ["id"]}, + {"indexUid": "score", "q": "jean", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, + {"indexUid": "test", "q": "the bat", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, + {"indexUid": "score", "q": "the bat", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, + {"indexUid": "score", "q": "badman returns", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, + {"indexUid" : "score", "q": "batman", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, + {"indexUid": "score", "q": "batman returns", "attributesToRetrieve": ["title"], "attributesToHighlight": ["title"]}, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + { + "hits": [ + { + "title": "Gläss", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + }, + "_formatted": { + "title": "Gläss" + } + }, + { + "id": 852, + "_federation": { + "indexUid": "nested", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "score", + "queriesPosition": 9, + "weightedRankingScore": 1.0 + }, + "_formatted": { + "title": "Batman" + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "score", + "queriesPosition": 10, + "weightedRankingScore": 1.0 + }, + "_formatted": { + "title": "Batman Returns" + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "test", + "queriesPosition": 1, + "weightedRankingScore": 0.9848484848484848 + }, + "_formatted": { + "title": "Captain Marvel" + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "test", + "queriesPosition": 3, + "weightedRankingScore": 0.9848484848484848 + }, + "_formatted": { + "title": "Escape Room" + } + }, + { + "id": 951, + "_federation": { + "indexUid": "nested", + "queriesPosition": 4, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "score", + "queriesPosition": 9, + "weightedRankingScore": 0.9848484848484848 + }, + "_formatted": { + "title": "Batman the dark knight returns: Part 1" + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "score", + "queriesPosition": 9, + "weightedRankingScore": 0.9848484848484848 + }, + "_formatted": { + "title": "Batman the dark knight returns: Part 2" + } + }, + { + "id": 654, + "_federation": { + "indexUid": "nested", + "queriesPosition": 2, + "weightedRankingScore": 0.7803030303030303 + } + }, + { + "title": "Badman", + "_federation": { + "indexUid": "score", + "queriesPosition": 8, + "weightedRankingScore": 0.5 + }, + "_formatted": { + "title": "Badman" + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "test", + "queriesPosition": 6, + "weightedRankingScore": 0.4166666666666667 + }, + "_formatted": { + "title": "How to Train Your Dragon: The Hidden World" + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 12 + } + "###); + } + + { + let (response, code) = server + .multi_search(json!({"federation": {"limit": 1}, "queries": [ + {"indexUid" : "test", "q": "glass", "attributesToRetrieve": ["title"]}, + {"indexUid" : "test", "q": "captain", "attributesToRetrieve": ["title"]}, + {"indexUid": "nested", "q": "pésti", "attributesToRetrieve": ["id"]}, + {"indexUid" : "test", "q": "Escape", "attributesToRetrieve": ["title"]}, + {"indexUid": "nested", "q": "jean", "attributesToRetrieve": ["id"]}, + {"indexUid": "score", "q": "jean", "attributesToRetrieve": ["title"]}, + {"indexUid": "test", "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid": "score", "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid": "score", "q": "badman returns", "attributesToRetrieve": ["title"]}, + {"indexUid" : "score", "q": "batman", "attributesToRetrieve": ["title"]}, + {"indexUid": "score", "q": "batman returns", "attributesToRetrieve": ["title"]}, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + { + "hits": [ + { + "title": "Gläss", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 1, + "offset": 0, + "estimatedTotalHits": 12 + } + "###); + } + + { + let (response, code) = server + .multi_search(json!({"federation": {"offset": 2}, "queries": [ + {"indexUid" : "test", "q": "glass", "attributesToRetrieve": ["title"]}, + {"indexUid" : "test", "q": "captain", "attributesToRetrieve": ["title"]}, + {"indexUid": "nested", "q": "pésti", "attributesToRetrieve": ["id"]}, + {"indexUid" : "test", "q": "Escape", "attributesToRetrieve": ["title"]}, + {"indexUid": "nested", "q": "jean", "attributesToRetrieve": ["id"]}, + {"indexUid": "score", "q": "jean", "attributesToRetrieve": ["title"]}, + {"indexUid": "test", "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid": "score", "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid": "score", "q": "badman returns", "attributesToRetrieve": ["title"]}, + {"indexUid" : "score", "q": "batman", "attributesToRetrieve": ["title"]}, + {"indexUid": "score", "q": "batman returns", "attributesToRetrieve": ["title"]}, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + { + "hits": [ + { + "title": "Batman", + "_federation": { + "indexUid": "score", + "queriesPosition": 9, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "score", + "queriesPosition": 10, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "test", + "queriesPosition": 1, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "test", + "queriesPosition": 3, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "id": 951, + "_federation": { + "indexUid": "nested", + "queriesPosition": 4, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "score", + "queriesPosition": 9, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "score", + "queriesPosition": 9, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "id": 654, + "_federation": { + "indexUid": "nested", + "queriesPosition": 2, + "weightedRankingScore": 0.7803030303030303 + } + }, + { + "title": "Badman", + "_federation": { + "indexUid": "score", + "queriesPosition": 8, + "weightedRankingScore": 0.5 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "test", + "queriesPosition": 6, + "weightedRankingScore": 0.4166666666666667 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 2, + "estimatedTotalHits": 12 + } + "###); + } + + { + let (response, code) = server + .multi_search(json!({"federation": {"offset": 12}, "queries": [ + {"indexUid" : "test", "q": "glass", "attributesToRetrieve": ["title"]}, + {"indexUid" : "test", "q": "captain", "attributesToRetrieve": ["title"]}, + {"indexUid": "nested", "q": "pésti", "attributesToRetrieve": ["id"]}, + {"indexUid" : "test", "q": "Escape", "attributesToRetrieve": ["title"]}, + {"indexUid": "nested", "q": "jean", "attributesToRetrieve": ["id"]}, + {"indexUid": "score", "q": "jean", "attributesToRetrieve": ["title"]}, + {"indexUid": "test", "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid": "score", "q": "the bat", "attributesToRetrieve": ["title"]}, + {"indexUid": "score", "q": "badman returns", "attributesToRetrieve": ["title"]}, + {"indexUid" : "score", "q": "batman", "attributesToRetrieve": ["title"]}, + {"indexUid": "score", "q": "batman returns", "attributesToRetrieve": ["title"]}, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + { + "hits": [], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 12, + "estimatedTotalHits": 12 + } + "###); + } +} + +#[actix_rt::test] +async fn federation_invalid_weight() { + let server = Server::new().await; + + let index = server.index("fruits"); + + let documents = FRUITS_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings( + json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST"]}), + ) + .await; + index.wait_task(value.uid()).await; + + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "fruits", "q": "apple red", "filter": "BOOST = true", "showRankingScore": true, "federationOptions": {"weight": 3.0}}, + {"indexUid": "fruits", "q": "apple red", "showRankingScore": true, "federationOptions": {"weight": -12}}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Invalid value at `.queries[1].federationOptions.weight`: the value of `weight` is invalid, expected a positive float (>= 0.0).", + "code": "invalid_multi_search_weight", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_weight" + } + "###); +} + +#[actix_rt::test] +async fn federation_null_weight() { + let server = Server::new().await; + + let index = server.index("fruits"); + + let documents = FRUITS_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings( + json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST"]}), + ) + .await; + index.wait_task(value.uid()).await; + + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "fruits", "q": "apple red", "filter": "BOOST = true", "showRankingScore": true, "federationOptions": {"weight": 3.0}}, + {"indexUid": "fruits", "q": "apple red", "showRankingScore": true, "federationOptions": {"weight": 0.0} }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "name": "Exclusive sale: Red delicious apple", + "id": "red-delicious-boosted", + "BOOST": true, + "_federation": { + "indexUid": "fruits", + "queriesPosition": 0, + "weightedRankingScore": 2.7281746031746033 + }, + "_rankingScore": 0.9093915343915344 + }, + { + "name": "Exclusive sale: green apple", + "id": "green-apple-boosted", + "BOOST": true, + "_federation": { + "indexUid": "fruits", + "queriesPosition": 0, + "weightedRankingScore": 1.318181818181818 + }, + "_rankingScore": 0.4393939393939394 + }, + { + "name": "Red apple gala", + "id": "red-apple-gala", + "_federation": { + "indexUid": "fruits", + "queriesPosition": 1, + "weightedRankingScore": 0.0 + }, + "_rankingScore": 0.953042328042328 + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3 + } + "###); +} + +#[actix_rt::test] +async fn federation_federated_contains_pagination() { + let server = Server::new().await; + + let index = server.index("fruits"); + + let documents = FRUITS_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + // fail when a federated query contains "limit" + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red", "limit": 5}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n Hint: remove `limit` from query #1 or remove `federation: {}` from the request", + "code": "invalid_multi_search_query_pagination", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_pagination" + } + "###); + // fail when a federated query contains "offset" + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red", "offset": 5}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n Hint: remove `offset` from query #1 or remove `federation: {}` from the request", + "code": "invalid_multi_search_query_pagination", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_pagination" + } + "###); + // fail when a federated query contains "page" + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red", "page": 2}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n Hint: remove `page` from query #1 or remove `federation: {}` from the request", + "code": "invalid_multi_search_query_pagination", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_pagination" + } + "###); + // fail when a federated query contains "hitsPerPage" + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red", "hitsPerPage": 5}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n Hint: remove `hitsPerPage` from query #1 or remove `federation: {}` from the request", + "code": "invalid_multi_search_query_pagination", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_pagination" + } + "###); +} + +#[actix_rt::test] +async fn federation_non_federated_contains_federation_option() { + let server = Server::new().await; + + let index = server.index("fruits"); + + let documents = FRUITS_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + // fail when a non-federated query contains "federationOptions" + let (response, code) = server + .multi_search(json!({"queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red", "federationOptions": {}}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.queries[1]`: Using `federationOptions` is not allowed in a non-federated search.\n Hint: remove `federationOptions` from query #1 or add `federation: {}` to the request.", + "code": "invalid_multi_search_federation_options", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_federation_options" + } + "###); +} + +#[actix_rt::test] +async fn federation_vector_single_index() { + let server = Server::new().await; + let (_, code) = server + .set_features(json!({ + "vectorStore": true + })) + .await; + + snapshot!(code, @"200 OK"); + + let index = server.index("vectors"); + + let (value, _) = index + .update_settings(json!({"embedders": { + "animal": { + "source": "userProvided", + "dimensions": 3 + }, + "sentiment": { + "source": "userProvided", + "dimensions": 2 + } + }})) + .await; + index.wait_task(value.uid()).await; + + let documents = VECTOR_DOCUMENTS.clone(); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + index.wait_task(value.uid()).await; + + // same embedder + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "vectors", "vector": [1.0, 0.0, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}}, + {"indexUid": "vectors", "vector": [0.5, 0.5, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}}, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + { + "hits": [ + { + "id": "B", + "description": "the kitten scratched the beagle", + "_federation": { + "indexUid": "vectors", + "queriesPosition": 1, + "weightedRankingScore": 0.9870882034301758 + } + }, + { + "id": "D", + "description": "the little boy pets the puppy", + "_federation": { + "indexUid": "vectors", + "queriesPosition": 0, + "weightedRankingScore": 0.9728479385375975 + } + }, + { + "id": "C", + "description": "the dog had to stay alone today", + "_federation": { + "indexUid": "vectors", + "queriesPosition": 0, + "weightedRankingScore": 0.9701486229896544 + } + }, + { + "id": "A", + "description": "the dog barks at the cat", + "_federation": { + "indexUid": "vectors", + "queriesPosition": 1, + "weightedRankingScore": 0.9191691875457764 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 4, + "semanticHitCount": 4 + } + "###); + + // distinct embedder + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "vectors", "vector": [1.0, 0.0, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}}, + // joyful and energetic first + {"indexUid": "vectors", "vector": [0.8, 0.6], "hybrid": {"semanticRatio": 1.0, "embedder": "sentiment"}}, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + { + "hits": [ + { + "id": "D", + "description": "the little boy pets the puppy", + "_federation": { + "indexUid": "vectors", + "queriesPosition": 1, + "weightedRankingScore": 0.979868710041046 + } + }, + { + "id": "C", + "description": "the dog had to stay alone today", + "_federation": { + "indexUid": "vectors", + "queriesPosition": 0, + "weightedRankingScore": 0.9701486229896544 + } + }, + { + "id": "B", + "description": "the kitten scratched the beagle", + "_federation": { + "indexUid": "vectors", + "queriesPosition": 0, + "weightedRankingScore": 0.8601469993591309 + } + }, + { + "id": "A", + "description": "the dog barks at the cat", + "_federation": { + "indexUid": "vectors", + "queriesPosition": 0, + "weightedRankingScore": 0.8432406187057495 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 4, + "semanticHitCount": 4 + } + "###); + + // hybrid search, distinct embedder + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "vectors", "vector": [1.0, 0.0, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}, "showRankingScore": true}, + // joyful and energetic first + {"indexUid": "vectors", "vector": [0.8, 0.6], "q": "beagle", "hybrid": {"semanticRatio": 1.0, "embedder": "sentiment"},"showRankingScore": true}, + {"indexUid": "vectors", "q": "dog", "showRankingScore": true}, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + { + "hits": [ + { + "id": "D", + "description": "the little boy pets the puppy", + "_federation": { + "indexUid": "vectors", + "queriesPosition": 1, + "weightedRankingScore": 0.979868710041046 + }, + "_rankingScore": "[score]" + }, + { + "id": "C", + "description": "the dog had to stay alone today", + "_federation": { + "indexUid": "vectors", + "queriesPosition": 0, + "weightedRankingScore": 0.9701486229896544 + }, + "_rankingScore": "[score]" + }, + { + "id": "A", + "description": "the dog barks at the cat", + "_federation": { + "indexUid": "vectors", + "queriesPosition": 2, + "weightedRankingScore": 0.9242424242424242 + }, + "_rankingScore": "[score]" + }, + { + "id": "B", + "description": "the kitten scratched the beagle", + "_federation": { + "indexUid": "vectors", + "queriesPosition": 0, + "weightedRankingScore": 0.8601469993591309 + }, + "_rankingScore": "[score]" + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 4, + "semanticHitCount": 3 + } + "###); +} + +#[actix_rt::test] +async fn federation_vector_two_indexes() { + let server = Server::new().await; + let (_, code) = server + .set_features(json!({ + "vectorStore": true + })) + .await; + + snapshot!(code, @"200 OK"); + + let index = server.index("vectors-animal"); + + let (value, _) = index + .update_settings(json!({"embedders": { + "animal": { + "source": "userProvided", + "dimensions": 3 + }, + }})) + .await; + index.wait_task(value.uid()).await; + + let documents = VECTOR_DOCUMENTS.clone(); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + index.wait_task(value.uid()).await; + + let index = server.index("vectors-sentiment"); + + let (value, _) = index + .update_settings(json!({"embedders": { + "sentiment": { + "source": "userProvided", + "dimensions": 2 + } + }})) + .await; + index.wait_task(value.uid()).await; + + let documents = VECTOR_DOCUMENTS.clone(); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + index.wait_task(value.uid()).await; + + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "vectors-animal", "vector": [1.0, 0.0, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}}, + // joyful and energetic first + {"indexUid": "vectors-sentiment", "vector": [0.8, 0.6], "hybrid": {"semanticRatio": 1.0, "embedder": "sentiment"}}, + {"indexUid": "vectors-sentiment", "q": "dog"}, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + { + "hits": [ + { + "id": "D", + "description": "the little boy pets the puppy", + "_vectors": { + "animal": [ + 0.8, + 0.09, + 0.8 + ] + }, + "_federation": { + "indexUid": "vectors-sentiment", + "queriesPosition": 1, + "weightedRankingScore": 0.979868710041046 + } + }, + { + "id": "D", + "description": "the little boy pets the puppy", + "_vectors": { + "sentiment": [ + 0.8, + 0.3 + ] + }, + "_federation": { + "indexUid": "vectors-animal", + "queriesPosition": 0, + "weightedRankingScore": 0.9728479385375975 + } + }, + { + "id": "C", + "description": "the dog had to stay alone today", + "_vectors": { + "sentiment": [ + -1.0, + 0.1 + ] + }, + "_federation": { + "indexUid": "vectors-animal", + "queriesPosition": 0, + "weightedRankingScore": 0.9701486229896544 + } + }, + { + "id": "A", + "description": "the dog barks at the cat", + "_vectors": { + "animal": [ + 0.9, + 0.8, + 0.05 + ] + }, + "_federation": { + "indexUid": "vectors-sentiment", + "queriesPosition": 2, + "weightedRankingScore": 0.9242424242424242 + } + }, + { + "id": "C", + "description": "the dog had to stay alone today", + "_vectors": { + "animal": [ + 0.85, + 0.02, + 0.1 + ] + }, + "_federation": { + "indexUid": "vectors-sentiment", + "queriesPosition": 2, + "weightedRankingScore": 0.9242424242424242 + } + }, + { + "id": "B", + "description": "the kitten scratched the beagle", + "_vectors": { + "sentiment": [ + -0.2, + 0.65 + ] + }, + "_federation": { + "indexUid": "vectors-animal", + "queriesPosition": 0, + "weightedRankingScore": 0.8601469993591309 + } + }, + { + "id": "A", + "description": "the dog barks at the cat", + "_vectors": { + "sentiment": [ + -0.1, + 0.55 + ] + }, + "_federation": { + "indexUid": "vectors-animal", + "queriesPosition": 0, + "weightedRankingScore": 0.8432406187057495 + } + }, + { + "id": "B", + "description": "the kitten scratched the beagle", + "_vectors": { + "animal": [ + 0.8, + 0.9, + 0.5 + ] + }, + "_federation": { + "indexUid": "vectors-sentiment", + "queriesPosition": 1, + "weightedRankingScore": 0.6690993905067444 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 8, + "semanticHitCount": 6 + } + "###); + + // hybrid search, distinct embedder + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "vectors-animal", "vector": [1.0, 0.0, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}, "showRankingScore": true}, + {"indexUid": "vectors-sentiment", "vector": [-1, 0.6], "q": "beagle", "hybrid": {"semanticRatio": 1.0, "embedder": "sentiment"}, "showRankingScore": true}, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###" + { + "hits": [ + { + "id": "D", + "description": "the little boy pets the puppy", + "_vectors": { + "sentiment": [ + 0.8, + 0.3 + ] + }, + "_federation": { + "indexUid": "vectors-animal", + "queriesPosition": 0, + "weightedRankingScore": 0.9728479385375975 + }, + "_rankingScore": "[score]" + }, + { + "id": "C", + "description": "the dog had to stay alone today", + "_vectors": { + "sentiment": [ + -1.0, + 0.1 + ] + }, + "_federation": { + "indexUid": "vectors-animal", + "queriesPosition": 0, + "weightedRankingScore": 0.9701486229896544 + }, + "_rankingScore": "[score]" + }, + { + "id": "C", + "description": "the dog had to stay alone today", + "_vectors": { + "animal": [ + 0.85, + 0.02, + 0.1 + ] + }, + "_federation": { + "indexUid": "vectors-sentiment", + "queriesPosition": 1, + "weightedRankingScore": 0.9522157907485962 + }, + "_rankingScore": "[score]" + }, + { + "id": "B", + "description": "the kitten scratched the beagle", + "_vectors": { + "animal": [ + 0.8, + 0.9, + 0.5 + ] + }, + "_federation": { + "indexUid": "vectors-sentiment", + "queriesPosition": 1, + "weightedRankingScore": 0.8719604015350342 + }, + "_rankingScore": "[score]" + }, + { + "id": "B", + "description": "the kitten scratched the beagle", + "_vectors": { + "sentiment": [ + -0.2, + 0.65 + ] + }, + "_federation": { + "indexUid": "vectors-animal", + "queriesPosition": 0, + "weightedRankingScore": 0.8601469993591309 + }, + "_rankingScore": "[score]" + }, + { + "id": "A", + "description": "the dog barks at the cat", + "_vectors": { + "sentiment": [ + -0.1, + 0.55 + ] + }, + "_federation": { + "indexUid": "vectors-animal", + "queriesPosition": 0, + "weightedRankingScore": 0.8432406187057495 + }, + "_rankingScore": "[score]" + }, + { + "id": "A", + "description": "the dog barks at the cat", + "_vectors": { + "animal": [ + 0.9, + 0.8, + 0.05 + ] + }, + "_federation": { + "indexUid": "vectors-sentiment", + "queriesPosition": 1, + "weightedRankingScore": 0.8297949433326721 + }, + "_rankingScore": "[score]" + }, + { + "id": "D", + "description": "the little boy pets the puppy", + "_vectors": { + "animal": [ + 0.8, + 0.09, + 0.8 + ] + }, + "_federation": { + "indexUid": "vectors-sentiment", + "queriesPosition": 1, + "weightedRankingScore": 0.18887794017791748 + }, + "_rankingScore": "[score]" + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 8, + "semanticHitCount": 8 + } + "###); +} From 2cacc448b6ca2080694f763b300a5d0120c30ec4 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 11 Jul 2024 16:40:39 +0200 Subject: [PATCH 12/14] Rename src/search.rs -> src/search/mod.rs --- meilisearch/src/{search.rs => search/mod.rs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename meilisearch/src/{search.rs => search/mod.rs} (100%) diff --git a/meilisearch/src/search.rs b/meilisearch/src/search/mod.rs similarity index 100% rename from meilisearch/src/search.rs rename to meilisearch/src/search/mod.rs From c35904d6e8e1fc0d4a79e51e32f7fcabec287c82 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 11 Jul 2024 16:52:56 +0200 Subject: [PATCH 13/14] search::federated::ranking_rules -> search::ranking_rules --- meilisearch/src/search/federated.rs | 4 +--- meilisearch/src/search/mod.rs | 2 ++ meilisearch/src/search/{federated => }/ranking_rules.rs | 0 3 files changed, 3 insertions(+), 3 deletions(-) rename meilisearch/src/search/{federated => }/ranking_rules.rs (100%) diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs index ebb1da7b8..a656f5538 100644 --- a/meilisearch/src/search/federated.rs +++ b/meilisearch/src/search/federated.rs @@ -19,7 +19,7 @@ use meilisearch_types::milli::{self, DocumentId, TimeBudget}; use roaring::RoaringBitmap; use serde::Serialize; -use self::ranking_rules::RankingRules; +use super::ranking_rules::{self, RankingRules}; use super::{ prepare_search, AttributesFormat, HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex, @@ -27,8 +27,6 @@ use super::{ use crate::error::MeilisearchHttpError; use crate::routes::indexes::search::search_kind; -mod ranking_rules; - pub const DEFAULT_FEDERATED_WEIGHT: fn() -> f64 = || 1.0; #[derive(Debug, Default, Clone, Copy, PartialEq, deserr::Deserr)] diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs index ea4bbd038..2bb1b5774 100644 --- a/meilisearch/src/search/mod.rs +++ b/meilisearch/src/search/mod.rs @@ -34,6 +34,8 @@ use crate::error::MeilisearchHttpError; mod federated; pub use federated::{perform_federated_search, FederatedSearch, Federation, FederationOptions}; +mod ranking_rules; + type MatchesPosition = BTreeMap>; pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0; diff --git a/meilisearch/src/search/federated/ranking_rules.rs b/meilisearch/src/search/ranking_rules.rs similarity index 100% rename from meilisearch/src/search/federated/ranking_rules.rs rename to meilisearch/src/search/ranking_rules.rs From 20094eba06d349f6867247e6c73cb4a4b56bb9c7 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 15 Jul 2024 12:43:29 +0200 Subject: [PATCH 14/14] Apply review comments --- meilisearch/src/search/federated.rs | 6 ++---- meilisearch/src/search/ranking_rules.rs | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs index a656f5538..6d445eb67 100644 --- a/meilisearch/src/search/federated.rs +++ b/meilisearch/src/search/federated.rs @@ -27,7 +27,7 @@ use super::{ use crate::error::MeilisearchHttpError; use crate::routes::indexes::search::search_kind; -pub const DEFAULT_FEDERATED_WEIGHT: fn() -> f64 = || 1.0; +pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0; #[derive(Debug, Default, Clone, Copy, PartialEq, deserr::Deserr)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] @@ -42,7 +42,7 @@ pub struct Weight(f64); impl Default for Weight { fn default() -> Self { - Weight(DEFAULT_FEDERATED_WEIGHT()) + Weight(DEFAULT_FEDERATED_WEIGHT) } } @@ -50,8 +50,6 @@ impl std::convert::TryFrom for Weight { type Error = InvalidMultiSearchWeight; fn try_from(f: f64) -> Result { - // the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable - #[allow(clippy::manual_range_contains)] if f < 0.0 { Err(InvalidMultiSearchWeight) } else { diff --git a/meilisearch/src/search/ranking_rules.rs b/meilisearch/src/search/ranking_rules.rs index d31c0ed35..7d7d3a1f7 100644 --- a/meilisearch/src/search/ranking_rules.rs +++ b/meilisearch/src/search/ranking_rules.rs @@ -125,7 +125,7 @@ impl CompatibilityError { | (RankingRuleKind::Relevancy, RankingRuleKind::DescendingGeoSort) | (RankingRuleKind::AscendingGeoSort, RankingRuleKind::Relevancy) | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::Relevancy) => { - "cannot compare a relevancy rule with a sort rule" + "cannot compare a relevancy rule with a geosort rule" } (RankingRuleKind::AscendingSort, RankingRuleKind::DescendingSort)