diff --git a/crates/dump/src/lib.rs b/crates/dump/src/lib.rs index 0fb5570b0..ad2d96e1c 100644 --- a/crates/dump/src/lib.rs +++ b/crates/dump/src/lib.rs @@ -229,7 +229,7 @@ pub(crate) mod test { use big_s::S; use maplit::{btreemap, btreeset}; use meilisearch_types::facet_values_sort::FacetValuesSort; - use meilisearch_types::features::RuntimeTogglableFeatures; + use meilisearch_types::features::{Network, Remote, RuntimeTogglableFeatures}; use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::keys::{Action, Key}; use meilisearch_types::milli; @@ -455,6 +455,10 @@ pub(crate) mod test { dump.create_experimental_features(features).unwrap(); + // ========== network + let network = create_test_network(); + dump.create_network(network).unwrap(); + // create the dump let mut file = tempfile::tempfile().unwrap(); dump.persist_to(&mut file).unwrap(); @@ -467,6 +471,13 @@ pub(crate) mod test { RuntimeTogglableFeatures::default() } + fn create_test_network() -> Network { + Network { + local: Some("myself".to_string()), + remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()) }}, + } + } + #[test] fn test_creating_and_read_dump() { let mut file = create_test_dump(); @@ -515,5 +526,9 @@ pub(crate) mod test { // ==== checking the features let expected = create_test_features(); assert_eq!(dump.features().unwrap().unwrap(), expected); + + // ==== checking the network + let expected = create_test_network(); + assert_eq!(&expected, dump.network().unwrap().unwrap()); } } diff --git a/crates/dump/src/reader/compat/v5_to_v6.rs b/crates/dump/src/reader/compat/v5_to_v6.rs index 6b2655bdf..2dd4ed761 100644 --- a/crates/dump/src/reader/compat/v5_to_v6.rs +++ b/crates/dump/src/reader/compat/v5_to_v6.rs @@ -196,6 +196,10 @@ impl CompatV5ToV6 { pub fn features(&self) -> Result> { Ok(None) } + + pub fn network(&self) -> Result> { + Ok(None) + } } pub enum CompatIndexV5ToV6 { diff --git a/crates/dump/src/reader/mod.rs b/crates/dump/src/reader/mod.rs index 151267378..ec74fa4fd 100644 --- a/crates/dump/src/reader/mod.rs +++ b/crates/dump/src/reader/mod.rs @@ -23,6 +23,7 @@ mod v6; pub type Document = serde_json::Map; pub type UpdateFile = dyn Iterator>; +#[allow(clippy::large_enum_variant)] pub enum DumpReader { Current(V6Reader), Compat(CompatV5ToV6), @@ -114,6 +115,13 @@ impl DumpReader { DumpReader::Compat(compat) => compat.features(), } } + + pub fn network(&self) -> Result> { + match self { + DumpReader::Current(current) => Ok(current.network()), + DumpReader::Compat(compat) => compat.network(), + } + } } impl From for DumpReader { @@ -328,6 +336,7 @@ pub(crate) mod test { } assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default()); + assert_eq!(dump.network().unwrap(), None); } #[test] @@ -373,6 +382,27 @@ pub(crate) mod test { assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default()); } + #[test] + fn import_dump_v6_network() { + let dump = File::open("tests/assets/v6-with-network.dump").unwrap(); + let dump = DumpReader::open(dump).unwrap(); + + // top level infos + insta::assert_snapshot!(dump.date().unwrap(), @"2025-01-29 15:45:32.738676 +00:00:00"); + insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None"); + + // network + + let network = dump.network().unwrap().unwrap(); + insta::assert_snapshot!(network.local.as_ref().unwrap(), @"ms-0"); + insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().url, @"http://localhost:7700"); + insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().search_api_key.is_none(), @"true"); + insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().url, @"http://localhost:7701"); + insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().search_api_key.is_none(), @"true"); + insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().url, @"http://ms-5679.example.meilisearch.io"); + insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().search_api_key.as_ref().unwrap(), @"foo"); + } + #[test] fn import_dump_v5() { let dump = File::open("tests/assets/v5.dump").unwrap(); diff --git a/crates/dump/src/reader/v6/mod.rs b/crates/dump/src/reader/v6/mod.rs index 50b9751a2..4c05f16bf 100644 --- a/crates/dump/src/reader/v6/mod.rs +++ b/crates/dump/src/reader/v6/mod.rs @@ -20,6 +20,7 @@ pub type Unchecked = meilisearch_types::settings::Unchecked; pub type Task = crate::TaskDump; pub type Key = meilisearch_types::keys::Key; pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures; +pub type Network = meilisearch_types::features::Network; // ===== Other types to clarify the code of the compat module // everything related to the tasks @@ -50,6 +51,7 @@ pub struct V6Reader { tasks: BufReader, keys: BufReader, features: Option, + network: Option, } impl V6Reader { @@ -78,12 +80,30 @@ impl V6Reader { None }; + let network_file = match fs::read(dump.path().join("network.json")) { + Ok(network_file) => Some(network_file), + Err(error) => match error.kind() { + // Allows the file to be missing, this will only result in all experimental features disabled. + ErrorKind::NotFound => { + debug!("`network.json` not found in dump"); + None + } + _ => return Err(error.into()), + }, + }; + let network = if let Some(network_file) = network_file { + Some(serde_json::from_reader(&*network_file)?) + } else { + None + }; + Ok(V6Reader { metadata: serde_json::from_reader(&*meta_file)?, instance_uid, tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?), keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?), features, + network, dump, }) } @@ -154,6 +174,10 @@ impl V6Reader { pub fn features(&self) -> Option { self.features } + + pub fn network(&self) -> Option<&Network> { + self.network.as_ref() + } } pub struct UpdateFile { diff --git a/crates/dump/src/writer.rs b/crates/dump/src/writer.rs index 3ee51cabf..923147c63 100644 --- a/crates/dump/src/writer.rs +++ b/crates/dump/src/writer.rs @@ -4,7 +4,7 @@ use std::path::PathBuf; use flate2::write::GzEncoder; use flate2::Compression; -use meilisearch_types::features::RuntimeTogglableFeatures; +use meilisearch_types::features::{Network, RuntimeTogglableFeatures}; use meilisearch_types::keys::Key; use meilisearch_types::settings::{Checked, Settings}; use serde_json::{Map, Value}; @@ -61,6 +61,10 @@ impl DumpWriter { )?) } + pub fn create_network(&self, network: Network) -> Result<()> { + Ok(std::fs::write(self.dir.path().join("network.json"), serde_json::to_string(&network)?)?) + } + pub fn persist_to(self, mut writer: impl Write) -> Result<()> { let gz_encoder = GzEncoder::new(&mut writer, Compression::default()); let mut tar_encoder = tar::Builder::new(gz_encoder); @@ -295,7 +299,8 @@ pub(crate) mod test { ├---- experimental-features.json ├---- instance_uid.uuid ├---- keys.jsonl - └---- metadata.json + ├---- metadata.json + └---- network.json "###); // ==== checking the top level infos diff --git a/crates/dump/tests/assets/v6-with-network.dump b/crates/dump/tests/assets/v6-with-network.dump new file mode 100644 index 000000000..4d0d9ddc9 Binary files /dev/null and b/crates/dump/tests/assets/v6-with-network.dump differ diff --git a/crates/index-scheduler/src/features.rs b/crates/index-scheduler/src/features.rs index c6c17b2d5..5dbe70444 100644 --- a/crates/index-scheduler/src/features.rs +++ b/crates/index-scheduler/src/features.rs @@ -1,6 +1,6 @@ use std::sync::{Arc, RwLock}; -use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures}; +use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures}; use meilisearch_types::heed::types::{SerdeJson, Str}; use meilisearch_types::heed::{Database, Env, RwTxn}; @@ -14,10 +14,16 @@ mod db_name { pub const EXPERIMENTAL_FEATURES: &str = "experimental-features"; } +mod db_keys { + pub const EXPERIMENTAL_FEATURES: &str = "experimental-features"; + pub const NETWORK: &str = "network"; +} + #[derive(Clone)] pub(crate) struct FeatureData { persisted: Database>, runtime: Arc>, + network: Arc>, } #[derive(Debug, Clone, Copy)] @@ -86,6 +92,19 @@ impl RoFeatures { .into()) } } + + pub fn check_network(&self, disabled_action: &'static str) -> Result<()> { + if self.runtime.network { + Ok(()) + } else { + Err(FeatureNotEnabledError { + disabled_action, + feature: "network", + issue_link: "https://github.com/orgs/meilisearch/discussions/805", + } + .into()) + } + } } impl FeatureData { @@ -102,7 +121,7 @@ impl FeatureData { env.create_database(wtxn, Some(db_name::EXPERIMENTAL_FEATURES))?; let persisted_features: RuntimeTogglableFeatures = - runtime_features_db.get(wtxn, db_name::EXPERIMENTAL_FEATURES)?.unwrap_or_default(); + runtime_features_db.get(wtxn, db_keys::EXPERIMENTAL_FEATURES)?.unwrap_or_default(); let InstanceTogglableFeatures { metrics, logs_route, contains_filter } = instance_features; let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures { metrics: metrics || persisted_features.metrics, @@ -111,7 +130,14 @@ impl FeatureData { ..persisted_features })); - Ok(Self { persisted: runtime_features_db, runtime }) + let network_db = runtime_features_db.remap_data_type::>(); + let network: Network = network_db.get(wtxn, db_keys::NETWORK)?.unwrap_or_default(); + + Ok(Self { + persisted: runtime_features_db, + runtime, + network: Arc::new(RwLock::new(network)), + }) } pub fn put_runtime_features( @@ -119,7 +145,7 @@ impl FeatureData { mut wtxn: RwTxn, features: RuntimeTogglableFeatures, ) -> Result<()> { - self.persisted.put(&mut wtxn, db_name::EXPERIMENTAL_FEATURES, &features)?; + self.persisted.put(&mut wtxn, db_keys::EXPERIMENTAL_FEATURES, &features)?; wtxn.commit()?; // safe to unwrap, the lock will only fail if: @@ -140,4 +166,21 @@ impl FeatureData { pub fn features(&self) -> RoFeatures { RoFeatures::new(self) } + + pub fn put_network(&self, mut wtxn: RwTxn, new_network: Network) -> Result<()> { + self.persisted.remap_data_type::>().put( + &mut wtxn, + db_keys::NETWORK, + &new_network, + )?; + wtxn.commit()?; + + let mut network = self.network.write().unwrap(); + *network = new_network; + Ok(()) + } + + pub fn network(&self) -> Network { + Network::clone(&*self.network.read().unwrap()) + } } diff --git a/crates/index-scheduler/src/lib.rs b/crates/index-scheduler/src/lib.rs index 530b7bedc..0f8212470 100644 --- a/crates/index-scheduler/src/lib.rs +++ b/crates/index-scheduler/src/lib.rs @@ -51,7 +51,7 @@ pub use features::RoFeatures; use flate2::bufread::GzEncoder; use flate2::Compression; use meilisearch_types::batches::Batch; -use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures}; +use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures}; use meilisearch_types::heed::byteorder::BE; use meilisearch_types::heed::types::I128; use meilisearch_types::heed::{self, Env, RoTxn}; @@ -770,7 +770,16 @@ impl IndexScheduler { Ok(()) } - // TODO: consider using a type alias or a struct embedder/template + pub fn put_network(&self, network: Network) -> Result<()> { + let wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?; + self.features.put_network(wtxn, network)?; + Ok(()) + } + + pub fn network(&self) -> Network { + self.features.network() + } + pub fn embedders( &self, index_uid: String, diff --git a/crates/index-scheduler/src/queue/test.rs b/crates/index-scheduler/src/queue/test.rs index eb3314496..3dbdd2db3 100644 --- a/crates/index-scheduler/src/queue/test.rs +++ b/crates/index-scheduler/src/queue/test.rs @@ -326,7 +326,7 @@ fn test_auto_deletion_of_tasks() { fn test_task_queue_is_full() { let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| { // that's the minimum map size possible - config.task_db_size = 1048576; + config.task_db_size = 1048576 * 3; None }); diff --git a/crates/index-scheduler/src/scheduler/process_dump_creation.rs b/crates/index-scheduler/src/scheduler/process_dump_creation.rs index 09c1020ac..adf5a5b61 100644 --- a/crates/index-scheduler/src/scheduler/process_dump_creation.rs +++ b/crates/index-scheduler/src/scheduler/process_dump_creation.rs @@ -219,6 +219,8 @@ impl IndexScheduler { progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures); let features = self.features().runtime_features(); dump.create_experimental_features(features)?; + let network = self.network(); + dump.create_network(network)?; let dump_uid = started_at.format(format_description!( "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" diff --git a/crates/meilisearch-types/src/deserr/mod.rs b/crates/meilisearch-types/src/deserr/mod.rs index 3c5e0fcf8..f5ad18d5c 100644 --- a/crates/meilisearch-types/src/deserr/mod.rs +++ b/crates/meilisearch-types/src/deserr/mod.rs @@ -193,6 +193,8 @@ merge_with_error_impl_take_error_message!(ParseTaskKindError); merge_with_error_impl_take_error_message!(ParseTaskStatusError); merge_with_error_impl_take_error_message!(IndexUidFormatError); merge_with_error_impl_take_error_message!(InvalidMultiSearchWeight); +merge_with_error_impl_take_error_message!(InvalidNetworkUrl); +merge_with_error_impl_take_error_message!(InvalidNetworkSearchApiKey); merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio); merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold); merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold); diff --git a/crates/meilisearch-types/src/error.rs b/crates/meilisearch-types/src/error.rs index 8caeb70c2..5acc8aa27 100644 --- a/crates/meilisearch-types/src/error.rs +++ b/crates/meilisearch-types/src/error.rs @@ -260,7 +260,13 @@ InvalidMultiSearchMergeFacets , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchQueryFacets , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchQueryPagination , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchQueryPosition , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchRemote , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ; +InvalidNetworkRemotes , InvalidRequest , BAD_REQUEST ; +InvalidNetworkSelf , InvalidRequest , BAD_REQUEST ; +InvalidNetworkSearchApiKey , InvalidRequest , BAD_REQUEST ; +InvalidNetworkUrl , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ; @@ -351,12 +357,19 @@ MissingDocumentId , InvalidRequest , BAD_REQUEST ; MissingFacetSearchFacetName , InvalidRequest , BAD_REQUEST ; MissingIndexUid , InvalidRequest , BAD_REQUEST ; MissingMasterKey , Auth , UNAUTHORIZED ; +MissingNetworkUrl , InvalidRequest , BAD_REQUEST ; MissingPayload , InvalidRequest , BAD_REQUEST ; MissingSearchHybrid , InvalidRequest , BAD_REQUEST ; MissingSwapIndexes , InvalidRequest , BAD_REQUEST ; MissingTaskFilters , InvalidRequest , BAD_REQUEST ; NoSpaceLeftOnDevice , System , UNPROCESSABLE_ENTITY; PayloadTooLarge , InvalidRequest , PAYLOAD_TOO_LARGE ; +RemoteBadResponse , System , BAD_GATEWAY ; +RemoteBadRequest , InvalidRequest , BAD_REQUEST ; +RemoteCouldNotSendRequest , System , BAD_GATEWAY ; +RemoteInvalidApiKey , Auth , FORBIDDEN ; +RemoteRemoteError , System , BAD_GATEWAY ; +RemoteTimeout , System , BAD_GATEWAY ; TooManySearchRequests , System , SERVICE_UNAVAILABLE ; TaskNotFound , InvalidRequest , NOT_FOUND ; BatchNotFound , InvalidRequest , NOT_FOUND ; @@ -583,6 +596,18 @@ impl fmt::Display for deserr_codes::InvalidSimilarRankingScoreThreshold { } } +impl fmt::Display for deserr_codes::InvalidNetworkUrl { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "the value of `url` is invalid, expected a string.") + } +} + +impl fmt::Display for deserr_codes::InvalidNetworkSearchApiKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "the value of `searchApiKey` is invalid, expected a string.") + } +} + #[macro_export] macro_rules! internal_error { ($target:ty : $($other:path), *) => { diff --git a/crates/meilisearch-types/src/features.rs b/crates/meilisearch-types/src/features.rs index ba67f996b..a11e39aa6 100644 --- a/crates/meilisearch-types/src/features.rs +++ b/crates/meilisearch-types/src/features.rs @@ -1,3 +1,5 @@ +use std::collections::BTreeMap; + use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)] @@ -7,6 +9,7 @@ pub struct RuntimeTogglableFeatures { pub logs_route: bool, pub edit_documents_by_function: bool, pub contains_filter: bool, + pub network: bool, } #[derive(Default, Debug, Clone, Copy)] @@ -15,3 +18,20 @@ pub struct InstanceTogglableFeatures { pub logs_route: bool, pub contains_filter: bool, } + +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct Remote { + pub url: String, + #[serde(default)] + pub search_api_key: Option, +} + +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)] +#[serde(rename_all = "camelCase")] +pub struct Network { + #[serde(default, rename = "self")] + pub local: Option, + #[serde(default)] + pub remotes: BTreeMap, +} diff --git a/crates/meilisearch-types/src/index_uid.rs b/crates/meilisearch-types/src/index_uid.rs index 4bf126794..87efd261c 100644 --- a/crates/meilisearch-types/src/index_uid.rs +++ b/crates/meilisearch-types/src/index_uid.rs @@ -4,13 +4,14 @@ use std::fmt; use std::str::FromStr; use deserr::Deserr; +use serde::Serialize; use utoipa::ToSchema; use crate::error::{Code, ErrorCode}; /// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400 /// bytes long -#[derive(Debug, Clone, PartialEq, Eq, Deserr, PartialOrd, Ord, ToSchema)] +#[derive(Debug, Clone, PartialEq, Eq, Deserr, PartialOrd, Ord, Serialize, ToSchema)] #[deserr(try_from(String) = IndexUid::try_from -> IndexUidFormatError)] #[schema(value_type = String, example = "movies")] pub struct IndexUid(String); diff --git a/crates/meilisearch-types/src/keys.rs b/crates/meilisearch-types/src/keys.rs index 8fcbab14d..27f2047ee 100644 --- a/crates/meilisearch-types/src/keys.rs +++ b/crates/meilisearch-types/src/keys.rs @@ -302,6 +302,12 @@ pub enum Action { #[serde(rename = "experimental.update")] #[deserr(rename = "experimental.update")] ExperimentalFeaturesUpdate, + #[serde(rename = "network.get")] + #[deserr(rename = "network.get")] + NetworkGet, + #[serde(rename = "network.update")] + #[deserr(rename = "network.update")] + NetworkUpdate, } impl Action { @@ -341,6 +347,8 @@ impl Action { KEYS_DELETE => Some(Self::KeysDelete), EXPERIMENTAL_FEATURES_GET => Some(Self::ExperimentalFeaturesGet), EXPERIMENTAL_FEATURES_UPDATE => Some(Self::ExperimentalFeaturesUpdate), + NETWORK_GET => Some(Self::NetworkGet), + NETWORK_UPDATE => Some(Self::NetworkUpdate), _otherwise => None, } } @@ -386,4 +394,7 @@ pub mod actions { pub const KEYS_DELETE: u8 = KeysDelete.repr(); pub const EXPERIMENTAL_FEATURES_GET: u8 = ExperimentalFeaturesGet.repr(); pub const EXPERIMENTAL_FEATURES_UPDATE: u8 = ExperimentalFeaturesUpdate.repr(); + + pub const NETWORK_GET: u8 = NetworkGet.repr(); + pub const NETWORK_UPDATE: u8 = NetworkUpdate.repr(); } diff --git a/crates/meilisearch/src/analytics/segment_analytics.rs b/crates/meilisearch/src/analytics/segment_analytics.rs index a09d2ead3..388644884 100644 --- a/crates/meilisearch/src/analytics/segment_analytics.rs +++ b/crates/meilisearch/src/analytics/segment_analytics.rs @@ -196,6 +196,7 @@ struct Infos { experimental_reduce_indexing_memory_usage: bool, experimental_max_number_of_batched_tasks: usize, experimental_limit_batched_tasks_total_size: u64, + experimental_network: bool, gpu_enabled: bool, db_path: bool, import_dump: bool, @@ -286,6 +287,7 @@ impl Infos { logs_route, edit_documents_by_function, contains_filter, + network, } = features; // We're going to override every sensible information. @@ -303,6 +305,7 @@ impl Infos { experimental_replication_parameters, experimental_enable_logs_route: experimental_enable_logs_route | logs_route, experimental_reduce_indexing_memory_usage, + experimental_network: network, gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(), db_path: db_path != PathBuf::from("./data.ms"), import_dump: import_dump.is_some(), diff --git a/crates/meilisearch/src/lib.rs b/crates/meilisearch/src/lib.rs index 4d41c63ea..cbd299f26 100644 --- a/crates/meilisearch/src/lib.rs +++ b/crates/meilisearch/src/lib.rs @@ -431,10 +431,13 @@ fn import_dump( keys.push(key); } - // 3. Import the runtime features. + // 3. Import the runtime features and network let features = dump_reader.features()?.unwrap_or_default(); index_scheduler.put_runtime_features(features)?; + let network = dump_reader.network()?.cloned().unwrap_or_default(); + index_scheduler.put_network(network)?; + let indexer_config = index_scheduler.indexer_config(); // /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might diff --git a/crates/meilisearch/src/routes/features.rs b/crates/meilisearch/src/routes/features.rs index f46bda5a0..e30bc8e8e 100644 --- a/crates/meilisearch/src/routes/features.rs +++ b/crates/meilisearch/src/routes/features.rs @@ -50,6 +50,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) { logs_route: Some(false), edit_documents_by_function: Some(false), contains_filter: Some(false), + network: Some(false), })), (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( { @@ -88,6 +89,8 @@ pub struct RuntimeTogglableFeatures { pub edit_documents_by_function: Option, #[deserr(default)] pub contains_filter: Option, + #[deserr(default)] + pub network: Option, } impl From for RuntimeTogglableFeatures { @@ -97,6 +100,7 @@ impl From for RuntimeTogg logs_route, edit_documents_by_function, contains_filter, + network, } = value; Self { @@ -104,6 +108,7 @@ impl From for RuntimeTogg logs_route: Some(logs_route), edit_documents_by_function: Some(edit_documents_by_function), contains_filter: Some(contains_filter), + network: Some(network), } } } @@ -114,6 +119,7 @@ pub struct PatchExperimentalFeatureAnalytics { logs_route: bool, edit_documents_by_function: bool, contains_filter: bool, + network: bool, } impl Aggregate for PatchExperimentalFeatureAnalytics { @@ -127,6 +133,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics { logs_route: new.logs_route, edit_documents_by_function: new.edit_documents_by_function, contains_filter: new.contains_filter, + network: new.network, }) } @@ -149,6 +156,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics { logs_route: Some(false), edit_documents_by_function: Some(false), contains_filter: Some(false), + network: Some(false), })), (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( { @@ -181,16 +189,18 @@ async fn patch_features( .edit_documents_by_function .unwrap_or(old_features.edit_documents_by_function), contains_filter: new_features.0.contains_filter.unwrap_or(old_features.contains_filter), + network: new_features.0.network.unwrap_or(old_features.network), }; // explicitly destructure for analytics rather than using the `Serialize` implementation, because - // the it renames to camelCase, which we don't want for analytics. + // it renames to camelCase, which we don't want for analytics. // **Do not** ignore fields with `..` or `_` here, because we want to add them in the future. let meilisearch_types::features::RuntimeTogglableFeatures { metrics, logs_route, edit_documents_by_function, contains_filter, + network, } = new_features; analytics.publish( @@ -199,6 +209,7 @@ async fn patch_features( logs_route, edit_documents_by_function, contains_filter, + network, }, &req, ); diff --git a/crates/meilisearch/src/routes/mod.rs b/crates/meilisearch/src/routes/mod.rs index bd7c6d981..65a12b692 100644 --- a/crates/meilisearch/src/routes/mod.rs +++ b/crates/meilisearch/src/routes/mod.rs @@ -34,6 +34,7 @@ use crate::routes::features::RuntimeTogglableFeatures; use crate::routes::indexes::documents::{DocumentDeletionByFilter, DocumentEditionByFunction}; use crate::routes::indexes::IndexView; use crate::routes::multi_search::SearchResults; +use crate::routes::network::{Network, Remote}; use crate::routes::swap_indexes::SwapIndexesPayload; use crate::search::{ FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets, @@ -54,6 +55,7 @@ mod logs; mod metrics; mod multi_search; mod multi_search_analytics; +pub mod network; mod open_api_utils; mod snapshot; mod swap_indexes; @@ -75,6 +77,7 @@ pub mod tasks; (path = "/multi-search", api = multi_search::MultiSearchApi), (path = "/swap-indexes", api = swap_indexes::SwapIndexesApi), (path = "/experimental-features", api = features::ExperimentalFeaturesApi), + (path = "/network", api = network::NetworkApi), ), paths(get_health, get_version, get_stats), tags( @@ -85,7 +88,7 @@ pub mod tasks; url = "/", description = "Local server", )), - components(schemas(PaginationView, PaginationView, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings, Settings, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind)) + components(schemas(PaginationView, PaginationView, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings, Settings, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote)) )] pub struct MeilisearchApi; @@ -103,7 +106,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .service(web::scope("/multi-search").configure(multi_search::configure)) .service(web::scope("/swap-indexes").configure(swap_indexes::configure)) .service(web::scope("/metrics").configure(metrics::configure)) - .service(web::scope("/experimental-features").configure(features::configure)); + .service(web::scope("/experimental-features").configure(features::configure)) + .service(web::scope("/network").configure(network::configure)); #[cfg(feature = "swagger")] { diff --git a/crates/meilisearch/src/routes/multi_search.rs b/crates/meilisearch/src/routes/multi_search.rs index fcc3cd700..b3af98fd5 100644 --- a/crates/meilisearch/src/routes/multi_search.rs +++ b/crates/meilisearch/src/routes/multi_search.rs @@ -20,6 +20,7 @@ use crate::routes::indexes::search::search_kind; use crate::search::{ add_search_rules, perform_federated_search, perform_search, FederatedSearch, FederatedSearchResult, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex, + PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, }; use crate::search_queue::SearchQueue; @@ -48,6 +49,7 @@ pub struct SearchResults { /// Bundle multiple search queries in a single API request. Use this endpoint to search through multiple indexes at once. #[utoipa::path( post, + request_body = FederatedSearch, path = "", tag = "Multi-search", security(("Bearer" = ["search", "*"])), @@ -186,18 +188,22 @@ pub async fn multi_search_with_post( let response = match federation { Some(federation) => { - let search_result = tokio::task::spawn_blocking(move || { - perform_federated_search(&index_scheduler, queries, federation, features) - }) - .await; + // check remote header + let is_proxy = req + .headers() + .get(PROXY_SEARCH_HEADER) + .is_some_and(|value| value.as_bytes() == PROXY_SEARCH_HEADER_VALUE.as_bytes()); + let search_result = + perform_federated_search(&index_scheduler, queries, federation, features, is_proxy) + .await; permit.drop().await; - if let Ok(Ok(_)) = search_result { + if search_result.is_ok() { multi_aggregate.succeed(); } analytics.publish(multi_aggregate, &req); - HttpResponse::Ok().json(search_result??) + HttpResponse::Ok().json(search_result?) } None => { // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only, diff --git a/crates/meilisearch/src/routes/multi_search_analytics.rs b/crates/meilisearch/src/routes/multi_search_analytics.rs index 3d07f471c..3fa23f630 100644 --- a/crates/meilisearch/src/routes/multi_search_analytics.rs +++ b/crates/meilisearch/src/routes/multi_search_analytics.rs @@ -13,6 +13,8 @@ pub struct MultiSearchAggregator { // sum of the number of distinct indexes in each single request, use with total_received to compute an avg total_distinct_index_count: usize, + // sum of the number of distinct remotes in each single request, use with total_received to compute an avg + total_distinct_remote_count: usize, // number of queries with a single index, use with total_received to compute a proportion total_single_index: usize, @@ -31,46 +33,49 @@ impl MultiSearchAggregator { pub fn from_federated_search(federated_search: &FederatedSearch) -> Self { let use_federation = federated_search.federation.is_some(); - let distinct_indexes: HashSet<_> = federated_search - .queries - .iter() - .map(|query| { - let query = &query; - // make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex - let SearchQueryWithIndex { - index_uid, - federation_options: _, - q: _, - vector: _, - offset: _, - limit: _, - page: _, - hits_per_page: _, - attributes_to_retrieve: _, - retrieve_vectors: _, - attributes_to_crop: _, - crop_length: _, - attributes_to_highlight: _, - show_ranking_score: _, - show_ranking_score_details: _, - show_matches_position: _, - filter: _, - sort: _, - distinct: _, - facets: _, - highlight_pre_tag: _, - highlight_post_tag: _, - crop_marker: _, - matching_strategy: _, - attributes_to_search_on: _, - hybrid: _, - ranking_score_threshold: _, - locales: _, - } = query; + let mut distinct_indexes = HashSet::with_capacity(federated_search.queries.len()); + let mut distinct_remotes = HashSet::with_capacity(federated_search.queries.len()); - index_uid.as_str() - }) - .collect(); + // make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex + for SearchQueryWithIndex { + index_uid, + federation_options, + q: _, + vector: _, + offset: _, + limit: _, + page: _, + hits_per_page: _, + attributes_to_retrieve: _, + retrieve_vectors: _, + attributes_to_crop: _, + crop_length: _, + attributes_to_highlight: _, + show_ranking_score: _, + show_ranking_score_details: _, + show_matches_position: _, + filter: _, + sort: _, + distinct: _, + facets: _, + highlight_pre_tag: _, + highlight_post_tag: _, + crop_marker: _, + matching_strategy: _, + attributes_to_search_on: _, + hybrid: _, + ranking_score_threshold: _, + locales: _, + } in &federated_search.queries + { + if let Some(federation_options) = federation_options { + if let Some(remote) = &federation_options.remote { + distinct_remotes.insert(remote.as_str()); + } + } + + distinct_indexes.insert(index_uid.as_str()); + } let show_ranking_score = federated_search.queries.iter().any(|query| query.show_ranking_score); @@ -81,6 +86,7 @@ impl MultiSearchAggregator { total_received: 1, total_succeeded: 0, total_distinct_index_count: distinct_indexes.len(), + total_distinct_remote_count: distinct_remotes.len(), total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 }, total_search_count: federated_search.queries.len(), show_ranking_score, @@ -110,6 +116,8 @@ impl Aggregate for MultiSearchAggregator { let total_succeeded = this.total_succeeded.saturating_add(new.total_succeeded); let total_distinct_index_count = this.total_distinct_index_count.saturating_add(new.total_distinct_index_count); + let total_distinct_remote_count = + this.total_distinct_remote_count.saturating_add(new.total_distinct_remote_count); let total_single_index = this.total_single_index.saturating_add(new.total_single_index); let total_search_count = this.total_search_count.saturating_add(new.total_search_count); let show_ranking_score = this.show_ranking_score || new.show_ranking_score; @@ -121,6 +129,7 @@ impl Aggregate for MultiSearchAggregator { total_received, total_succeeded, total_distinct_index_count, + total_distinct_remote_count, total_single_index, total_search_count, show_ranking_score, @@ -134,6 +143,7 @@ impl Aggregate for MultiSearchAggregator { total_received, total_succeeded, total_distinct_index_count, + total_distinct_remote_count, total_single_index, total_search_count, show_ranking_score, @@ -152,6 +162,10 @@ impl Aggregate for MultiSearchAggregator { "total_distinct_index_count": total_distinct_index_count, "avg_distinct_index_count": (total_distinct_index_count as f64) / (total_received as f64), // not 0 else returned early }, + "remotes": { + "total_distinct_remote_count": total_distinct_remote_count, + "avg_distinct_remote_count": (total_distinct_remote_count as f64) / (total_received as f64), // not 0 else returned early + }, "searches": { "total_search_count": total_search_count, "avg_search_count": (total_search_count as f64) / (total_received as f64), diff --git a/crates/meilisearch/src/routes/network.rs b/crates/meilisearch/src/routes/network.rs new file mode 100644 index 000000000..458ae8cbf --- /dev/null +++ b/crates/meilisearch/src/routes/network.rs @@ -0,0 +1,261 @@ +use std::collections::BTreeMap; + +use actix_web::web::{self, Data}; +use actix_web::{HttpRequest, HttpResponse}; +use deserr::actix_web::AwebJson; +use deserr::Deserr; +use index_scheduler::IndexScheduler; +use itertools::{EitherOrBoth, Itertools}; +use meilisearch_types::deserr::DeserrJsonError; +use meilisearch_types::error::deserr_codes::{ + InvalidNetworkRemotes, InvalidNetworkSearchApiKey, InvalidNetworkSelf, InvalidNetworkUrl, +}; +use meilisearch_types::error::ResponseError; +use meilisearch_types::features::{Network as DbNetwork, Remote as DbRemote}; +use meilisearch_types::keys::actions; +use meilisearch_types::milli::update::Setting; +use serde::Serialize; +use tracing::debug; +use utoipa::{OpenApi, ToSchema}; + +use crate::analytics::{Aggregate, Analytics}; +use crate::extractors::authentication::policies::ActionPolicy; +use crate::extractors::authentication::GuardedData; +use crate::extractors::sequential_extractor::SeqHandler; + +#[derive(OpenApi)] +#[openapi( + paths(get_network, patch_network), + tags(( + name = "Network", + description = "The `/network` route allows you to describe the topology of a network of Meilisearch instances. + +This route is **synchronous**. This means that no task object will be returned, and any change to the network will be made available immediately.", + external_docs(url = "https://www.meilisearch.com/docs/reference/api/network"), + )), +)] +pub struct NetworkApi; + +pub fn configure(cfg: &mut web::ServiceConfig) { + cfg.service( + web::resource("") + .route(web::get().to(get_network)) + .route(web::patch().to(SeqHandler(patch_network))), + ); +} + +/// Get network topology +/// +/// Get a list of all Meilisearch instances currently known to this instance. +#[utoipa::path( + get, + path = "", + tag = "Network", + security(("Bearer" = ["network.get", "network.*", "*"])), + responses( + (status = OK, description = "Known nodes are returned", body = Network, content_type = "application/json", example = json!( + { + "self": "ms-0", + "remotes": { + "ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset }, + "ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) }, + "ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) }, + } + })), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] +async fn get_network( + index_scheduler: GuardedData, Data>, +) -> Result { + index_scheduler.features().check_network("Using the /network route")?; + + let network = index_scheduler.network(); + debug!(returns = ?network, "Get network"); + Ok(HttpResponse::Ok().json(network)) +} + +#[derive(Debug, Deserr, ToSchema, Serialize)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] +pub struct Remote { + #[schema(value_type = Option, example = json!({ + "ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset }, + "ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) }, + "ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) }, + }))] + #[deserr(default, error = DeserrJsonError)] + #[serde(default)] + pub url: Setting, + #[schema(value_type = Option, example = json!("XWnBI8QHUc-4IlqbKPLUDuhftNq19mQtjc6JvmivzJU"))] + #[deserr(default, error = DeserrJsonError)] + #[serde(default)] + pub search_api_key: Setting, +} + +#[derive(Debug, Deserr, ToSchema, Serialize)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] +pub struct Network { + #[schema(value_type = Option>, example = json!("http://localhost:7700"))] + #[deserr(default, error = DeserrJsonError)] + #[serde(default)] + pub remotes: Setting>>, + #[schema(value_type = Option, example = json!("ms-00"), rename = "self")] + #[serde(default, rename = "self")] + #[deserr(default, rename = "self", error = DeserrJsonError)] + pub local: Setting, +} + +impl Remote { + pub fn try_into_db_node(self, name: &str) -> Result { + Ok(DbRemote { + url: self.url.set().ok_or(ResponseError::from_msg( + format!("Missing field `.remotes.{name}.url`"), + meilisearch_types::error::Code::MissingNetworkUrl, + ))?, + search_api_key: self.search_api_key.set(), + }) + } +} + +#[derive(Serialize)] +pub struct PatchNetworkAnalytics { + network_size: usize, + network_has_self: bool, +} + +impl Aggregate for PatchNetworkAnalytics { + fn event_name(&self) -> &'static str { + "Network Updated" + } + + fn aggregate(self: Box, new: Box) -> Box { + Box::new(Self { network_size: new.network_size, network_has_self: new.network_has_self }) + } + + fn into_event(self: Box) -> serde_json::Value { + serde_json::to_value(*self).unwrap_or_default() + } +} + +/// Configure Network +/// +/// Add or remove nodes from network. +#[utoipa::path( + patch, + path = "", + tag = "Network", + request_body = Network, + security(("Bearer" = ["network.update", "network.*", "*"])), + responses( + (status = OK, description = "New network state is returned", body = Network, content_type = "application/json", example = json!( + { + "self": "ms-0", + "remotes": { + "ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset }, + "ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) }, + "ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) }, + } + })), + (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + )), + ) +)] +async fn patch_network( + index_scheduler: GuardedData, Data>, + new_network: AwebJson, + req: HttpRequest, + analytics: Data, +) -> Result { + index_scheduler.features().check_network("Using the /network route")?; + + let new_network = new_network.0; + let old_network = index_scheduler.network(); + debug!(parameters = ?new_network, "Patch network"); + + let merged_self = match new_network.local { + Setting::Set(new_self) => Some(new_self), + Setting::Reset => None, + Setting::NotSet => old_network.local, + }; + + let merged_remotes = match new_network.remotes { + Setting::Set(new_remotes) => { + let mut merged_remotes = BTreeMap::new(); + for either_or_both in old_network + .remotes + .into_iter() + .merge_join_by(new_remotes.into_iter(), |left, right| left.0.cmp(&right.0)) + { + match either_or_both { + EitherOrBoth::Both((key, old), (_, Some(new))) => { + let DbRemote { url: old_url, search_api_key: old_search_api_key } = old; + + let Remote { url: new_url, search_api_key: new_search_api_key } = new; + + let merged = DbRemote { + url: match new_url { + Setting::Set(new_url) => new_url, + Setting::Reset => { + return Err(ResponseError::from_msg( + format!( + "Field `.remotes.{key}.url` cannot be set to `null`" + ), + meilisearch_types::error::Code::InvalidNetworkUrl, + )) + } + Setting::NotSet => old_url, + }, + search_api_key: match new_search_api_key { + Setting::Set(new_search_api_key) => Some(new_search_api_key), + Setting::Reset => None, + Setting::NotSet => old_search_api_key, + }, + }; + merged_remotes.insert(key, merged); + } + EitherOrBoth::Both((_, _), (_, None)) | EitherOrBoth::Right((_, None)) => {} + EitherOrBoth::Left((key, node)) => { + merged_remotes.insert(key, node); + } + EitherOrBoth::Right((key, Some(node))) => { + let node = node.try_into_db_node(&key)?; + merged_remotes.insert(key, node); + } + } + } + merged_remotes + } + Setting::Reset => BTreeMap::new(), + Setting::NotSet => old_network.remotes, + }; + + analytics.publish( + PatchNetworkAnalytics { + network_size: merged_remotes.len(), + network_has_self: merged_self.is_some(), + }, + &req, + ); + + let merged_network = DbNetwork { local: merged_self, remotes: merged_remotes }; + index_scheduler.put_network(merged_network.clone())?; + debug!(returns = ?merged_network, "Patch network"); + Ok(HttpResponse::Ok().json(merged_network)) +} diff --git a/crates/meilisearch/src/search/federated.rs b/crates/meilisearch/src/search/federated.rs deleted file mode 100644 index 1b3fa3b26..000000000 --- a/crates/meilisearch/src/search/federated.rs +++ /dev/null @@ -1,923 +0,0 @@ -use std::cmp::Ordering; -use std::collections::BTreeMap; -use std::fmt; -use std::iter::Zip; -use std::rc::Rc; -use std::str::FromStr as _; -use std::time::Duration; -use std::vec::{IntoIter, Vec}; - -use actix_http::StatusCode; -use index_scheduler::{IndexScheduler, RoFeatures}; -use indexmap::IndexMap; -use meilisearch_types::deserr::DeserrJsonError; -use meilisearch_types::error::deserr_codes::{ - InvalidMultiSearchFacetsByIndex, InvalidMultiSearchMaxValuesPerFacet, - InvalidMultiSearchMergeFacets, InvalidMultiSearchWeight, InvalidSearchLimit, - InvalidSearchOffset, -}; -use meilisearch_types::error::ResponseError; -use meilisearch_types::index_uid::IndexUid; -use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue}; -use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget}; -use roaring::RoaringBitmap; -use serde::Serialize; -use utoipa::ToSchema; - -use super::ranking_rules::{self, RankingRules}; -use super::{ - compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, FacetStats, - HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex, -}; -use crate::error::MeilisearchHttpError; -use crate::routes::indexes::search::search_kind; - -pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0; - -#[derive(Debug, Default, Clone, Copy, PartialEq, deserr::Deserr, ToSchema)] -#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] -pub struct FederationOptions { - #[deserr(default, error = DeserrJsonError)] - #[schema(value_type = f64)] - pub weight: Weight, -} - -#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)] -#[deserr(try_from(f64) = TryFrom::try_from -> InvalidMultiSearchWeight)] -pub struct Weight(f64); - -impl Default for Weight { - fn default() -> Self { - Weight(DEFAULT_FEDERATED_WEIGHT) - } -} - -impl std::convert::TryFrom for Weight { - type Error = InvalidMultiSearchWeight; - - fn try_from(f: f64) -> Result { - if f < 0.0 { - Err(InvalidMultiSearchWeight) - } else { - Ok(Weight(f)) - } - } -} - -impl std::ops::Deref for Weight { - type Target = f64; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -#[derive(Debug, deserr::Deserr, ToSchema)] -#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] -#[schema(rename_all = "camelCase")] -pub struct Federation { - #[deserr(default = super::DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError)] - pub limit: usize, - #[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError)] - pub offset: usize, - #[deserr(default, error = DeserrJsonError)] - pub facets_by_index: BTreeMap>>, - #[deserr(default, error = DeserrJsonError)] - pub merge_facets: Option, -} - -#[derive(Copy, Clone, Debug, deserr::Deserr, Default, ToSchema)] -#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] -#[schema(rename_all = "camelCase")] -pub struct MergeFacets { - #[deserr(default, error = DeserrJsonError)] - pub max_values_per_facet: Option, -} - -#[derive(Debug, deserr::Deserr, ToSchema)] -#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] -#[schema(rename_all = "camelCase")] -pub struct FederatedSearch { - pub queries: Vec, - #[deserr(default)] - pub federation: Option, -} - -#[derive(Serialize, Clone, ToSchema)] -#[serde(rename_all = "camelCase")] -#[schema(rename_all = "camelCase")] -pub struct FederatedSearchResult { - pub hits: Vec, - pub processing_time_ms: u128, - #[serde(flatten)] - pub hits_info: HitsInfo, - - #[serde(skip_serializing_if = "Option::is_none")] - pub semantic_hit_count: Option, - - #[serde(skip_serializing_if = "Option::is_none")] - #[schema(value_type = Option>>)] - pub facet_distribution: Option>>, - #[serde(skip_serializing_if = "Option::is_none")] - pub facet_stats: Option>, - #[serde(skip_serializing_if = "FederatedFacets::is_empty")] - pub facets_by_index: FederatedFacets, - - // These fields are only used for analytics purposes - #[serde(skip)] - pub degraded: bool, - #[serde(skip)] - pub used_negative_operator: bool, -} - -impl fmt::Debug for FederatedSearchResult { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let FederatedSearchResult { - hits, - processing_time_ms, - hits_info, - semantic_hit_count, - degraded, - used_negative_operator, - facet_distribution, - facet_stats, - facets_by_index, - } = self; - - let mut debug = f.debug_struct("SearchResult"); - // The most important thing when looking at a search result is the time it took to process - debug.field("processing_time_ms", &processing_time_ms); - debug.field("hits", &format!("[{} hits returned]", hits.len())); - debug.field("hits_info", &hits_info); - if *used_negative_operator { - debug.field("used_negative_operator", used_negative_operator); - } - if *degraded { - debug.field("degraded", degraded); - } - if let Some(facet_distribution) = facet_distribution { - debug.field("facet_distribution", &facet_distribution); - } - if let Some(facet_stats) = facet_stats { - debug.field("facet_stats", &facet_stats); - } - if let Some(semantic_hit_count) = semantic_hit_count { - debug.field("semantic_hit_count", &semantic_hit_count); - } - if !facets_by_index.is_empty() { - debug.field("facets_by_index", &facets_by_index); - } - - debug.finish() - } -} - -struct WeightedScore<'a> { - details: &'a [ScoreDetails], - weight: f64, -} - -impl<'a> WeightedScore<'a> { - pub fn new(details: &'a [ScoreDetails], weight: f64) -> Self { - Self { details, weight } - } - - pub fn weighted_global_score(&self) -> f64 { - ScoreDetails::global_score(self.details.iter()) * self.weight - } - - pub fn compare_weighted_global_scores(&self, other: &Self) -> Ordering { - self.weighted_global_score() - .partial_cmp(&other.weighted_global_score()) - // both are numbers, possibly infinite - .unwrap() - } - - pub fn compare(&self, other: &Self) -> Ordering { - let mut left_it = ScoreDetails::score_values(self.details.iter()); - let mut right_it = ScoreDetails::score_values(other.details.iter()); - - loop { - let left = left_it.next(); - let right = right_it.next(); - - match (left, right) { - (None, None) => return Ordering::Equal, - (None, Some(_)) => return Ordering::Less, - (Some(_), None) => return Ordering::Greater, - (Some(ScoreValue::Score(left)), Some(ScoreValue::Score(right))) => { - let left = left * self.weight; - let right = right * other.weight; - if (left - right).abs() <= f64::EPSILON { - continue; - } - return left.partial_cmp(&right).unwrap(); - } - (Some(ScoreValue::Sort(left)), Some(ScoreValue::Sort(right))) => { - match left.partial_cmp(right) { - Some(Ordering::Equal) => continue, - Some(order) => return order, - None => return self.compare_weighted_global_scores(other), - } - } - (Some(ScoreValue::GeoSort(left)), Some(ScoreValue::GeoSort(right))) => { - match left.partial_cmp(right) { - Some(Ordering::Equal) => continue, - Some(order) => return order, - None => { - return self.compare_weighted_global_scores(other); - } - } - } - // not comparable details, use global - (Some(ScoreValue::Score(_)), Some(_)) - | (Some(_), Some(ScoreValue::Score(_))) - | (Some(ScoreValue::GeoSort(_)), Some(ScoreValue::Sort(_))) - | (Some(ScoreValue::Sort(_)), Some(ScoreValue::GeoSort(_))) => { - let left_count = left_it.count(); - let right_count = right_it.count(); - // compare how many remaining groups of rules each side has. - // the group with the most remaining groups wins. - return left_count - .cmp(&right_count) - // breaks ties with the global ranking score - .then_with(|| self.compare_weighted_global_scores(other)); - } - } - } - } -} - -struct QueryByIndex { - query: SearchQuery, - federation_options: FederationOptions, - query_index: usize, -} - -struct SearchResultByQuery<'a> { - documents_ids: Vec, - document_scores: Vec>, - federation_options: FederationOptions, - hit_maker: HitMaker<'a>, - query_index: usize, -} - -struct SearchResultByQueryIter<'a> { - it: Zip, IntoIter>>, - federation_options: FederationOptions, - hit_maker: Rc>, - query_index: usize, -} - -impl<'a> SearchResultByQueryIter<'a> { - fn new( - SearchResultByQuery { - documents_ids, - document_scores, - federation_options, - hit_maker, - query_index, - }: SearchResultByQuery<'a>, - ) -> Self { - let it = documents_ids.into_iter().zip(document_scores); - Self { it, federation_options, hit_maker: Rc::new(hit_maker), query_index } - } -} - -struct SearchResultByQueryIterItem<'a> { - docid: DocumentId, - score: Vec, - federation_options: FederationOptions, - hit_maker: Rc>, - query_index: usize, -} - -fn merge_index_local_results( - results_by_query: Vec>, -) -> impl Iterator + '_ { - itertools::kmerge_by( - results_by_query.into_iter().map(SearchResultByQueryIter::new), - |left: &SearchResultByQueryIterItem, right: &SearchResultByQueryIterItem| { - let left_score = WeightedScore::new(&left.score, *left.federation_options.weight); - let right_score = WeightedScore::new(&right.score, *right.federation_options.weight); - - match left_score.compare(&right_score) { - // the biggest score goes first - Ordering::Greater => true, - // break ties using query index - Ordering::Equal => left.query_index < right.query_index, - Ordering::Less => false, - } - }, - ) -} - -fn merge_index_global_results( - results_by_index: Vec, -) -> impl Iterator { - itertools::kmerge_by( - results_by_index.into_iter().map(|result_by_index| result_by_index.hits.into_iter()), - |left: &SearchHitByIndex, right: &SearchHitByIndex| { - let left_score = WeightedScore::new(&left.score, *left.federation_options.weight); - let right_score = WeightedScore::new(&right.score, *right.federation_options.weight); - - match left_score.compare(&right_score) { - // the biggest score goes first - Ordering::Greater => true, - // break ties using query index - Ordering::Equal => left.query_index < right.query_index, - Ordering::Less => false, - } - }, - ) -} - -impl<'a> Iterator for SearchResultByQueryIter<'a> { - type Item = SearchResultByQueryIterItem<'a>; - - fn next(&mut self) -> Option { - let (docid, score) = self.it.next()?; - Some(SearchResultByQueryIterItem { - docid, - score, - federation_options: self.federation_options, - hit_maker: Rc::clone(&self.hit_maker), - query_index: self.query_index, - }) - } -} - -struct SearchHitByIndex { - hit: SearchHit, - score: Vec, - federation_options: FederationOptions, - query_index: usize, -} - -struct SearchResultByIndex { - index: String, - hits: Vec, - estimated_total_hits: usize, - degraded: bool, - used_negative_operator: bool, - facets: Option, -} - -#[derive(Debug, Clone, Default, Serialize, ToSchema)] -pub struct FederatedFacets(pub BTreeMap); - -impl FederatedFacets { - pub fn insert(&mut self, index: String, facets: Option) { - if let Some(facets) = facets { - self.0.insert(index, facets); - } - } - - pub fn is_empty(&self) -> bool { - self.0.is_empty() - } - - pub fn merge( - self, - MergeFacets { max_values_per_facet }: MergeFacets, - facet_order: BTreeMap, - ) -> Option { - if self.is_empty() { - return None; - } - - let mut distribution: BTreeMap = Default::default(); - let mut stats: BTreeMap = Default::default(); - - for facets_by_index in self.0.into_values() { - for (facet, index_distribution) in facets_by_index.distribution { - match distribution.entry(facet) { - std::collections::btree_map::Entry::Vacant(entry) => { - entry.insert(index_distribution); - } - std::collections::btree_map::Entry::Occupied(mut entry) => { - let distribution = entry.get_mut(); - - for (value, index_count) in index_distribution { - distribution - .entry(value) - .and_modify(|count| *count += index_count) - .or_insert(index_count); - } - } - } - } - - for (facet, index_stats) in facets_by_index.stats { - match stats.entry(facet) { - std::collections::btree_map::Entry::Vacant(entry) => { - entry.insert(index_stats); - } - std::collections::btree_map::Entry::Occupied(mut entry) => { - let stats = entry.get_mut(); - - stats.min = f64::min(stats.min, index_stats.min); - stats.max = f64::max(stats.max, index_stats.max); - } - } - } - } - - // fixup order - for (facet, values) in &mut distribution { - let order_by = facet_order.get(facet).map(|(_, order)| *order).unwrap_or_default(); - - match order_by { - OrderBy::Lexicographic => { - values.sort_unstable_by(|left, _, right, _| left.cmp(right)) - } - OrderBy::Count => { - values.sort_unstable_by(|_, left, _, right| { - left.cmp(right) - // biggest first - .reverse() - }) - } - } - - if let Some(max_values_per_facet) = max_values_per_facet { - values.truncate(max_values_per_facet) - }; - } - - Some(ComputedFacets { distribution, stats }) - } -} - -pub fn perform_federated_search( - index_scheduler: &IndexScheduler, - queries: Vec, - mut federation: Federation, - features: RoFeatures, -) -> Result { - let before_search = std::time::Instant::now(); - - // this implementation partition the queries by index to guarantee an important property: - // - all the queries to a particular index use the same read transaction. - // This is an important property, otherwise we cannot guarantee the self-consistency of the results. - - // 1. partition queries by index - let mut queries_by_index: BTreeMap> = Default::default(); - for (query_index, federated_query) in queries.into_iter().enumerate() { - if let Some(pagination_field) = federated_query.has_pagination() { - return Err(MeilisearchHttpError::PaginationInFederatedQuery( - query_index, - pagination_field, - ) - .into()); - } - - if let Some(facets) = federated_query.has_facets() { - let facets = facets.to_owned(); - return Err(MeilisearchHttpError::FacetsInFederatedQuery( - query_index, - federated_query.index_uid.into_inner(), - facets, - ) - .into()); - } - - let (index_uid, query, federation_options) = federated_query.into_index_query_federation(); - - queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex { - query, - federation_options: federation_options.unwrap_or_default(), - query_index, - }) - } - - // 2. perform queries, merge and make hits index by index - let required_hit_count = federation.limit + federation.offset; - - // In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic - // Then in step (3), we'll update its value if there is any semantic search - let mut semantic_hit_count = None; - let mut results_by_index = Vec::with_capacity(queries_by_index.len()); - let mut previous_query_data: Option<(RankingRules, usize, String)> = None; - - // remember the order and name of first index for each facet when merging with index settings - // to detect if the order is inconsistent for a facet. - let mut facet_order: Option> = match federation.merge_facets - { - Some(MergeFacets { .. }) => Some(Default::default()), - _ => None, - }; - - for (index_uid, queries) in queries_by_index { - let first_query_index = queries.first().map(|query| query.query_index); - - let index = match index_scheduler.index(&index_uid) { - Ok(index) => index, - Err(err) => { - let mut err = ResponseError::from(err); - // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but - // here the resource not found is not part of the URL. - err.code = StatusCode::BAD_REQUEST; - if let Some(query_index) = first_query_index { - err.message = format!("Inside `.queries[{}]`: {}", query_index, err.message); - } - return Err(err); - } - }; - - // Important: this is the only transaction we'll use for this index during this federated search - let rtxn = index.read_txn()?; - - let criteria = index.criteria(&rtxn)?; - - let dictionary = index.dictionary(&rtxn)?; - let dictionary: Option> = - dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); - let separators = index.allowed_separators(&rtxn)?; - let separators: Option> = - separators.as_ref().map(|x| x.iter().map(String::as_str).collect()); - - // each query gets its individual cutoff - let cutoff = index.search_cutoff(&rtxn)?; - - let mut degraded = false; - let mut used_negative_operator = false; - let mut candidates = RoaringBitmap::new(); - - let facets_by_index = federation.facets_by_index.remove(&index_uid).flatten(); - - // TODO: recover the max size + facets_by_index as return value of this function so as not to ask it for all queries - if let Err(mut error) = - check_facet_order(&mut facet_order, &index_uid, &facets_by_index, &index, &rtxn) - { - error.message = format!( - "Inside `.federation.facetsByIndex.{index_uid}`: {error}{}", - if let Some(query_index) = first_query_index { - format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`") - } else { - Default::default() - } - ); - return Err(error); - } - - // 2.1. Compute all candidates for each query in the index - let mut results_by_query = Vec::with_capacity(queries.len()); - - for QueryByIndex { query, federation_options, query_index } in queries { - // use an immediately invoked lambda to capture the result without returning from the function - - let res: Result<(), ResponseError> = (|| { - let search_kind = - search_kind(&query, index_scheduler, index_uid.to_string(), &index)?; - - let canonicalization_kind = match (&search_kind, &query.q) { - (SearchKind::SemanticOnly { .. }, _) => { - ranking_rules::CanonicalizationKind::Vector - } - (_, Some(q)) if !q.is_empty() => ranking_rules::CanonicalizationKind::Keyword, - _ => ranking_rules::CanonicalizationKind::Placeholder, - }; - - let sort = if let Some(sort) = &query.sort { - let sorts: Vec<_> = - match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() { - Ok(sorts) => sorts, - Err(asc_desc_error) => { - return Err(milli::Error::from(milli::SortError::from( - asc_desc_error, - )) - .into()) - } - }; - Some(sorts) - } else { - None - }; - - let ranking_rules = ranking_rules::RankingRules::new( - criteria.clone(), - sort, - query.matching_strategy.into(), - canonicalization_kind, - ); - - if let Some((previous_ranking_rules, previous_query_index, previous_index_uid)) = - previous_query_data.take() - { - if let Err(error) = ranking_rules.is_compatible_with(&previous_ranking_rules) { - return Err(error.to_response_error( - &ranking_rules, - &previous_ranking_rules, - query_index, - previous_query_index, - &index_uid, - &previous_index_uid, - )); - } - previous_query_data = if previous_ranking_rules.constraint_count() - > ranking_rules.constraint_count() - { - Some((previous_ranking_rules, previous_query_index, previous_index_uid)) - } else { - Some((ranking_rules, query_index, index_uid.clone())) - }; - } else { - previous_query_data = Some((ranking_rules, query_index, index_uid.clone())); - } - - match search_kind { - SearchKind::KeywordOnly => {} - _ => semantic_hit_count = Some(0), - } - - let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors); - - let time_budget = match cutoff { - Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)), - None => TimeBudget::default(), - }; - - let (mut search, _is_finite_pagination, _max_total_hits, _offset) = - prepare_search(&index, &rtxn, &query, &search_kind, time_budget, features)?; - - search.scoring_strategy(milli::score_details::ScoringStrategy::Detailed); - search.offset(0); - search.limit(required_hit_count); - - let (result, _semantic_hit_count) = - super::search_from_kind(index_uid.to_string(), search_kind, search)?; - let format = AttributesFormat { - attributes_to_retrieve: query.attributes_to_retrieve, - retrieve_vectors, - attributes_to_highlight: query.attributes_to_highlight, - attributes_to_crop: query.attributes_to_crop, - crop_length: query.crop_length, - crop_marker: query.crop_marker, - highlight_pre_tag: query.highlight_pre_tag, - highlight_post_tag: query.highlight_post_tag, - show_matches_position: query.show_matches_position, - sort: query.sort, - show_ranking_score: query.show_ranking_score, - show_ranking_score_details: query.show_ranking_score_details, - locales: query.locales.map(|l| l.iter().copied().map(Into::into).collect()), - }; - - let milli::SearchResult { - matching_words, - candidates: query_candidates, - documents_ids, - document_scores, - degraded: query_degraded, - used_negative_operator: query_used_negative_operator, - } = result; - - candidates |= query_candidates; - degraded |= query_degraded; - used_negative_operator |= query_used_negative_operator; - - let tokenizer = HitMaker::tokenizer(dictionary.as_deref(), separators.as_deref()); - - let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer); - - let hit_maker = - HitMaker::new(&index, &rtxn, format, formatter_builder).map_err(|e| { - MeilisearchHttpError::from_milli(e, Some(index_uid.to_string())) - })?; - - results_by_query.push(SearchResultByQuery { - federation_options, - hit_maker, - query_index, - documents_ids, - document_scores, - }); - Ok(()) - })(); - - if let Err(mut error) = res { - error.message = format!("Inside `.queries[{query_index}]`: {}", error.message); - return Err(error); - } - } - // 2.2. merge inside index - let mut documents_seen = RoaringBitmap::new(); - let merged_result: Result, ResponseError> = - merge_index_local_results(results_by_query) - // skip documents we've already seen & mark that we saw the current document - .filter(|SearchResultByQueryIterItem { docid, .. }| documents_seen.insert(*docid)) - .take(required_hit_count) - // 2.3 make hits - .map( - |SearchResultByQueryIterItem { - docid, - score, - federation_options, - hit_maker, - query_index, - }| { - let mut hit = hit_maker.make_hit(docid, &score)?; - let weighted_score = - ScoreDetails::global_score(score.iter()) * (*federation_options.weight); - - let _federation = serde_json::json!( - { - "indexUid": index_uid, - "queriesPosition": query_index, - "weightedRankingScore": weighted_score, - } - ); - hit.document.insert("_federation".to_string(), _federation); - Ok(SearchHitByIndex { hit, score, federation_options, query_index }) - }, - ) - .collect(); - - let merged_result = merged_result?; - - let estimated_total_hits = candidates.len() as usize; - - let facets = facets_by_index - .map(|facets_by_index| { - compute_facet_distribution_stats( - &facets_by_index, - &index, - &rtxn, - candidates, - super::Route::MultiSearch, - ) - }) - .transpose() - .map_err(|mut error| { - error.message = format!( - "Inside `.federation.facetsByIndex.{index_uid}`: {}{}", - error.message, - if let Some(query_index) = first_query_index { - format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`") - } else { - Default::default() - } - ); - error - })?; - - results_by_index.push(SearchResultByIndex { - index: index_uid, - hits: merged_result, - estimated_total_hits, - degraded, - used_negative_operator, - facets, - }); - } - - // bonus step, make sure to return an error if an index wants a non-faceted field, even if no query actually uses that index. - for (index_uid, facets) in federation.facets_by_index { - let index = match index_scheduler.index(&index_uid) { - Ok(index) => index, - Err(err) => { - let mut err = ResponseError::from(err); - // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but - // here the resource not found is not part of the URL. - err.code = StatusCode::BAD_REQUEST; - err.message = format!( - "Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", - err.message - ); - return Err(err); - } - }; - - // Important: this is the only transaction we'll use for this index during this federated search - let rtxn = index.read_txn()?; - - if let Err(mut error) = - check_facet_order(&mut facet_order, &index_uid, &facets, &index, &rtxn) - { - error.message = format!( - "Inside `.federation.facetsByIndex.{index_uid}`: {error}\n - Note: index `{index_uid}` is not used in queries", - ); - return Err(error); - } - - if let Some(facets) = facets { - if let Err(mut error) = compute_facet_distribution_stats( - &facets, - &index, - &rtxn, - Default::default(), - super::Route::MultiSearch, - ) { - error.message = - format!("Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", error.message); - return Err(error); - } - } - } - - // 3. merge hits and metadata across indexes - // 3.1 merge metadata - let (estimated_total_hits, degraded, used_negative_operator, facets) = { - let mut estimated_total_hits = 0; - let mut degraded = false; - let mut used_negative_operator = false; - - let mut facets: FederatedFacets = FederatedFacets::default(); - - for SearchResultByIndex { - index, - hits: _, - estimated_total_hits: estimated_total_hits_by_index, - facets: facets_by_index, - degraded: degraded_by_index, - used_negative_operator: used_negative_operator_by_index, - } in &mut results_by_index - { - estimated_total_hits += *estimated_total_hits_by_index; - degraded |= *degraded_by_index; - used_negative_operator |= *used_negative_operator_by_index; - - let facets_by_index = std::mem::take(facets_by_index); - let index = std::mem::take(index); - - facets.insert(index, facets_by_index); - } - - (estimated_total_hits, degraded, used_negative_operator, facets) - }; - - // 3.2 merge hits - let merged_hits: Vec<_> = merge_index_global_results(results_by_index) - .skip(federation.offset) - .take(federation.limit) - .inspect(|hit| { - if let Some(semantic_hit_count) = &mut semantic_hit_count { - if hit.score.iter().any(|score| matches!(&score, ScoreDetails::Vector(_))) { - *semantic_hit_count += 1; - } - } - }) - .map(|hit| hit.hit) - .collect(); - - let (facet_distribution, facet_stats, facets_by_index) = - match federation.merge_facets.zip(facet_order) { - Some((merge_facets, facet_order)) => { - let facets = facets.merge(merge_facets, facet_order); - - let (facet_distribution, facet_stats) = facets - .map(|ComputedFacets { distribution, stats }| (distribution, stats)) - .unzip(); - - (facet_distribution, facet_stats, FederatedFacets::default()) - } - None => (None, None, facets), - }; - - let search_result = FederatedSearchResult { - hits: merged_hits, - processing_time_ms: before_search.elapsed().as_millis(), - hits_info: HitsInfo::OffsetLimit { - limit: federation.limit, - offset: federation.offset, - estimated_total_hits, - }, - semantic_hit_count, - degraded, - used_negative_operator, - facet_distribution, - facet_stats, - facets_by_index, - }; - - Ok(search_result) -} - -fn check_facet_order( - facet_order: &mut Option>, - current_index: &str, - facets_by_index: &Option>, - index: &milli::Index, - rtxn: &milli::heed::RoTxn<'_>, -) -> Result<(), ResponseError> { - if let (Some(facet_order), Some(facets_by_index)) = (facet_order, facets_by_index) { - let index_facet_order = index.sort_facet_values_by(rtxn)?; - for facet in facets_by_index { - let index_facet_order = index_facet_order.get(facet); - let (previous_index, previous_facet_order) = facet_order - .entry(facet.to_owned()) - .or_insert_with(|| (current_index.to_owned(), index_facet_order)); - if previous_facet_order != &index_facet_order { - return Err(MeilisearchHttpError::InconsistentFacetOrder { - facet: facet.clone(), - previous_facet_order: *previous_facet_order, - previous_uid: previous_index.clone(), - current_uid: current_index.to_owned(), - index_facet_order, - } - .into()); - } - } - }; - Ok(()) -} diff --git a/crates/meilisearch/src/search/federated/mod.rs b/crates/meilisearch/src/search/federated/mod.rs new file mode 100644 index 000000000..40204c591 --- /dev/null +++ b/crates/meilisearch/src/search/federated/mod.rs @@ -0,0 +1,10 @@ +mod perform; +mod proxy; +mod types; +mod weighted_scores; + +pub use perform::perform_federated_search; +pub use proxy::{PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE}; +pub use types::{ + FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets, +}; diff --git a/crates/meilisearch/src/search/federated/perform.rs b/crates/meilisearch/src/search/federated/perform.rs new file mode 100644 index 000000000..67ca0b845 --- /dev/null +++ b/crates/meilisearch/src/search/federated/perform.rs @@ -0,0 +1,1099 @@ +use std::cmp::Ordering; +use std::collections::BTreeMap; +use std::iter::Zip; +use std::rc::Rc; +use std::str::FromStr as _; +use std::time::{Duration, Instant}; +use std::vec::{IntoIter, Vec}; + +use actix_http::StatusCode; +use index_scheduler::{IndexScheduler, RoFeatures}; +use itertools::Itertools; +use meilisearch_types::error::ResponseError; +use meilisearch_types::features::{Network, Remote}; +use meilisearch_types::milli::order_by_map::OrderByMap; +use meilisearch_types::milli::score_details::{ScoreDetails, WeightedScoreValue}; +use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget, DEFAULT_VALUES_PER_FACET}; +use roaring::RoaringBitmap; +use tokio::task::JoinHandle; + +use super::super::ranking_rules::{self, RankingRules}; +use super::super::{ + compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, HitMaker, + HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex, +}; +use super::proxy::{proxy_search, ProxySearchError, ProxySearchParams}; +use super::types::{ + FederatedFacets, FederatedSearchResult, Federation, FederationOptions, MergeFacets, Weight, + FEDERATION_HIT, FEDERATION_REMOTE, WEIGHTED_SCORE_VALUES, +}; +use super::weighted_scores; +use crate::error::MeilisearchHttpError; +use crate::routes::indexes::search::search_kind; +use crate::search::federated::types::{INDEX_UID, QUERIES_POSITION, WEIGHTED_RANKING_SCORE}; + +pub async fn perform_federated_search( + index_scheduler: &IndexScheduler, + queries: Vec, + federation: Federation, + features: RoFeatures, + is_proxy: bool, +) -> Result { + if is_proxy { + features.check_network("Performing a remote federated search")?; + } + let before_search = std::time::Instant::now(); + let deadline = before_search + std::time::Duration::from_secs(9); + + let required_hit_count = federation.limit + federation.offset; + + let network = index_scheduler.network(); + + // this implementation partition the queries by index to guarantee an important property: + // - all the queries to a particular index use the same read transaction. + // This is an important property, otherwise we cannot guarantee the self-consistency of the results. + + // 1. partition queries by host and index + let mut partitioned_queries = PartitionedQueries::new(); + for (query_index, federated_query) in queries.into_iter().enumerate() { + partitioned_queries.partition(federated_query, query_index, &network, features)? + } + + // 2. perform queries, merge and make hits index by index + // 2.1. start remote queries + let remote_search = + RemoteSearch::start(partitioned_queries.remote_queries_by_host, &federation, deadline); + + // 2.2. concurrently execute local queries + let params = SearchByIndexParams { + index_scheduler, + features, + is_proxy, + network: &network, + has_remote: partitioned_queries.has_remote, + required_hit_count, + }; + let mut search_by_index = SearchByIndex::new( + federation, + partitioned_queries.local_queries_by_index.len(), + params.has_remote, + ); + + for (index_uid, queries) in partitioned_queries.local_queries_by_index { + // note: this is the only place we open `index_uid` + search_by_index.execute(index_uid, queries, ¶ms)?; + } + + // bonus step, make sure to return an error if an index wants a non-faceted field, even if no query actually uses that index. + search_by_index.check_unused_facets(index_scheduler)?; + + let SearchByIndex { + federation, + mut semantic_hit_count, + mut results_by_index, + previous_query_data: _, + facet_order, + } = search_by_index; + + // 2.3. Wait for proxy search requests to complete + let (mut remote_results, remote_errors) = remote_search.finish().await; + + // 3. merge hits and metadata across indexes and hosts + // 3.1. merge metadata + let (estimated_total_hits, degraded, used_negative_operator, facets) = + merge_metadata(&mut results_by_index, &remote_results); + + // 3.2. merge hits + let merged_hits: Vec<_> = merge_index_global_results(results_by_index, &mut remote_results) + .skip(federation.offset) + .take(federation.limit) + .inspect(|hit| { + if let Some(semantic_hit_count) = &mut semantic_hit_count { + if hit.to_score().0.any(|score| matches!(&score, WeightedScoreValue::VectorSort(_))) + { + *semantic_hit_count += 1; + } + } + }) + .map(|hit| hit.hit()) + .collect(); + + // 3.3. merge facets + let (facet_distribution, facet_stats, facets_by_index) = + facet_order.merge(federation.merge_facets, remote_results, facets); + + Ok(FederatedSearchResult { + hits: merged_hits, + processing_time_ms: before_search.elapsed().as_millis(), + hits_info: HitsInfo::OffsetLimit { + limit: federation.limit, + offset: federation.offset, + estimated_total_hits, + }, + semantic_hit_count, + degraded, + used_negative_operator, + facet_distribution, + facet_stats, + facets_by_index, + remote_errors: partitioned_queries.has_remote.then_some(remote_errors), + }) +} + +struct QueryByIndex { + query: SearchQuery, + weight: Weight, + query_index: usize, +} + +struct SearchResultByQuery<'a> { + documents_ids: Vec, + document_scores: Vec>, + weight: Weight, + hit_maker: HitMaker<'a>, + query_index: usize, +} + +struct SearchResultByQueryIter<'a> { + it: Zip, IntoIter>>, + weight: Weight, + hit_maker: Rc>, + query_index: usize, +} + +impl<'a> SearchResultByQueryIter<'a> { + fn new( + SearchResultByQuery { + documents_ids, + document_scores, + weight, + hit_maker, + query_index, + }: SearchResultByQuery<'a>, + ) -> Self { + let it = documents_ids.into_iter().zip(document_scores); + Self { it, weight, hit_maker: Rc::new(hit_maker), query_index } + } +} + +struct SearchResultByQueryIterItem<'a> { + docid: DocumentId, + score: Vec, + weight: Weight, + hit_maker: Rc>, + query_index: usize, +} + +fn merge_index_local_results( + results_by_query: Vec>, +) -> impl Iterator + '_ { + itertools::kmerge_by( + results_by_query.into_iter().map(SearchResultByQueryIter::new), + |left: &SearchResultByQueryIterItem, right: &SearchResultByQueryIterItem| { + match weighted_scores::compare( + ScoreDetails::weighted_score_values(left.score.iter(), *left.weight), + ScoreDetails::global_score(left.score.iter()) * *left.weight, + ScoreDetails::weighted_score_values(right.score.iter(), *right.weight), + ScoreDetails::global_score(right.score.iter()) * *right.weight, + ) { + // the biggest score goes first + Ordering::Greater => true, + // break ties using query index + Ordering::Equal => left.query_index < right.query_index, + Ordering::Less => false, + } + }, + ) +} + +fn merge_index_global_results( + results_by_index: Vec, + remote_results: &mut [FederatedSearchResult], +) -> impl Iterator + '_ { + itertools::kmerge_by( + // local results + results_by_index + .into_iter() + .map(|result_by_index| { + either::Either::Left(result_by_index.hits.into_iter().map(MergedSearchHit::Local)) + }) + // remote results + .chain(remote_results.iter_mut().map(|x| either::Either::Right(iter_remote_hits(x)))), + |left: &MergedSearchHit, right: &MergedSearchHit| { + let (left_it, left_weighted_global_score, left_query_index) = left.to_score(); + let (right_it, right_weighted_global_score, right_query_index) = right.to_score(); + + match weighted_scores::compare( + left_it, + left_weighted_global_score, + right_it, + right_weighted_global_score, + ) { + // the biggest score goes first + Ordering::Greater => true, + // break ties using query index + Ordering::Equal => left_query_index < right_query_index, + Ordering::Less => false, + } + }, + ) +} + +enum MergedSearchHit { + Local(SearchHitByIndex), + Remote { + hit: SearchHit, + score: Vec, + global_weighted_score: f64, + query_index: usize, + }, +} + +impl MergedSearchHit { + fn remote(mut hit: SearchHit) -> Result { + let federation = hit + .document + .get_mut(FEDERATION_HIT) + .ok_or(ProxySearchError::MissingPathInResponse("._federation"))?; + let federation = match federation.as_object_mut() { + Some(federation) => federation, + None => { + return Err(ProxySearchError::UnexpectedValueInPath { + path: "._federation", + expected_type: "map", + received_value: federation.to_string(), + }); + } + }; + + let global_weighted_score = federation + .get(WEIGHTED_RANKING_SCORE) + .ok_or(ProxySearchError::MissingPathInResponse("._federation.weightedRankingScore"))?; + let global_weighted_score = global_weighted_score.as_f64().ok_or_else(|| { + ProxySearchError::UnexpectedValueInPath { + path: "._federation.weightedRankingScore", + expected_type: "number", + received_value: global_weighted_score.to_string(), + } + })?; + + let score: Vec = + serde_json::from_value(federation.remove(WEIGHTED_SCORE_VALUES).ok_or( + ProxySearchError::MissingPathInResponse("._federation.weightedScoreValues"), + )?) + .map_err(ProxySearchError::CouldNotParseWeightedScoreValues)?; + + let query_index = federation + .get(QUERIES_POSITION) + .ok_or(ProxySearchError::MissingPathInResponse("._federation.queriesPosition"))?; + let query_index = + query_index.as_u64().ok_or_else(|| ProxySearchError::UnexpectedValueInPath { + path: "._federation.queriesPosition", + expected_type: "integer", + received_value: query_index.to_string(), + })? as usize; + + Ok(Self::Remote { hit, score, global_weighted_score, query_index }) + } + + fn hit(self) -> SearchHit { + match self { + MergedSearchHit::Local(search_hit_by_index) => search_hit_by_index.hit, + MergedSearchHit::Remote { hit, .. } => hit, + } + } + + fn to_score(&self) -> (impl Iterator + '_, f64, usize) { + match self { + MergedSearchHit::Local(search_hit_by_index) => ( + either::Left(ScoreDetails::weighted_score_values( + search_hit_by_index.score.iter(), + *search_hit_by_index.weight, + )), + ScoreDetails::global_score(search_hit_by_index.score.iter()) + * *search_hit_by_index.weight, + search_hit_by_index.query_index, + ), + MergedSearchHit::Remote { hit: _, score, global_weighted_score, query_index } => { + let global_weighted_score = *global_weighted_score; + let query_index = *query_index; + (either::Right(score.iter().cloned()), global_weighted_score, query_index) + } + } + } +} + +fn iter_remote_hits( + results_by_host: &mut FederatedSearchResult, +) -> impl Iterator + '_ { + // have a per node registry of failed hits + results_by_host.hits.drain(..).filter_map(|hit| match MergedSearchHit::remote(hit) { + Ok(hit) => Some(hit), + Err(err) => { + tracing::warn!("skipping remote hit due to error: {err}"); + None + } + }) +} + +impl<'a> Iterator for SearchResultByQueryIter<'a> { + type Item = SearchResultByQueryIterItem<'a>; + + fn next(&mut self) -> Option { + let (docid, score) = self.it.next()?; + Some(SearchResultByQueryIterItem { + docid, + score, + weight: self.weight, + hit_maker: Rc::clone(&self.hit_maker), + query_index: self.query_index, + }) + } +} + +struct SearchHitByIndex { + hit: SearchHit, + score: Vec, + weight: Weight, + query_index: usize, +} + +struct SearchResultByIndex { + index: String, + hits: Vec, + estimated_total_hits: usize, + degraded: bool, + used_negative_operator: bool, + facets: Option, +} + +fn merge_metadata( + results_by_index: &mut Vec, + remote_results: &Vec, +) -> (usize, bool, bool, FederatedFacets) { + let mut estimated_total_hits = 0; + let mut degraded = false; + let mut used_negative_operator = false; + let mut facets: FederatedFacets = FederatedFacets::default(); + for SearchResultByIndex { + index, + hits: _, + estimated_total_hits: estimated_total_hits_by_index, + facets: facets_by_index, + degraded: degraded_by_index, + used_negative_operator: used_negative_operator_by_index, + } in results_by_index + { + estimated_total_hits += *estimated_total_hits_by_index; + degraded |= *degraded_by_index; + used_negative_operator |= *used_negative_operator_by_index; + + let facets_by_index = std::mem::take(facets_by_index); + let index = std::mem::take(index); + + facets.insert(index, facets_by_index); + } + for FederatedSearchResult { + hits: _, + processing_time_ms: _, + hits_info, + semantic_hit_count: _, + facet_distribution: _, + facet_stats: _, + facets_by_index: _, + degraded: degraded_for_host, + used_negative_operator: host_used_negative_operator, + remote_errors: _, + } in remote_results + { + estimated_total_hits += match hits_info { + HitsInfo::Pagination { total_hits: estimated_total_hits, .. } + | HitsInfo::OffsetLimit { estimated_total_hits, .. } => estimated_total_hits, + }; + // note that because `degraded` and `used_negative_operator` are #[serde(skip)], + // `degraded_for_host` and `host_used_negative_operator` will always be false. + degraded |= degraded_for_host; + used_negative_operator |= host_used_negative_operator; + } + (estimated_total_hits, degraded, used_negative_operator, facets) +} + +type LocalQueriesByIndex = BTreeMap>; +type RemoteQueriesByHost = BTreeMap)>; + +struct PartitionedQueries { + local_queries_by_index: LocalQueriesByIndex, + remote_queries_by_host: RemoteQueriesByHost, + has_remote: bool, +} + +impl PartitionedQueries { + fn new() -> PartitionedQueries { + PartitionedQueries { + local_queries_by_index: Default::default(), + remote_queries_by_host: Default::default(), + has_remote: false, + } + } + + fn partition( + &mut self, + federated_query: SearchQueryWithIndex, + query_index: usize, + network: &Network, + features: RoFeatures, + ) -> Result<(), ResponseError> { + if let Some(pagination_field) = federated_query.has_pagination() { + return Err(MeilisearchHttpError::PaginationInFederatedQuery( + query_index, + pagination_field, + ) + .into()); + } + + if let Some(facets) = federated_query.has_facets() { + let facets = facets.to_owned(); + return Err(MeilisearchHttpError::FacetsInFederatedQuery( + query_index, + federated_query.index_uid.into_inner(), + facets, + ) + .into()); + } + + let (index_uid, query, federation_options) = federated_query.into_index_query_federation(); + + let federation_options = federation_options.unwrap_or_default(); + + // local or remote node? + 'local_query: { + let queries_by_index = match federation_options.remote { + None => self.local_queries_by_index.entry(index_uid.into_inner()).or_default(), + Some(remote_name) => { + self.has_remote = true; + features.check_network("Performing a remote federated search")?; + + match &network.local { + Some(local) if local == &remote_name => { + self.local_queries_by_index.entry(index_uid.into_inner()).or_default() + } + _ => { + // node from the network + let Some(remote) = network.remotes.get(&remote_name) else { + return Err(ResponseError::from_msg(format!("Invalid `queries[{query_index}].federation_options.remote`: remote `{remote_name}` is not registered"), + meilisearch_types::error::Code::InvalidMultiSearchRemote)); + }; + let query = SearchQueryWithIndex::from_index_query_federation( + index_uid, + query, + Some(FederationOptions { + weight: federation_options.weight, + // do not pass the `remote` to not require the remote instance to have itself has a local node + remote: None, + // pass an explicit query index + query_position: Some(query_index), + }), + ); + + self.remote_queries_by_host + .entry(remote_name) + .or_insert_with(|| (remote.clone(), Default::default())) + .1 + .push(query); + break 'local_query; + } + } + } + }; + + queries_by_index.push(QueryByIndex { + query, + weight: federation_options.weight, + // override query index here with the one in federation. + // this will fix-up error messages to refer to the global query index of the original request. + query_index: if let Some(query_index) = federation_options.query_position { + features.check_network("Using `federationOptions.queryPosition`")?; + query_index + } else { + query_index + }, + }) + } + Ok(()) + } +} + +struct RemoteSearch { + in_flight_remote_queries: + BTreeMap>>, +} + +impl RemoteSearch { + fn start(queries: RemoteQueriesByHost, federation: &Federation, deadline: Instant) -> Self { + let mut in_flight_remote_queries = BTreeMap::new(); + let client = reqwest::ClientBuilder::new() + .connect_timeout(std::time::Duration::from_millis(200)) + .build() + .unwrap(); + let params = + ProxySearchParams { deadline: Some(deadline), try_count: 3, client: client.clone() }; + for (node_name, (node, queries)) in queries { + // spawn one task per host + in_flight_remote_queries.insert( + node_name, + tokio::spawn({ + let mut proxy_federation = federation.clone(); + // fixup limit and offset to not apply them twice + proxy_federation.limit = federation.limit + federation.offset; + proxy_federation.offset = 0; + // never merge distant facets + proxy_federation.merge_facets = None; + let params = params.clone(); + async move { proxy_search(&node, queries, proxy_federation, ¶ms).await } + }), + ); + } + Self { in_flight_remote_queries } + } + + async fn finish(self) -> (Vec, BTreeMap) { + let mut remote_results = Vec::with_capacity(self.in_flight_remote_queries.len()); + let mut remote_errors: BTreeMap = BTreeMap::new(); + 'remote_queries: for (node_name, handle) in self.in_flight_remote_queries { + match handle.await { + Ok(Ok(mut res)) => { + for hit in &mut res.hits { + let Some(federation) = hit.document.get_mut(FEDERATION_HIT) else { + let error = ProxySearchError::MissingPathInResponse("._federation"); + remote_errors.insert(node_name, error.as_response_error()); + continue 'remote_queries; + }; + let Some(federation) = federation.as_object_mut() else { + let error = ProxySearchError::UnexpectedValueInPath { + path: "._federation", + expected_type: "map", + received_value: federation.to_string(), + }; + remote_errors.insert(node_name, error.as_response_error()); + continue 'remote_queries; + }; + if !federation.contains_key(WEIGHTED_SCORE_VALUES) { + let error = ProxySearchError::MissingPathInResponse( + "._federation.weightedScoreValues", + ); + remote_errors.insert(node_name, error.as_response_error()); + continue 'remote_queries; + } + + if !federation.contains_key(WEIGHTED_RANKING_SCORE) { + let error = ProxySearchError::MissingPathInResponse( + "._federation.weightedRankingScore", + ); + remote_errors.insert(node_name, error.as_response_error()); + continue 'remote_queries; + } + + federation.insert( + FEDERATION_REMOTE.to_string(), + serde_json::Value::String(node_name.clone()), + ); + } + + remote_results.push(res); + } + Ok(Err(error)) => { + remote_errors.insert(node_name, error.as_response_error()); + } + Err(panic) => match panic.try_into_panic() { + Ok(panic) => { + let msg = match panic.downcast_ref::<&'static str>() { + Some(s) => *s, + None => match panic.downcast_ref::() { + Some(s) => &s[..], + None => "Box", + }, + }; + remote_errors.insert( + node_name, + ResponseError::from_msg( + msg.to_string(), + meilisearch_types::error::Code::Internal, + ), + ); + } + Err(_) => tracing::error!("proxy search task was unexpectedly cancelled"), + }, + } + } + (remote_results, remote_errors) + } +} + +struct SearchByIndexParams<'a> { + index_scheduler: &'a IndexScheduler, + required_hit_count: usize, + features: RoFeatures, + is_proxy: bool, + has_remote: bool, + network: &'a Network, +} + +struct SearchByIndex { + federation: Federation, + // During search by index, semantic_hit_count will be set to Some(0) if any search kind uses semantic + // Then when merging, we'll update its value if there is any semantic hit + semantic_hit_count: Option, + results_by_index: Vec, + previous_query_data: Option<(RankingRules, usize, String)>, + // remember the order and name of first index for each facet when merging with index settings + // to detect if the order is inconsistent for a facet. + facet_order: FacetOrder, +} + +impl SearchByIndex { + fn new(federation: Federation, index_count: usize, has_remote: bool) -> Self { + SearchByIndex { + facet_order: match (federation.merge_facets, has_remote) { + (None, true) => FacetOrder::ByIndex(Default::default()), + (None, false) => FacetOrder::None, + (Some(_), _) => FacetOrder::ByFacet(Default::default()), + }, + federation, + semantic_hit_count: None, + results_by_index: Vec::with_capacity(index_count), + previous_query_data: None, + } + } + + fn execute( + &mut self, + index_uid: String, + queries: Vec, + params: &SearchByIndexParams<'_>, + ) -> Result<(), ResponseError> { + let first_query_index = queries.first().map(|query| query.query_index); + let index = match params.index_scheduler.index(&index_uid) { + Ok(index) => index, + Err(err) => { + let mut err = ResponseError::from(err); + // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but + // here the resource not found is not part of the URL. + err.code = StatusCode::BAD_REQUEST; + if let Some(query_index) = first_query_index { + err.message = format!("Inside `.queries[{}]`: {}", query_index, err.message); + } + return Err(err); + } + }; + let rtxn = index.read_txn()?; + let criteria = index.criteria(&rtxn)?; + let dictionary = index.dictionary(&rtxn)?; + let dictionary: Option> = + dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); + let separators = index.allowed_separators(&rtxn)?; + let separators: Option> = + separators.as_ref().map(|x| x.iter().map(String::as_str).collect()); + let cutoff = index.search_cutoff(&rtxn)?; + let mut degraded = false; + let mut used_negative_operator = false; + let mut candidates = RoaringBitmap::new(); + let facets_by_index = self.federation.facets_by_index.remove(&index_uid).flatten(); + if let Err(mut error) = + self.facet_order.check_facet_order(&index_uid, &facets_by_index, &index, &rtxn) + { + error.message = format!( + "Inside `.federation.facetsByIndex.{index_uid}`: {error}{}", + if let Some(query_index) = first_query_index { + format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`") + } else { + Default::default() + } + ); + return Err(error); + } + let mut results_by_query = Vec::with_capacity(queries.len()); + for QueryByIndex { query, weight, query_index } in queries { + // use an immediately invoked lambda to capture the result without returning from the function + + let res: Result<(), ResponseError> = (|| { + let search_kind = + search_kind(&query, params.index_scheduler, index_uid.to_string(), &index)?; + + let canonicalization_kind = match (&search_kind, &query.q) { + (SearchKind::SemanticOnly { .. }, _) => { + ranking_rules::CanonicalizationKind::Vector + } + (_, Some(q)) if !q.is_empty() => ranking_rules::CanonicalizationKind::Keyword, + _ => ranking_rules::CanonicalizationKind::Placeholder, + }; + + let sort = if let Some(sort) = &query.sort { + let sorts: Vec<_> = + match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() { + Ok(sorts) => sorts, + Err(asc_desc_error) => { + return Err(milli::Error::from(milli::SortError::from( + asc_desc_error, + )) + .into()) + } + }; + Some(sorts) + } else { + None + }; + + let ranking_rules = ranking_rules::RankingRules::new( + criteria.clone(), + sort, + query.matching_strategy.into(), + canonicalization_kind, + ); + + if let Some((previous_ranking_rules, previous_query_index, previous_index_uid)) = + self.previous_query_data.take() + { + if let Err(error) = ranking_rules.is_compatible_with(&previous_ranking_rules) { + return Err(error.to_response_error( + &ranking_rules, + &previous_ranking_rules, + query_index, + previous_query_index, + &index_uid, + &previous_index_uid, + )); + } + self.previous_query_data = if previous_ranking_rules.constraint_count() + > ranking_rules.constraint_count() + { + Some((previous_ranking_rules, previous_query_index, previous_index_uid)) + } else { + Some((ranking_rules, query_index, index_uid.clone())) + }; + } else { + self.previous_query_data = + Some((ranking_rules, query_index, index_uid.clone())); + } + + match search_kind { + SearchKind::KeywordOnly => {} + _ => self.semantic_hit_count = Some(0), + } + + let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors); + + let time_budget = match cutoff { + Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)), + None => TimeBudget::default(), + }; + + let (mut search, _is_finite_pagination, _max_total_hits, _offset) = prepare_search( + &index, + &rtxn, + &query, + &search_kind, + time_budget, + params.features, + )?; + + search.scoring_strategy(milli::score_details::ScoringStrategy::Detailed); + search.offset(0); + search.limit(params.required_hit_count); + + let (result, _semantic_hit_count) = + super::super::search_from_kind(index_uid.to_string(), search_kind, search)?; + let format = AttributesFormat { + attributes_to_retrieve: query.attributes_to_retrieve, + retrieve_vectors, + attributes_to_highlight: query.attributes_to_highlight, + attributes_to_crop: query.attributes_to_crop, + crop_length: query.crop_length, + crop_marker: query.crop_marker, + highlight_pre_tag: query.highlight_pre_tag, + highlight_post_tag: query.highlight_post_tag, + show_matches_position: query.show_matches_position, + sort: query.sort, + show_ranking_score: query.show_ranking_score, + show_ranking_score_details: query.show_ranking_score_details, + locales: query.locales.map(|l| l.iter().copied().map(Into::into).collect()), + }; + + let milli::SearchResult { + matching_words, + candidates: query_candidates, + documents_ids, + document_scores, + degraded: query_degraded, + used_negative_operator: query_used_negative_operator, + } = result; + + candidates |= query_candidates; + degraded |= query_degraded; + used_negative_operator |= query_used_negative_operator; + + let tokenizer = HitMaker::tokenizer(dictionary.as_deref(), separators.as_deref()); + + let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer); + + let hit_maker = + HitMaker::new(&index, &rtxn, format, formatter_builder).map_err(|e| { + MeilisearchHttpError::from_milli(e, Some(index_uid.to_string())) + })?; + + results_by_query.push(SearchResultByQuery { + weight, + hit_maker, + query_index, + documents_ids, + document_scores, + }); + Ok(()) + })(); + + if let Err(mut error) = res { + error.message = format!("Inside `.queries[{query_index}]`: {}", error.message); + return Err(error); + } + } + let mut documents_seen = RoaringBitmap::new(); + let merged_result: Result, ResponseError> = + merge_index_local_results(results_by_query) + // skip documents we've already seen & mark that we saw the current document + .filter(|SearchResultByQueryIterItem { docid, .. }| documents_seen.insert(*docid)) + .take(params.required_hit_count) + // 2.3 make hits + .map( + |SearchResultByQueryIterItem { + docid, + score, + weight, + hit_maker, + query_index, + }| { + let mut hit = hit_maker.make_hit(docid, &score)?; + let weighted_score = ScoreDetails::global_score(score.iter()) * (*weight); + + let mut _federation = serde_json::json!( + { + INDEX_UID: index_uid, + QUERIES_POSITION: query_index, + WEIGHTED_RANKING_SCORE: weighted_score, + } + ); + if params.has_remote && !params.is_proxy { + _federation.as_object_mut().unwrap().insert( + FEDERATION_REMOTE.to_string(), + params.network.local.clone().into(), + ); + } + if params.is_proxy { + _federation.as_object_mut().unwrap().insert( + WEIGHTED_SCORE_VALUES.to_string(), + serde_json::json!(ScoreDetails::weighted_score_values( + score.iter(), + *weight + ) + .collect_vec()), + ); + } + hit.document.insert(FEDERATION_HIT.to_string(), _federation); + Ok(SearchHitByIndex { hit, score, weight, query_index }) + }, + ) + .collect(); + let merged_result = merged_result?; + let estimated_total_hits = candidates.len() as usize; + let facets = facets_by_index + .map(|facets_by_index| { + compute_facet_distribution_stats( + &facets_by_index, + &index, + &rtxn, + candidates, + super::super::Route::MultiSearch, + ) + }) + .transpose() + .map_err(|mut error| { + error.message = format!( + "Inside `.federation.facetsByIndex.{index_uid}`: {}{}", + error.message, + if let Some(query_index) = first_query_index { + format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`") + } else { + Default::default() + } + ); + error + })?; + self.results_by_index.push(SearchResultByIndex { + index: index_uid, + hits: merged_result, + estimated_total_hits, + degraded, + used_negative_operator, + facets, + }); + Ok(()) + } + + fn check_unused_facets( + &mut self, + index_scheduler: &IndexScheduler, + ) -> Result<(), ResponseError> { + for (index_uid, facets) in std::mem::take(&mut self.federation.facets_by_index) { + let index = match index_scheduler.index(&index_uid) { + Ok(index) => index, + Err(err) => { + let mut err = ResponseError::from(err); + // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but + // here the resource not found is not part of the URL. + err.code = StatusCode::BAD_REQUEST; + err.message = format!( + "Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", + err.message + ); + return Err(err); + } + }; + + // Important: this is the only transaction we'll use for this index during this federated search + let rtxn = index.read_txn()?; + + if let Err(mut error) = + self.facet_order.check_facet_order(&index_uid, &facets, &index, &rtxn) + { + error.message = format!( + "Inside `.federation.facetsByIndex.{index_uid}`: {error}\n - Note: index `{index_uid}` is not used in queries", + ); + return Err(error); + } + + if let Some(facets) = facets { + if let Err(mut error) = compute_facet_distribution_stats( + &facets, + &index, + &rtxn, + Default::default(), + super::super::Route::MultiSearch, + ) { + error.message = + format!("Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", error.message); + return Err(error); + } + } + } + Ok(()) + } +} + +enum FacetOrder { + /// The order is stored by facet to be able to merge facets regardless of index of origin + /// + /// - key: facet name + /// - value: (first_index_name, first_index_order) + /// + /// We store the name of the first index where the facet is present as well as its order, + /// so that if encountering the same facet in a different index we can compare the order and send + /// a readable error. + ByFacet(BTreeMap), + /// The order is stored by index to be able to merge facets regardless of the remote of origin. + /// + /// This variant is only used when `is_remote = true`, and always used in that case. + /// + /// - key: index name + /// - value: (order_by_map, max_values_per_facet) + /// + /// We store a map of the order per facet for that index, as well as the max values per facet. + /// Both are retrieved from the settings of the local version of the index. + /// + /// It is not possible to have an index only existing in the remotes, because as of now all indexes that appear + /// in `federation.facetsByIndex` must exist on all hosts. + ByIndex(BTreeMap), + /// Do not merge facets. Used when `federation.mergeFacets = null` and `!has_remote` + None, +} + +type FacetDistributions = BTreeMap>; +type FacetStats = BTreeMap; + +impl FacetOrder { + fn check_facet_order( + &mut self, + current_index: &str, + facets_by_index: &Option>, + index: &milli::Index, + rtxn: &milli::heed::RoTxn<'_>, + ) -> Result<(), ResponseError> { + match self { + FacetOrder::ByFacet(facet_order) => { + if let Some(facets_by_index) = facets_by_index { + let index_facet_order = index.sort_facet_values_by(rtxn)?; + for facet in facets_by_index { + let index_facet_order = index_facet_order.get(facet); + let (previous_index, previous_facet_order) = facet_order + .entry(facet.to_owned()) + .or_insert_with(|| (current_index.to_owned(), index_facet_order)); + if previous_facet_order != &index_facet_order { + return Err(MeilisearchHttpError::InconsistentFacetOrder { + facet: facet.clone(), + previous_facet_order: *previous_facet_order, + previous_uid: previous_index.clone(), + current_uid: current_index.to_owned(), + index_facet_order, + } + .into()); + } + } + } + } + FacetOrder::ByIndex(order_by_index) => { + let max_values_per_facet = index + .max_values_per_facet(rtxn)? + .map(|x| x as usize) + .unwrap_or(DEFAULT_VALUES_PER_FACET); + order_by_index.insert( + current_index.to_owned(), + (index.sort_facet_values_by(rtxn)?, max_values_per_facet), + ); + } + FacetOrder::None => {} + } + Ok(()) + } + + fn merge( + self, + merge_facets: Option, + remote_results: Vec, + mut facets: FederatedFacets, + ) -> (Option, Option, FederatedFacets) { + let (facet_distribution, facet_stats, facets_by_index) = match (self, merge_facets) { + (FacetOrder::ByFacet(facet_order), Some(merge_facets)) => { + for remote_facets_by_index in + remote_results.into_iter().map(|result| result.facets_by_index) + { + facets.append(remote_facets_by_index); + } + let facets = facets.merge(merge_facets, facet_order); + + let (facet_distribution, facet_stats) = facets + .map(|ComputedFacets { distribution, stats }| (distribution, stats)) + .unzip(); + + (facet_distribution, facet_stats, FederatedFacets::default()) + } + (FacetOrder::ByIndex(facet_order), _) => { + for remote_facets_by_index in + remote_results.into_iter().map(|result| result.facets_by_index) + { + facets.append(remote_facets_by_index); + } + facets.sort_and_truncate(facet_order); + (None, None, facets) + } + _ => (None, None, facets), + }; + (facet_distribution, facet_stats, facets_by_index) + } +} diff --git a/crates/meilisearch/src/search/federated/proxy.rs b/crates/meilisearch/src/search/federated/proxy.rs new file mode 100644 index 000000000..bf954693c --- /dev/null +++ b/crates/meilisearch/src/search/federated/proxy.rs @@ -0,0 +1,267 @@ +pub use error::ProxySearchError; +use error::ReqwestErrorWithoutUrl; +use meilisearch_types::features::Remote; +use rand::Rng as _; +use reqwest::{Client, Response, StatusCode}; +use serde::de::DeserializeOwned; +use serde_json::Value; + +use super::types::{FederatedSearch, FederatedSearchResult, Federation}; +use crate::search::SearchQueryWithIndex; + +pub const PROXY_SEARCH_HEADER: &str = "Meili-Proxy-Search"; +pub const PROXY_SEARCH_HEADER_VALUE: &str = "true"; + +mod error { + use meilisearch_types::error::ResponseError; + use reqwest::StatusCode; + + #[derive(Debug, thiserror::Error)] + pub enum ProxySearchError { + #[error("{0}")] + CouldNotSendRequest(ReqwestErrorWithoutUrl), + #[error("could not authenticate against the remote host\n - hint: check that the remote instance was registered with a valid API key having the `search` action")] + AuthenticationError, + #[error( + "could not parse response from the remote host as a federated search response{}\n - hint: check that the remote instance is a Meilisearch instance running the same version", + response_from_remote(response) + )] + CouldNotParseResponse { response: Result }, + #[error("remote host responded with code {}{}\n - hint: check that the remote instance has the correct index configuration for that request\n - hint: check that the `network` experimental feature is enabled on the remote instance", status_code.as_u16(), response_from_remote(response))] + BadRequest { status_code: StatusCode, response: Result }, + #[error("remote host did not answer before the deadline")] + Timeout, + #[error("remote hit does not contain `{0}`\n - hint: check that the remote instance is a Meilisearch instance running the same version")] + MissingPathInResponse(&'static str), + #[error("remote host responded with code {}{}", status_code.as_u16(), response_from_remote(response))] + RemoteError { status_code: StatusCode, response: Result }, + #[error("remote hit contains an unexpected value at path `{path}`: expected {expected_type}, received `{received_value}`\n - hint: check that the remote instance is a Meilisearch instance running the same version")] + UnexpectedValueInPath { + path: &'static str, + expected_type: &'static str, + received_value: String, + }, + #[error("could not parse weighted score values in the remote hit: {0}")] + CouldNotParseWeightedScoreValues(serde_json::Error), + } + + impl ProxySearchError { + pub fn as_response_error(&self) -> ResponseError { + use meilisearch_types::error::Code; + let message = self.to_string(); + let code = match self { + ProxySearchError::CouldNotSendRequest(_) => Code::RemoteCouldNotSendRequest, + ProxySearchError::AuthenticationError => Code::RemoteInvalidApiKey, + ProxySearchError::BadRequest { .. } => Code::RemoteBadRequest, + ProxySearchError::Timeout => Code::RemoteTimeout, + ProxySearchError::RemoteError { .. } => Code::RemoteRemoteError, + ProxySearchError::CouldNotParseResponse { .. } + | ProxySearchError::MissingPathInResponse(_) + | ProxySearchError::UnexpectedValueInPath { .. } + | ProxySearchError::CouldNotParseWeightedScoreValues(_) => Code::RemoteBadResponse, + }; + ResponseError::from_msg(message, code) + } + } + + #[derive(Debug, thiserror::Error)] + #[error(transparent)] + pub struct ReqwestErrorWithoutUrl(reqwest::Error); + impl ReqwestErrorWithoutUrl { + pub fn new(inner: reqwest::Error) -> Self { + Self(inner.without_url()) + } + } + + fn response_from_remote(response: &Result) -> String { + match response { + Ok(response) => { + format!(":\n - response from remote: {}", response) + } + Err(error) => { + format!(":\n - additionally, could not retrieve response from remote: {error}") + } + } + } +} + +#[derive(Clone)] +pub struct ProxySearchParams { + pub deadline: Option, + pub try_count: u32, + pub client: reqwest::Client, +} + +/// Performs a federated search on a remote host and returns the results +pub async fn proxy_search( + node: &Remote, + queries: Vec, + federation: Federation, + params: &ProxySearchParams, +) -> Result { + let url = format!("{}/multi-search", node.url); + + let federated = FederatedSearch { queries, federation: Some(federation) }; + + let search_api_key = node.search_api_key.as_deref(); + + let max_deadline = std::time::Instant::now() + std::time::Duration::from_secs(5); + + let deadline = if let Some(deadline) = params.deadline { + std::time::Instant::min(deadline, max_deadline) + } else { + max_deadline + }; + + for i in 0..params.try_count { + match try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline).await { + Ok(response) => return Ok(response), + Err(retry) => { + let duration = retry.into_duration(i)?; + tokio::time::sleep(duration).await; + } + } + } + try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline) + .await + .map_err(Retry::into_error) +} + +async fn try_proxy_search( + url: &str, + search_api_key: Option<&str>, + federated: &FederatedSearch, + client: &Client, + deadline: std::time::Instant, +) -> Result { + let timeout = deadline.saturating_duration_since(std::time::Instant::now()); + + let request = client.post(url).json(&federated).timeout(timeout); + let request = if let Some(search_api_key) = search_api_key { + request.bearer_auth(search_api_key) + } else { + request + }; + let request = request.header(PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE); + + let response = request.send().await; + let response = match response { + Ok(response) => response, + Err(error) if error.is_timeout() => return Err(Retry::give_up(ProxySearchError::Timeout)), + Err(error) => { + return Err(Retry::retry_later(ProxySearchError::CouldNotSendRequest( + ReqwestErrorWithoutUrl::new(error), + ))) + } + }; + + match response.status() { + status_code if status_code.is_success() => (), + StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => { + return Err(Retry::give_up(ProxySearchError::AuthenticationError)) + } + status_code if status_code.is_client_error() => { + let response = parse_error(response).await; + return Err(Retry::give_up(ProxySearchError::BadRequest { status_code, response })); + } + status_code if status_code.is_server_error() => { + let response = parse_error(response).await; + return Err(Retry::retry_later(ProxySearchError::RemoteError { + status_code, + response, + })); + } + status_code => { + tracing::warn!( + status_code = status_code.as_u16(), + "remote replied with unexpected status code" + ); + } + } + + let response = match parse_response(response).await { + Ok(response) => response, + Err(response) => { + return Err(Retry::retry_later(ProxySearchError::CouldNotParseResponse { response })) + } + }; + + Ok(response) +} + +/// Always parse the body of the response of a failed request as JSON. +async fn parse_error(response: Response) -> Result { + let bytes = match response.bytes().await { + Ok(bytes) => bytes, + Err(error) => return Err(ReqwestErrorWithoutUrl::new(error)), + }; + + Ok(parse_bytes_as_error(&bytes)) +} + +fn parse_bytes_as_error(bytes: &[u8]) -> String { + match serde_json::from_slice::(bytes) { + Ok(value) => value.to_string(), + Err(_) => String::from_utf8_lossy(bytes).into_owned(), + } +} + +async fn parse_response( + response: Response, +) -> Result> { + let bytes = match response.bytes().await { + Ok(bytes) => bytes, + Err(error) => return Err(Err(ReqwestErrorWithoutUrl::new(error))), + }; + + match serde_json::from_slice::(&bytes) { + Ok(value) => Ok(value), + Err(_) => Err(Ok(parse_bytes_as_error(&bytes))), + } +} + +pub struct Retry { + error: ProxySearchError, + strategy: RetryStrategy, +} + +pub enum RetryStrategy { + GiveUp, + Retry, +} + +impl Retry { + pub fn give_up(error: ProxySearchError) -> Self { + Self { error, strategy: RetryStrategy::GiveUp } + } + + pub fn retry_later(error: ProxySearchError) -> Self { + Self { error, strategy: RetryStrategy::Retry } + } + + pub fn into_duration(self, attempt: u32) -> Result { + match self.strategy { + RetryStrategy::GiveUp => Err(self.error), + RetryStrategy::Retry => { + let retry_duration = std::time::Duration::from_nanos((10u64).pow(attempt)); + let retry_duration = retry_duration.min(std::time::Duration::from_millis(100)); // don't wait more than 100ms + + // randomly up to double the retry duration + let retry_duration = retry_duration + + rand::thread_rng().gen_range(std::time::Duration::ZERO..retry_duration); + + tracing::warn!( + "Attempt #{}, failed with {}, retrying after {}ms.", + attempt, + self.error, + retry_duration.as_millis() + ); + Ok(retry_duration) + } + } + } + + pub fn into_error(self) -> ProxySearchError { + self.error + } +} diff --git a/crates/meilisearch/src/search/federated/types.rs b/crates/meilisearch/src/search/federated/types.rs new file mode 100644 index 000000000..804df8d31 --- /dev/null +++ b/crates/meilisearch/src/search/federated/types.rs @@ -0,0 +1,322 @@ +use std::collections::btree_map::Entry; +use std::collections::BTreeMap; +use std::fmt; +use std::vec::Vec; + +use indexmap::IndexMap; +use meilisearch_types::deserr::DeserrJsonError; +use meilisearch_types::error::deserr_codes::{ + InvalidMultiSearchFacetsByIndex, InvalidMultiSearchMaxValuesPerFacet, + InvalidMultiSearchMergeFacets, InvalidMultiSearchQueryPosition, InvalidMultiSearchRemote, + InvalidMultiSearchWeight, InvalidSearchLimit, InvalidSearchOffset, +}; +use meilisearch_types::error::ResponseError; +use meilisearch_types::index_uid::IndexUid; +use meilisearch_types::milli::order_by_map::OrderByMap; +use meilisearch_types::milli::OrderBy; +use serde::{Deserialize, Serialize}; +use utoipa::ToSchema; + +use super::super::{ComputedFacets, FacetStats, HitsInfo, SearchHit, SearchQueryWithIndex}; + +pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0; + +// fields in the response +pub const FEDERATION_HIT: &str = "_federation"; +pub const INDEX_UID: &str = "indexUid"; +pub const QUERIES_POSITION: &str = "queriesPosition"; +pub const WEIGHTED_RANKING_SCORE: &str = "weightedRankingScore"; +pub const WEIGHTED_SCORE_VALUES: &str = "weightedScoreValues"; +pub const FEDERATION_REMOTE: &str = "remote"; + +#[derive(Debug, Default, Clone, PartialEq, Serialize, deserr::Deserr, ToSchema)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[serde(rename_all = "camelCase")] + +pub struct FederationOptions { + #[deserr(default, error = DeserrJsonError)] + #[schema(value_type = f64)] + pub weight: Weight, + + #[deserr(default, error = DeserrJsonError)] + pub remote: Option, + + #[deserr(default, error = DeserrJsonError)] + pub query_position: Option, +} + +#[derive(Debug, Clone, Copy, PartialEq, Serialize, deserr::Deserr)] +#[deserr(try_from(f64) = TryFrom::try_from -> InvalidMultiSearchWeight)] +pub struct Weight(f64); + +impl Default for Weight { + fn default() -> Self { + Weight(DEFAULT_FEDERATED_WEIGHT) + } +} + +impl std::convert::TryFrom for Weight { + type Error = InvalidMultiSearchWeight; + + fn try_from(f: f64) -> Result { + if f < 0.0 { + Err(InvalidMultiSearchWeight) + } else { + Ok(Weight(f)) + } + } +} + +impl std::ops::Deref for Weight { + type Target = f64; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +#[derive(Debug, Clone, deserr::Deserr, Serialize, ToSchema)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[schema(rename_all = "camelCase")] +#[serde(rename_all = "camelCase")] +pub struct Federation { + #[deserr(default = super::super::DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError)] + pub limit: usize, + #[deserr(default = super::super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError)] + pub offset: usize, + #[deserr(default, error = DeserrJsonError)] + pub facets_by_index: BTreeMap>>, + #[deserr(default, error = DeserrJsonError)] + pub merge_facets: Option, +} + +#[derive(Copy, Clone, Debug, deserr::Deserr, Serialize, Default, ToSchema)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[schema(rename_all = "camelCase")] +#[serde(rename_all = "camelCase")] +pub struct MergeFacets { + #[deserr(default, error = DeserrJsonError)] + pub max_values_per_facet: Option, +} + +#[derive(Debug, deserr::Deserr, Serialize, ToSchema)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[schema(rename_all = "camelCase")] +#[serde(rename_all = "camelCase")] +pub struct FederatedSearch { + pub queries: Vec, + #[deserr(default)] + pub federation: Option, +} + +#[derive(Serialize, Deserialize, Clone, ToSchema)] +#[serde(rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] +pub struct FederatedSearchResult { + pub hits: Vec, + pub processing_time_ms: u128, + #[serde(flatten)] + pub hits_info: HitsInfo, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub semantic_hit_count: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + #[schema(value_type = Option>>)] + pub facet_distribution: Option>>, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub facet_stats: Option>, + #[serde(default, skip_serializing_if = "FederatedFacets::is_empty")] + pub facets_by_index: FederatedFacets, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub remote_errors: Option>, + + // These fields are only used for analytics purposes + #[serde(skip)] + pub degraded: bool, + #[serde(skip)] + pub used_negative_operator: bool, +} + +impl fmt::Debug for FederatedSearchResult { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let FederatedSearchResult { + hits, + processing_time_ms, + hits_info, + semantic_hit_count, + degraded, + used_negative_operator, + facet_distribution, + facet_stats, + facets_by_index, + remote_errors, + } = self; + + let mut debug = f.debug_struct("SearchResult"); + // The most important thing when looking at a search result is the time it took to process + debug.field("processing_time_ms", &processing_time_ms); + debug.field("hits", &format!("[{} hits returned]", hits.len())); + debug.field("hits_info", &hits_info); + if *used_negative_operator { + debug.field("used_negative_operator", used_negative_operator); + } + if *degraded { + debug.field("degraded", degraded); + } + if let Some(facet_distribution) = facet_distribution { + debug.field("facet_distribution", &facet_distribution); + } + if let Some(facet_stats) = facet_stats { + debug.field("facet_stats", &facet_stats); + } + if let Some(semantic_hit_count) = semantic_hit_count { + debug.field("semantic_hit_count", &semantic_hit_count); + } + if !facets_by_index.is_empty() { + debug.field("facets_by_index", &facets_by_index); + } + if let Some(remote_errors) = remote_errors { + debug.field("remote_errors", &remote_errors); + } + + debug.finish() + } +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)] +pub struct FederatedFacets(pub BTreeMap); + +impl FederatedFacets { + pub fn insert(&mut self, index: String, facets: Option) { + if let Some(facets) = facets { + self.0.insert(index, facets); + } + } + + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + pub fn merge( + self, + MergeFacets { max_values_per_facet }: MergeFacets, + facet_order: BTreeMap, + ) -> Option { + if self.is_empty() { + return None; + } + + let mut distribution: BTreeMap = Default::default(); + let mut stats: BTreeMap = Default::default(); + + for facets_by_index in self.0.into_values() { + for (facet, index_distribution) in facets_by_index.distribution { + match distribution.entry(facet) { + Entry::Vacant(entry) => { + entry.insert(index_distribution); + } + Entry::Occupied(mut entry) => { + let distribution = entry.get_mut(); + + for (value, index_count) in index_distribution { + distribution + .entry(value) + .and_modify(|count| *count += index_count) + .or_insert(index_count); + } + } + } + } + + for (facet, index_stats) in facets_by_index.stats { + match stats.entry(facet) { + Entry::Vacant(entry) => { + entry.insert(index_stats); + } + Entry::Occupied(mut entry) => { + let stats = entry.get_mut(); + + stats.min = f64::min(stats.min, index_stats.min); + stats.max = f64::max(stats.max, index_stats.max); + } + } + } + } + + // fixup order + for (facet, values) in &mut distribution { + let order_by = facet_order.get(facet).map(|(_, order)| *order).unwrap_or_default(); + + match order_by { + OrderBy::Lexicographic => { + values.sort_unstable_by(|left, _, right, _| left.cmp(right)) + } + OrderBy::Count => { + values.sort_unstable_by(|_, left, _, right| { + left.cmp(right) + // biggest first + .reverse() + }) + } + } + + if let Some(max_values_per_facet) = max_values_per_facet { + values.truncate(max_values_per_facet) + }; + } + + Some(ComputedFacets { distribution, stats }) + } + + pub(crate) fn append(&mut self, FederatedFacets(remote_facets_by_index): FederatedFacets) { + for (index, remote_facets) in remote_facets_by_index { + let merged_facets = self.0.entry(index).or_default(); + + for (remote_facet, remote_stats) in remote_facets.stats { + match merged_facets.stats.entry(remote_facet) { + Entry::Vacant(vacant_entry) => { + vacant_entry.insert(remote_stats); + } + Entry::Occupied(mut occupied_entry) => { + let stats = occupied_entry.get_mut(); + stats.min = f64::min(stats.min, remote_stats.min); + stats.max = f64::max(stats.max, remote_stats.max); + } + } + } + + for (remote_facet, remote_values) in remote_facets.distribution { + let merged_facet = merged_facets.distribution.entry(remote_facet).or_default(); + for (remote_value, remote_count) in remote_values { + let count = merged_facet.entry(remote_value).or_default(); + *count += remote_count; + } + } + } + } + + pub fn sort_and_truncate(&mut self, facet_order: BTreeMap) { + for (index, facets) in &mut self.0 { + let Some((order_by, max_values_per_facet)) = facet_order.get(index) else { + continue; + }; + for (facet, values) in &mut facets.distribution { + match order_by.get(facet) { + OrderBy::Lexicographic => { + values.sort_unstable_by(|left, _, right, _| left.cmp(right)) + } + OrderBy::Count => { + values.sort_unstable_by(|_, left, _, right| { + left.cmp(right) + // biggest first + .reverse() + }) + } + } + values.truncate(*max_values_per_facet); + } + } + } +} diff --git a/crates/meilisearch/src/search/federated/weighted_scores.rs b/crates/meilisearch/src/search/federated/weighted_scores.rs new file mode 100644 index 000000000..899940a31 --- /dev/null +++ b/crates/meilisearch/src/search/federated/weighted_scores.rs @@ -0,0 +1,88 @@ +use std::cmp::Ordering; + +use meilisearch_types::milli::score_details::{self, WeightedScoreValue}; + +pub fn compare( + mut left_it: impl Iterator, + left_weighted_global_score: f64, + mut right_it: impl Iterator, + right_weighted_global_score: f64, +) -> Ordering { + loop { + let left = left_it.next(); + let right = right_it.next(); + + match (left, right) { + (None, None) => return Ordering::Equal, + (None, Some(_)) => return Ordering::Less, + (Some(_), None) => return Ordering::Greater, + ( + Some( + WeightedScoreValue::WeightedScore(left) | WeightedScoreValue::VectorSort(left), + ), + Some( + WeightedScoreValue::WeightedScore(right) + | WeightedScoreValue::VectorSort(right), + ), + ) => { + if (left - right).abs() <= f64::EPSILON { + continue; + } + return left.partial_cmp(&right).unwrap(); + } + ( + Some(WeightedScoreValue::Sort { asc: left_asc, value: left }), + Some(WeightedScoreValue::Sort { asc: right_asc, value: right }), + ) => { + if left_asc != right_asc { + return left_weighted_global_score + .partial_cmp(&right_weighted_global_score) + .unwrap(); + } + match score_details::compare_sort_values(left_asc, &left, &right) { + Ordering::Equal => continue, + order => return order, + } + } + ( + Some(WeightedScoreValue::GeoSort { asc: left_asc, distance: left }), + Some(WeightedScoreValue::GeoSort { asc: right_asc, distance: right }), + ) => { + if left_asc != right_asc { + continue; + } + match (left, right) { + (None, None) => continue, + (None, Some(_)) => return Ordering::Less, + (Some(_), None) => return Ordering::Greater, + (Some(left), Some(right)) => { + if (left - right).abs() <= f64::EPSILON { + continue; + } + return left.partial_cmp(&right).unwrap(); + } + } + } + // not comparable details, use global + (Some(WeightedScoreValue::WeightedScore(_)), Some(_)) + | (Some(_), Some(WeightedScoreValue::WeightedScore(_))) + | (Some(WeightedScoreValue::VectorSort(_)), Some(_)) + | (Some(_), Some(WeightedScoreValue::VectorSort(_))) + | (Some(WeightedScoreValue::GeoSort { .. }), Some(WeightedScoreValue::Sort { .. })) + | (Some(WeightedScoreValue::Sort { .. }), Some(WeightedScoreValue::GeoSort { .. })) => { + let left_count = left_it.count(); + let right_count = right_it.count(); + // compare how many remaining groups of rules each side has. + // the group with the most remaining groups wins. + return left_count + .cmp(&right_count) + // breaks ties with the global ranking score + .then_with(|| { + left_weighted_global_score + .partial_cmp(&right_weighted_global_score) + .unwrap() + }); + } + } + } +} diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs index aab8ae919..2091047fc 100644 --- a/crates/meilisearch/src/search/mod.rs +++ b/crates/meilisearch/src/search/mod.rs @@ -30,7 +30,7 @@ use milli::{ MatchBounds, MatcherBuilder, SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, }; use regex::Regex; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; #[cfg(test)] mod mod_test; @@ -41,7 +41,7 @@ use crate::error::MeilisearchHttpError; mod federated; pub use federated::{ perform_federated_search, FederatedSearch, FederatedSearchResult, Federation, - FederationOptions, MergeFacets, + FederationOptions, MergeFacets, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, }; mod ranking_rules; @@ -119,7 +119,7 @@ pub struct SearchQuery { pub locales: Option>, } -#[derive(Debug, Clone, Copy, PartialEq, Deserr, ToSchema)] +#[derive(Debug, Clone, Copy, PartialEq, Deserr, ToSchema, Serialize)] #[deserr(try_from(f64) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)] pub struct RankingScoreThreshold(f64); impl std::convert::TryFrom for RankingScoreThreshold { @@ -275,11 +275,13 @@ impl fmt::Debug for SearchQuery { } } -#[derive(Debug, Clone, Default, PartialEq, Deserr, ToSchema)] +#[derive(Debug, Clone, Default, PartialEq, Deserr, ToSchema, Serialize)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[serde(rename_all = "camelCase")] pub struct HybridQuery { #[deserr(default, error = DeserrJsonError, default)] #[schema(value_type = f32, default)] + #[serde(default)] pub semantic_ratio: SemanticRatio, #[deserr(error = DeserrJsonError)] pub embedder: String, @@ -369,7 +371,7 @@ impl SearchKind { } } -#[derive(Debug, Clone, Copy, PartialEq, Deserr)] +#[derive(Debug, Clone, Copy, PartialEq, Deserr, Serialize)] #[deserr(try_from(f32) = TryFrom::try_from -> InvalidSearchSemanticRatio)] pub struct SemanticRatio(f32); @@ -411,8 +413,9 @@ impl SearchQuery { // This struct contains the fields of `SearchQuery` inline. // This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields. // The `From` implementation ensures both structs remain up to date. -#[derive(Debug, Clone, PartialEq, Deserr, ToSchema)] +#[derive(Debug, Clone, Serialize, PartialEq, Deserr, ToSchema)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[serde(rename_all = "camelCase")] #[schema(rename_all = "camelCase")] pub struct SearchQueryWithIndex { #[deserr(error = DeserrJsonError, missing_field_error = DeserrJsonError::missing_index_uid)] @@ -493,6 +496,72 @@ impl SearchQueryWithIndex { self.facets.as_deref().filter(|v| !v.is_empty()) } + pub fn from_index_query_federation( + index_uid: IndexUid, + query: SearchQuery, + federation_options: Option, + ) -> Self { + let SearchQuery { + q, + vector, + hybrid, + offset, + limit, + page, + hits_per_page, + attributes_to_retrieve, + retrieve_vectors, + attributes_to_crop, + crop_length, + attributes_to_highlight, + show_matches_position, + show_ranking_score, + show_ranking_score_details, + filter, + sort, + distinct, + facets, + highlight_pre_tag, + highlight_post_tag, + crop_marker, + matching_strategy, + attributes_to_search_on, + ranking_score_threshold, + locales, + } = query; + + SearchQueryWithIndex { + index_uid, + q, + vector, + hybrid, + offset: if offset == DEFAULT_SEARCH_OFFSET() { None } else { Some(offset) }, + limit: if limit == DEFAULT_SEARCH_LIMIT() { None } else { Some(limit) }, + page, + hits_per_page, + attributes_to_retrieve, + retrieve_vectors, + attributes_to_crop, + crop_length, + attributes_to_highlight, + show_ranking_score, + show_ranking_score_details, + show_matches_position, + filter, + sort, + distinct, + facets, + highlight_pre_tag, + highlight_post_tag, + crop_marker, + matching_strategy, + attributes_to_search_on, + ranking_score_threshold, + locales, + federation_options, + } + } + pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option) { let SearchQueryWithIndex { index_uid, @@ -620,8 +689,9 @@ impl TryFrom for ExternalDocumentId { } } -#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, ToSchema)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, ToSchema, Serialize)] #[deserr(rename_all = camelCase)] +#[serde(rename_all = "camelCase")] pub enum MatchingStrategy { /// Remove query words from last to first Last, @@ -667,19 +737,19 @@ impl From for OrderBy { } } -#[derive(Debug, Clone, Serialize, PartialEq, ToSchema)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, ToSchema)] pub struct SearchHit { #[serde(flatten)] #[schema(additional_properties, inline, value_type = HashMap)] pub document: Document, - #[serde(rename = "_formatted", skip_serializing_if = "Document::is_empty")] + #[serde(default, rename = "_formatted", skip_serializing_if = "Document::is_empty")] #[schema(additional_properties, value_type = HashMap)] pub formatted: Document, - #[serde(rename = "_matchesPosition", skip_serializing_if = "Option::is_none")] + #[serde(default, rename = "_matchesPosition", skip_serializing_if = "Option::is_none")] pub matches_position: Option, - #[serde(rename = "_rankingScore", skip_serializing_if = "Option::is_none")] + #[serde(default, rename = "_rankingScore", skip_serializing_if = "Option::is_none")] pub ranking_score: Option, - #[serde(rename = "_rankingScoreDetails", skip_serializing_if = "Option::is_none")] + #[serde(default, rename = "_rankingScoreDetails", skip_serializing_if = "Option::is_none")] pub ranking_score_details: Option>, } @@ -767,7 +837,7 @@ pub struct SearchResultWithIndex { pub result: SearchResult, } -#[derive(Serialize, Debug, Clone, PartialEq, Eq, ToSchema)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, ToSchema)] #[serde(untagged)] pub enum HitsInfo { #[serde(rename_all = "camelCase")] @@ -778,7 +848,7 @@ pub enum HitsInfo { OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize }, } -#[derive(Serialize, Debug, Clone, PartialEq, ToSchema)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, ToSchema)] pub struct FacetStats { pub min: f64, pub max: f64, @@ -1061,7 +1131,7 @@ pub fn perform_search( Ok(result) } -#[derive(Debug, Clone, Default, Serialize, ToSchema)] +#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)] pub struct ComputedFacets { #[schema(value_type = BTreeMap>)] pub distribution: BTreeMap>, diff --git a/crates/meilisearch/tests/auth/api_keys.rs b/crates/meilisearch/tests/auth/api_keys.rs index 253929428..0aea7d722 100644 --- a/crates/meilisearch/tests/auth/api_keys.rs +++ b/crates/meilisearch/tests/auth/api_keys.rs @@ -421,7 +421,7 @@ async fn error_add_api_key_invalid_parameters_actions() { meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r###" { - "message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`", + "message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `network.get`, `network.update`", "code": "invalid_api_key_actions", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_api_key_actions" diff --git a/crates/meilisearch/tests/auth/authorization.rs b/crates/meilisearch/tests/auth/authorization.rs index 609b7d01b..277911fb8 100644 --- a/crates/meilisearch/tests/auth/authorization.rs +++ b/crates/meilisearch/tests/auth/authorization.rs @@ -68,6 +68,8 @@ pub static AUTHORIZATIONS: Lazy hashset!{"keys.get", "*"}, ("GET", "/experimental-features") => hashset!{"experimental.get", "*"}, ("PATCH", "/experimental-features") => hashset!{"experimental.update", "*"}, + ("GET", "/network") => hashset!{"network.get", "*"}, + ("PATCH", "/network") => hashset!{"network.update", "*"}, }; authorizations diff --git a/crates/meilisearch/tests/auth/errors.rs b/crates/meilisearch/tests/auth/errors.rs index c063b2aac..0e8968ef0 100644 --- a/crates/meilisearch/tests/auth/errors.rs +++ b/crates/meilisearch/tests/auth/errors.rs @@ -93,7 +93,7 @@ async fn create_api_key_bad_actions() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`", + "message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `network.get`, `network.update`", "code": "invalid_api_key_actions", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_api_key_actions" diff --git a/crates/meilisearch/tests/common/server.rs b/crates/meilisearch/tests/common/server.rs index 49214d646..c017a060c 100644 --- a/crates/meilisearch/tests/common/server.rs +++ b/crates/meilisearch/tests/common/server.rs @@ -88,6 +88,10 @@ impl Server { self.service.api_key = Some(api_key.as_ref().to_string()); } + pub fn clear_api_key(&mut self) { + self.service.api_key = None; + } + /// Fetch and use the default admin key for nexts http requests. pub async fn use_admin_key(&mut self, master_key: impl AsRef) { self.use_api_key(master_key); @@ -163,6 +167,10 @@ impl Server { self.service.patch("/experimental-features", value).await } + pub async fn set_network(&self, value: Value) -> (Value, StatusCode) { + self.service.patch("/network", value).await + } + pub async fn get_metrics(&self) -> (Value, StatusCode) { self.service.get("/metrics").await } @@ -388,6 +396,10 @@ impl Server { pub async fn get_features(&self) -> (Value, StatusCode) { self.service.get("/experimental-features").await } + + pub async fn get_network(&self) -> (Value, StatusCode) { + self.service.get("/network").await + } } pub fn default_settings(dir: impl AsRef) -> Opt { diff --git a/crates/meilisearch/tests/dumps/mod.rs b/crates/meilisearch/tests/dumps/mod.rs index a2b008fe3..2b4c32cc7 100644 --- a/crates/meilisearch/tests/dumps/mod.rs +++ b/crates/meilisearch/tests/dumps/mod.rs @@ -1908,7 +1908,8 @@ async fn import_dump_v6_containing_experimental_features() { "metrics": false, "logsRoute": false, "editDocumentsByFunction": false, - "containsFilter": false + "containsFilter": false, + "network": false } "###); @@ -2069,7 +2070,8 @@ async fn generate_and_import_dump_containing_vectors() { "metrics": false, "logsRoute": false, "editDocumentsByFunction": false, - "containsFilter": false + "containsFilter": false, + "network": false } "###); diff --git a/crates/meilisearch/tests/features/mod.rs b/crates/meilisearch/tests/features/mod.rs index 8e1ac921d..36559daf6 100644 --- a/crates/meilisearch/tests/features/mod.rs +++ b/crates/meilisearch/tests/features/mod.rs @@ -21,7 +21,8 @@ async fn experimental_features() { "metrics": false, "logsRoute": false, "editDocumentsByFunction": false, - "containsFilter": false + "containsFilter": false, + "network": false } "###); @@ -33,7 +34,8 @@ async fn experimental_features() { "metrics": true, "logsRoute": false, "editDocumentsByFunction": false, - "containsFilter": false + "containsFilter": false, + "network": false } "###); @@ -45,7 +47,8 @@ async fn experimental_features() { "metrics": true, "logsRoute": false, "editDocumentsByFunction": false, - "containsFilter": false + "containsFilter": false, + "network": false } "###); @@ -58,7 +61,8 @@ async fn experimental_features() { "metrics": true, "logsRoute": false, "editDocumentsByFunction": false, - "containsFilter": false + "containsFilter": false, + "network": false } "###); @@ -71,7 +75,8 @@ async fn experimental_features() { "metrics": true, "logsRoute": false, "editDocumentsByFunction": false, - "containsFilter": false + "containsFilter": false, + "network": false } "###); } @@ -91,7 +96,8 @@ async fn experimental_feature_metrics() { "metrics": true, "logsRoute": false, "editDocumentsByFunction": false, - "containsFilter": false + "containsFilter": false, + "network": false } "###); @@ -146,7 +152,7 @@ async fn errors() { meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`", + "message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`", "code": "bad_request", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#bad_request" diff --git a/crates/meilisearch/tests/integration.rs b/crates/meilisearch/tests/integration.rs index 7c3b8affe..927eb4617 100644 --- a/crates/meilisearch/tests/integration.rs +++ b/crates/meilisearch/tests/integration.rs @@ -7,6 +7,7 @@ mod dumps; mod features; mod index; mod logs; +mod network; mod search; mod settings; mod similar; diff --git a/crates/meilisearch/tests/network/mod.rs b/crates/meilisearch/tests/network/mod.rs new file mode 100644 index 000000000..1c3661a06 --- /dev/null +++ b/crates/meilisearch/tests/network/mod.rs @@ -0,0 +1,606 @@ +use serde_json::Value::Null; + +use crate::common::Server; +use crate::json; + +#[actix_rt::test] +async fn error_network_not_enabled() { + let server = Server::new().await; + + let (response, code) = server.get_network().await; + + meili_snap::snapshot!(code, @"400 Bad Request"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "message": "Using the /network route requires enabling the `network` experimental feature. See https://github.com/orgs/meilisearch/discussions/805", + "code": "feature_not_enabled", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#feature_not_enabled" + } + "###); + + let (response, code) = server.set_network(json!({"self": "myself"})).await; + + meili_snap::snapshot!(code, @"400 Bad Request"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "message": "Using the /network route requires enabling the `network` experimental feature. See https://github.com/orgs/meilisearch/discussions/805", + "code": "feature_not_enabled", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#feature_not_enabled" + } + "###); +} + +#[actix_rt::test] +async fn errors_on_param() { + let server = Server::new().await; + + let (response, code) = server.set_features(json!({"network": true})).await; + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["network"]), @r#"true"#); + + // non-existing param + let (response, code) = server.set_network(json!({"selfie": "myself"})).await; + + meili_snap::snapshot!(code, @"400 Bad Request"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "message": "Unknown field `selfie`: expected one of `remotes`, `self`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // self not a string + let (response, code) = server.set_network(json!({"self": 42})).await; + + meili_snap::snapshot!(code, @"400 Bad Request"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "message": "Invalid value type at `.self`: expected a string, but found a positive integer: `42`", + "code": "invalid_network_self", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_network_self" + } + "###); + + // remotes not an object + let (response, code) = server.set_network(json!({"remotes": 42})).await; + + meili_snap::snapshot!(code, @"400 Bad Request"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "message": "Invalid value type at `.remotes`: expected an object, but found a positive integer: `42`", + "code": "invalid_network_remotes", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_network_remotes" + } + "###); + + // new remote without url + let (response, code) = server + .set_network(json!({"remotes": { + "new": { + "searchApiKey": "http://localhost:7700" + } + }})) + .await; + + meili_snap::snapshot!(code, @"400 Bad Request"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "message": "Missing field `.remotes.new.url`", + "code": "missing_network_url", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#missing_network_url" + } + "###); + + // remote with url not a string + let (response, code) = server + .set_network(json!({"remotes": { + "new": { + "url": 7700 + } + }})) + .await; + + meili_snap::snapshot!(code, @"400 Bad Request"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "message": "Invalid value type at `.remotes.new.url`: expected a string, but found a positive integer: `7700`", + "code": "invalid_network_url", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_network_url" + } + "###); + + // remote with non-existing param + let (response, code) = server + .set_network(json!({"remotes": { + "new": { + "url": "http://localhost:7700", + "doggo": "Intel the Beagle" + } + }})) + .await; + + meili_snap::snapshot!(code, @"400 Bad Request"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "message": "Unknown field `doggo` inside `.remotes.new`: expected one of `url`, `searchApiKey`", + "code": "invalid_network_remotes", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_network_remotes" + } + "###); + + // remote with non-string searchApiKey + let (response, code) = server + .set_network(json!({"remotes": { + "new": { + "url": "http://localhost:7700", + "searchApiKey": 1204664602099962445u64, + } + }})) + .await; + + meili_snap::snapshot!(code, @"400 Bad Request"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "message": "Invalid value type at `.remotes.new.searchApiKey`: expected a string, but found a positive integer: `1204664602099962445`", + "code": "invalid_network_search_api_key", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_network_search_api_key" + } + "###); + + // setting `null` on URL a posteriori + let (response, code) = server + .set_network(json!({"remotes": { + "kefir": { + "url": "http://localhost:7700", + } + }})) + .await; + + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "self": null, + "remotes": { + "kefir": { + "url": "http://localhost:7700", + "searchApiKey": null + } + } + } + "###); + let (response, code) = server + .set_network(json!({"remotes": { + "kefir": { + "url": Null, + } + }})) + .await; + + meili_snap::snapshot!(code, @"400 Bad Request"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "message": "Field `.remotes.kefir.url` cannot be set to `null`", + "code": "invalid_network_url", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_network_url" + } + "###); +} + +#[actix_rt::test] +async fn auth() { + let mut server = Server::new_auth().await; + server.use_api_key("MASTER_KEY"); + + let (response, code) = server.set_features(json!({"network": true})).await; + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["network"]), @r#"true"#); + + let (get_network_key, code) = server + .add_api_key(json!({ + "actions": ["network.get"], + "indexes": ["*"], + "expiresAt": serde_json::Value::Null + })) + .await; + meili_snap::snapshot!(code, @"201 Created"); + let get_network_key = get_network_key["key"].clone(); + + let (update_network_key, code) = server + .add_api_key(json!({ + "actions": ["network.update"], + "indexes": ["*"], + "expiresAt": serde_json::Value::Null + })) + .await; + meili_snap::snapshot!(code, @"201 Created"); + let update_network_key = update_network_key["key"].clone(); + + let (search_api_key, code) = server + .add_api_key(json!({ + "actions": ["search"], + "indexes": ["*"], + "expiresAt": serde_json::Value::Null + })) + .await; + meili_snap::snapshot!(code, @"201 Created"); + let search_api_key = search_api_key["key"].clone(); + + // try with master key + let (response, code) = server + .set_network(json!({ + "self": "master" + })) + .await; + + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "self": "master", + "remotes": {} + } + "###); + + let (response, code) = server.get_network().await; + + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" +{ + "self": "master", + "remotes": {} +} +"###); + + // try get with get permission + server.use_api_key(get_network_key.as_str().unwrap()); + let (response, code) = server.get_network().await; + + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" +{ + "self": "master", + "remotes": {} +} +"###); + + // try update with update permission + server.use_api_key(update_network_key.as_str().unwrap()); + + let (response, code) = server + .set_network(json!({ + "self": "api_key" + })) + .await; + + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" +{ + "self": "api_key", + "remotes": {} +} +"###); + + // try with the other's permission + let (response, code) = server.get_network().await; + + meili_snap::snapshot!(code, @"403 Forbidden"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "message": "The provided API key is invalid.", + "code": "invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#invalid_api_key" + } + "###); + + server.use_api_key(get_network_key.as_str().unwrap()); + let (response, code) = server + .set_network(json!({ + "self": "get_api_key" + })) + .await; + + meili_snap::snapshot!(code, @"403 Forbidden"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "message": "The provided API key is invalid.", + "code": "invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#invalid_api_key" + } + "###); + // try either with bad permission + server.use_api_key(search_api_key.as_str().unwrap()); + let (response, code) = server.get_network().await; + + meili_snap::snapshot!(code, @"403 Forbidden"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "message": "The provided API key is invalid.", + "code": "invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#invalid_api_key" + } + "###); + + let (response, code) = server + .set_network(json!({ + "self": "get_api_key" + })) + .await; + + meili_snap::snapshot!(code, @"403 Forbidden"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "message": "The provided API key is invalid.", + "code": "invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#invalid_api_key" + } + "###); +} + +#[actix_rt::test] +async fn get_and_set_network() { + let server = Server::new().await; + + let (response, code) = server.set_features(json!({"network": true})).await; + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["network"]), @r#"true"#); + + let (response, code) = server.get_network().await; + + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "self": null, + "remotes": {} + } + "###); + + // adding self + let (response, code) = server.set_network(json!({"self": "myself"})).await; + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "self": "myself", + "remotes": {} + } + "###); + + // adding remotes + let (response, code) = server + .set_network(json!({"remotes": { + "myself": { + "url": "http://localhost:7700" + }, + "thy": { + "url": "http://localhost:7701", + "searchApiKey": "foo" + } + }})) + .await; + + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "self": "myself", + "remotes": { + "myself": { + "url": "http://localhost:7700", + "searchApiKey": null + }, + "thy": { + "url": "http://localhost:7701", + "searchApiKey": "foo" + } + } + } + "###); + + // partially updating one remote + let (response, code) = server + .set_network(json!({"remotes": { + "thy": { + "searchApiKey": "bar" + } + }})) + .await; + + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "self": "myself", + "remotes": { + "myself": { + "url": "http://localhost:7700", + "searchApiKey": null + }, + "thy": { + "url": "http://localhost:7701", + "searchApiKey": "bar" + } + } + } + "###); + + // adding one remote + let (response, code) = server + .set_network(json!({"remotes": { + "them": { + "url": "http://localhost:7702", + "searchApiKey": "baz" + } + }})) + .await; + + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "self": "myself", + "remotes": { + "myself": { + "url": "http://localhost:7700", + "searchApiKey": null + }, + "them": { + "url": "http://localhost:7702", + "searchApiKey": "baz" + }, + "thy": { + "url": "http://localhost:7701", + "searchApiKey": "bar" + } + } + } + "###); + + // deleting one remote + let (response, code) = server + .set_network(json!({"remotes": { + "myself": Null, + }})) + .await; + + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "self": "myself", + "remotes": { + "them": { + "url": "http://localhost:7702", + "searchApiKey": "baz" + }, + "thy": { + "url": "http://localhost:7701", + "searchApiKey": "bar" + } + } + } + "###); + + // removing self + let (response, code) = server.set_network(json!({"self": Null})).await; + + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "self": null, + "remotes": { + "them": { + "url": "http://localhost:7702", + "searchApiKey": "baz" + }, + "thy": { + "url": "http://localhost:7701", + "searchApiKey": "bar" + } + } + } + "###); + + // setting self again + let (response, code) = server.set_network(json!({"self": "thy"})).await; + + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "self": "thy", + "remotes": { + "them": { + "url": "http://localhost:7702", + "searchApiKey": "baz" + }, + "thy": { + "url": "http://localhost:7701", + "searchApiKey": "bar" + } + } + } + "###); + + // doing nothing + let (response, code) = server.set_network(json!({})).await; + + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "self": "thy", + "remotes": { + "them": { + "url": "http://localhost:7702", + "searchApiKey": "baz" + }, + "thy": { + "url": "http://localhost:7701", + "searchApiKey": "bar" + } + } + } + "###); + + // still doing nothing + let (response, code) = server.set_network(json!({"remotes": {}})).await; + + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "self": "thy", + "remotes": { + "them": { + "url": "http://localhost:7702", + "searchApiKey": "baz" + }, + "thy": { + "url": "http://localhost:7701", + "searchApiKey": "bar" + } + } + } + "###); + + // good time to check GET + let (response, code) = server.get_network().await; + + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "self": "thy", + "remotes": { + "them": { + "url": "http://localhost:7702", + "searchApiKey": "baz" + }, + "thy": { + "url": "http://localhost:7701", + "searchApiKey": "bar" + } + } + } + "###); + + // deleting everything + let (response, code) = server + .set_network(json!({ + "remotes": Null, + })) + .await; + + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "self": "thy", + "remotes": {} + } + "###); +} diff --git a/crates/meilisearch/tests/search/multi.rs b/crates/meilisearch/tests/search/multi/mod.rs similarity index 99% rename from crates/meilisearch/tests/search/multi.rs rename to crates/meilisearch/tests/search/multi/mod.rs index 4fc0aed7f..2a95a5dd2 100644 --- a/crates/meilisearch/tests/search/multi.rs +++ b/crates/meilisearch/tests/search/multi/mod.rs @@ -5,6 +5,8 @@ use crate::common::Server; use crate::json; use crate::search::{SCORE_DOCUMENTS, VECTOR_DOCUMENTS}; +mod proxy; + #[actix_rt::test] async fn search_empty_list() { let server = Server::new().await; diff --git a/crates/meilisearch/tests/search/multi/proxy.rs b/crates/meilisearch/tests/search/multi/proxy.rs new file mode 100644 index 000000000..2c3b31bf1 --- /dev/null +++ b/crates/meilisearch/tests/search/multi/proxy.rs @@ -0,0 +1,2591 @@ +use std::sync::Arc; + +use actix_http::StatusCode; +use meili_snap::{json_string, snapshot}; +use wiremock::matchers::AnyMatcher; +use wiremock::{Mock, MockServer, ResponseTemplate}; + +use crate::common::{Server, Value, SCORE_DOCUMENTS}; +use crate::json; + +#[actix_rt::test] +async fn error_feature() { + let server = Server::new().await; + + let (response, code) = server + .multi_search(json!({ + "federation": {}, + "queries": [ + { + "indexUid": "test", + "federationOptions": { + "remote": "toto" + } + } + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Performing a remote federated search requires enabling the `network` experimental feature. See https://github.com/orgs/meilisearch/discussions/805", + "code": "feature_not_enabled", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#feature_not_enabled" + } + "###); + + let (response, code) = server + .multi_search(json!({ + "federation": {}, + "queries": [ + { + "indexUid": "test", + "federationOptions": { + "queryPosition": 42, + } + } + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Using `federationOptions.queryPosition` requires enabling the `network` experimental feature. See https://github.com/orgs/meilisearch/discussions/805", + "code": "feature_not_enabled", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#feature_not_enabled" + } + "###); +} + +#[actix_rt::test] +async fn error_params() { + let server = Server::new().await; + + let (response, code) = server + .multi_search(json!({ + "federation": {}, + "queries": [ + { + "indexUid": "test", + "federationOptions": { + "remote": 42 + } + } + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Invalid value type at `.queries[0].federationOptions.remote`: expected a string, but found a positive integer: `42`", + "code": "invalid_multi_search_remote", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_remote" + } + "###); + + let (response, code) = server + .multi_search(json!({ + "federation": {}, + "queries": [ + { + "indexUid": "test", + "federationOptions": { + "queryPosition": "toto", + } + } + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Invalid value type at `.queries[0].federationOptions.queryPosition`: expected a positive integer, but found a string: `\"toto\"`", + "code": "invalid_multi_search_query_position", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_position" + } + "###); +} + +#[actix_rt::test] +async fn remote_sharding() { + let ms0 = Server::new().await; + let ms1 = Server::new().await; + let ms2 = Server::new().await; + + // enable feature + + let (response, code) = ms0.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + let (response, code) = ms1.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + let (response, code) = ms2.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + + // set self + + let (response, code) = ms0.set_network(json!({"self": "ms0"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms0", + "remotes": {} + } + "###); + let (response, code) = ms1.set_network(json!({"self": "ms1"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms1", + "remotes": {} + } + "###); + let (response, code) = ms2.set_network(json!({"self": "ms2"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms2", + "remotes": {} + } + "###); + + // add documents + let documents = SCORE_DOCUMENTS.clone(); + let documents = documents.as_array().unwrap(); + let index0 = ms0.index("test"); + let index1 = ms1.index("test"); + let index2 = ms2.index("test"); + let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await; + index0.wait_task(task.uid()).await.succeeded(); + let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await; + index1.wait_task(task.uid()).await.succeeded(); + let (task, _status_code) = index2.add_documents(json!(documents[3..5]), None).await; + index2.wait_task(task.uid()).await.succeeded(); + + // wrap servers + let ms0 = Arc::new(ms0); + let ms1 = Arc::new(ms1); + let ms2 = Arc::new(ms2); + + let rms0 = LocalMeili::new(ms0.clone()).await; + let rms1 = LocalMeili::new(ms1.clone()).await; + let rms2 = LocalMeili::new(ms2.clone()).await; + + // set network + let network = json!({"remotes": { + "ms0": { + "url": rms0.url() + }, + "ms1": { + "url": rms1.url() + }, + "ms2": { + "url": rms2.url() + } + }}); + + println!("{}", serde_json::to_string_pretty(&network).unwrap()); + + let (_response, status_code) = ms0.set_network(network.clone()).await; + snapshot!(status_code, @"200 OK"); + let (_response, status_code) = ms1.set_network(network.clone()).await; + snapshot!(status_code, @"200 OK"); + let (_response, status_code) = ms2.set_network(network.clone()).await; + snapshot!(status_code, @"200 OK"); + + // perform multi-search + let query = "badman returns"; + let request = json!({ + "federation": {}, + "queries": [ + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms0" + } + }, + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms1" + } + }, + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms2" + } + }, + ] + }); + + let (response, _status_code) = ms0.multi_search(request.clone()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "title": "Batman Returns", + "id": "C", + "_federation": { + "indexUid": "test", + "queriesPosition": 1, + "weightedRankingScore": 0.8317901234567902, + "remote": "ms1" + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + }, + { + "title": "Badman", + "id": "E", + "_federation": { + "indexUid": "test", + "queriesPosition": 2, + "weightedRankingScore": 0.5, + "remote": "ms2" + } + }, + { + "title": "Batman", + "id": "D", + "_federation": { + "indexUid": "test", + "queriesPosition": 2, + "weightedRankingScore": 0.23106060606060605, + "remote": "ms2" + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 5, + "remoteErrors": {} + } + "###); + let (response, _status_code) = ms1.multi_search(request.clone()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "title": "Batman Returns", + "id": "C", + "_federation": { + "indexUid": "test", + "queriesPosition": 1, + "weightedRankingScore": 0.8317901234567902, + "remote": "ms1" + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + }, + { + "title": "Badman", + "id": "E", + "_federation": { + "indexUid": "test", + "queriesPosition": 2, + "weightedRankingScore": 0.5, + "remote": "ms2" + } + }, + { + "title": "Batman", + "id": "D", + "_federation": { + "indexUid": "test", + "queriesPosition": 2, + "weightedRankingScore": 0.23106060606060605, + "remote": "ms2" + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 5, + "remoteErrors": {} + } + "###); + let (response, _status_code) = ms2.multi_search(request.clone()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "title": "Batman Returns", + "id": "C", + "_federation": { + "indexUid": "test", + "queriesPosition": 1, + "weightedRankingScore": 0.8317901234567902, + "remote": "ms1" + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + }, + { + "title": "Badman", + "id": "E", + "_federation": { + "indexUid": "test", + "queriesPosition": 2, + "weightedRankingScore": 0.5, + "remote": "ms2" + } + }, + { + "title": "Batman", + "id": "D", + "_federation": { + "indexUid": "test", + "queriesPosition": 2, + "weightedRankingScore": 0.23106060606060605, + "remote": "ms2" + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 5, + "remoteErrors": {} + } + "###); +} + +#[actix_rt::test] +async fn error_unregistered_remote() { + let ms0 = Server::new().await; + let ms1 = Server::new().await; + + // enable feature + + let (response, code) = ms0.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + let (response, code) = ms1.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + + // set self + + let (response, code) = ms0.set_network(json!({"self": "ms0"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms0", + "remotes": {} + } + "###); + let (response, code) = ms1.set_network(json!({"self": "ms1"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms1", + "remotes": {} + } + "###); + + // add documents + let documents = SCORE_DOCUMENTS.clone(); + let documents = documents.as_array().unwrap(); + let index0 = ms0.index("test"); + let index1 = ms1.index("test"); + let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await; + index0.wait_task(task.uid()).await.succeeded(); + let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await; + index1.wait_task(task.uid()).await.succeeded(); + + // wrap servers + let ms0 = Arc::new(ms0); + let ms1 = Arc::new(ms1); + + let rms0 = LocalMeili::new(ms0.clone()).await; + let rms1 = LocalMeili::new(ms1.clone()).await; + + // set network + let network = json!({"remotes": { + "ms0": { + "url": rms0.url() + }, + "ms1": { + "url": rms1.url() + }, + }}); + + println!("{}", serde_json::to_string_pretty(&network).unwrap()); + + let (_response, status_code) = ms0.set_network(network.clone()).await; + snapshot!(status_code, @"200 OK"); + let (_response, status_code) = ms1.set_network(network.clone()).await; + snapshot!(status_code, @"200 OK"); + + // perform multi-search + let query = "badman returns"; + let request = json!({ + "federation": {}, + "queries": [ + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms0" + } + }, + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms1" + } + }, + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms2" + } + }, + ] + }); + + let (response, _status_code) = ms0.multi_search(request.clone()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "message": "Invalid `queries[2].federation_options.remote`: remote `ms2` is not registered", + "code": "invalid_multi_search_remote", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_remote" + } + "###); + let (response, _status_code) = ms1.multi_search(request.clone()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "message": "Invalid `queries[2].federation_options.remote`: remote `ms2` is not registered", + "code": "invalid_multi_search_remote", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_remote" + } + "###); +} + +#[actix_rt::test] +async fn error_no_weighted_score() { + let ms0 = Server::new().await; + let ms1 = Server::new().await; + + // enable feature + + let (response, code) = ms0.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + let (response, code) = ms1.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + + // set self + + let (response, code) = ms0.set_network(json!({"self": "ms0"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms0", + "remotes": {} + } + "###); + let (response, code) = ms1.set_network(json!({"self": "ms1"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms1", + "remotes": {} + } + "###); + + // add documents + let documents = SCORE_DOCUMENTS.clone(); + let documents = documents.as_array().unwrap(); + let index0 = ms0.index("test"); + let index1 = ms1.index("test"); + let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await; + index0.wait_task(task.uid()).await.succeeded(); + let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await; + index1.wait_task(task.uid()).await.succeeded(); + + // wrap servers + let ms0 = Arc::new(ms0); + let ms1 = Arc::new(ms1); + + let rms0 = LocalMeili::new(ms0.clone()).await; + let rms1 = LocalMeili::with_params( + ms1.clone(), + LocalMeiliParams { gobble_headers: true, ..Default::default() }, + ) + .await; + + // set network + let network = json!({"remotes": { + "ms0": { + "url": rms0.url() + }, + "ms1": { + "url": rms1.url() + }, + }}); + + println!("{}", serde_json::to_string_pretty(&network).unwrap()); + + let (_response, status_code) = ms0.set_network(network.clone()).await; + snapshot!(status_code, @"200 OK"); + + // perform multi-search + let query = "badman returns"; + let request = json!({ + "federation": {}, + "queries": [ + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms0" + } + }, + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms1" + } + }, + ] + }); + + let (response, _status_code) = ms0.multi_search(request.clone()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2, + "remoteErrors": { + "ms1": { + "message": "remote hit does not contain `._federation.weightedScoreValues`\n - hint: check that the remote instance is a Meilisearch instance running the same version", + "code": "remote_bad_response", + "type": "system", + "link": "https://docs.meilisearch.com/errors#remote_bad_response" + } + } + } + "###); +} + +#[actix_rt::test] +async fn error_bad_response() { + let ms0 = Server::new().await; + let ms1 = Server::new().await; + + // enable feature + + let (response, code) = ms0.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + let (response, code) = ms1.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + + // set self + + let (response, code) = ms0.set_network(json!({"self": "ms0"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms0", + "remotes": {} + } + "###); + let (response, code) = ms1.set_network(json!({"self": "ms1"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms1", + "remotes": {} + } + "###); + + // add documents + let documents = SCORE_DOCUMENTS.clone(); + let documents = documents.as_array().unwrap(); + let index0 = ms0.index("test"); + let index1 = ms1.index("test"); + let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await; + index0.wait_task(task.uid()).await.succeeded(); + let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await; + index1.wait_task(task.uid()).await.succeeded(); + + // wrap servers + let ms0 = Arc::new(ms0); + let ms1 = Arc::new(ms1); + + let rms0 = LocalMeili::new(ms0.clone()).await; + let rms1 = LocalMeili::with_params( + ms1.clone(), + LocalMeiliParams { + override_response_body: Some("Returning an HTML page".into()), + ..Default::default() + }, + ) + .await; + + // set network + let network = json!({"remotes": { + "ms0": { + "url": rms0.url() + }, + "ms1": { + "url": rms1.url() + }, + }}); + + println!("{}", serde_json::to_string_pretty(&network).unwrap()); + + let (_response, status_code) = ms0.set_network(network.clone()).await; + snapshot!(status_code, @"200 OK"); + + // perform multi-search + let query = "badman returns"; + let request = json!({ + "federation": {}, + "queries": [ + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms0" + } + }, + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms1" + } + }, + ] + }); + + let (response, _status_code) = ms0.multi_search(request.clone()).await; + snapshot!(code, @"200 OK"); + + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2, + "remoteErrors": { + "ms1": { + "message": "could not parse response from the remote host as a federated search response:\n - response from remote: Returning an HTML page\n - hint: check that the remote instance is a Meilisearch instance running the same version", + "code": "remote_bad_response", + "type": "system", + "link": "https://docs.meilisearch.com/errors#remote_bad_response" + } + } + } + "###); +} + +#[actix_rt::test] +async fn error_bad_request() { + let ms0 = Server::new().await; + let ms1 = Server::new().await; + + // enable feature + + let (response, code) = ms0.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + let (response, code) = ms1.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + + // set self + + let (response, code) = ms0.set_network(json!({"self": "ms0"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms0", + "remotes": {} + } + "###); + let (response, code) = ms1.set_network(json!({"self": "ms1"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms1", + "remotes": {} + } + "###); + + // add documents + let documents = SCORE_DOCUMENTS.clone(); + let documents = documents.as_array().unwrap(); + let index0 = ms0.index("test"); + let index1 = ms1.index("test"); + let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await; + index0.wait_task(task.uid()).await.succeeded(); + let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await; + index1.wait_task(task.uid()).await.succeeded(); + + // wrap servers + let ms0 = Arc::new(ms0); + let ms1 = Arc::new(ms1); + + let rms0 = LocalMeili::new(ms0.clone()).await; + let rms1 = LocalMeili::new(ms1.clone()).await; + + // set network + let network = json!({"remotes": { + "ms0": { + "url": rms0.url() + }, + "ms1": { + "url": rms1.url() + }, + }}); + + println!("{}", serde_json::to_string_pretty(&network).unwrap()); + + let (_response, status_code) = ms0.set_network(network.clone()).await; + snapshot!(status_code, @"200 OK"); + + // perform multi-search + let query = "badman returns"; + let request = json!({ + "federation": {}, + "queries": [ + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms0" + } + }, + { + "q": query, + "indexUid": "nottest", + "federationOptions": { + "remote": "ms1" + } + }, + ] + }); + + let (response, _status_code) = ms0.multi_search(request.clone()).await; + snapshot!(code, @"200 OK"); + + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2, + "remoteErrors": { + "ms1": { + "message": "remote host responded with code 400:\n - response from remote: {\"message\":\"Inside `.queries[1]`: Index `nottest` not found.\",\"code\":\"index_not_found\",\"type\":\"invalid_request\",\"link\":\"https://docs.meilisearch.com/errors#index_not_found\"}\n - hint: check that the remote instance has the correct index configuration for that request\n - hint: check that the `network` experimental feature is enabled on the remote instance", + "code": "remote_bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#remote_bad_request" + } + } + } + "###); +} + +#[actix_rt::test] +async fn error_bad_request_facets_by_index() { + let ms0 = Server::new().await; + let ms1 = Server::new().await; + + // enable feature + + let (response, code) = ms0.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + let (response, code) = ms1.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + + // set self + + let (response, code) = ms0.set_network(json!({"self": "ms0"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms0", + "remotes": {} + } + "###); + let (response, code) = ms1.set_network(json!({"self": "ms1"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms1", + "remotes": {} + } + "###); + + // add documents + let documents = SCORE_DOCUMENTS.clone(); + let documents = documents.as_array().unwrap(); + let index0 = ms0.index("test0"); + let index1 = ms1.index("test1"); + let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await; + index0.wait_task(task.uid()).await.succeeded(); + + let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await; + index1.wait_task(task.uid()).await.succeeded(); + + // wrap servers + let ms0 = Arc::new(ms0); + let ms1 = Arc::new(ms1); + + let rms0 = LocalMeili::new(ms0.clone()).await; + let rms1 = LocalMeili::new(ms1.clone()).await; + + // set network + let network = json!({"remotes": { + "ms0": { + "url": rms0.url() + }, + "ms1": { + "url": rms1.url() + }, + }}); + + println!("{}", serde_json::to_string_pretty(&network).unwrap()); + + let (_response, status_code) = ms0.set_network(network.clone()).await; + snapshot!(status_code, @"200 OK"); + + // perform multi-search + let query = "badman returns"; + let request = json!({ + "federation": { + "facetsByIndex": { + "test0": [] + } + }, + "queries": [ + { + "q": query, + "indexUid": "test0", + "federationOptions": { + "remote": "ms0" + } + }, + { + "q": query, + "indexUid": "test1", + "federationOptions": { + "remote": "ms1" + } + }, + ] + }); + + let (response, _status_code) = ms0.multi_search(request.clone()).await; + snapshot!(code, @"200 OK"); + + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "test0", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "test0", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2, + "facetsByIndex": { + "test0": { + "distribution": {}, + "stats": {} + } + }, + "remoteErrors": { + "ms1": { + "message": "remote host responded with code 400:\n - response from remote: {\"message\":\"Inside `.federation.facetsByIndex.test0`: Index `test0` not found.\\n - Note: index `test0` is not used in queries\",\"code\":\"index_not_found\",\"type\":\"invalid_request\",\"link\":\"https://docs.meilisearch.com/errors#index_not_found\"}\n - hint: check that the remote instance has the correct index configuration for that request\n - hint: check that the `network` experimental feature is enabled on the remote instance", + "code": "remote_bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#remote_bad_request" + } + } + } + "###); +} + +#[actix_rt::test] +async fn error_bad_request_facets_by_index_facet() { + let ms0 = Server::new().await; + let ms1 = Server::new().await; + + // enable feature + + let (response, code) = ms0.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + let (response, code) = ms1.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + + // set self + + let (response, code) = ms0.set_network(json!({"self": "ms0"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms0", + "remotes": {} + } + "###); + let (response, code) = ms1.set_network(json!({"self": "ms1"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms1", + "remotes": {} + } + "###); + + // add documents + let documents = SCORE_DOCUMENTS.clone(); + let documents = documents.as_array().unwrap(); + let index0 = ms0.index("test"); + let index1 = ms1.index("test"); + let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await; + index0.wait_task(task.uid()).await.succeeded(); + + let (task, _status_code) = index0.update_settings_filterable_attributes(json!(["id"])).await; + index0.wait_task(task.uid()).await.succeeded(); + + let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await; + index1.wait_task(task.uid()).await.succeeded(); + + // wrap servers + let ms0 = Arc::new(ms0); + let ms1 = Arc::new(ms1); + + let rms0 = LocalMeili::new(ms0.clone()).await; + let rms1 = LocalMeili::new(ms1.clone()).await; + + // set network + let network = json!({"remotes": { + "ms0": { + "url": rms0.url() + }, + "ms1": { + "url": rms1.url() + }, + }}); + + println!("{}", serde_json::to_string_pretty(&network).unwrap()); + + let (_response, status_code) = ms0.set_network(network.clone()).await; + snapshot!(status_code, @"200 OK"); + + // perform multi-search + let query = "badman returns"; + let request = json!({ + "federation": { + "facetsByIndex": { + "test": ["id"] + } + }, + "queries": [ + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms0" + } + }, + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms1" + } + }, + ] + }); + + let (response, _status_code) = ms0.multi_search(request.clone()).await; + snapshot!(code, @"200 OK"); + + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2, + "facetsByIndex": { + "test": { + "distribution": { + "id": { + "A": 1, + "B": 1 + } + }, + "stats": {} + } + }, + "remoteErrors": { + "ms1": { + "message": "remote host responded with code 400:\n - response from remote: {\"message\":\"Inside `.federation.facetsByIndex.test`: Invalid facet distribution, this index does not have configured filterable attributes.\\n - Note: index `test` used in `.queries[1]`\",\"code\":\"invalid_multi_search_facets\",\"type\":\"invalid_request\",\"link\":\"https://docs.meilisearch.com/errors#invalid_multi_search_facets\"}\n - hint: check that the remote instance has the correct index configuration for that request\n - hint: check that the `network` experimental feature is enabled on the remote instance", + "code": "remote_bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#remote_bad_request" + } + } + } + "###); +} + +#[actix_rt::test] +async fn error_remote_does_not_answer() { + let ms0 = Server::new().await; + let ms1 = Server::new().await; + + // enable feature + + let (response, code) = ms0.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + let (response, code) = ms1.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + + // set self + + let (response, code) = ms0.set_network(json!({"self": "ms0"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms0", + "remotes": {} + } + "###); + let (response, code) = ms1.set_network(json!({"self": "ms1"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms1", + "remotes": {} + } + "###); + + // add documents + let documents = SCORE_DOCUMENTS.clone(); + let documents = documents.as_array().unwrap(); + let index0 = ms0.index("test"); + let index1 = ms1.index("test"); + let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await; + index0.wait_task(task.uid()).await.succeeded(); + let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await; + index1.wait_task(task.uid()).await.succeeded(); + + // wrap servers + let ms0 = Arc::new(ms0); + let ms1 = Arc::new(ms1); + + let rms0 = LocalMeili::new(ms0.clone()).await; + let rms1 = LocalMeili::new(ms1.clone()).await; + + // set network + let network = json!({"remotes": { + "ms0": { + "url": rms0.url() + }, + "ms1": { + "url": rms1.url() + }, + "ms2": { + "url": "https://thiswebsitedoesnotexist.example" + } + }}); + + println!("{}", serde_json::to_string_pretty(&network).unwrap()); + + let (_response, status_code) = ms0.set_network(network.clone()).await; + snapshot!(status_code, @"200 OK"); + let (_response, status_code) = ms1.set_network(network.clone()).await; + snapshot!(status_code, @"200 OK"); + + // perform multi-search + let query = "badman returns"; + let request = json!({ + "federation": {}, + "queries": [ + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms0" + } + }, + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms1" + } + }, + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms2" + } + }, + ] + }); + + let (response, _status_code) = ms0.multi_search(request.clone()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "title": "Batman Returns", + "id": "C", + "_federation": { + "indexUid": "test", + "queriesPosition": 1, + "weightedRankingScore": 0.8317901234567902, + "remote": "ms1" + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3, + "remoteErrors": { + "ms2": { + "message": "error sending request", + "code": "remote_could_not_send_request", + "type": "system", + "link": "https://docs.meilisearch.com/errors#remote_could_not_send_request" + } + } + } + "###); + let (response, _status_code) = ms1.multi_search(request.clone()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "title": "Batman Returns", + "id": "C", + "_federation": { + "indexUid": "test", + "queriesPosition": 1, + "weightedRankingScore": 0.8317901234567902, + "remote": "ms1" + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3, + "remoteErrors": { + "ms2": { + "message": "error sending request", + "code": "remote_could_not_send_request", + "type": "system", + "link": "https://docs.meilisearch.com/errors#remote_could_not_send_request" + } + } + } + "###); +} + +#[actix_rt::test] +async fn error_remote_404() { + let ms0 = Server::new().await; + let ms1 = Server::new().await; + + // enable feature + + let (response, code) = ms0.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + let (response, code) = ms1.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + + // set self + + let (response, code) = ms0.set_network(json!({"self": "ms0"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms0", + "remotes": {} + } + "###); + let (response, code) = ms1.set_network(json!({"self": "ms1"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms1", + "remotes": {} + } + "###); + + // add documents + let documents = SCORE_DOCUMENTS.clone(); + let documents = documents.as_array().unwrap(); + let index0 = ms0.index("test"); + let index1 = ms1.index("test"); + let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await; + index0.wait_task(task.uid()).await.succeeded(); + let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await; + index1.wait_task(task.uid()).await.succeeded(); + + // wrap servers + let ms0 = Arc::new(ms0); + let ms1 = Arc::new(ms1); + + let rms0 = LocalMeili::new(ms0.clone()).await; + let rms1 = LocalMeili::new(ms1.clone()).await; + + // set network + let network = json!({"remotes": { + "ms0": { + "url": rms0.url() + }, + "ms1": { + "url": format!("{}/this-route-does-not-exists/", rms1.url()) + }, + }}); + + println!("{}", serde_json::to_string_pretty(&network).unwrap()); + + let (_response, status_code) = ms0.set_network(network.clone()).await; + snapshot!(status_code, @"200 OK"); + let (_response, status_code) = ms1.set_network(network.clone()).await; + snapshot!(status_code, @"200 OK"); + + // perform multi-search + let query = "badman returns"; + let request = json!({ + "federation": {}, + "queries": [ + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms0" + } + }, + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms1" + } + }, + ] + }); + + let (response, _status_code) = ms0.multi_search(request.clone()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2, + "remoteErrors": { + "ms1": { + "message": "remote host responded with code 404:\n - response from remote: null\n - hint: check that the remote instance has the correct index configuration for that request\n - hint: check that the `network` experimental feature is enabled on the remote instance", + "code": "remote_bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#remote_bad_request" + } + } + } + "###); + let (response, _status_code) = ms1.multi_search(request.clone()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "title": "Batman Returns", + "id": "C", + "_federation": { + "indexUid": "test", + "queriesPosition": 1, + "weightedRankingScore": 0.8317901234567902, + "remote": "ms1" + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3, + "remoteErrors": {} + } + "###); +} + +#[actix_rt::test] +async fn error_remote_sharding_auth() { + let ms0 = Server::new().await; + let mut ms1 = Server::new_auth().await; + ms1.use_api_key("MASTER_KEY"); + + let (search_api_key_not_enough_indexes, code) = ms1 + .add_api_key(json!({ + "actions": ["search"], + "indexes": ["nottest"], + "expiresAt": serde_json::Value::Null + })) + .await; + meili_snap::snapshot!(code, @"201 Created"); + let search_api_key_not_enough_indexes = search_api_key_not_enough_indexes["key"].clone(); + + let (api_key_not_search, code) = ms1 + .add_api_key(json!({ + "actions": ["documents.*"], + "indexes": ["*"], + "expiresAt": serde_json::Value::Null + })) + .await; + meili_snap::snapshot!(code, @"201 Created"); + let api_key_not_search = api_key_not_search["key"].clone(); + + // enable feature + + let (response, code) = ms0.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + let (response, code) = ms1.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + + // set self + + let (response, code) = ms0.set_network(json!({"self": "ms0"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms0", + "remotes": {} + } + "###); + let (response, code) = ms1.set_network(json!({"self": "ms1"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms1", + "remotes": {} + } + "###); + + // add documents + let documents = SCORE_DOCUMENTS.clone(); + let documents = documents.as_array().unwrap(); + let index0 = ms0.index("test"); + let index1 = ms1.index("test"); + let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await; + index0.wait_task(task.uid()).await.succeeded(); + let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await; + index1.wait_task(task.uid()).await.succeeded(); + + // wrap servers + ms1.clear_api_key(); + + let ms0 = Arc::new(ms0); + let ms1 = Arc::new(ms1); + + let rms0 = LocalMeili::new(ms0.clone()).await; + let rms1 = LocalMeili::new(ms1.clone()).await; + + // set network + let network = json!({"remotes": { + "ms0": { + "url": rms0.url() + }, + "ms1-nottest": { + "url": rms1.url(), + "searchApiKey": search_api_key_not_enough_indexes + }, + "ms1-notsearch": { + "url": rms1.url(), + "searchApiKey": api_key_not_search + } + }}); + + println!("{}", serde_json::to_string_pretty(&network).unwrap()); + + let (_response, status_code) = ms0.set_network(network.clone()).await; + snapshot!(status_code, @"200 OK"); + + // perform multi-search + let query = "badman returns"; + let request = json!({ + "federation": {}, + "queries": [ + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms0" + } + }, + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms1-nottest" + } + }, + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms1-notsearch" + } + }, + ] + }); + + let (response, _status_code) = ms0.multi_search(request.clone()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2, + "remoteErrors": { + "ms1-notsearch": { + "message": "could not authenticate against the remote host\n - hint: check that the remote instance was registered with a valid API key having the `search` action", + "code": "remote_invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#remote_invalid_api_key" + }, + "ms1-nottest": { + "message": "could not authenticate against the remote host\n - hint: check that the remote instance was registered with a valid API key having the `search` action", + "code": "remote_invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#remote_invalid_api_key" + } + } + } + "###); +} + +#[actix_rt::test] +async fn remote_sharding_auth() { + let ms0 = Server::new().await; + let mut ms1 = Server::new_auth().await; + ms1.use_api_key("MASTER_KEY"); + + let (search_api_key, code) = ms1 + .add_api_key(json!({ + "actions": ["search"], + "indexes": ["*"], + "expiresAt": serde_json::Value::Null + })) + .await; + meili_snap::snapshot!(code, @"201 Created"); + let search_api_key = search_api_key["key"].clone(); + + // enable feature + + let (response, code) = ms0.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + let (response, code) = ms1.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + + // set self + + let (response, code) = ms0.set_network(json!({"self": "ms0"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms0", + "remotes": {} + } + "###); + let (response, code) = ms1.set_network(json!({"self": "ms1"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms1", + "remotes": {} + } + "###); + + // add documents + let documents = SCORE_DOCUMENTS.clone(); + let documents = documents.as_array().unwrap(); + let index0 = ms0.index("test"); + let index1 = ms1.index("test"); + let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await; + index0.wait_task(task.uid()).await.succeeded(); + let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await; + index1.wait_task(task.uid()).await.succeeded(); + + // wrap servers + ms1.clear_api_key(); + let ms0 = Arc::new(ms0); + let ms1 = Arc::new(ms1); + + let rms0 = LocalMeili::new(ms0.clone()).await; + let rms1 = LocalMeili::new(ms1.clone()).await; + + // set network + let network = json!({"remotes": { + "ms0": { + "url": rms0.url() + }, + "ms1": { + "url": rms1.url(), + "searchApiKey": "MASTER_KEY" + }, + "ms1-alias": { + "url": rms1.url(), + "searchApiKey": search_api_key + } + }}); + + println!("{}", serde_json::to_string_pretty(&network).unwrap()); + + let (_response, status_code) = ms0.set_network(network.clone()).await; + snapshot!(status_code, @"200 OK"); + + // perform multi-search + let query = "badman returns"; + let request = json!({ + "federation": {}, + "queries": [ + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms0" + } + }, + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms1" + } + }, + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms1-alias" + } + }, + ] + }); + + let (response, _status_code) = ms0.multi_search(request.clone()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "title": "Batman Returns", + "id": "C", + "_federation": { + "indexUid": "test", + "queriesPosition": 1, + "weightedRankingScore": 0.8317901234567902, + "remote": "ms1" + } + }, + { + "title": "Batman Returns", + "id": "C", + "_federation": { + "indexUid": "test", + "queriesPosition": 2, + "weightedRankingScore": 0.8317901234567902, + "remote": "ms1-alias" + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 4, + "remoteErrors": {} + } + "###); +} + +#[actix_rt::test] +async fn error_remote_500() { + let ms0 = Server::new().await; + let ms1 = Server::new().await; + + // enable feature + + let (response, code) = ms0.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + let (response, code) = ms1.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + + // set self + + let (response, code) = ms0.set_network(json!({"self": "ms0"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms0", + "remotes": {} + } + "###); + let (response, code) = ms1.set_network(json!({"self": "ms1"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms1", + "remotes": {} + } + "###); + + // add documents + let documents = SCORE_DOCUMENTS.clone(); + let documents = documents.as_array().unwrap(); + let index0 = ms0.index("test"); + let index1 = ms1.index("test"); + let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await; + index0.wait_task(task.uid()).await.succeeded(); + let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await; + index1.wait_task(task.uid()).await.succeeded(); + + // wrap servers + let ms0 = Arc::new(ms0); + let ms1 = Arc::new(ms1); + + let rms0 = LocalMeili::new(ms0.clone()).await; + let rms1 = LocalMeili::with_params( + ms1.clone(), + LocalMeiliParams { fails: FailurePolicy::Always, ..Default::default() }, + ) + .await; + + // set network + let network = json!({"remotes": { + "ms0": { + "url": rms0.url() + }, + "ms1": { + "url": rms1.url() + }, + }}); + + println!("{}", serde_json::to_string_pretty(&network).unwrap()); + + let (_response, status_code) = ms0.set_network(network.clone()).await; + snapshot!(status_code, @"200 OK"); + let (_response, status_code) = ms1.set_network(network.clone()).await; + snapshot!(status_code, @"200 OK"); + + // perform multi-search + let query = "badman returns"; + let request = json!({ + "federation": {}, + "queries": [ + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms0" + } + }, + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms1" + } + } + ] + }); + + let (response, _status_code) = ms0.multi_search(request.clone()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2, + "remoteErrors": { + "ms1": { + "message": "remote host responded with code 500:\n - response from remote: {\"error\":\"provoked error\",\"code\":\"test_error\",\"link\":\"https://docs.meilisearch.com/errors#test_error\"}", + "code": "remote_remote_error", + "type": "system", + "link": "https://docs.meilisearch.com/errors#remote_remote_error" + } + } + } + "###); + let (response, _status_code) = ms1.multi_search(request.clone()).await; + snapshot!(code, @"200 OK"); + // the response if full because we queried the instance that works + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "title": "Batman Returns", + "id": "C", + "_federation": { + "indexUid": "test", + "queriesPosition": 1, + "weightedRankingScore": 0.8317901234567902, + "remote": "ms1" + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3, + "remoteErrors": {} + } + "###); +} + +#[actix_rt::test] +async fn error_remote_500_once() { + let ms0 = Server::new().await; + let ms1 = Server::new().await; + + // enable feature + + let (response, code) = ms0.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + let (response, code) = ms1.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + + // set self + + let (response, code) = ms0.set_network(json!({"self": "ms0"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms0", + "remotes": {} + } + "###); + let (response, code) = ms1.set_network(json!({"self": "ms1"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms1", + "remotes": {} + } + "###); + + // add documents + let documents = SCORE_DOCUMENTS.clone(); + let documents = documents.as_array().unwrap(); + let index0 = ms0.index("test"); + let index1 = ms1.index("test"); + let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await; + index0.wait_task(task.uid()).await.succeeded(); + let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await; + index1.wait_task(task.uid()).await.succeeded(); + + // wrap servers + let ms0 = Arc::new(ms0); + let ms1 = Arc::new(ms1); + + let rms0 = LocalMeili::new(ms0.clone()).await; + let rms1 = LocalMeili::with_params( + ms1.clone(), + LocalMeiliParams { fails: FailurePolicy::Once, ..Default::default() }, + ) + .await; + + // set network + let network = json!({"remotes": { + "ms0": { + "url": rms0.url() + }, + "ms1": { + "url": rms1.url() + }, + }}); + + println!("{}", serde_json::to_string_pretty(&network).unwrap()); + + let (_response, status_code) = ms0.set_network(network.clone()).await; + snapshot!(status_code, @"200 OK"); + let (_response, status_code) = ms1.set_network(network.clone()).await; + snapshot!(status_code, @"200 OK"); + + // perform multi-search + let query = "badman returns"; + let request = json!({ + "federation": {}, + "queries": [ + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms0" + } + }, + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms1" + } + } + ] + }); + + // Meilisearch is tolerant to a single failure + let (response, _status_code) = ms0.multi_search(request.clone()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "title": "Batman Returns", + "id": "C", + "_federation": { + "indexUid": "test", + "queriesPosition": 1, + "weightedRankingScore": 0.8317901234567902, + "remote": "ms1" + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3, + "remoteErrors": {} + } + "###); + let (response, _status_code) = ms1.multi_search(request.clone()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "title": "Batman Returns", + "id": "C", + "_federation": { + "indexUid": "test", + "queriesPosition": 1, + "weightedRankingScore": 0.8317901234567902, + "remote": "ms1" + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3, + "remoteErrors": {} + } + "###); +} + +#[actix_rt::test] +async fn error_remote_timeout() { + let ms0 = Server::new().await; + let ms1 = Server::new().await; + + // enable feature + + let (response, code) = ms0.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + let (response, code) = ms1.set_features(json!({"network": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["network"]), @"true"); + + // set self + + let (response, code) = ms0.set_network(json!({"self": "ms0"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms0", + "remotes": {} + } + "###); + let (response, code) = ms1.set_network(json!({"self": "ms1"})).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), @r###" + { + "self": "ms1", + "remotes": {} + } + "###); + + // add documents + let documents = SCORE_DOCUMENTS.clone(); + let documents = documents.as_array().unwrap(); + let index0 = ms0.index("test"); + let index1 = ms1.index("test"); + let (task, _status_code) = index0.add_documents(json!(documents[0..2]), None).await; + index0.wait_task(task.uid()).await.succeeded(); + let (task, _status_code) = index1.add_documents(json!(documents[2..3]), None).await; + index1.wait_task(task.uid()).await.succeeded(); + + // wrap servers + let ms0 = Arc::new(ms0); + let ms1 = Arc::new(ms1); + + let rms0 = LocalMeili::new(ms0.clone()).await; + let rms1 = LocalMeili::with_params( + ms1.clone(), + LocalMeiliParams { delay: Some(std::time::Duration::from_secs(6)), ..Default::default() }, + ) + .await; + + // set network + let network = json!({"remotes": { + "ms0": { + "url": rms0.url() + }, + "ms1": { + "url": rms1.url() + }, + }}); + + println!("{}", serde_json::to_string_pretty(&network).unwrap()); + + let (_response, status_code) = ms0.set_network(network.clone()).await; + snapshot!(status_code, @"200 OK"); + let (_response, status_code) = ms1.set_network(network.clone()).await; + snapshot!(status_code, @"200 OK"); + + // perform multi-search + let query = "badman returns"; + let request = json!({ + "federation": {}, + "queries": [ + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms0" + } + }, + { + "q": query, + "indexUid": "test", + "federationOptions": { + "remote": "ms1" + } + } + ] + }); + + let (response, _status_code) = ms0.multi_search(request.clone()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2, + "remoteErrors": { + "ms1": { + "message": "remote host did not answer before the deadline", + "code": "remote_timeout", + "type": "system", + "link": "https://docs.meilisearch.com/errors#remote_timeout" + } + } + } + "###); + let (response, _status_code) = ms1.multi_search(request.clone()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "title": "Batman Returns", + "id": "C", + "_federation": { + "indexUid": "test", + "queriesPosition": 1, + "weightedRankingScore": 0.8317901234567902, + "remote": "ms1" + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "id": "A", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "id": "B", + "_federation": { + "indexUid": "test", + "queriesPosition": 0, + "weightedRankingScore": 0.7028218694885362, + "remote": "ms0" + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3, + "remoteErrors": {} + } + "###); +} + +// test: try all the flattened structs in queries + +// working facet tests with and without merge + +#[derive(Default)] +pub enum FailurePolicy { + #[default] + Never, + Once, + Always, +} + +/// Parameters to change the behavior of the [`LocalMeili`] server. +#[derive(Default)] +pub struct LocalMeiliParams { + /// delay the response by the specified duration + pub delay: Option, + pub fails: FailurePolicy, + /// replace the reponse body with the provided String + pub override_response_body: Option, + pub gobble_headers: bool, +} + +/// A server that exploits [`MockServer`] to provide an URL for testing network and the network. +pub struct LocalMeili { + mock_server: MockServer, +} + +impl LocalMeili { + pub async fn new(server: Arc) -> Self { + Self::with_params(server, Default::default()).await + } + + pub async fn with_params(server: Arc, params: LocalMeiliParams) -> Self { + let mock_server = MockServer::start().await; + + // tokio won't let us execute asynchronous code from a sync function inside of an async test, + // so instead we spawn another thread that will call the service on a brand new tokio runtime + // and communicate via channels... + let (request_sender, request_receiver) = crossbeam_channel::bounded::(0); + let (response_sender, response_receiver) = + crossbeam_channel::bounded::<(Value, StatusCode)>(0); + std::thread::spawn(move || { + let rt = tokio::runtime::Builder::new_current_thread().build().unwrap(); + while let Ok(req) = request_receiver.recv() { + let body = std::str::from_utf8(&req.body).unwrap(); + let headers: Vec<(&str, &str)> = if params.gobble_headers { + vec![("Content-Type", "application/json")] + } else { + req.headers + .iter() + .map(|(name, value)| (name.as_str(), value.to_str().unwrap())) + .collect() + }; + let (value, code) = rt.block_on(async { + match req.method.as_str() { + "POST" => server.service.post_str(&req.url, body, headers.clone()).await, + "PUT" => server.service.put_str(&req.url, body, headers).await, + "PATCH" => server.service.patch(&req.url, req.body_json().unwrap()).await, + "GET" => server.service.get(&req.url).await, + "DELETE" => server.service.delete(&req.url).await, + _ => unimplemented!(), + } + }); + if response_sender.send((value, code)).is_err() { + break; + } + } + println!("exiting mock thread") + }); + + let failed_already = std::sync::atomic::AtomicBool::new(false); + + Mock::given(AnyMatcher) + .respond_with(move |req: &wiremock::Request| { + if let Some(delay) = params.delay { + std::thread::sleep(delay); + } + match params.fails { + FailurePolicy::Never => {} + FailurePolicy::Once => { + let failed_already = + failed_already.fetch_or(true, std::sync::atomic::Ordering::AcqRel); + if !failed_already { + return fail(params.override_response_body.as_deref()); + } + } + FailurePolicy::Always => return fail(params.override_response_body.as_deref()), + } + request_sender.send(req.clone()).unwrap(); + let (value, code) = response_receiver.recv().unwrap(); + let response = ResponseTemplate::new(code.as_u16()); + if let Some(override_response_body) = params.override_response_body.as_deref() { + response.set_body_string(override_response_body) + } else { + response.set_body_json(value) + } + }) + .mount(&mock_server) + .await; + Self { mock_server } + } + + pub fn url(&self) -> String { + self.mock_server.uri() + } +} + +fn fail(override_response_body: Option<&str>) -> ResponseTemplate { + let response = ResponseTemplate::new(500); + if let Some(override_response_body) = override_response_body { + response.set_body_string(override_response_body) + } else { + response.set_body_json(json!({"error": "provoked error", "code": "test_error", "link": "https://docs.meilisearch.com/errors#test_error"})) + } +} diff --git a/crates/milli/src/score_details.rs b/crates/milli/src/score_details.rs index 1efa3b8e6..940e5f395 100644 --- a/crates/milli/src/score_details.rs +++ b/crates/milli/src/score_details.rs @@ -1,7 +1,7 @@ use std::cmp::Ordering; use itertools::Itertools; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use crate::distance_between_two_points; @@ -36,6 +36,15 @@ enum RankOrValue<'a> { Score(f64), } +#[derive(Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub enum WeightedScoreValue { + WeightedScore(f64), + Sort { asc: bool, value: serde_json::Value }, + GeoSort { asc: bool, distance: Option }, + VectorSort(f64), +} + impl ScoreDetails { pub fn local_score(&self) -> Option { self.rank().map(Rank::local_score) @@ -87,6 +96,30 @@ impl ScoreDetails { }) } + pub fn weighted_score_values<'a>( + details: impl Iterator + 'a, + weight: f64, + ) -> impl Iterator + 'a { + details + .map(ScoreDetails::rank_or_value) + .coalesce(|left, right| match (left, right) { + (RankOrValue::Rank(left), RankOrValue::Rank(right)) => { + Ok(RankOrValue::Rank(Rank::merge(left, right))) + } + (left, right) => Err((left, right)), + }) + .map(move |rank_or_value| match rank_or_value { + RankOrValue::Rank(r) => WeightedScoreValue::WeightedScore(r.local_score() * weight), + RankOrValue::Sort(s) => { + WeightedScoreValue::Sort { asc: s.ascending, value: s.value.clone() } + } + RankOrValue::GeoSort(g) => { + WeightedScoreValue::GeoSort { asc: g.ascending, distance: g.distance() } + } + RankOrValue::Score(s) => WeightedScoreValue::VectorSort(s * weight), + }) + } + fn rank_or_value(&self) -> RankOrValue<'_> { match self { ScoreDetails::Words(w) => RankOrValue::Rank(w.rank()), @@ -423,34 +456,58 @@ pub struct Sort { pub value: serde_json::Value, } +pub fn compare_sort_values( + ascending: bool, + left: &serde_json::Value, + right: &serde_json::Value, +) -> Ordering { + use serde_json::Value::*; + match (left, right) { + (Null, Null) => Ordering::Equal, + (Null, _) => Ordering::Less, + (_, Null) => Ordering::Greater, + // numbers are always before strings + (Number(_), String(_)) => Ordering::Greater, + (String(_), Number(_)) => Ordering::Less, + (Number(left), Number(right)) => { + // FIXME: unwrap permitted here? + let order = left + .as_f64() + .unwrap() + .partial_cmp(&right.as_f64().unwrap()) + .unwrap_or(Ordering::Equal); + // 12 < 42, and when ascending, we want to see 12 first, so the smallest. + // Hence, when ascending, smaller is better + if ascending { + order.reverse() + } else { + order + } + } + (String(left), String(right)) => { + let order = left.cmp(right); + // Taking e.g. "a" and "z" + // "a" < "z", and when ascending, we want to see "a" first, so the smallest. + // Hence, when ascending, smaller is better + if ascending { + order.reverse() + } else { + order + } + } + (left, right) => { + tracing::warn!(%left, %right, "sort values that are neither numbers, strings or null, handling as equal"); + Ordering::Equal + } + } +} + impl PartialOrd for Sort { fn partial_cmp(&self, other: &Self) -> Option { if self.ascending != other.ascending { return None; } - match (&self.value, &other.value) { - (serde_json::Value::Null, serde_json::Value::Null) => Some(Ordering::Equal), - (serde_json::Value::Null, _) => Some(Ordering::Less), - (_, serde_json::Value::Null) => Some(Ordering::Greater), - // numbers are always before strings - (serde_json::Value::Number(_), serde_json::Value::String(_)) => Some(Ordering::Greater), - (serde_json::Value::String(_), serde_json::Value::Number(_)) => Some(Ordering::Less), - (serde_json::Value::Number(left), serde_json::Value::Number(right)) => { - // FIXME: unwrap permitted here? - let order = left.as_f64().unwrap().partial_cmp(&right.as_f64().unwrap())?; - // 12 < 42, and when ascending, we want to see 12 first, so the smallest. - // Hence, when ascending, smaller is better - Some(if self.ascending { order.reverse() } else { order }) - } - (serde_json::Value::String(left), serde_json::Value::String(right)) => { - let order = left.cmp(right); - // Taking e.g. "a" and "z" - // "a" < "z", and when ascending, we want to see "a" first, so the smallest. - // Hence, when ascending, smaller is better - Some(if self.ascending { order.reverse() } else { order }) - } - _ => None, - } + Some(compare_sort_values(self.ascending, &self.value, &other.value)) } } diff --git a/crates/milli/src/search/new/matches/mod.rs b/crates/milli/src/search/new/matches/mod.rs index 83d00caf0..7f333d548 100644 --- a/crates/milli/src/search/new/matches/mod.rs +++ b/crates/milli/src/search/new/matches/mod.rs @@ -11,7 +11,7 @@ use either::Either; pub use matching_words::MatchingWords; use matching_words::{MatchType, PartialMatch}; use r#match::{Match, MatchPosition}; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use simple_token_kind::SimpleTokenKind; use utoipa::ToSchema; @@ -101,11 +101,11 @@ impl FormatOptions { } } -#[derive(Serialize, Debug, Clone, PartialEq, Eq, ToSchema)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, ToSchema)] pub struct MatchBounds { pub start: usize, pub length: usize, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(skip_serializing_if = "Option::is_none", default)] pub indices: Option>, }