From a23fbf6c7b4ce345584b0469f01438a3ff731bb4 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 24 Jan 2023 15:18:00 +0100 Subject: [PATCH] multi-search: Add search with an array of indexes --- meilisearch/src/routes/indexes/search.rs | 27 +---- meilisearch/src/routes/mod.rs | 2 + meilisearch/src/routes/multi_search.rs | 122 ++++++++++++++++++++++ meilisearch/src/search.rs | 124 +++++++++++++++++++++++ 4 files changed, 251 insertions(+), 24 deletions(-) create mode 100644 meilisearch/src/routes/multi_search.rs diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index eef092d02..f9242f320 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -3,7 +3,6 @@ use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::{AwebJson, AwebQueryParameter}; use index_scheduler::IndexScheduler; use log::debug; -use meilisearch_auth::IndexSearchRules; use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::error::deserr_codes::*; @@ -17,9 +16,9 @@ use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; use crate::search::{ - perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, - DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, - DEFAULT_SEARCH_OFFSET, + add_search_rules, perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, + DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, + DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, }; pub fn configure(cfg: &mut web::ServiceConfig) { @@ -101,26 +100,6 @@ impl From for SearchQuery { } } -/// Incorporate search rules in search query -fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) { - query.filter = match (query.filter.take(), rules.filter) { - (None, rules_filter) => rules_filter, - (filter, None) => filter, - (Some(filter), Some(rules_filter)) => { - let filter = match filter { - Value::Array(filter) => filter, - filter => vec![filter], - }; - let rules_filter = match rules_filter { - Value::Array(rules_filter) => rules_filter, - rules_filter => vec![rules_filter], - }; - - Some(Value::Array([filter, rules_filter].concat())) - } - } -} - // TODO: TAMO: split on :asc, and :desc, instead of doing some weird things /// Transform the sort query parameter into something that matches the post expected format. diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs index 27eecebbc..622e26c75 100644 --- a/meilisearch/src/routes/mod.rs +++ b/meilisearch/src/routes/mod.rs @@ -22,6 +22,7 @@ const PAGINATION_DEFAULT_LIMIT: usize = 20; mod api_key; mod dump; pub mod indexes; +mod multi_search; mod swap_indexes; pub mod tasks; @@ -33,6 +34,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .service(web::resource("/stats").route(web::get().to(get_stats))) .service(web::resource("/version").route(web::get().to(get_version))) .service(web::scope("/indexes").configure(indexes::configure)) + .service(web::scope("/multi-search").configure(multi_search::configure)) .service(web::scope("/swap-indexes").configure(swap_indexes::configure)); } diff --git a/meilisearch/src/routes/multi_search.rs b/meilisearch/src/routes/multi_search.rs new file mode 100644 index 000000000..fd78df5e5 --- /dev/null +++ b/meilisearch/src/routes/multi_search.rs @@ -0,0 +1,122 @@ +use actix_http::StatusCode; +use actix_web::web::{self, Data}; +use actix_web::{HttpRequest, HttpResponse}; +use deserr::actix_web::AwebJson; +use index_scheduler::IndexScheduler; +use log::debug; +use meilisearch_types::deserr::DeserrJsonError; +use meilisearch_types::error::ResponseError; +use meilisearch_types::keys::actions; +use serde::Serialize; + +use crate::analytics::{Analytics, MultiSearchAggregator}; +use crate::extractors::authentication::policies::ActionPolicy; +use crate::extractors::authentication::{AuthenticationError, GuardedData}; +use crate::extractors::sequential_extractor::SeqHandler; +use crate::search::{ + add_search_rules, perform_search, SearchQueryWithIndex, SearchResultWithIndex, +}; + +pub fn configure(cfg: &mut web::ServiceConfig) { + cfg.service(web::resource("").route(web::post().to(SeqHandler(multi_search_with_post)))); +} + +#[derive(Serialize)] +struct SearchResults { + results: Vec, +} + +#[derive(Debug, deserr::Deserr)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +pub struct SearchQueries { + queries: Vec, +} + +pub async fn multi_search_with_post( + index_scheduler: GuardedData, Data>, + params: AwebJson, + req: HttpRequest, + analytics: web::Data, +) -> Result { + let queries = params.into_inner().queries; + + let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req); + + // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only, + // so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code + // changes. + let search_results: Result<_, (ResponseError, usize)> = (|| { + async { + let mut search_results = Vec::with_capacity(queries.len()); + for (query_index, (index_uid, mut query)) in + queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate() + { + debug!("multi-search #{query_index}: called with params: {:?}", query); + + // Check index from API key + if !index_scheduler.filters().is_index_authorized(&index_uid) { + return Err(AuthenticationError::InvalidToken).with_index(query_index); + } + // Apply search rules from tenant token + if let Some(search_rules) = + index_scheduler.filters().get_index_search_rules(&index_uid) + { + add_search_rules(&mut query, search_rules); + } + + let index = index_scheduler + .index(&index_uid) + .map_err(|err| { + let mut err = ResponseError::from(err); + // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but + // here the resource not found is not part of the URL. + err.code = StatusCode::BAD_REQUEST; + err + }) + .with_index(query_index)?; + let search_result = + tokio::task::spawn_blocking(move || perform_search(&index, query)) + .await + .with_index(query_index)?; + + search_results.push(SearchResultWithIndex { + index_uid: index_uid.into_inner(), + result: search_result.with_index(query_index)?, + }); + } + Ok(search_results) + } + })() + .await; + + if search_results.is_ok() { + multi_aggregate.succeed(); + } + analytics.post_multi_search(multi_aggregate); + + let search_results = search_results.map_err(|(mut err, query_index)| { + // Add the query index that failed as context for the error message. + // We're doing it only here and not directly in the `WithIndex` trait so that the `with_index` function returns a different type + // of result and we can benefit from static typing. + err.message = format!("Inside `.queries[{query_index}]`: {}", err.message); + err + })?; + + debug!("returns: {:?}", search_results); + + Ok(HttpResponse::Ok().json(SearchResults { results: search_results })) +} + +/// Local `Result` extension trait to avoid `map_err` boilerplate. +trait WithIndex { + type T; + /// convert the error type inside of the `Result` to a `ResponseError`, and return a couple of it + the usize. + fn with_index(self, index: usize) -> Result; +} + +impl> WithIndex for Result { + type T = T; + fn with_index(self, index: usize) -> Result { + self.map_err(|err| (err.into(), index)) + } +} diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 6bf23cfc4..b7d5b9114 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -5,8 +5,10 @@ use std::time::Instant; use deserr::Deserr; use either::Either; +use meilisearch_auth::IndexSearchRules; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::*; +use meilisearch_types::index_uid::IndexUid; use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; use meilisearch_types::{milli, Document}; use milli::tokenizer::TokenizerBuilder; @@ -74,6 +76,100 @@ impl SearchQuery { } } +/// A `SearchQuery` + an index UID. +// This struct contains the fields of `SearchQuery` inline. +// This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields. +// The `From` implementation ensures both structs remain up to date. +#[derive(Debug, Clone, PartialEq, Eq, Deserr)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +pub struct SearchQueryWithIndex { + #[deserr(error = DeserrJsonError, missing_field_error = DeserrJsonError::missing_index_uid)] + pub index_uid: IndexUid, + #[deserr(default, error = DeserrJsonError)] + pub q: Option, + #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError)] + pub offset: usize, + #[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError)] + pub limit: usize, + #[deserr(default, error = DeserrJsonError)] + pub page: Option, + #[deserr(default, error = DeserrJsonError)] + pub hits_per_page: Option, + #[deserr(default, error = DeserrJsonError)] + pub attributes_to_retrieve: Option>, + #[deserr(default, error = DeserrJsonError)] + pub attributes_to_crop: Option>, + #[deserr(default, error = DeserrJsonError, default = DEFAULT_CROP_LENGTH())] + pub crop_length: usize, + #[deserr(default, error = DeserrJsonError)] + pub attributes_to_highlight: Option>, + #[deserr(default, error = DeserrJsonError, default)] + pub show_matches_position: bool, + #[deserr(default, error = DeserrJsonError)] + pub filter: Option, + #[deserr(default, error = DeserrJsonError)] + pub sort: Option>, + #[deserr(default, error = DeserrJsonError)] + pub facets: Option>, + #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_PRE_TAG())] + pub highlight_pre_tag: String, + #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_POST_TAG())] + pub highlight_post_tag: String, + #[deserr(default, error = DeserrJsonError, default = DEFAULT_CROP_MARKER())] + pub crop_marker: String, + #[deserr(default, error = DeserrJsonError, default)] + pub matching_strategy: MatchingStrategy, +} + +impl SearchQueryWithIndex { + pub fn into_index_query(self) -> (IndexUid, SearchQuery) { + let SearchQueryWithIndex { + index_uid, + q, + offset, + limit, + page, + hits_per_page, + attributes_to_retrieve, + attributes_to_crop, + crop_length, + attributes_to_highlight, + show_matches_position, + filter, + sort, + facets, + highlight_pre_tag, + highlight_post_tag, + crop_marker, + matching_strategy, + } = self; + ( + index_uid, + SearchQuery { + q, + offset, + limit, + page, + hits_per_page, + attributes_to_retrieve, + attributes_to_crop, + crop_length, + attributes_to_highlight, + show_matches_position, + filter, + sort, + facets, + highlight_pre_tag, + highlight_post_tag, + crop_marker, + matching_strategy, + // do not use ..Default::default() here, + // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex` + }, + ) + } +} + #[derive(Debug, Clone, PartialEq, Eq, Deserr)] #[deserr(rename_all = camelCase)] pub enum MatchingStrategy { @@ -122,6 +218,14 @@ pub struct SearchResult { pub facet_stats: Option>, } +#[derive(Serialize, Debug, Clone, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct SearchResultWithIndex { + pub index_uid: String, + #[serde(flatten)] + pub result: SearchResult, +} + #[derive(Serialize, Debug, Clone, PartialEq, Eq)] #[serde(untagged)] pub enum HitsInfo { @@ -137,6 +241,26 @@ pub struct FacetStats { pub max: f64, } +/// Incorporate search rules in search query +pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) { + query.filter = match (query.filter.take(), rules.filter) { + (None, rules_filter) => rules_filter, + (filter, None) => filter, + (Some(filter), Some(rules_filter)) => { + let filter = match filter { + Value::Array(filter) => filter, + filter => vec![filter], + }; + let rules_filter = match rules_filter { + Value::Array(rules_filter) => rules_filter, + rules_filter => vec![rules_filter], + }; + + Some(Value::Array([filter, rules_filter].concat())) + } + } +} + pub fn perform_search( index: &Index, query: SearchQuery,