add the multi-search

This commit is contained in:
Tamo 2024-12-26 15:56:44 +01:00
parent 11ce3b9636
commit 9473a2a6ca
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
5 changed files with 153 additions and 20 deletions

View File

@ -190,7 +190,6 @@ impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
}
}
/// Get one document
///
/// Get one document from its primary key.
@ -303,7 +302,6 @@ impl Aggregate for DocumentsDeletionAggregator {
}
}
/// Delete a document
///
/// Delete a single document by id.
@ -1197,13 +1195,16 @@ pub async fn delete_documents_by_filter(
Ok(HttpResponse::Accepted().json(task))
}
#[derive(Debug, Deserr, IntoParams)]
#[derive(Debug, Deserr, ToSchema)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct DocumentEditionByFunction {
/// A string containing a RHAI function.
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
pub filter: Option<Value>,
/// A string containing a filter expression.
#[deserr(default, error = DeserrJsonError<InvalidDocumentEditionContext>)]
pub context: Option<Value>,
/// An object with data Meilisearch should make available for the editing function.
#[deserr(error = DeserrJsonError<InvalidDocumentEditionFunctionFilter>, missing_field_error = DeserrJsonError::missing_document_edition_function)]
pub function: String,
}
@ -1246,8 +1247,8 @@ impl Aggregate for EditDocumentsByFunctionAggregator {
security(("Bearer" = ["documents.*", "*"])),
params(
("indexUid", example = "movies", description = "Index Unique Identifier", nullable = false),
DocumentEditionByFunction,
),
request_body = DocumentEditionByFunction,
responses(
(status = 202, description = "Task successfully enqueued", body = SummarizedTaskView, content_type = "application/json", example = json!(
{

View File

@ -2,7 +2,12 @@ use std::collections::BTreeMap;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::search::{SimilarQuery, SimilarResult};
use crate::routes::indexes::documents::DocumentEditionByFunction;
use crate::routes::multi_search::SearchResults;
use crate::search::{
FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets,
SearchQueryWithIndex, SearchResultWithIndex, SimilarQuery, SimilarResult,
};
use crate::search_queue::SearchQueue;
use crate::Opt;
use actix_web::web::Data;
@ -64,13 +69,14 @@ pub mod tasks;
(path = "/keys", api = api_key::ApiKeyApi),
(path = "/metrics", api = metrics::MetricApi),
(path = "/logs", api = logs::LogsApi),
(path = "/multi-search", api = multi_search::MultiSearchApi),
),
paths(get_health, get_version, get_stats),
tags(
(name = "Stats", description = "Stats gives extended information and metrics about indexes and the Meilisearch database."),
),
modifiers(&OpenApiAuth),
components(schemas(SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind))
components(schemas(DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind))
)]
pub struct MeilisearchApi;
@ -89,7 +95,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::scope("/snapshots").configure(snapshot::configure)) // done
.service(web::resource("/stats").route(web::get().to(get_stats))) // done
.service(web::resource("/version").route(web::get().to(get_version))) // done
.service(web::scope("/indexes").configure(indexes::configure)) // WIP
.service(web::scope("/indexes").configure(indexes::configure)) // done
.service(web::scope("/multi-search").configure(multi_search::configure)) // TODO
.service(web::scope("/swap-indexes").configure(swap_indexes::configure)) // TODO
.service(web::scope("/metrics").configure(metrics::configure)) // done

View File

@ -8,6 +8,7 @@ use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use serde::Serialize;
use tracing::debug;
use utoipa::{OpenApi, ToSchema};
use super::multi_search_analytics::MultiSearchAggregator;
use crate::analytics::Analytics;
@ -17,20 +18,129 @@ use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::indexes::search::search_kind;
use crate::search::{
add_search_rules, perform_federated_search, perform_search, FederatedSearch, RetrieveVectors,
add_search_rules, perform_federated_search, perform_search, FederatedSearch, FederatedSearchResult, RetrieveVectors,
SearchQueryWithIndex, SearchResultWithIndex,
};
use crate::search_queue::SearchQueue;
#[derive(OpenApi)]
#[openapi(
paths(multi_search_with_post),
tags((
name = "Multi-search",
description = "The `/multi-search` route allows you to perform multiple search queries on one or more indexes by bundling them into a single HTTP request. Multi-search is also known as federated search.",
external_docs(url = "https://www.meilisearch.com/docs/reference/api/multi_search"),
)),
)]
pub struct MultiSearchApi;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(SeqHandler(multi_search_with_post))));
}
#[derive(Serialize)]
struct SearchResults {
#[derive(Serialize, ToSchema)]
pub struct SearchResults {
results: Vec<SearchResultWithIndex>,
}
/// Perform a multi-search
///
/// Bundle multiple search queries in a single API request. Use this endpoint to search through multiple indexes at once.
#[utoipa::path(
post,
path = "/",
tag = "Multi-search",
security(("Bearer" = ["search", "*"])),
responses(
(status = OK, description = "Non federated multi-search", body = SearchResults, content_type = "application/json", example = json!(
{
"results":[
{
"indexUid":"movies",
"hits":[
{
"id":13682,
"title":"Pooh's Heffalump Movie",
},
],
"query":"pooh",
"processingTimeMs":26,
"limit":1,
"offset":0,
"estimatedTotalHits":22
},
{
"indexUid":"movies",
"hits":[
{
"id":12,
"title":"Finding Nemo",
},
],
"query":"nemo",
"processingTimeMs":5,
"limit":1,
"offset":0,
"estimatedTotalHits":11
},
{
"indexUid":"movie_ratings",
"hits":[
{
"id":"Us",
"director": "Jordan Peele",
}
],
"query":"Us",
"processingTimeMs":0,
"limit":1,
"offset":0,
"estimatedTotalHits":1
}
]
}
)),
(status = OK, description = "Federated multi-search", body = FederatedSearchResult, content_type = "application/json", example = json!(
{
"hits": [
{
"id": 42,
"title": "Batman returns",
"overview": "The overview of batman returns",
"_federation": {
"indexUid": "movies",
"queriesPosition": 0
}
},
{
"comicsId": "batman-killing-joke",
"description": "This comic is really awesome",
"title": "Batman: the killing joke",
"_federation": {
"indexUid": "comics",
"queriesPosition": 1
}
},
],
"processingTimeMs": 0,
"limit": 20,
"offset": 0,
"estimatedTotalHits": 2,
"semanticHitCount": 0
}
)),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{
"message": "The Authorization header is missing. It must use the bearer authorization method.",
"code": "missing_authorization_header",
"type": "auth",
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
}
)),
)
)]
pub async fn multi_search_with_post(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
search_queue: Data<SearchQueue>,

View File

@ -22,6 +22,7 @@ use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue};
use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget};
use roaring::RoaringBitmap;
use serde::Serialize;
use utoipa::ToSchema;
use super::ranking_rules::{self, RankingRules};
use super::{
@ -33,10 +34,11 @@ use crate::routes::indexes::search::search_kind;
pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0;
#[derive(Debug, Default, Clone, Copy, PartialEq, deserr::Deserr)]
#[derive(Debug, Default, Clone, Copy, PartialEq, deserr::Deserr, ToSchema)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct FederationOptions {
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchWeight>)]
#[schema(value_type = f64)]
pub weight: Weight,
}
@ -70,8 +72,9 @@ impl std::ops::Deref for Weight {
}
}
#[derive(Debug, deserr::Deserr)]
#[derive(Debug, deserr::Deserr, ToSchema)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
#[schema(rename_all = "camelCase")]
pub struct Federation {
#[deserr(default = super::DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
pub limit: usize,
@ -83,22 +86,26 @@ pub struct Federation {
pub merge_facets: Option<MergeFacets>,
}
#[derive(Copy, Clone, Debug, deserr::Deserr, Default)]
#[derive(Copy, Clone, Debug, deserr::Deserr, Default, ToSchema)]
#[deserr(error = DeserrJsonError<InvalidMultiSearchMergeFacets>, rename_all = camelCase, deny_unknown_fields)]
#[schema(rename_all = "camelCase")]
pub struct MergeFacets {
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchMaxValuesPerFacet>)]
pub max_values_per_facet: Option<usize>,
}
#[derive(Debug, deserr::Deserr)]
#[derive(Debug, deserr::Deserr, ToSchema)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
#[schema(rename_all = "camelCase")]
pub struct FederatedSearch {
pub queries: Vec<SearchQueryWithIndex>,
#[deserr(default)]
pub federation: Option<Federation>,
}
#[derive(Serialize, Clone)]
#[derive(Serialize, Clone, ToSchema)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub struct FederatedSearchResult {
pub hits: Vec<SearchHit>,
pub processing_time_ms: u128,
@ -109,6 +116,7 @@ pub struct FederatedSearchResult {
pub semantic_hit_count: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
#[schema(value_type = Option<BTreeMap<String, BTreeMap<String, u64>>>)]
pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
@ -355,7 +363,7 @@ struct SearchResultByIndex {
facets: Option<ComputedFacets>,
}
#[derive(Debug, Clone, Default, Serialize)]
#[derive(Debug, Clone, Default, Serialize, ToSchema)]
pub struct FederatedFacets(pub BTreeMap<String, ComputedFacets>);
impl FederatedFacets {

View File

@ -39,7 +39,10 @@ use utoipa::ToSchema;
use crate::error::MeilisearchHttpError;
mod federated;
pub use federated::{perform_federated_search, FederatedSearch, Federation, FederationOptions};
pub use federated::{
perform_federated_search, FederatedSearch, FederatedSearchResult, Federation,
FederationOptions, MergeFacets,
};
mod ranking_rules;
@ -388,8 +391,9 @@ impl SearchQuery {
// This struct contains the fields of `SearchQuery` inline.
// This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields.
// The `From<SearchQueryWithIndex>` implementation ensures both structs remain up to date.
#[derive(Debug, Clone, PartialEq, Deserr)]
#[derive(Debug, Clone, PartialEq, Deserr, ToSchema)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
#[schema(rename_all = "camelCase")]
pub struct SearchQueryWithIndex {
#[deserr(error = DeserrJsonError<InvalidIndexUid>, missing_field_error = DeserrJsonError::missing_index_uid)]
pub index_uid: IndexUid,
@ -734,8 +738,9 @@ pub struct SimilarResult {
pub hits_info: HitsInfo,
}
#[derive(Serialize, Debug, Clone, PartialEq)]
#[derive(Serialize, Debug, Clone, PartialEq, ToSchema)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub struct SearchResultWithIndex {
pub index_uid: String,
#[serde(flatten)]
@ -746,8 +751,10 @@ pub struct SearchResultWithIndex {
#[serde(untagged)]
pub enum HitsInfo {
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
Pagination { hits_per_page: usize, page: usize, total_pages: usize, total_hits: usize },
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize },
}
@ -1034,8 +1041,9 @@ pub fn perform_search(
Ok(result)
}
#[derive(Debug, Clone, Default, Serialize)]
#[derive(Debug, Clone, Default, Serialize, ToSchema)]
pub struct ComputedFacets {
#[schema(value_type = Option<BTreeMap<String, BTreeMap<String, u64>>>)]
pub distribution: BTreeMap<String, IndexMap<String, u64>>,
pub stats: BTreeMap<String, FacetStats>,
}