mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-03-27 18:10:31 +01:00
359 lines
15 KiB
Rust
359 lines
15 KiB
Rust
use actix_web::web::Data;
|
|
use actix_web::{web, HttpRequest, HttpResponse};
|
|
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
|
use index_scheduler::{IndexScheduler, RoFeatures};
|
|
use meilisearch_types::deserr::query_params::Param;
|
|
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
|
use meilisearch_types::error::deserr_codes::*;
|
|
use meilisearch_types::error::ResponseError;
|
|
use meilisearch_types::index_uid::IndexUid;
|
|
use meilisearch_types::locales::Locale;
|
|
use meilisearch_types::milli;
|
|
use meilisearch_types::serde_cs::vec::CS;
|
|
use serde_json::Value;
|
|
use tracing::debug;
|
|
|
|
use crate::analytics::Analytics;
|
|
use crate::error::MeilisearchHttpError;
|
|
use crate::extractors::authentication::policies::*;
|
|
use crate::extractors::authentication::GuardedData;
|
|
use crate::extractors::sequential_extractor::SeqHandler;
|
|
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
|
use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST};
|
|
use crate::search::{
|
|
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
|
RetrieveVectors, SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH,
|
|
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
|
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
|
};
|
|
use crate::search_queue::SearchQueue;
|
|
|
|
pub fn configure(cfg: &mut web::ServiceConfig) {
|
|
cfg.service(
|
|
web::resource("")
|
|
.route(web::get().to(SeqHandler(search_with_url_query)))
|
|
.route(web::post().to(SeqHandler(search_with_post))),
|
|
);
|
|
}
|
|
|
|
#[derive(Debug, deserr::Deserr)]
|
|
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
|
pub struct SearchQueryGet {
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchQ>)]
|
|
q: Option<String>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchVector>)]
|
|
vector: Option<CS<f32>>,
|
|
#[deserr(default = Param(DEFAULT_SEARCH_OFFSET()), error = DeserrQueryParamError<InvalidSearchOffset>)]
|
|
offset: Param<usize>,
|
|
#[deserr(default = Param(DEFAULT_SEARCH_LIMIT()), error = DeserrQueryParamError<InvalidSearchLimit>)]
|
|
limit: Param<usize>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchPage>)]
|
|
page: Option<Param<usize>>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchHitsPerPage>)]
|
|
hits_per_page: Option<Param<usize>>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToRetrieve>)]
|
|
attributes_to_retrieve: Option<CS<String>>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchRetrieveVectors>)]
|
|
retrieve_vectors: Param<bool>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToCrop>)]
|
|
attributes_to_crop: Option<CS<String>>,
|
|
#[deserr(default = Param(DEFAULT_CROP_LENGTH()), error = DeserrQueryParamError<InvalidSearchCropLength>)]
|
|
crop_length: Param<usize>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToHighlight>)]
|
|
attributes_to_highlight: Option<CS<String>>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchFilter>)]
|
|
filter: Option<String>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)]
|
|
sort: Option<String>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchDistinct>)]
|
|
distinct: Option<String>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)]
|
|
show_matches_position: Param<bool>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScore>)]
|
|
show_ranking_score: Param<bool>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScoreDetails>)]
|
|
show_ranking_score_details: Param<bool>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchFacets>)]
|
|
facets: Option<CS<String>>,
|
|
#[deserr( default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError<InvalidSearchHighlightPreTag>)]
|
|
highlight_pre_tag: String,
|
|
#[deserr( default = DEFAULT_HIGHLIGHT_POST_TAG(), error = DeserrQueryParamError<InvalidSearchHighlightPostTag>)]
|
|
highlight_post_tag: String,
|
|
#[deserr(default = DEFAULT_CROP_MARKER(), error = DeserrQueryParamError<InvalidSearchCropMarker>)]
|
|
crop_marker: String,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchMatchingStrategy>)]
|
|
matching_strategy: MatchingStrategy,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToSearchOn>)]
|
|
pub attributes_to_search_on: Option<CS<String>>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)]
|
|
pub hybrid_embedder: Option<String>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSemanticRatio>)]
|
|
pub hybrid_semantic_ratio: Option<SemanticRatioGet>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchRankingScoreThreshold>)]
|
|
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchLocales>)]
|
|
pub locales: Option<CS<Locale>>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
|
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)]
|
|
pub struct RankingScoreThresholdGet(RankingScoreThreshold);
|
|
|
|
impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
|
|
type Error = InvalidSearchRankingScoreThreshold;
|
|
|
|
fn try_from(s: String) -> Result<Self, Self::Error> {
|
|
let f: f64 = s.parse().map_err(|_| InvalidSearchRankingScoreThreshold)?;
|
|
Ok(RankingScoreThresholdGet(RankingScoreThreshold::try_from(f)?))
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, Default, PartialEq, deserr::Deserr)]
|
|
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSearchSemanticRatio)]
|
|
pub struct SemanticRatioGet(SemanticRatio);
|
|
|
|
impl std::convert::TryFrom<String> for SemanticRatioGet {
|
|
type Error = InvalidSearchSemanticRatio;
|
|
|
|
fn try_from(s: String) -> Result<Self, Self::Error> {
|
|
let f: f32 = s.parse().map_err(|_| InvalidSearchSemanticRatio)?;
|
|
Ok(SemanticRatioGet(SemanticRatio::try_from(f)?))
|
|
}
|
|
}
|
|
|
|
impl std::ops::Deref for SemanticRatioGet {
|
|
type Target = SemanticRatio;
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
&self.0
|
|
}
|
|
}
|
|
|
|
impl TryFrom<SearchQueryGet> for SearchQuery {
|
|
type Error = ResponseError;
|
|
|
|
fn try_from(other: SearchQueryGet) -> Result<Self, Self::Error> {
|
|
let filter = match other.filter {
|
|
Some(f) => match serde_json::from_str(&f) {
|
|
Ok(v) => Some(v),
|
|
_ => Some(Value::String(f)),
|
|
},
|
|
None => None,
|
|
};
|
|
|
|
let hybrid = match (other.hybrid_embedder, other.hybrid_semantic_ratio) {
|
|
(None, None) => None,
|
|
(None, Some(_)) => {
|
|
return Err(ResponseError::from_msg(
|
|
"`hybridEmbedder` is mandatory when `hybridSemanticRatio` is present".into(),
|
|
meilisearch_types::error::Code::InvalidHybridQuery,
|
|
));
|
|
}
|
|
(Some(embedder), None) => {
|
|
Some(HybridQuery { semantic_ratio: DEFAULT_SEMANTIC_RATIO(), embedder })
|
|
}
|
|
(Some(embedder), Some(semantic_ratio)) => {
|
|
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder })
|
|
}
|
|
};
|
|
|
|
if other.vector.is_some() && hybrid.is_none() {
|
|
return Err(ResponseError::from_msg(
|
|
"`hybridEmbedder` is mandatory when `vector` is present".into(),
|
|
meilisearch_types::error::Code::MissingSearchHybrid,
|
|
));
|
|
}
|
|
|
|
Ok(Self {
|
|
q: other.q,
|
|
vector: other.vector.map(CS::into_inner),
|
|
offset: other.offset.0,
|
|
limit: other.limit.0,
|
|
page: other.page.as_deref().copied(),
|
|
hits_per_page: other.hits_per_page.as_deref().copied(),
|
|
attributes_to_retrieve: other.attributes_to_retrieve.map(|o| o.into_iter().collect()),
|
|
retrieve_vectors: other.retrieve_vectors.0,
|
|
attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()),
|
|
crop_length: other.crop_length.0,
|
|
attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()),
|
|
filter,
|
|
sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)),
|
|
distinct: other.distinct,
|
|
show_matches_position: other.show_matches_position.0,
|
|
show_ranking_score: other.show_ranking_score.0,
|
|
show_ranking_score_details: other.show_ranking_score_details.0,
|
|
facets: other.facets.map(|o| o.into_iter().collect()),
|
|
highlight_pre_tag: other.highlight_pre_tag,
|
|
highlight_post_tag: other.highlight_post_tag,
|
|
crop_marker: other.crop_marker,
|
|
matching_strategy: other.matching_strategy,
|
|
attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
|
|
hybrid,
|
|
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
|
|
locales: other.locales.map(|o| o.into_iter().collect()),
|
|
})
|
|
}
|
|
}
|
|
|
|
// TODO: TAMO: split on :asc, and :desc, instead of doing some weird things
|
|
|
|
/// Transform the sort query parameter into something that matches the post expected format.
|
|
pub fn fix_sort_query_parameters(sort_query: &str) -> Vec<String> {
|
|
let mut sort_parameters = Vec::new();
|
|
let mut merge = false;
|
|
for current_sort in sort_query.trim_matches('"').split(',').map(|s| s.trim()) {
|
|
if current_sort.starts_with("_geoPoint(") {
|
|
sort_parameters.push(current_sort.to_string());
|
|
merge = true;
|
|
} else if merge && !sort_parameters.is_empty() {
|
|
let s = sort_parameters.last_mut().unwrap();
|
|
s.push(',');
|
|
s.push_str(current_sort);
|
|
if current_sort.ends_with("):desc") || current_sort.ends_with("):asc") {
|
|
merge = false;
|
|
}
|
|
} else {
|
|
sort_parameters.push(current_sort.to_string());
|
|
merge = false;
|
|
}
|
|
}
|
|
sort_parameters
|
|
}
|
|
|
|
pub async fn search_with_url_query(
|
|
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
|
search_queue: web::Data<SearchQueue>,
|
|
index_uid: web::Path<String>,
|
|
params: AwebQueryParameter<SearchQueryGet, DeserrQueryParamError>,
|
|
req: HttpRequest,
|
|
analytics: web::Data<Analytics>,
|
|
) -> Result<HttpResponse, ResponseError> {
|
|
debug!(parameters = ?params, "Search get");
|
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
|
|
|
let mut query: SearchQuery = params.into_inner().try_into()?;
|
|
|
|
// Tenant token search_rules.
|
|
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
|
add_search_rules(&mut query.filter, search_rules);
|
|
}
|
|
|
|
let mut aggregate = SearchAggregator::<SearchGET>::from_query(&query);
|
|
|
|
let index = index_scheduler.index(&index_uid)?;
|
|
let features = index_scheduler.features();
|
|
|
|
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
|
|
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
|
let permit = search_queue.try_get_search_permit().await?;
|
|
let search_result = tokio::task::spawn_blocking(move || {
|
|
perform_search(&index, query, search_kind, retrieve_vector, index_scheduler.features())
|
|
})
|
|
.await;
|
|
permit.drop().await;
|
|
let search_result = search_result?;
|
|
if let Ok(ref search_result) = search_result {
|
|
aggregate.succeed(search_result);
|
|
}
|
|
analytics.publish(aggregate, &req);
|
|
|
|
let search_result = search_result?;
|
|
|
|
debug!(returns = ?search_result, "Search get");
|
|
Ok(HttpResponse::Ok().json(search_result))
|
|
}
|
|
|
|
pub async fn search_with_post(
|
|
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
|
search_queue: web::Data<SearchQueue>,
|
|
index_uid: web::Path<String>,
|
|
params: AwebJson<SearchQuery, DeserrJsonError>,
|
|
req: HttpRequest,
|
|
analytics: web::Data<Analytics>,
|
|
) -> Result<HttpResponse, ResponseError> {
|
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
|
|
|
let mut query = params.into_inner();
|
|
debug!(parameters = ?query, "Search post");
|
|
|
|
// Tenant token search_rules.
|
|
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
|
add_search_rules(&mut query.filter, search_rules);
|
|
}
|
|
|
|
let mut aggregate = SearchAggregator::<SearchPOST>::from_query(&query);
|
|
|
|
let index = index_scheduler.index(&index_uid)?;
|
|
|
|
let features = index_scheduler.features();
|
|
|
|
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
|
|
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
|
|
|
let permit = search_queue.try_get_search_permit().await?;
|
|
let search_result = tokio::task::spawn_blocking(move || {
|
|
perform_search(&index, query, search_kind, retrieve_vectors, index_scheduler.features())
|
|
})
|
|
.await;
|
|
permit.drop().await;
|
|
let search_result = search_result?;
|
|
if let Ok(ref search_result) = search_result {
|
|
aggregate.succeed(search_result);
|
|
if search_result.degraded {
|
|
MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
|
|
}
|
|
}
|
|
analytics.publish(aggregate, &req);
|
|
|
|
let search_result = search_result?;
|
|
|
|
debug!(returns = ?search_result, "Search post");
|
|
Ok(HttpResponse::Ok().json(search_result))
|
|
}
|
|
|
|
pub fn search_kind(
|
|
query: &SearchQuery,
|
|
index_scheduler: &IndexScheduler,
|
|
index: &milli::Index,
|
|
features: RoFeatures,
|
|
) -> Result<SearchKind, ResponseError> {
|
|
if query.vector.is_some() {
|
|
features.check_vector("Passing `vector` as a parameter")?;
|
|
}
|
|
if query.hybrid.is_some() {
|
|
features.check_vector("Passing `hybrid` as a parameter")?;
|
|
}
|
|
|
|
// handle with care, the order of cases matters, the semantics is subtle
|
|
match (query.q.as_deref(), &query.hybrid, query.vector.as_deref()) {
|
|
// empty query, no vector => placeholder search
|
|
(Some(q), _, None) if q.trim().is_empty() => Ok(SearchKind::KeywordOnly),
|
|
// no query, no vector => placeholder search
|
|
(None, _, None) => Ok(SearchKind::KeywordOnly),
|
|
// hybrid.semantic_ratio == 1.0 => vector
|
|
(_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => {
|
|
SearchKind::semantic(index_scheduler, index, embedder, v.map(|v| v.len()))
|
|
}
|
|
// hybrid.semantic_ratio == 0.0 => keyword
|
|
(_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => {
|
|
Ok(SearchKind::KeywordOnly)
|
|
}
|
|
// no query, hybrid, vector => semantic
|
|
(None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => {
|
|
SearchKind::semantic(index_scheduler, index, embedder, Some(v.len()))
|
|
}
|
|
// query, no hybrid, no vector => keyword
|
|
(Some(_), None, None) => Ok(SearchKind::KeywordOnly),
|
|
// query, hybrid, maybe vector => hybrid
|
|
(Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid(
|
|
index_scheduler,
|
|
index,
|
|
embedder,
|
|
**semantic_ratio,
|
|
v.map(|v| v.len()),
|
|
),
|
|
|
|
(_, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
|
|
}
|
|
}
|