use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::{AwebJson, AwebQueryParameter}; use index_scheduler::IndexScheduler; use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli; use meilisearch_types::milli::vector::DistributionShift; use meilisearch_types::serde_cs::vec::CS; use serde_json::Value; use tracing::{debug, warn}; use crate::analytics::{Analytics, SearchAggregator}; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS; use crate::search::{ add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO, }; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( web::resource("") .route(web::get().to(SeqHandler(search_with_url_query))) .route(web::post().to(SeqHandler(search_with_post))), ); } #[derive(Debug, deserr::Deserr)] #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)] pub struct SearchQueryGet { #[deserr(default, error = DeserrQueryParamError)] q: Option, #[deserr(default, error = DeserrQueryParamError)] vector: Option>, #[deserr(default = Param(DEFAULT_SEARCH_OFFSET()), error = DeserrQueryParamError)] offset: Param, #[deserr(default = Param(DEFAULT_SEARCH_LIMIT()), error = DeserrQueryParamError)] limit: Param, #[deserr(default, error = DeserrQueryParamError)] page: Option>, #[deserr(default, error = DeserrQueryParamError)] hits_per_page: Option>, #[deserr(default, error = DeserrQueryParamError)] attributes_to_retrieve: Option>, #[deserr(default, error = DeserrQueryParamError)] attributes_to_crop: Option>, #[deserr(default = Param(DEFAULT_CROP_LENGTH()), error = DeserrQueryParamError)] crop_length: Param, #[deserr(default, error = DeserrQueryParamError)] attributes_to_highlight: Option>, #[deserr(default, error = DeserrQueryParamError)] filter: Option, #[deserr(default, error = DeserrQueryParamError)] sort: Option, #[deserr(default, error = DeserrQueryParamError)] show_matches_position: Param, #[deserr(default, error = DeserrQueryParamError)] show_ranking_score: Param, #[deserr(default, error = DeserrQueryParamError)] show_ranking_score_details: Param, #[deserr(default, error = DeserrQueryParamError)] facets: Option>, #[deserr( default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError)] highlight_pre_tag: String, #[deserr( default = DEFAULT_HIGHLIGHT_POST_TAG(), error = DeserrQueryParamError)] highlight_post_tag: String, #[deserr(default = DEFAULT_CROP_MARKER(), error = DeserrQueryParamError)] crop_marker: String, #[deserr(default, error = DeserrQueryParamError)] matching_strategy: MatchingStrategy, #[deserr(default, error = DeserrQueryParamError)] pub attributes_to_search_on: Option>, #[deserr(default, error = DeserrQueryParamError)] pub hybrid_embedder: Option, #[deserr(default, error = DeserrQueryParamError)] pub hybrid_semantic_ratio: Option, } #[derive(Debug, Clone, Copy, Default, PartialEq, deserr::Deserr)] #[deserr(try_from(String) = TryFrom::try_from -> InvalidSearchSemanticRatio)] pub struct SemanticRatioGet(SemanticRatio); impl std::convert::TryFrom for SemanticRatioGet { type Error = InvalidSearchSemanticRatio; fn try_from(s: String) -> Result { let f: f32 = s.parse().map_err(|_| InvalidSearchSemanticRatio)?; Ok(SemanticRatioGet(SemanticRatio::try_from(f)?)) } } impl std::ops::Deref for SemanticRatioGet { type Target = SemanticRatio; fn deref(&self) -> &Self::Target { &self.0 } } impl From for SearchQuery { fn from(other: SearchQueryGet) -> Self { let filter = match other.filter { Some(f) => match serde_json::from_str(&f) { Ok(v) => Some(v), _ => Some(Value::String(f)), }, None => None, }; let hybrid = match (other.hybrid_embedder, other.hybrid_semantic_ratio) { (None, None) => None, (None, Some(semantic_ratio)) => { Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: None }) } (Some(embedder), None) => Some(HybridQuery { semantic_ratio: DEFAULT_SEMANTIC_RATIO(), embedder: Some(embedder), }), (Some(embedder), Some(semantic_ratio)) => { Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: Some(embedder) }) } }; Self { q: other.q, vector: other.vector.map(CS::into_inner), offset: other.offset.0, limit: other.limit.0, page: other.page.as_deref().copied(), hits_per_page: other.hits_per_page.as_deref().copied(), attributes_to_retrieve: other.attributes_to_retrieve.map(|o| o.into_iter().collect()), attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()), crop_length: other.crop_length.0, attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()), filter, sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)), show_matches_position: other.show_matches_position.0, show_ranking_score: other.show_ranking_score.0, show_ranking_score_details: other.show_ranking_score_details.0, facets: other.facets.map(|o| o.into_iter().collect()), highlight_pre_tag: other.highlight_pre_tag, highlight_post_tag: other.highlight_post_tag, crop_marker: other.crop_marker, matching_strategy: other.matching_strategy, attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()), hybrid, } } } // TODO: TAMO: split on :asc, and :desc, instead of doing some weird things /// Transform the sort query parameter into something that matches the post expected format. fn fix_sort_query_parameters(sort_query: &str) -> Vec { let mut sort_parameters = Vec::new(); let mut merge = false; for current_sort in sort_query.trim_matches('"').split(',').map(|s| s.trim()) { if current_sort.starts_with("_geoPoint(") { sort_parameters.push(current_sort.to_string()); merge = true; } else if merge && !sort_parameters.is_empty() { let s = sort_parameters.last_mut().unwrap(); s.push(','); s.push_str(current_sort); if current_sort.ends_with("):desc") || current_sort.ends_with("):asc") { merge = false; } } else { sort_parameters.push(current_sort.to_string()); merge = false; } } sort_parameters } pub async fn search_with_url_query( index_scheduler: GuardedData, Data>, index_uid: web::Path, params: AwebQueryParameter, req: HttpRequest, analytics: web::Data, ) -> Result { debug!(parameters = ?params, "Search get"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; let mut query: SearchQuery = params.into_inner().into(); // Tenant token search_rules. if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) { add_search_rules(&mut query, search_rules); } let mut aggregate = SearchAggregator::from_query(&query, &req); let index = index_scheduler.index(&index_uid)?; let features = index_scheduler.features(); let distribution = embed(&mut query, index_scheduler.get_ref(), &index)?; let search_result = tokio::task::spawn_blocking(move || perform_search(&index, query, features, distribution)) .await?; if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); } analytics.get_search(aggregate); let search_result = search_result?; debug!(returns = ?search_result, "Search get"); Ok(HttpResponse::Ok().json(search_result)) } pub async fn search_with_post( index_scheduler: GuardedData, Data>, index_uid: web::Path, params: AwebJson, req: HttpRequest, analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; let mut query = params.into_inner(); debug!(parameters = ?query, "Search post"); // Tenant token search_rules. if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) { add_search_rules(&mut query, search_rules); } let mut aggregate = SearchAggregator::from_query(&query, &req); let index = index_scheduler.index(&index_uid)?; let features = index_scheduler.features(); let distribution = embed(&mut query, index_scheduler.get_ref(), &index)?; let search_result = tokio::task::spawn_blocking(move || perform_search(&index, query, features, distribution)) .await?; if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); if search_result.degraded { MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc(); } } analytics.post_search(aggregate); let search_result = search_result?; debug!(returns = ?search_result, "Search post"); Ok(HttpResponse::Ok().json(search_result)) } pub fn embed( query: &mut SearchQuery, index_scheduler: &IndexScheduler, index: &milli::Index, ) -> Result, ResponseError> { match (&query.hybrid, &query.vector, &query.q) { (Some(HybridQuery { semantic_ratio: _, embedder }), None, Some(q)) if !q.trim().is_empty() => { let embedder_configs = index.embedding_configs(&index.read_txn()?)?; let embedders = index_scheduler.embedders(embedder_configs)?; let embedder = if let Some(embedder_name) = embedder { embedders.get(embedder_name) } else { embedders.get_default() }; let embedder = embedder .ok_or(milli::UserError::InvalidEmbedder("default".to_owned())) .map_err(milli::Error::from)? .0; let distribution = embedder.distribution(); let embeddings = embedder .embed(vec![q.to_owned()]) .map_err(milli::vector::Error::from) .map_err(milli::Error::from)? .pop() .expect("No vector returned from embedding"); if embeddings.iter().nth(1).is_some() { warn!("Ignoring embeddings past the first one in long search query"); query.vector = Some(embeddings.iter().next().unwrap().to_vec()); } else { query.vector = Some(embeddings.into_inner()); } Ok(distribution) } (Some(hybrid), vector, _) => { let embedder_configs = index.embedding_configs(&index.read_txn()?)?; let embedders = index_scheduler.embedders(embedder_configs)?; let embedder = if let Some(embedder_name) = &hybrid.embedder { embedders.get(embedder_name) } else { embedders.get_default() }; let embedder = embedder .ok_or(milli::UserError::InvalidEmbedder("default".to_owned())) .map_err(milli::Error::from)? .0; if let Some(vector) = vector { if vector.len() != embedder.dimensions() { return Err(meilisearch_types::milli::Error::UserError( meilisearch_types::milli::UserError::InvalidVectorDimensions { expected: embedder.dimensions(), found: vector.len(), }, ) .into()); } } Ok(embedder.distribution()) } _ => Ok(None), } } #[cfg(test)] mod test { use super::*; #[test] fn test_fix_sort_query_parameters() { let sort = fix_sort_query_parameters("_geoPoint(12, 13):asc"); assert_eq!(sort, vec!["_geoPoint(12,13):asc".to_string()]); let sort = fix_sort_query_parameters("doggo:asc,_geoPoint(12.45,13.56):desc"); assert_eq!(sort, vec!["doggo:asc".to_string(), "_geoPoint(12.45,13.56):desc".to_string(),]); let sort = fix_sort_query_parameters( "doggo:asc , _geoPoint(12.45, 13.56, 2590352):desc , catto:desc", ); assert_eq!( sort, vec![ "doggo:asc".to_string(), "_geoPoint(12.45,13.56,2590352):desc".to_string(), "catto:desc".to_string(), ] ); let sort = fix_sort_query_parameters("doggo:asc , _geoPoint(1, 2), catto:desc"); // This is ugly but eh, I don't want to write a full parser just for this unused route assert_eq!(sort, vec!["doggo:asc".to_string(), "_geoPoint(1,2),catto:desc".to_string(),]); } }