Introduce the facet search route

This commit is contained in:
Kerollmops 2023-04-13 18:16:33 +02:00 committed by Louis Dureuil
parent addb21f110
commit ce7e7f12c8
No known key found for this signature in database
4 changed files with 220 additions and 9 deletions

View File

@ -0,0 +1,178 @@
use std::collections::{BTreeSet, HashSet};
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::facet;
use meilisearch_types::serde_cs::vec::CS;
use serde_json::Value;
use crate::analytics::{Analytics, SearchAggregator};
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::search::{
add_search_rules, perform_facet_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
};
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(search)));
}
// #[derive(Debug, deserr::Deserr)]
// #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
// pub struct FacetSearchQuery {
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchQ>)]
// facetQuery: Option<String>,
// #[deserr(default = Param(DEFAULT_SEARCH_OFFSET()), error = DeserrQueryParamError<InvalidSearchOffset>)]
// offset: Param<usize>,
// #[deserr(default = Param(DEFAULT_SEARCH_LIMIT()), error = DeserrQueryParamError<InvalidSearchLimit>)]
// limit: Param<usize>,
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchPage>)]
// page: Option<Param<usize>>,
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchHitsPerPage>)]
// hits_per_page: Option<Param<usize>>,
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToRetrieve>)]
// attributes_to_retrieve: Option<CS<String>>,
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToCrop>)]
// attributes_to_crop: Option<CS<String>>,
// #[deserr(default = Param(DEFAULT_CROP_LENGTH()), error = DeserrQueryParamError<InvalidSearchCropLength>)]
// crop_length: Param<usize>,
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToHighlight>)]
// attributes_to_highlight: Option<CS<String>>,
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchFilter>)]
// filter: Option<String>,
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)]
// sort: Option<String>,
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)]
// show_matches_position: Param<bool>,
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchFacets>)]
// facets: Option<CS<String>>,
// #[deserr( default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError<InvalidSearchHighlightPreTag>)]
// highlight_pre_tag: String,
// #[deserr( default = DEFAULT_HIGHLIGHT_POST_TAG(), error = DeserrQueryParamError<InvalidSearchHighlightPostTag>)]
// highlight_post_tag: String,
// #[deserr(default = DEFAULT_CROP_MARKER(), error = DeserrQueryParamError<InvalidSearchCropMarker>)]
// crop_marker: String,
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchMatchingStrategy>)]
// matching_strategy: MatchingStrategy,
// }
// TODO improve the error messages
#[derive(Debug, Clone, Default, PartialEq, Eq, deserr::Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct FacetSearchQuery {
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
pub facet_query: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
pub facet_name: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
pub q: Option<String>,
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
pub offset: usize,
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
pub limit: usize,
#[deserr(default, error = DeserrJsonError<InvalidSearchPage>)]
pub page: Option<usize>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)]
pub hits_per_page: Option<usize>,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
pub attributes_to_retrieve: Option<BTreeSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
pub attributes_to_crop: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
pub crop_length: usize,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToHighlight>)]
pub attributes_to_highlight: Option<HashSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
pub show_matches_position: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
pub sort: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
pub facets: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
pub highlight_pre_tag: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPostTag>, default = DEFAULT_HIGHLIGHT_POST_TAG())]
pub highlight_post_tag: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchCropMarker>, default = DEFAULT_CROP_MARKER())]
pub crop_marker: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
pub matching_strategy: MatchingStrategy,
}
pub async fn search(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebJson<FacetSearchQuery, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let mut query = params.into_inner();
debug!("facet search called with params: {:?}", query);
let facet_query = query.facet_query.clone();
let facet_name = query.facet_name.clone();
let mut search_query = SearchQuery::from(query);
// Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
add_search_rules(&mut search_query, search_rules);
}
// TODO log stuff
// let mut aggregate = SearchAggregator::from_query(&query, &req);
let index = index_scheduler.index(&index_uid)?;
let search_result = tokio::task::spawn_blocking(move || {
perform_facet_search(&index, search_query, facet_query, facet_name)
})
.await?;
// TODO log stuff
// if let Ok(ref search_result) = search_result {
// aggregate.succeed(search_result);
// }
// TODO analytics
// analytics.post_search(aggregate);
let search_result = search_result?;
debug!("returns: {:?}", search_result);
Ok(HttpResponse::Ok().json(search_result))
}
impl From<FacetSearchQuery> for SearchQuery {
fn from(value: FacetSearchQuery) -> Self {
SearchQuery {
q: value.q,
offset: value.offset,
limit: value.limit,
page: value.page,
hits_per_page: value.hits_per_page,
attributes_to_retrieve: value.attributes_to_retrieve,
attributes_to_crop: value.attributes_to_crop,
crop_length: value.crop_length,
attributes_to_highlight: value.attributes_to_highlight,
show_matches_position: value.show_matches_position,
filter: value.filter,
sort: value.sort,
facets: value.facets,
highlight_pre_tag: value.highlight_pre_tag,
highlight_post_tag: value.highlight_post_tag,
crop_marker: value.crop_marker,
matching_strategy: value.matching_strategy,
}
}
}

View File

@ -24,6 +24,7 @@ use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
pub mod documents;
pub mod facet_search;
pub mod search;
pub mod settings;
@ -44,6 +45,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats))))
.service(web::scope("/documents").configure(documents::configure))
.service(web::scope("/search").configure(search::configure))
.service(web::scope("/facet-search").configure(facet_search::configure))
.service(web::scope("/settings").configure(settings::configure)),
);
}

View File

@ -3,6 +3,7 @@ use std::collections::{BTreeMap, BTreeSet, HashSet};
use std::str::FromStr;
use std::time::Instant;
use actix_http::header::q;
use deserr::Deserr;
use either::Either;
use index_scheduler::RoFeatures;
@ -10,9 +11,10 @@ use log::warn;
use meilisearch_auth::IndexSearchRules;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
use meilisearch_types::milli::{dot_product_similarity, InternalError};
use meilisearch_types::milli::{dot_product_similarity, FacetSearchResult, InternalError};
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
use meilisearch_types::{milli, Document};
use milli::tokenizer::TokenizerBuilder;
@ -26,6 +28,7 @@ use serde::Serialize;
use serde_json::{json, Value};
use crate::error::MeilisearchHttpError;
use crate::routes::indexes::facet_search::FacetSearchQuery;
type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
@ -199,7 +202,7 @@ impl SearchQueryWithIndex {
}
}
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr)]
#[deserr(rename_all = camelCase)]
pub enum MatchingStrategy {
/// Remove query words from last to first
@ -298,14 +301,12 @@ pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
}
}
pub fn perform_search(
index: &Index,
query: SearchQuery,
fn prepare_search<'t>(
index: &'t Index,
rtxn: &'t RoTxn,
query: &'t SearchQuery,
features: RoFeatures,
) -> Result<SearchResult, MeilisearchHttpError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
let mut search = index.search(&rtxn);
if query.vector.is_some() && query.q.is_some() {
@ -383,6 +384,20 @@ pub fn perform_search(
search.sort_criteria(sort);
}
Ok((search, is_finite_pagination, max_total_hits, offset))
}
pub fn perform_search(
index: &Index,
query: SearchQuery,
features: RoFeatures,
) -> Result<SearchResult, MeilisearchHttpError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
let (search, is_finite_pagination, max_total_hits, offset) =
prepare_search(index, &rtxn, &query, features)?;
let milli::SearchResult { documents_ids, matching_words, candidates, document_scores, .. } =
search.execute()?;
@ -557,6 +572,21 @@ pub fn perform_search(
Ok(result)
}
pub fn perform_facet_search(
index: &Index,
search_query: SearchQuery,
facet_query: Option<String>,
facet_name: String,
) -> Result<Vec<FacetSearchResult>, MeilisearchHttpError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
let (search, is_finite_pagination, max_total_hits, offset) =
prepare_search(index, &rtxn, &search_query)?;
todo!("Execute the search")
}
fn insert_geo_distance(sorts: &[String], document: &mut Document) {
lazy_static::lazy_static! {
static ref GEO_REGEX: Regex =

View File

@ -341,6 +341,7 @@ impl<'a> SearchForFacetValue<'a> {
}
}
#[derive(Debug, serde::Serialize)]
pub struct FacetSearchResult {
/// The original facet value
pub value: String,