mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-11 15:38:55 +01:00
d4f10800f2
3834: Define searchable fields at runtime r=Kerollmops a=ManyTheFish ## Summary This feature allows the end-user to search in one or multiple attributes using the search parameter `attributesToSearchOn`: ```json { "q": "Captain Marvel", "attributesToSearchOn": ["title"] } ``` This feature act like a filter, forcing Meilisearch to only return the documents containing the requested words in the attributes-to-search-on. Note that, with the matching strategy `last`, Meilisearch will only ensure that the first word is in the attributes-to-search-on, but, the retrieved documents will be ordered taking into account the word contained in the attributes-to-search-on. ## Trying the prototype A dedicated docker image has been released for this feature: #### last prototype version: ```bash docker pull getmeili/meilisearch:prototype-define-searchable-fields-at-search-time-1 ``` #### others prototype versions: ```bash docker pull getmeili/meilisearch:prototype-define-searchable-fields-at-search-time-0 ``` ## Technical Detail The attributes-to-search-on list is given to the search context, then, the search context uses the `fid_word_docids`database using only the allowed field ids instead of the global `word_docids` database. This is the same for the prefix databases. The database cache is updated with the merged values, meaning that the union of the field-id-database values is only made if the requested key is missing from the cache. ### Relevancy limits Almost all ranking rules behave as expected when ordering the documents. Only `proximity` could miss-order documents if all the searched words are in the restricted attribute but a better proximity is found in an ignored attribute in a document that should be ranked lower. I put below a failing test showing it: ```rust #[actix_rt::test] async fn proximity_ranking_rule_order() { let server = Server::new().await; let index = index_with_documents( &server, &json!([ { "title": "Captain super mega cool. A Marvel story", // Perfect distance between words in an ignored attribute "desc": "Captain Marvel", "id": "1", }, { "title": "Captain America from Marvel", "desc": "a Shazam ersatz", "id": "2", }]), ) .await; // Document 2 should appear before document 1. index .search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}), |response, code| { assert_eq!(code, 200, "{}", response); assert_eq!( response["hits"], json!([ {"id": "2"}, {"id": "1"}, ]) ); }) .await; } ``` Fixing this would force us to create a `fid_word_pair_proximity_docids` and a `fid_word_prefix_pair_proximity_docids` databases which may multiply the keys of `word_pair_proximity_docids` and `word_prefix_pair_proximity_docids` by the number of attributes in the searchable_attributes list. If we think we should fix this test, I'll suggest doing it in another PR. ## Related Fixes #3772 Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com>
235 lines
9.9 KiB
Rust
235 lines
9.9 KiB
Rust
use actix_web::web::Data;
|
|
use actix_web::{web, HttpRequest, HttpResponse};
|
|
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
|
use index_scheduler::IndexScheduler;
|
|
use log::debug;
|
|
use meilisearch_types::deserr::query_params::Param;
|
|
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
|
use meilisearch_types::error::deserr_codes::*;
|
|
use meilisearch_types::error::ResponseError;
|
|
use meilisearch_types::index_uid::IndexUid;
|
|
use meilisearch_types::serde_cs::vec::CS;
|
|
use serde_json::Value;
|
|
|
|
use crate::analytics::{Analytics, SearchAggregator};
|
|
use crate::extractors::authentication::policies::*;
|
|
use crate::extractors::authentication::GuardedData;
|
|
use crate::extractors::sequential_extractor::SeqHandler;
|
|
use crate::search::{
|
|
add_search_rules, perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
|
|
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
|
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
|
};
|
|
|
|
pub fn configure(cfg: &mut web::ServiceConfig) {
|
|
cfg.service(
|
|
web::resource("")
|
|
.route(web::get().to(SeqHandler(search_with_url_query)))
|
|
.route(web::post().to(SeqHandler(search_with_post))),
|
|
);
|
|
}
|
|
|
|
#[derive(Debug, deserr::Deserr)]
|
|
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
|
pub struct SearchQueryGet {
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchQ>)]
|
|
q: Option<String>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchVector>)]
|
|
vector: Option<Vec<f32>>,
|
|
#[deserr(default = Param(DEFAULT_SEARCH_OFFSET()), error = DeserrQueryParamError<InvalidSearchOffset>)]
|
|
offset: Param<usize>,
|
|
#[deserr(default = Param(DEFAULT_SEARCH_LIMIT()), error = DeserrQueryParamError<InvalidSearchLimit>)]
|
|
limit: Param<usize>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchPage>)]
|
|
page: Option<Param<usize>>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchHitsPerPage>)]
|
|
hits_per_page: Option<Param<usize>>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToRetrieve>)]
|
|
attributes_to_retrieve: Option<CS<String>>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToCrop>)]
|
|
attributes_to_crop: Option<CS<String>>,
|
|
#[deserr(default = Param(DEFAULT_CROP_LENGTH()), error = DeserrQueryParamError<InvalidSearchCropLength>)]
|
|
crop_length: Param<usize>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToHighlight>)]
|
|
attributes_to_highlight: Option<CS<String>>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchFilter>)]
|
|
filter: Option<String>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)]
|
|
sort: Option<String>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)]
|
|
show_matches_position: Param<bool>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScore>)]
|
|
show_ranking_score: Param<bool>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScoreDetails>)]
|
|
show_ranking_score_details: Param<bool>,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchFacets>)]
|
|
facets: Option<CS<String>>,
|
|
#[deserr( default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError<InvalidSearchHighlightPreTag>)]
|
|
highlight_pre_tag: String,
|
|
#[deserr( default = DEFAULT_HIGHLIGHT_POST_TAG(), error = DeserrQueryParamError<InvalidSearchHighlightPostTag>)]
|
|
highlight_post_tag: String,
|
|
#[deserr(default = DEFAULT_CROP_MARKER(), error = DeserrQueryParamError<InvalidSearchCropMarker>)]
|
|
crop_marker: String,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchMatchingStrategy>)]
|
|
matching_strategy: MatchingStrategy,
|
|
#[deserr(default, error = DeserrQueryParamError<InvalidAttributesToSearchOn>)]
|
|
pub attributes_to_search_on: Option<CS<String>>,
|
|
}
|
|
|
|
impl From<SearchQueryGet> for SearchQuery {
|
|
fn from(other: SearchQueryGet) -> Self {
|
|
let filter = match other.filter {
|
|
Some(f) => match serde_json::from_str(&f) {
|
|
Ok(v) => Some(v),
|
|
_ => Some(Value::String(f)),
|
|
},
|
|
None => None,
|
|
};
|
|
|
|
Self {
|
|
q: other.q,
|
|
vector: other.vector,
|
|
offset: other.offset.0,
|
|
limit: other.limit.0,
|
|
page: other.page.as_deref().copied(),
|
|
hits_per_page: other.hits_per_page.as_deref().copied(),
|
|
attributes_to_retrieve: other.attributes_to_retrieve.map(|o| o.into_iter().collect()),
|
|
attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()),
|
|
crop_length: other.crop_length.0,
|
|
attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()),
|
|
filter,
|
|
sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)),
|
|
show_matches_position: other.show_matches_position.0,
|
|
show_ranking_score: other.show_ranking_score.0,
|
|
show_ranking_score_details: other.show_ranking_score_details.0,
|
|
facets: other.facets.map(|o| o.into_iter().collect()),
|
|
highlight_pre_tag: other.highlight_pre_tag,
|
|
highlight_post_tag: other.highlight_post_tag,
|
|
crop_marker: other.crop_marker,
|
|
matching_strategy: other.matching_strategy,
|
|
attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
|
|
}
|
|
}
|
|
}
|
|
|
|
// TODO: TAMO: split on :asc, and :desc, instead of doing some weird things
|
|
|
|
/// Transform the sort query parameter into something that matches the post expected format.
|
|
fn fix_sort_query_parameters(sort_query: &str) -> Vec<String> {
|
|
let mut sort_parameters = Vec::new();
|
|
let mut merge = false;
|
|
for current_sort in sort_query.trim_matches('"').split(',').map(|s| s.trim()) {
|
|
if current_sort.starts_with("_geoPoint(") {
|
|
sort_parameters.push(current_sort.to_string());
|
|
merge = true;
|
|
} else if merge && !sort_parameters.is_empty() {
|
|
let s = sort_parameters.last_mut().unwrap();
|
|
s.push(',');
|
|
s.push_str(current_sort);
|
|
if current_sort.ends_with("):desc") || current_sort.ends_with("):asc") {
|
|
merge = false;
|
|
}
|
|
} else {
|
|
sort_parameters.push(current_sort.to_string());
|
|
merge = false;
|
|
}
|
|
}
|
|
sort_parameters
|
|
}
|
|
|
|
pub async fn search_with_url_query(
|
|
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
|
index_uid: web::Path<String>,
|
|
params: AwebQueryParameter<SearchQueryGet, DeserrQueryParamError>,
|
|
req: HttpRequest,
|
|
analytics: web::Data<dyn Analytics>,
|
|
) -> Result<HttpResponse, ResponseError> {
|
|
debug!("called with params: {:?}", params);
|
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
|
|
|
let mut query: SearchQuery = params.into_inner().into();
|
|
|
|
// Tenant token search_rules.
|
|
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
|
add_search_rules(&mut query, search_rules);
|
|
}
|
|
|
|
let mut aggregate = SearchAggregator::from_query(&query, &req);
|
|
|
|
let index = index_scheduler.index(&index_uid)?;
|
|
let features = index_scheduler.features()?;
|
|
let search_result =
|
|
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
|
|
if let Ok(ref search_result) = search_result {
|
|
aggregate.succeed(search_result);
|
|
}
|
|
analytics.get_search(aggregate);
|
|
|
|
let search_result = search_result?;
|
|
|
|
debug!("returns: {:?}", search_result);
|
|
Ok(HttpResponse::Ok().json(search_result))
|
|
}
|
|
|
|
pub async fn search_with_post(
|
|
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
|
index_uid: web::Path<String>,
|
|
params: AwebJson<SearchQuery, DeserrJsonError>,
|
|
req: HttpRequest,
|
|
analytics: web::Data<dyn Analytics>,
|
|
) -> Result<HttpResponse, ResponseError> {
|
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
|
|
|
let mut query = params.into_inner();
|
|
debug!("search called with params: {:?}", query);
|
|
|
|
// Tenant token search_rules.
|
|
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
|
add_search_rules(&mut query, search_rules);
|
|
}
|
|
|
|
let mut aggregate = SearchAggregator::from_query(&query, &req);
|
|
|
|
let index = index_scheduler.index(&index_uid)?;
|
|
|
|
let features = index_scheduler.features()?;
|
|
let search_result =
|
|
tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
|
|
if let Ok(ref search_result) = search_result {
|
|
aggregate.succeed(search_result);
|
|
}
|
|
analytics.post_search(aggregate);
|
|
|
|
let search_result = search_result?;
|
|
|
|
debug!("returns: {:?}", search_result);
|
|
Ok(HttpResponse::Ok().json(search_result))
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_fix_sort_query_parameters() {
|
|
let sort = fix_sort_query_parameters("_geoPoint(12, 13):asc");
|
|
assert_eq!(sort, vec!["_geoPoint(12,13):asc".to_string()]);
|
|
let sort = fix_sort_query_parameters("doggo:asc,_geoPoint(12.45,13.56):desc");
|
|
assert_eq!(sort, vec!["doggo:asc".to_string(), "_geoPoint(12.45,13.56):desc".to_string(),]);
|
|
let sort = fix_sort_query_parameters(
|
|
"doggo:asc , _geoPoint(12.45, 13.56, 2590352):desc , catto:desc",
|
|
);
|
|
assert_eq!(
|
|
sort,
|
|
vec![
|
|
"doggo:asc".to_string(),
|
|
"_geoPoint(12.45,13.56,2590352):desc".to_string(),
|
|
"catto:desc".to_string(),
|
|
]
|
|
);
|
|
let sort = fix_sort_query_parameters("doggo:asc , _geoPoint(1, 2), catto:desc");
|
|
// This is ugly but eh, I don't want to write a full parser just for this unused route
|
|
assert_eq!(sort, vec!["doggo:asc".to_string(), "_geoPoint(1,2),catto:desc".to_string(),]);
|
|
}
|
|
}
|