mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-09 22:48:54 +01:00
Merge #4769
4769: Federated search r=ManyTheFish a=dureuill # Pull Request ## Related issue Fixes #4747 [Usage](https://meilisearch.notion.site/v1-10-federated-search-698dfe36ab6b4668b044f735fb40f0b2) ## What does this PR do? - multi-search now allows a top-level federation object. When not `null`, the results of multi-search are modified to be a single list of results rather than a list of a list of results - changed lifetimes around tokenizer et al. to be able to make hits one by one rather than using a vector - adds `roaring` to Meilisearch itself. As the federated search happens at the Meilisearch level (reuses the search functions declared at the Meilisearch level + merge happens after the hits were created), `RoaringBitmap`s are needed to track the candidates: hits that were seen, all candidates. - Refactor `make_hits` to allow for an individual, optimized `make_hit` - Score details comparison no longer fail when sorting on different field names or target point (for geo) Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
commit
1582c7e788
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -3394,6 +3394,7 @@ dependencies = [
|
|||||||
"rayon",
|
"rayon",
|
||||||
"regex",
|
"regex",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
|
"roaring",
|
||||||
"rustls 0.21.12",
|
"rustls 0.21.12",
|
||||||
"rustls-pemfile 1.0.4",
|
"rustls-pemfile 1.0.4",
|
||||||
"segment",
|
"segment",
|
||||||
|
@ -192,6 +192,7 @@ merge_with_error_impl_take_error_message!(ParseOffsetDateTimeError);
|
|||||||
merge_with_error_impl_take_error_message!(ParseTaskKindError);
|
merge_with_error_impl_take_error_message!(ParseTaskKindError);
|
||||||
merge_with_error_impl_take_error_message!(ParseTaskStatusError);
|
merge_with_error_impl_take_error_message!(ParseTaskStatusError);
|
||||||
merge_with_error_impl_take_error_message!(IndexUidFormatError);
|
merge_with_error_impl_take_error_message!(IndexUidFormatError);
|
||||||
|
merge_with_error_impl_take_error_message!(InvalidMultiSearchWeight);
|
||||||
merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
|
merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
|
||||||
merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold);
|
merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold);
|
||||||
merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold);
|
merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold);
|
||||||
|
@ -238,6 +238,11 @@ InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
|
|||||||
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
|
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
|
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
|
InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidMultiSearchFederated , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidMultiSearchFederationOptions , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidMultiSearchQueryPagination , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
|
||||||
@ -512,6 +517,12 @@ impl fmt::Display for deserr_codes::InvalidSearchSemanticRatio {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for deserr_codes::InvalidMultiSearchWeight {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
write!(f, "the value of `weight` is invalid, expected a positive float (>= 0.0).")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl fmt::Display for deserr_codes::InvalidSimilarId {
|
impl fmt::Display for deserr_codes::InvalidSimilarId {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
write!(
|
write!(
|
||||||
|
@ -102,6 +102,7 @@ tracing-subscriber = { version = "0.3.18", features = ["json"] }
|
|||||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||||
tracing-actix-web = "0.7.11"
|
tracing-actix-web = "0.7.11"
|
||||||
build-info = { version = "1.7.0", path = "../build-info" }
|
build-info = { version = "1.7.0", path = "../build-info" }
|
||||||
|
roaring = "0.10.2"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
actix-rt = "2.10.0"
|
actix-rt = "2.10.0"
|
||||||
|
@ -42,7 +42,7 @@ pub struct MultiSearchAggregator;
|
|||||||
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
impl MultiSearchAggregator {
|
impl MultiSearchAggregator {
|
||||||
pub fn from_queries(_: &dyn Any, _: &dyn Any) -> Self {
|
pub fn from_federated_search(_: &dyn Any, _: &dyn Any) -> Self {
|
||||||
Self
|
Self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -34,8 +34,8 @@ use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumen
|
|||||||
use crate::routes::indexes::facet_search::FacetSearchQuery;
|
use crate::routes::indexes::facet_search::FacetSearchQuery;
|
||||||
use crate::routes::{create_all_stats, Stats};
|
use crate::routes::{create_all_stats, Stats};
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
|
FacetSearchResult, FederatedSearch, MatchingStrategy, SearchQuery, SearchQueryWithIndex,
|
||||||
SimilarQuery, SimilarResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
SearchResult, SimilarQuery, SimilarResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||||
DEFAULT_SEMANTIC_RATIO,
|
DEFAULT_SEMANTIC_RATIO,
|
||||||
};
|
};
|
||||||
@ -1095,22 +1095,33 @@ pub struct MultiSearchAggregator {
|
|||||||
show_ranking_score: bool,
|
show_ranking_score: bool,
|
||||||
show_ranking_score_details: bool,
|
show_ranking_score_details: bool,
|
||||||
|
|
||||||
|
// federation
|
||||||
|
use_federation: bool,
|
||||||
|
|
||||||
// context
|
// context
|
||||||
user_agents: HashSet<String>,
|
user_agents: HashSet<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MultiSearchAggregator {
|
impl MultiSearchAggregator {
|
||||||
pub fn from_queries(query: &[SearchQueryWithIndex], request: &HttpRequest) -> Self {
|
pub fn from_federated_search(
|
||||||
|
federated_search: &FederatedSearch,
|
||||||
|
request: &HttpRequest,
|
||||||
|
) -> Self {
|
||||||
let timestamp = Some(OffsetDateTime::now_utc());
|
let timestamp = Some(OffsetDateTime::now_utc());
|
||||||
|
|
||||||
let user_agents = extract_user_agents(request).into_iter().collect();
|
let user_agents = extract_user_agents(request).into_iter().collect();
|
||||||
|
|
||||||
let distinct_indexes: HashSet<_> = query
|
let use_federation = federated_search.federation.is_some();
|
||||||
|
|
||||||
|
let distinct_indexes: HashSet<_> = federated_search
|
||||||
|
.queries
|
||||||
.iter()
|
.iter()
|
||||||
.map(|query| {
|
.map(|query| {
|
||||||
|
let query = &query;
|
||||||
// make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex
|
// make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex
|
||||||
let SearchQueryWithIndex {
|
let SearchQueryWithIndex {
|
||||||
index_uid,
|
index_uid,
|
||||||
|
federation_options: _,
|
||||||
q: _,
|
q: _,
|
||||||
vector: _,
|
vector: _,
|
||||||
offset: _,
|
offset: _,
|
||||||
@ -1142,8 +1153,10 @@ impl MultiSearchAggregator {
|
|||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let show_ranking_score = query.iter().any(|query| query.show_ranking_score);
|
let show_ranking_score =
|
||||||
let show_ranking_score_details = query.iter().any(|query| query.show_ranking_score_details);
|
federated_search.queries.iter().any(|query| query.show_ranking_score);
|
||||||
|
let show_ranking_score_details =
|
||||||
|
federated_search.queries.iter().any(|query| query.show_ranking_score_details);
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
timestamp,
|
timestamp,
|
||||||
@ -1151,10 +1164,11 @@ impl MultiSearchAggregator {
|
|||||||
total_succeeded: 0,
|
total_succeeded: 0,
|
||||||
total_distinct_index_count: distinct_indexes.len(),
|
total_distinct_index_count: distinct_indexes.len(),
|
||||||
total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 },
|
total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 },
|
||||||
total_search_count: query.len(),
|
total_search_count: federated_search.queries.len(),
|
||||||
show_ranking_score,
|
show_ranking_score,
|
||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
user_agents,
|
user_agents,
|
||||||
|
use_federation,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1180,6 +1194,7 @@ impl MultiSearchAggregator {
|
|||||||
let show_ranking_score_details =
|
let show_ranking_score_details =
|
||||||
this.show_ranking_score_details || other.show_ranking_score_details;
|
this.show_ranking_score_details || other.show_ranking_score_details;
|
||||||
let mut user_agents = this.user_agents;
|
let mut user_agents = this.user_agents;
|
||||||
|
let use_federation = this.use_federation || other.use_federation;
|
||||||
|
|
||||||
for user_agent in other.user_agents.into_iter() {
|
for user_agent in other.user_agents.into_iter() {
|
||||||
user_agents.insert(user_agent);
|
user_agents.insert(user_agent);
|
||||||
@ -1196,6 +1211,7 @@ impl MultiSearchAggregator {
|
|||||||
user_agents,
|
user_agents,
|
||||||
show_ranking_score,
|
show_ranking_score,
|
||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
|
use_federation,
|
||||||
// do not add _ or ..Default::default() here
|
// do not add _ or ..Default::default() here
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1214,6 +1230,7 @@ impl MultiSearchAggregator {
|
|||||||
user_agents,
|
user_agents,
|
||||||
show_ranking_score,
|
show_ranking_score,
|
||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
|
use_federation,
|
||||||
} = self;
|
} = self;
|
||||||
|
|
||||||
if total_received == 0 {
|
if total_received == 0 {
|
||||||
@ -1238,6 +1255,9 @@ impl MultiSearchAggregator {
|
|||||||
"scoring": {
|
"scoring": {
|
||||||
"show_ranking_score": show_ranking_score,
|
"show_ranking_score": show_ranking_score,
|
||||||
"show_ranking_score_details": show_ranking_score_details,
|
"show_ranking_score_details": show_ranking_score_details,
|
||||||
|
},
|
||||||
|
"federation": {
|
||||||
|
"use_federation": use_federation,
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -25,6 +25,10 @@ pub enum MeilisearchHttpError {
|
|||||||
DocumentNotFound(String),
|
DocumentNotFound(String),
|
||||||
#[error("Sending an empty filter is forbidden.")]
|
#[error("Sending an empty filter is forbidden.")]
|
||||||
EmptyFilter,
|
EmptyFilter,
|
||||||
|
#[error("Using `federationOptions` is not allowed in a non-federated search.\n Hint: remove `federationOptions` from query #{0} or add `federation: {{}}` to the request.")]
|
||||||
|
FederationOptionsInNonFederatedRequest(usize),
|
||||||
|
#[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n Hint: remove `{1}` from query #{0} or remove `federation: {{}}` from the request")]
|
||||||
|
PaginationInFederatedQuery(usize, &'static str),
|
||||||
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
|
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
|
||||||
InvalidExpression(&'static [&'static str], Value),
|
InvalidExpression(&'static [&'static str], Value),
|
||||||
#[error("A {0} payload is missing.")]
|
#[error("A {0} payload is missing.")]
|
||||||
@ -86,6 +90,12 @@ impl ErrorCode for MeilisearchHttpError {
|
|||||||
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
|
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
|
||||||
MeilisearchHttpError::Join(_) => Code::Internal,
|
MeilisearchHttpError::Join(_) => Code::Internal,
|
||||||
MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid,
|
MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid,
|
||||||
|
MeilisearchHttpError::FederationOptionsInNonFederatedRequest(_) => {
|
||||||
|
Code::InvalidMultiSearchFederationOptions
|
||||||
|
}
|
||||||
|
MeilisearchHttpError::PaginationInFederatedQuery(_, _) => {
|
||||||
|
Code::InvalidMultiSearchQueryPagination
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -10,12 +10,14 @@ use serde::Serialize;
|
|||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use crate::analytics::{Analytics, MultiSearchAggregator};
|
use crate::analytics::{Analytics, MultiSearchAggregator};
|
||||||
|
use crate::error::MeilisearchHttpError;
|
||||||
use crate::extractors::authentication::policies::ActionPolicy;
|
use crate::extractors::authentication::policies::ActionPolicy;
|
||||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||||
use crate::extractors::sequential_extractor::SeqHandler;
|
use crate::extractors::sequential_extractor::SeqHandler;
|
||||||
use crate::routes::indexes::search::search_kind;
|
use crate::routes::indexes::search::search_kind;
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
add_search_rules, perform_search, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex,
|
add_search_rules, perform_federated_search, perform_search, FederatedSearch, RetrieveVectors,
|
||||||
|
SearchQueryWithIndex, SearchResultWithIndex,
|
||||||
};
|
};
|
||||||
use crate::search_queue::SearchQueue;
|
use crate::search_queue::SearchQueue;
|
||||||
|
|
||||||
@ -28,46 +30,86 @@ struct SearchResults {
|
|||||||
results: Vec<SearchResultWithIndex>,
|
results: Vec<SearchResultWithIndex>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, deserr::Deserr)]
|
|
||||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
|
||||||
pub struct SearchQueries {
|
|
||||||
queries: Vec<SearchQueryWithIndex>,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn multi_search_with_post(
|
pub async fn multi_search_with_post(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||||
search_queue: Data<SearchQueue>,
|
search_queue: Data<SearchQueue>,
|
||||||
params: AwebJson<SearchQueries, DeserrJsonError>,
|
params: AwebJson<FederatedSearch, DeserrJsonError>,
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
analytics: web::Data<dyn Analytics>,
|
analytics: web::Data<dyn Analytics>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let queries = params.into_inner().queries;
|
|
||||||
|
|
||||||
let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
|
|
||||||
let features = index_scheduler.features();
|
|
||||||
|
|
||||||
// Since we don't want to process half of the search requests and then get a permit refused
|
// Since we don't want to process half of the search requests and then get a permit refused
|
||||||
// we're going to get one permit for the whole duration of the multi-search request.
|
// we're going to get one permit for the whole duration of the multi-search request.
|
||||||
let _permit = search_queue.try_get_search_permit().await?;
|
let _permit = search_queue.try_get_search_permit().await?;
|
||||||
|
|
||||||
|
let federated_search = params.into_inner();
|
||||||
|
|
||||||
|
let mut multi_aggregate = MultiSearchAggregator::from_federated_search(&federated_search, &req);
|
||||||
|
|
||||||
|
let FederatedSearch { mut queries, federation } = federated_search;
|
||||||
|
|
||||||
|
let features = index_scheduler.features();
|
||||||
|
|
||||||
|
// regardless of federation, check authorization and apply search rules
|
||||||
|
let auth = 'check_authorization: {
|
||||||
|
for (query_index, federated_query) in queries.iter_mut().enumerate() {
|
||||||
|
let index_uid = federated_query.index_uid.as_str();
|
||||||
|
// Check index from API key
|
||||||
|
if !index_scheduler.filters().is_index_authorized(index_uid) {
|
||||||
|
break 'check_authorization Err(AuthenticationError::InvalidToken)
|
||||||
|
.with_index(query_index);
|
||||||
|
}
|
||||||
|
// Apply search rules from tenant token
|
||||||
|
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(index_uid)
|
||||||
|
{
|
||||||
|
add_search_rules(&mut federated_query.filter, search_rules);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
};
|
||||||
|
|
||||||
|
auth.map_err(|(mut err, query_index)| {
|
||||||
|
// Add the query index that failed as context for the error message.
|
||||||
|
// We're doing it only here and not directly in the `WithIndex` trait so that the `with_index` function returns a different type
|
||||||
|
// of result and we can benefit from static typing.
|
||||||
|
err.message = format!("Inside `.queries[{query_index}]`: {}", err.message);
|
||||||
|
err
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let response = match federation {
|
||||||
|
Some(federation) => {
|
||||||
|
let search_result = tokio::task::spawn_blocking(move || {
|
||||||
|
perform_federated_search(&index_scheduler, queries, federation, features)
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
if let Ok(Ok(_)) = search_result {
|
||||||
|
multi_aggregate.succeed();
|
||||||
|
}
|
||||||
|
|
||||||
|
analytics.post_multi_search(multi_aggregate);
|
||||||
|
HttpResponse::Ok().json(search_result??)
|
||||||
|
}
|
||||||
|
None => {
|
||||||
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
|
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
|
||||||
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
|
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
|
||||||
// changes.
|
// changes.
|
||||||
let search_results: Result<_, (ResponseError, usize)> = async {
|
let search_results: Result<_, (ResponseError, usize)> = async {
|
||||||
let mut search_results = Vec::with_capacity(queries.len());
|
let mut search_results = Vec::with_capacity(queries.len());
|
||||||
for (query_index, (index_uid, mut query)) in
|
for (query_index, (index_uid, query, federation_options)) in queries
|
||||||
queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
|
.into_iter()
|
||||||
|
.map(SearchQueryWithIndex::into_index_query_federation)
|
||||||
|
.enumerate()
|
||||||
{
|
{
|
||||||
debug!(on_index = query_index, parameters = ?query, "Multi-search");
|
debug!(on_index = query_index, parameters = ?query, "Multi-search");
|
||||||
|
|
||||||
// Check index from API key
|
if federation_options.is_some() {
|
||||||
if !index_scheduler.filters().is_index_authorized(&index_uid) {
|
return Err((
|
||||||
return Err(AuthenticationError::InvalidToken).with_index(query_index);
|
MeilisearchHttpError::FederationOptionsInNonFederatedRequest(
|
||||||
}
|
query_index,
|
||||||
// Apply search rules from tenant token
|
)
|
||||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid)
|
.into(),
|
||||||
{
|
query_index,
|
||||||
add_search_rules(&mut query.filter, search_rules);
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
let index = index_scheduler
|
let index = index_scheduler
|
||||||
@ -81,10 +123,11 @@ pub async fn multi_search_with_post(
|
|||||||
})
|
})
|
||||||
.with_index(query_index)?;
|
.with_index(query_index)?;
|
||||||
|
|
||||||
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)
|
let search_kind =
|
||||||
|
search_kind(&query, index_scheduler.get_ref(), &index, features)
|
||||||
|
.with_index(query_index)?;
|
||||||
|
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)
|
||||||
.with_index(query_index)?;
|
.with_index(query_index)?;
|
||||||
let retrieve_vector =
|
|
||||||
RetrieveVectors::new(query.retrieve_vectors, features).with_index(query_index)?;
|
|
||||||
|
|
||||||
let search_result = tokio::task::spawn_blocking(move || {
|
let search_result = tokio::task::spawn_blocking(move || {
|
||||||
perform_search(&index, query, search_kind, retrieve_vector)
|
perform_search(&index, query, search_kind, retrieve_vector)
|
||||||
@ -116,7 +159,11 @@ pub async fn multi_search_with_post(
|
|||||||
|
|
||||||
debug!(returns = ?search_results, "Multi-search");
|
debug!(returns = ?search_results, "Multi-search");
|
||||||
|
|
||||||
Ok(HttpResponse::Ok().json(SearchResults { results: search_results }))
|
HttpResponse::Ok().json(SearchResults { results: search_results })
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(response)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Local `Result` extension trait to avoid `map_err` boilerplate.
|
/// Local `Result` extension trait to avoid `map_err` boilerplate.
|
||||||
|
629
meilisearch/src/search/federated.rs
Normal file
629
meilisearch/src/search/federated.rs
Normal file
@ -0,0 +1,629 @@
|
|||||||
|
use std::cmp::Ordering;
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::fmt;
|
||||||
|
use std::iter::Zip;
|
||||||
|
use std::rc::Rc;
|
||||||
|
use std::str::FromStr as _;
|
||||||
|
use std::time::Duration;
|
||||||
|
use std::vec::{IntoIter, Vec};
|
||||||
|
|
||||||
|
use actix_http::StatusCode;
|
||||||
|
use index_scheduler::{IndexScheduler, RoFeatures};
|
||||||
|
use meilisearch_types::deserr::DeserrJsonError;
|
||||||
|
use meilisearch_types::error::deserr_codes::{
|
||||||
|
InvalidMultiSearchWeight, InvalidSearchLimit, InvalidSearchOffset,
|
||||||
|
};
|
||||||
|
use meilisearch_types::error::ResponseError;
|
||||||
|
use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue};
|
||||||
|
use meilisearch_types::milli::{self, DocumentId, TimeBudget};
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
|
use super::ranking_rules::{self, RankingRules};
|
||||||
|
use super::{
|
||||||
|
prepare_search, AttributesFormat, HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind,
|
||||||
|
SearchQuery, SearchQueryWithIndex,
|
||||||
|
};
|
||||||
|
use crate::error::MeilisearchHttpError;
|
||||||
|
use crate::routes::indexes::search::search_kind;
|
||||||
|
|
||||||
|
pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0;
|
||||||
|
|
||||||
|
#[derive(Debug, Default, Clone, Copy, PartialEq, deserr::Deserr)]
|
||||||
|
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||||
|
pub struct FederationOptions {
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchWeight>)]
|
||||||
|
pub weight: Weight,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
||||||
|
#[deserr(try_from(f64) = TryFrom::try_from -> InvalidMultiSearchWeight)]
|
||||||
|
pub struct Weight(f64);
|
||||||
|
|
||||||
|
impl Default for Weight {
|
||||||
|
fn default() -> Self {
|
||||||
|
Weight(DEFAULT_FEDERATED_WEIGHT)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::convert::TryFrom<f64> for Weight {
|
||||||
|
type Error = InvalidMultiSearchWeight;
|
||||||
|
|
||||||
|
fn try_from(f: f64) -> Result<Self, Self::Error> {
|
||||||
|
if f < 0.0 {
|
||||||
|
Err(InvalidMultiSearchWeight)
|
||||||
|
} else {
|
||||||
|
Ok(Weight(f))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::ops::Deref for Weight {
|
||||||
|
type Target = f64;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, deserr::Deserr)]
|
||||||
|
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||||
|
pub struct Federation {
|
||||||
|
#[deserr(default = super::DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
|
||||||
|
pub limit: usize,
|
||||||
|
#[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
||||||
|
pub offset: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, deserr::Deserr)]
|
||||||
|
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||||
|
pub struct FederatedSearch {
|
||||||
|
pub queries: Vec<SearchQueryWithIndex>,
|
||||||
|
#[deserr(default)]
|
||||||
|
pub federation: Option<Federation>,
|
||||||
|
}
|
||||||
|
#[derive(Serialize, Clone, PartialEq)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct FederatedSearchResult {
|
||||||
|
pub hits: Vec<SearchHit>,
|
||||||
|
pub processing_time_ms: u128,
|
||||||
|
#[serde(flatten)]
|
||||||
|
pub hits_info: HitsInfo,
|
||||||
|
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub semantic_hit_count: Option<u32>,
|
||||||
|
|
||||||
|
// These fields are only used for analytics purposes
|
||||||
|
#[serde(skip)]
|
||||||
|
pub degraded: bool,
|
||||||
|
#[serde(skip)]
|
||||||
|
pub used_negative_operator: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for FederatedSearchResult {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
let FederatedSearchResult {
|
||||||
|
hits,
|
||||||
|
processing_time_ms,
|
||||||
|
hits_info,
|
||||||
|
semantic_hit_count,
|
||||||
|
degraded,
|
||||||
|
used_negative_operator,
|
||||||
|
} = self;
|
||||||
|
|
||||||
|
let mut debug = f.debug_struct("SearchResult");
|
||||||
|
// The most important thing when looking at a search result is the time it took to process
|
||||||
|
debug.field("processing_time_ms", &processing_time_ms);
|
||||||
|
debug.field("hits", &format!("[{} hits returned]", hits.len()));
|
||||||
|
debug.field("hits_info", &hits_info);
|
||||||
|
if *used_negative_operator {
|
||||||
|
debug.field("used_negative_operator", used_negative_operator);
|
||||||
|
}
|
||||||
|
if *degraded {
|
||||||
|
debug.field("degraded", degraded);
|
||||||
|
}
|
||||||
|
if let Some(semantic_hit_count) = semantic_hit_count {
|
||||||
|
debug.field("semantic_hit_count", &semantic_hit_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
debug.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct WeightedScore<'a> {
|
||||||
|
details: &'a [ScoreDetails],
|
||||||
|
weight: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> WeightedScore<'a> {
|
||||||
|
pub fn new(details: &'a [ScoreDetails], weight: f64) -> Self {
|
||||||
|
Self { details, weight }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn weighted_global_score(&self) -> f64 {
|
||||||
|
ScoreDetails::global_score(self.details.iter()) * self.weight
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn compare_weighted_global_scores(&self, other: &Self) -> Ordering {
|
||||||
|
self.weighted_global_score()
|
||||||
|
.partial_cmp(&other.weighted_global_score())
|
||||||
|
// both are numbers, possibly infinite
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn compare(&self, other: &Self) -> Ordering {
|
||||||
|
let mut left_it = ScoreDetails::score_values(self.details.iter());
|
||||||
|
let mut right_it = ScoreDetails::score_values(other.details.iter());
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let left = left_it.next();
|
||||||
|
let right = right_it.next();
|
||||||
|
|
||||||
|
match (left, right) {
|
||||||
|
(None, None) => return Ordering::Equal,
|
||||||
|
(None, Some(_)) => return Ordering::Less,
|
||||||
|
(Some(_), None) => return Ordering::Greater,
|
||||||
|
(Some(ScoreValue::Score(left)), Some(ScoreValue::Score(right))) => {
|
||||||
|
let left = left * self.weight;
|
||||||
|
let right = right * other.weight;
|
||||||
|
if (left - right).abs() <= f64::EPSILON {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
return left.partial_cmp(&right).unwrap();
|
||||||
|
}
|
||||||
|
(Some(ScoreValue::Sort(left)), Some(ScoreValue::Sort(right))) => {
|
||||||
|
match left.partial_cmp(right) {
|
||||||
|
Some(Ordering::Equal) => continue,
|
||||||
|
Some(order) => return order,
|
||||||
|
None => return self.compare_weighted_global_scores(other),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(Some(ScoreValue::GeoSort(left)), Some(ScoreValue::GeoSort(right))) => {
|
||||||
|
match left.partial_cmp(right) {
|
||||||
|
Some(Ordering::Equal) => continue,
|
||||||
|
Some(order) => return order,
|
||||||
|
None => {
|
||||||
|
return self.compare_weighted_global_scores(other);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// not comparable details, use global
|
||||||
|
(Some(ScoreValue::Score(_)), Some(_))
|
||||||
|
| (Some(_), Some(ScoreValue::Score(_)))
|
||||||
|
| (Some(ScoreValue::GeoSort(_)), Some(ScoreValue::Sort(_)))
|
||||||
|
| (Some(ScoreValue::Sort(_)), Some(ScoreValue::GeoSort(_))) => {
|
||||||
|
let left_count = left_it.count();
|
||||||
|
let right_count = right_it.count();
|
||||||
|
// compare how many remaining groups of rules each side has.
|
||||||
|
// the group with the most remaining groups wins.
|
||||||
|
return left_count
|
||||||
|
.cmp(&right_count)
|
||||||
|
// breaks ties with the global ranking score
|
||||||
|
.then_with(|| self.compare_weighted_global_scores(other));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct QueryByIndex {
|
||||||
|
query: SearchQuery,
|
||||||
|
federation_options: FederationOptions,
|
||||||
|
query_index: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SearchResultByQuery<'a> {
|
||||||
|
documents_ids: Vec<DocumentId>,
|
||||||
|
document_scores: Vec<Vec<ScoreDetails>>,
|
||||||
|
federation_options: FederationOptions,
|
||||||
|
hit_maker: HitMaker<'a>,
|
||||||
|
query_index: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SearchResultByQueryIter<'a> {
|
||||||
|
it: Zip<IntoIter<DocumentId>, IntoIter<Vec<ScoreDetails>>>,
|
||||||
|
federation_options: FederationOptions,
|
||||||
|
hit_maker: Rc<HitMaker<'a>>,
|
||||||
|
query_index: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> SearchResultByQueryIter<'a> {
|
||||||
|
fn new(
|
||||||
|
SearchResultByQuery {
|
||||||
|
documents_ids,
|
||||||
|
document_scores,
|
||||||
|
federation_options,
|
||||||
|
hit_maker,
|
||||||
|
query_index,
|
||||||
|
}: SearchResultByQuery<'a>,
|
||||||
|
) -> Self {
|
||||||
|
let it = documents_ids.into_iter().zip(document_scores);
|
||||||
|
Self { it, federation_options, hit_maker: Rc::new(hit_maker), query_index }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SearchResultByQueryIterItem<'a> {
|
||||||
|
docid: DocumentId,
|
||||||
|
score: Vec<ScoreDetails>,
|
||||||
|
federation_options: FederationOptions,
|
||||||
|
hit_maker: Rc<HitMaker<'a>>,
|
||||||
|
query_index: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn merge_index_local_results(
|
||||||
|
results_by_query: Vec<SearchResultByQuery<'_>>,
|
||||||
|
) -> impl Iterator<Item = SearchResultByQueryIterItem> + '_ {
|
||||||
|
itertools::kmerge_by(
|
||||||
|
results_by_query.into_iter().map(SearchResultByQueryIter::new),
|
||||||
|
|left: &SearchResultByQueryIterItem, right: &SearchResultByQueryIterItem| {
|
||||||
|
let left_score = WeightedScore::new(&left.score, *left.federation_options.weight);
|
||||||
|
let right_score = WeightedScore::new(&right.score, *right.federation_options.weight);
|
||||||
|
|
||||||
|
match left_score.compare(&right_score) {
|
||||||
|
// the biggest score goes first
|
||||||
|
Ordering::Greater => true,
|
||||||
|
// break ties using query index
|
||||||
|
Ordering::Equal => left.query_index < right.query_index,
|
||||||
|
Ordering::Less => false,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn merge_index_global_results(
|
||||||
|
results_by_index: Vec<SearchResultByIndex>,
|
||||||
|
) -> impl Iterator<Item = SearchHitByIndex> {
|
||||||
|
itertools::kmerge_by(
|
||||||
|
results_by_index.into_iter().map(|result_by_index| result_by_index.hits.into_iter()),
|
||||||
|
|left: &SearchHitByIndex, right: &SearchHitByIndex| {
|
||||||
|
let left_score = WeightedScore::new(&left.score, *left.federation_options.weight);
|
||||||
|
let right_score = WeightedScore::new(&right.score, *right.federation_options.weight);
|
||||||
|
|
||||||
|
match left_score.compare(&right_score) {
|
||||||
|
// the biggest score goes first
|
||||||
|
Ordering::Greater => true,
|
||||||
|
// break ties using query index
|
||||||
|
Ordering::Equal => left.query_index < right.query_index,
|
||||||
|
Ordering::Less => false,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Iterator for SearchResultByQueryIter<'a> {
|
||||||
|
type Item = SearchResultByQueryIterItem<'a>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
let (docid, score) = self.it.next()?;
|
||||||
|
Some(SearchResultByQueryIterItem {
|
||||||
|
docid,
|
||||||
|
score,
|
||||||
|
federation_options: self.federation_options,
|
||||||
|
hit_maker: Rc::clone(&self.hit_maker),
|
||||||
|
query_index: self.query_index,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SearchHitByIndex {
|
||||||
|
hit: SearchHit,
|
||||||
|
score: Vec<ScoreDetails>,
|
||||||
|
federation_options: FederationOptions,
|
||||||
|
query_index: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SearchResultByIndex {
|
||||||
|
hits: Vec<SearchHitByIndex>,
|
||||||
|
candidates: RoaringBitmap,
|
||||||
|
degraded: bool,
|
||||||
|
used_negative_operator: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn perform_federated_search(
|
||||||
|
index_scheduler: &IndexScheduler,
|
||||||
|
queries: Vec<SearchQueryWithIndex>,
|
||||||
|
federation: Federation,
|
||||||
|
features: RoFeatures,
|
||||||
|
) -> Result<FederatedSearchResult, ResponseError> {
|
||||||
|
let before_search = std::time::Instant::now();
|
||||||
|
|
||||||
|
// this implementation partition the queries by index to guarantee an important property:
|
||||||
|
// - all the queries to a particular index use the same read transaction.
|
||||||
|
// This is an important property, otherwise we cannot guarantee the self-consistency of the results.
|
||||||
|
|
||||||
|
// 1. partition queries by index
|
||||||
|
let mut queries_by_index: BTreeMap<String, Vec<QueryByIndex>> = Default::default();
|
||||||
|
for (query_index, federated_query) in queries.into_iter().enumerate() {
|
||||||
|
if let Some(pagination_field) = federated_query.has_pagination() {
|
||||||
|
return Err(MeilisearchHttpError::PaginationInFederatedQuery(
|
||||||
|
query_index,
|
||||||
|
pagination_field,
|
||||||
|
)
|
||||||
|
.into());
|
||||||
|
}
|
||||||
|
|
||||||
|
let (index_uid, query, federation_options) = federated_query.into_index_query_federation();
|
||||||
|
|
||||||
|
queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex {
|
||||||
|
query,
|
||||||
|
federation_options: federation_options.unwrap_or_default(),
|
||||||
|
query_index,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. perform queries, merge and make hits index by index
|
||||||
|
let required_hit_count = federation.limit + federation.offset;
|
||||||
|
// In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic
|
||||||
|
// Then in step (3), we'll update its value if there is any semantic search
|
||||||
|
let mut semantic_hit_count = None;
|
||||||
|
let mut results_by_index = Vec::with_capacity(queries_by_index.len());
|
||||||
|
let mut previous_query_data: Option<(RankingRules, usize, String)> = None;
|
||||||
|
|
||||||
|
for (index_uid, queries) in queries_by_index {
|
||||||
|
let index = match index_scheduler.index(&index_uid) {
|
||||||
|
Ok(index) => index,
|
||||||
|
Err(err) => {
|
||||||
|
let mut err = ResponseError::from(err);
|
||||||
|
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
|
||||||
|
// here the resource not found is not part of the URL.
|
||||||
|
err.code = StatusCode::BAD_REQUEST;
|
||||||
|
if let Some(query) = queries.first() {
|
||||||
|
err.message =
|
||||||
|
format!("Inside `.queries[{}]`: {}", query.query_index, err.message);
|
||||||
|
}
|
||||||
|
return Err(err);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Important: this is the only transaction we'll use for this index during this federated search
|
||||||
|
let rtxn = index.read_txn()?;
|
||||||
|
|
||||||
|
let criteria = index.criteria(&rtxn)?;
|
||||||
|
|
||||||
|
// stuff we need for the hitmaker
|
||||||
|
let script_lang_map = index.script_language(&rtxn)?;
|
||||||
|
|
||||||
|
let dictionary = index.dictionary(&rtxn)?;
|
||||||
|
let dictionary: Option<Vec<_>> =
|
||||||
|
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||||
|
let separators = index.allowed_separators(&rtxn)?;
|
||||||
|
let separators: Option<Vec<_>> =
|
||||||
|
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||||
|
|
||||||
|
// each query gets its individual cutoff
|
||||||
|
let cutoff = index.search_cutoff(&rtxn)?;
|
||||||
|
|
||||||
|
let mut degraded = false;
|
||||||
|
let mut used_negative_operator = false;
|
||||||
|
let mut candidates = RoaringBitmap::new();
|
||||||
|
|
||||||
|
// 2.1. Compute all candidates for each query in the index
|
||||||
|
let mut results_by_query = Vec::with_capacity(queries.len());
|
||||||
|
|
||||||
|
for QueryByIndex { query, federation_options, query_index } in queries {
|
||||||
|
// use an immediately invoked lambda to capture the result without returning from the function
|
||||||
|
|
||||||
|
let res: Result<(), ResponseError> = (|| {
|
||||||
|
let search_kind = search_kind(&query, index_scheduler, &index, features)?;
|
||||||
|
|
||||||
|
let canonicalization_kind = match (&search_kind, &query.q) {
|
||||||
|
(SearchKind::SemanticOnly { .. }, _) => {
|
||||||
|
ranking_rules::CanonicalizationKind::Vector
|
||||||
|
}
|
||||||
|
(_, Some(q)) if !q.is_empty() => ranking_rules::CanonicalizationKind::Keyword,
|
||||||
|
_ => ranking_rules::CanonicalizationKind::Placeholder,
|
||||||
|
};
|
||||||
|
|
||||||
|
let sort = if let Some(sort) = &query.sort {
|
||||||
|
let sorts: Vec<_> =
|
||||||
|
match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() {
|
||||||
|
Ok(sorts) => sorts,
|
||||||
|
Err(asc_desc_error) => {
|
||||||
|
return Err(milli::Error::from(milli::SortError::from(
|
||||||
|
asc_desc_error,
|
||||||
|
))
|
||||||
|
.into())
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Some(sorts)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let ranking_rules = ranking_rules::RankingRules::new(
|
||||||
|
criteria.clone(),
|
||||||
|
sort,
|
||||||
|
query.matching_strategy.into(),
|
||||||
|
canonicalization_kind,
|
||||||
|
);
|
||||||
|
|
||||||
|
if let Some((previous_ranking_rules, previous_query_index, previous_index_uid)) =
|
||||||
|
previous_query_data.take()
|
||||||
|
{
|
||||||
|
if let Err(error) = ranking_rules.is_compatible_with(&previous_ranking_rules) {
|
||||||
|
return Err(error.to_response_error(
|
||||||
|
&ranking_rules,
|
||||||
|
&previous_ranking_rules,
|
||||||
|
query_index,
|
||||||
|
previous_query_index,
|
||||||
|
&index_uid,
|
||||||
|
&previous_index_uid,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
previous_query_data = if previous_ranking_rules.constraint_count()
|
||||||
|
> ranking_rules.constraint_count()
|
||||||
|
{
|
||||||
|
Some((previous_ranking_rules, previous_query_index, previous_index_uid))
|
||||||
|
} else {
|
||||||
|
Some((ranking_rules, query_index, index_uid.clone()))
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
previous_query_data = Some((ranking_rules, query_index, index_uid.clone()));
|
||||||
|
}
|
||||||
|
|
||||||
|
match search_kind {
|
||||||
|
SearchKind::KeywordOnly => {}
|
||||||
|
_ => semantic_hit_count = Some(0),
|
||||||
|
}
|
||||||
|
|
||||||
|
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
||||||
|
|
||||||
|
let time_budget = match cutoff {
|
||||||
|
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
|
||||||
|
None => TimeBudget::default(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let (mut search, _is_finite_pagination, _max_total_hits, _offset) =
|
||||||
|
prepare_search(&index, &rtxn, &query, &search_kind, time_budget)?;
|
||||||
|
|
||||||
|
search.scoring_strategy(milli::score_details::ScoringStrategy::Detailed);
|
||||||
|
search.offset(0);
|
||||||
|
search.limit(required_hit_count);
|
||||||
|
|
||||||
|
let (result, _semantic_hit_count) = super::search_from_kind(search_kind, search)?;
|
||||||
|
let format = AttributesFormat {
|
||||||
|
attributes_to_retrieve: query.attributes_to_retrieve,
|
||||||
|
retrieve_vectors,
|
||||||
|
attributes_to_highlight: query.attributes_to_highlight,
|
||||||
|
attributes_to_crop: query.attributes_to_crop,
|
||||||
|
crop_length: query.crop_length,
|
||||||
|
crop_marker: query.crop_marker,
|
||||||
|
highlight_pre_tag: query.highlight_pre_tag,
|
||||||
|
highlight_post_tag: query.highlight_post_tag,
|
||||||
|
show_matches_position: query.show_matches_position,
|
||||||
|
sort: query.sort,
|
||||||
|
show_ranking_score: query.show_ranking_score,
|
||||||
|
show_ranking_score_details: query.show_ranking_score_details,
|
||||||
|
};
|
||||||
|
|
||||||
|
let milli::SearchResult {
|
||||||
|
matching_words,
|
||||||
|
candidates: query_candidates,
|
||||||
|
documents_ids,
|
||||||
|
document_scores,
|
||||||
|
degraded: query_degraded,
|
||||||
|
used_negative_operator: query_used_negative_operator,
|
||||||
|
} = result;
|
||||||
|
|
||||||
|
candidates |= query_candidates;
|
||||||
|
degraded |= query_degraded;
|
||||||
|
used_negative_operator |= query_used_negative_operator;
|
||||||
|
|
||||||
|
let tokenizer = HitMaker::tokenizer(
|
||||||
|
&script_lang_map,
|
||||||
|
dictionary.as_deref(),
|
||||||
|
separators.as_deref(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
|
||||||
|
|
||||||
|
let hit_maker = HitMaker::new(&index, &rtxn, format, formatter_builder)?;
|
||||||
|
|
||||||
|
results_by_query.push(SearchResultByQuery {
|
||||||
|
federation_options,
|
||||||
|
hit_maker,
|
||||||
|
query_index,
|
||||||
|
documents_ids,
|
||||||
|
document_scores,
|
||||||
|
});
|
||||||
|
Ok(())
|
||||||
|
})();
|
||||||
|
|
||||||
|
if let Err(mut error) = res {
|
||||||
|
error.message = format!("Inside `.queries[{query_index}]`: {}", error.message);
|
||||||
|
return Err(error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 2.2. merge inside index
|
||||||
|
let mut documents_seen = RoaringBitmap::new();
|
||||||
|
let merged_result: Result<Vec<_>, ResponseError> =
|
||||||
|
merge_index_local_results(results_by_query)
|
||||||
|
// skip documents we've already seen & mark that we saw the current document
|
||||||
|
.filter(|SearchResultByQueryIterItem { docid, .. }| documents_seen.insert(*docid))
|
||||||
|
.take(required_hit_count)
|
||||||
|
// 2.3 make hits
|
||||||
|
.map(
|
||||||
|
|SearchResultByQueryIterItem {
|
||||||
|
docid,
|
||||||
|
score,
|
||||||
|
federation_options,
|
||||||
|
hit_maker,
|
||||||
|
query_index,
|
||||||
|
}| {
|
||||||
|
let mut hit = hit_maker.make_hit(docid, &score)?;
|
||||||
|
let weighted_score =
|
||||||
|
ScoreDetails::global_score(score.iter()) * (*federation_options.weight);
|
||||||
|
|
||||||
|
let _federation = serde_json::json!(
|
||||||
|
{
|
||||||
|
"indexUid": index_uid,
|
||||||
|
"queriesPosition": query_index,
|
||||||
|
"weightedRankingScore": weighted_score,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
hit.document.insert("_federation".to_string(), _federation);
|
||||||
|
Ok(SearchHitByIndex { hit, score, federation_options, query_index })
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let merged_result = merged_result?;
|
||||||
|
results_by_index.push(SearchResultByIndex {
|
||||||
|
hits: merged_result,
|
||||||
|
candidates,
|
||||||
|
degraded,
|
||||||
|
used_negative_operator,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. merge hits and metadata across indexes
|
||||||
|
// 3.1 merge metadata
|
||||||
|
let (estimated_total_hits, degraded, used_negative_operator) = {
|
||||||
|
let mut estimated_total_hits = 0;
|
||||||
|
let mut degraded = false;
|
||||||
|
let mut used_negative_operator = false;
|
||||||
|
|
||||||
|
for SearchResultByIndex {
|
||||||
|
hits: _,
|
||||||
|
candidates,
|
||||||
|
degraded: degraded_by_index,
|
||||||
|
used_negative_operator: used_negative_operator_by_index,
|
||||||
|
} in &results_by_index
|
||||||
|
{
|
||||||
|
estimated_total_hits += candidates.len() as usize;
|
||||||
|
degraded |= *degraded_by_index;
|
||||||
|
used_negative_operator |= *used_negative_operator_by_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
(estimated_total_hits, degraded, used_negative_operator)
|
||||||
|
};
|
||||||
|
|
||||||
|
// 3.2 merge hits
|
||||||
|
let merged_hits: Vec<_> = merge_index_global_results(results_by_index)
|
||||||
|
.skip(federation.offset)
|
||||||
|
.take(federation.limit)
|
||||||
|
.inspect(|hit| {
|
||||||
|
if let Some(semantic_hit_count) = &mut semantic_hit_count {
|
||||||
|
if hit.score.iter().any(|score| matches!(&score, ScoreDetails::Vector(_))) {
|
||||||
|
*semantic_hit_count += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.map(|hit| hit.hit)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let search_result = FederatedSearchResult {
|
||||||
|
hits: merged_hits,
|
||||||
|
processing_time_ms: before_search.elapsed().as_millis(),
|
||||||
|
hits_info: HitsInfo::OffsetLimit {
|
||||||
|
limit: federation.limit,
|
||||||
|
offset: federation.offset,
|
||||||
|
estimated_total_hits,
|
||||||
|
},
|
||||||
|
semantic_hit_count,
|
||||||
|
degraded,
|
||||||
|
used_negative_operator,
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(search_result)
|
||||||
|
}
|
@ -1,6 +1,6 @@
|
|||||||
use core::fmt;
|
use core::fmt;
|
||||||
use std::cmp::min;
|
use std::cmp::min;
|
||||||
use std::collections::{BTreeMap, BTreeSet, HashSet};
|
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
@ -31,6 +31,11 @@ use serde_json::{json, Value};
|
|||||||
|
|
||||||
use crate::error::MeilisearchHttpError;
|
use crate::error::MeilisearchHttpError;
|
||||||
|
|
||||||
|
mod federated;
|
||||||
|
pub use federated::{perform_federated_search, FederatedSearch, Federation, FederationOptions};
|
||||||
|
|
||||||
|
mod ranking_rules;
|
||||||
|
|
||||||
type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
|
type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
|
||||||
|
|
||||||
pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0;
|
pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0;
|
||||||
@ -257,11 +262,13 @@ pub struct HybridQuery {
|
|||||||
pub embedder: Option<String>,
|
pub embedder: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
pub enum SearchKind {
|
pub enum SearchKind {
|
||||||
KeywordOnly,
|
KeywordOnly,
|
||||||
SemanticOnly { embedder_name: String, embedder: Arc<Embedder> },
|
SemanticOnly { embedder_name: String, embedder: Arc<Embedder> },
|
||||||
Hybrid { embedder_name: String, embedder: Arc<Embedder>, semantic_ratio: f32 },
|
Hybrid { embedder_name: String, embedder: Arc<Embedder>, semantic_ratio: f32 },
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SearchKind {
|
impl SearchKind {
|
||||||
pub(crate) fn semantic(
|
pub(crate) fn semantic(
|
||||||
index_scheduler: &index_scheduler::IndexScheduler,
|
index_scheduler: &index_scheduler::IndexScheduler,
|
||||||
@ -358,7 +365,7 @@ impl SearchQuery {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A `SearchQuery` + an index UID.
|
/// A `SearchQuery` + an index UID and optional FederationOptions.
|
||||||
// This struct contains the fields of `SearchQuery` inline.
|
// This struct contains the fields of `SearchQuery` inline.
|
||||||
// This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields.
|
// This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields.
|
||||||
// The `From<SearchQueryWithIndex>` implementation ensures both structs remain up to date.
|
// The `From<SearchQueryWithIndex>` implementation ensures both structs remain up to date.
|
||||||
@ -373,10 +380,10 @@ pub struct SearchQueryWithIndex {
|
|||||||
pub vector: Option<Vec<f32>>,
|
pub vector: Option<Vec<f32>>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
|
#[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
|
||||||
pub hybrid: Option<HybridQuery>,
|
pub hybrid: Option<HybridQuery>,
|
||||||
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchOffset>)]
|
||||||
pub offset: usize,
|
pub offset: Option<usize>,
|
||||||
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchLimit>)]
|
||||||
pub limit: usize,
|
pub limit: Option<usize>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchPage>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchPage>)]
|
||||||
pub page: Option<usize>,
|
pub page: Option<usize>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)]
|
||||||
@ -417,12 +424,33 @@ pub struct SearchQueryWithIndex {
|
|||||||
pub attributes_to_search_on: Option<Vec<String>>,
|
pub attributes_to_search_on: Option<Vec<String>>,
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
|
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
|
||||||
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
||||||
|
|
||||||
|
#[deserr(default)]
|
||||||
|
pub federation_options: Option<FederationOptions>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SearchQueryWithIndex {
|
impl SearchQueryWithIndex {
|
||||||
pub fn into_index_query(self) -> (IndexUid, SearchQuery) {
|
pub fn has_federation_options(&self) -> bool {
|
||||||
|
self.federation_options.is_some()
|
||||||
|
}
|
||||||
|
pub fn has_pagination(&self) -> Option<&'static str> {
|
||||||
|
if self.offset.is_some() {
|
||||||
|
Some("offset")
|
||||||
|
} else if self.limit.is_some() {
|
||||||
|
Some("limit")
|
||||||
|
} else if self.page.is_some() {
|
||||||
|
Some("page")
|
||||||
|
} else if self.hits_per_page.is_some() {
|
||||||
|
Some("hitsPerPage")
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option<FederationOptions>) {
|
||||||
let SearchQueryWithIndex {
|
let SearchQueryWithIndex {
|
||||||
index_uid,
|
index_uid,
|
||||||
|
federation_options,
|
||||||
q,
|
q,
|
||||||
vector,
|
vector,
|
||||||
offset,
|
offset,
|
||||||
@ -454,8 +482,8 @@ impl SearchQueryWithIndex {
|
|||||||
SearchQuery {
|
SearchQuery {
|
||||||
q,
|
q,
|
||||||
vector,
|
vector,
|
||||||
offset,
|
offset: offset.unwrap_or(DEFAULT_SEARCH_OFFSET()),
|
||||||
limit,
|
limit: limit.unwrap_or(DEFAULT_SEARCH_LIMIT()),
|
||||||
page,
|
page,
|
||||||
hits_per_page,
|
hits_per_page,
|
||||||
attributes_to_retrieve,
|
attributes_to_retrieve,
|
||||||
@ -480,6 +508,7 @@ impl SearchQueryWithIndex {
|
|||||||
// do not use ..Default::default() here,
|
// do not use ..Default::default() here,
|
||||||
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
|
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
|
||||||
},
|
},
|
||||||
|
federation_options,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -864,15 +893,7 @@ pub fn perform_search(
|
|||||||
used_negative_operator,
|
used_negative_operator,
|
||||||
},
|
},
|
||||||
semantic_hit_count,
|
semantic_hit_count,
|
||||||
) = match &search_kind {
|
) = search_from_kind(search_kind, search)?;
|
||||||
SearchKind::KeywordOnly => (search.execute()?, None),
|
|
||||||
SearchKind::SemanticOnly { .. } => {
|
|
||||||
let results = search.execute()?;
|
|
||||||
let semantic_hit_count = results.document_scores.len() as u32;
|
|
||||||
(results, Some(semantic_hit_count))
|
|
||||||
}
|
|
||||||
SearchKind::Hybrid { semantic_ratio, .. } => search.execute_hybrid(*semantic_ratio)?,
|
|
||||||
};
|
|
||||||
|
|
||||||
let SearchQuery {
|
let SearchQuery {
|
||||||
q,
|
q,
|
||||||
@ -919,8 +940,13 @@ pub fn perform_search(
|
|||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
};
|
};
|
||||||
|
|
||||||
let documents =
|
let documents = make_hits(
|
||||||
make_hits(index, &rtxn, format, matching_words, documents_ids, document_scores)?;
|
index,
|
||||||
|
&rtxn,
|
||||||
|
format,
|
||||||
|
matching_words,
|
||||||
|
documents_ids.iter().copied().zip(document_scores.iter()),
|
||||||
|
)?;
|
||||||
|
|
||||||
let number_of_hits = min(candidates.len() as usize, max_total_hits);
|
let number_of_hits = min(candidates.len() as usize, max_total_hits);
|
||||||
let hits_info = if is_finite_pagination {
|
let hits_info = if is_finite_pagination {
|
||||||
@ -988,6 +1014,22 @@ pub fn perform_search(
|
|||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn search_from_kind(
|
||||||
|
search_kind: SearchKind,
|
||||||
|
search: milli::Search<'_>,
|
||||||
|
) -> Result<(milli::SearchResult, Option<u32>), MeilisearchHttpError> {
|
||||||
|
let (milli_result, semantic_hit_count) = match &search_kind {
|
||||||
|
SearchKind::KeywordOnly => (search.execute()?, None),
|
||||||
|
SearchKind::SemanticOnly { .. } => {
|
||||||
|
let results = search.execute()?;
|
||||||
|
let semantic_hit_count = results.document_scores.len() as u32;
|
||||||
|
(results, Some(semantic_hit_count))
|
||||||
|
}
|
||||||
|
SearchKind::Hybrid { semantic_ratio, .. } => search.execute_hybrid(*semantic_ratio)?,
|
||||||
|
};
|
||||||
|
Ok((milli_result, semantic_hit_count))
|
||||||
|
}
|
||||||
|
|
||||||
struct AttributesFormat {
|
struct AttributesFormat {
|
||||||
attributes_to_retrieve: Option<BTreeSet<String>>,
|
attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||||
retrieve_vectors: RetrieveVectors,
|
retrieve_vectors: RetrieveVectors,
|
||||||
@ -1033,19 +1075,72 @@ impl RetrieveVectors {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn make_hits(
|
struct HitMaker<'a> {
|
||||||
index: &Index,
|
index: &'a Index,
|
||||||
rtxn: &RoTxn<'_>,
|
rtxn: &'a RoTxn<'a>,
|
||||||
format: AttributesFormat,
|
fields_ids_map: FieldsIdsMap,
|
||||||
matching_words: milli::MatchingWords,
|
displayed_ids: BTreeSet<FieldId>,
|
||||||
documents_ids: Vec<u32>,
|
vectors_fid: Option<FieldId>,
|
||||||
document_scores: Vec<Vec<ScoreDetails>>,
|
retrieve_vectors: RetrieveVectors,
|
||||||
) -> Result<Vec<SearchHit>, MeilisearchHttpError> {
|
to_retrieve_ids: BTreeSet<FieldId>,
|
||||||
let fields_ids_map = index.fields_ids_map(rtxn).unwrap();
|
embedding_configs: Vec<milli::index::IndexEmbeddingConfig>,
|
||||||
let displayed_ids =
|
formatter_builder: MatcherBuilder<'a>,
|
||||||
index.displayed_fields_ids(rtxn)?.map(|fields| fields.into_iter().collect::<BTreeSet<_>>());
|
formatted_options: BTreeMap<FieldId, FormatOptions>,
|
||||||
|
show_ranking_score: bool,
|
||||||
|
show_ranking_score_details: bool,
|
||||||
|
sort: Option<Vec<String>>,
|
||||||
|
show_matches_position: bool,
|
||||||
|
}
|
||||||
|
|
||||||
let vectors_fid = fields_ids_map.id(milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME);
|
impl<'a> HitMaker<'a> {
|
||||||
|
pub fn tokenizer<'b>(
|
||||||
|
script_lang_map: &'b HashMap<milli::tokenizer::Script, Vec<milli::tokenizer::Language>>,
|
||||||
|
dictionary: Option<&'b [&'b str]>,
|
||||||
|
separators: Option<&'b [&'b str]>,
|
||||||
|
) -> milli::tokenizer::Tokenizer<'b> {
|
||||||
|
let mut tokenizer_builder = TokenizerBuilder::default();
|
||||||
|
tokenizer_builder.create_char_map(true);
|
||||||
|
if !script_lang_map.is_empty() {
|
||||||
|
tokenizer_builder.allow_list(script_lang_map);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(separators) = separators {
|
||||||
|
tokenizer_builder.separators(separators);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(dictionary) = dictionary {
|
||||||
|
tokenizer_builder.words_dict(dictionary);
|
||||||
|
}
|
||||||
|
|
||||||
|
tokenizer_builder.into_tokenizer()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn formatter_builder(
|
||||||
|
matching_words: milli::MatchingWords,
|
||||||
|
tokenizer: milli::tokenizer::Tokenizer<'_>,
|
||||||
|
) -> MatcherBuilder<'_> {
|
||||||
|
let formatter_builder = MatcherBuilder::new(matching_words, tokenizer);
|
||||||
|
|
||||||
|
formatter_builder
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new(
|
||||||
|
index: &'a Index,
|
||||||
|
rtxn: &'a RoTxn<'a>,
|
||||||
|
format: AttributesFormat,
|
||||||
|
mut formatter_builder: MatcherBuilder<'a>,
|
||||||
|
) -> Result<Self, MeilisearchHttpError> {
|
||||||
|
formatter_builder.crop_marker(format.crop_marker);
|
||||||
|
formatter_builder.highlight_prefix(format.highlight_pre_tag);
|
||||||
|
formatter_builder.highlight_suffix(format.highlight_post_tag);
|
||||||
|
|
||||||
|
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||||
|
let displayed_ids = index
|
||||||
|
.displayed_fields_ids(rtxn)?
|
||||||
|
.map(|fields| fields.into_iter().collect::<BTreeSet<_>>());
|
||||||
|
|
||||||
|
let vectors_fid =
|
||||||
|
fields_ids_map.id(milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME);
|
||||||
|
|
||||||
let vectors_is_hidden = match (&displayed_ids, vectors_fid) {
|
let vectors_is_hidden = match (&displayed_ids, vectors_fid) {
|
||||||
// displayed_ids is a wildcard, so `_vectors` can be displayed regardless of its fid
|
// displayed_ids is a wildcard, so `_vectors` can be displayed regardless of its fid
|
||||||
@ -1056,6 +1151,9 @@ fn make_hits(
|
|||||||
(Some(map), Some(vectors_fid)) => map.contains(&vectors_fid),
|
(Some(map), Some(vectors_fid)) => map.contains(&vectors_fid),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let displayed_ids =
|
||||||
|
displayed_ids.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
|
||||||
|
|
||||||
let retrieve_vectors = if let RetrieveVectors::Retrieve = format.retrieve_vectors {
|
let retrieve_vectors = if let RetrieveVectors::Retrieve = format.retrieve_vectors {
|
||||||
if vectors_is_hidden {
|
if vectors_is_hidden {
|
||||||
RetrieveVectors::Hide
|
RetrieveVectors::Hide
|
||||||
@ -1066,8 +1164,6 @@ fn make_hits(
|
|||||||
format.retrieve_vectors
|
format.retrieve_vectors
|
||||||
};
|
};
|
||||||
|
|
||||||
let displayed_ids =
|
|
||||||
displayed_ids.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
|
|
||||||
let fids = |attrs: &BTreeSet<String>| {
|
let fids = |attrs: &BTreeSet<String>| {
|
||||||
let mut ids = BTreeSet::new();
|
let mut ids = BTreeSet::new();
|
||||||
for attr in attrs {
|
for attr in attrs {
|
||||||
@ -1101,63 +1197,69 @@ fn make_hits(
|
|||||||
&fields_ids_map,
|
&fields_ids_map,
|
||||||
&displayed_ids,
|
&displayed_ids,
|
||||||
);
|
);
|
||||||
let mut tokenizer_builder = TokenizerBuilder::default();
|
|
||||||
tokenizer_builder.create_char_map(true);
|
|
||||||
let script_lang_map = index.script_language(rtxn)?;
|
|
||||||
if !script_lang_map.is_empty() {
|
|
||||||
tokenizer_builder.allow_list(&script_lang_map);
|
|
||||||
}
|
|
||||||
let separators = index.allowed_separators(rtxn)?;
|
|
||||||
let separators: Option<Vec<_>> =
|
|
||||||
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
|
||||||
if let Some(ref separators) = separators {
|
|
||||||
tokenizer_builder.separators(separators);
|
|
||||||
}
|
|
||||||
let dictionary = index.dictionary(rtxn)?;
|
|
||||||
let dictionary: Option<Vec<_>> =
|
|
||||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
|
||||||
if let Some(ref dictionary) = dictionary {
|
|
||||||
tokenizer_builder.words_dict(dictionary);
|
|
||||||
}
|
|
||||||
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_builder.build());
|
|
||||||
formatter_builder.crop_marker(format.crop_marker);
|
|
||||||
formatter_builder.highlight_prefix(format.highlight_pre_tag);
|
|
||||||
formatter_builder.highlight_suffix(format.highlight_post_tag);
|
|
||||||
let mut documents = Vec::new();
|
|
||||||
let embedding_configs = index.embedding_configs(rtxn)?;
|
let embedding_configs = index.embedding_configs(rtxn)?;
|
||||||
let documents_iter = index.documents(rtxn, documents_ids)?;
|
|
||||||
for ((id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
|
Ok(Self {
|
||||||
|
index,
|
||||||
|
rtxn,
|
||||||
|
fields_ids_map,
|
||||||
|
displayed_ids,
|
||||||
|
vectors_fid,
|
||||||
|
retrieve_vectors,
|
||||||
|
to_retrieve_ids,
|
||||||
|
embedding_configs,
|
||||||
|
formatter_builder,
|
||||||
|
formatted_options,
|
||||||
|
show_ranking_score: format.show_ranking_score,
|
||||||
|
show_ranking_score_details: format.show_ranking_score_details,
|
||||||
|
show_matches_position: format.show_matches_position,
|
||||||
|
sort: format.sort,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn make_hit(
|
||||||
|
&self,
|
||||||
|
id: u32,
|
||||||
|
score: &[ScoreDetails],
|
||||||
|
) -> Result<SearchHit, MeilisearchHttpError> {
|
||||||
|
let (_, obkv) =
|
||||||
|
self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?;
|
||||||
|
|
||||||
// First generate a document with all the displayed fields
|
// First generate a document with all the displayed fields
|
||||||
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
|
let displayed_document = make_document(&self.displayed_ids, &self.fields_ids_map, obkv)?;
|
||||||
|
|
||||||
let add_vectors_fid =
|
let add_vectors_fid =
|
||||||
vectors_fid.filter(|_fid| retrieve_vectors == RetrieveVectors::Retrieve);
|
self.vectors_fid.filter(|_fid| self.retrieve_vectors == RetrieveVectors::Retrieve);
|
||||||
|
|
||||||
// select the attributes to retrieve
|
// select the attributes to retrieve
|
||||||
let attributes_to_retrieve = to_retrieve_ids
|
let attributes_to_retrieve = self
|
||||||
|
.to_retrieve_ids
|
||||||
.iter()
|
.iter()
|
||||||
// skip the vectors_fid if RetrieveVectors::Hide
|
// skip the vectors_fid if RetrieveVectors::Hide
|
||||||
.filter(|fid| match vectors_fid {
|
.filter(|fid| match self.vectors_fid {
|
||||||
Some(vectors_fid) => {
|
Some(vectors_fid) => {
|
||||||
!(retrieve_vectors == RetrieveVectors::Hide && **fid == vectors_fid)
|
!(self.retrieve_vectors == RetrieveVectors::Hide && **fid == vectors_fid)
|
||||||
}
|
}
|
||||||
None => true,
|
None => true,
|
||||||
})
|
})
|
||||||
// need to retrieve the existing `_vectors` field if the `RetrieveVectors::Retrieve`
|
// need to retrieve the existing `_vectors` field if the `RetrieveVectors::Retrieve`
|
||||||
.chain(add_vectors_fid.iter())
|
.chain(add_vectors_fid.iter())
|
||||||
.map(|&fid| fields_ids_map.name(fid).expect("Missing field name"));
|
.map(|&fid| self.fields_ids_map.name(fid).expect("Missing field name"));
|
||||||
|
|
||||||
let mut document =
|
let mut document =
|
||||||
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
|
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
|
||||||
|
|
||||||
if retrieve_vectors == RetrieveVectors::Retrieve {
|
if self.retrieve_vectors == RetrieveVectors::Retrieve {
|
||||||
// Clippy is wrong
|
// Clippy is wrong
|
||||||
#[allow(clippy::manual_unwrap_or_default)]
|
#[allow(clippy::manual_unwrap_or_default)]
|
||||||
let mut vectors = match document.remove("_vectors") {
|
let mut vectors = match document.remove("_vectors") {
|
||||||
Some(Value::Object(map)) => map,
|
Some(Value::Object(map)) => map,
|
||||||
_ => Default::default(),
|
_ => Default::default(),
|
||||||
};
|
};
|
||||||
for (name, vector) in index.embeddings(rtxn, id)? {
|
for (name, vector) in self.index.embeddings(self.rtxn, id)? {
|
||||||
let user_provided = embedding_configs
|
let user_provided = self
|
||||||
|
.embedding_configs
|
||||||
.iter()
|
.iter()
|
||||||
.find(|conf| conf.name == name)
|
.find(|conf| conf.name == name)
|
||||||
.is_some_and(|conf| conf.user_provided.contains(id));
|
.is_some_and(|conf| conf.user_provided.contains(id));
|
||||||
@ -1170,21 +1272,21 @@ fn make_hits(
|
|||||||
|
|
||||||
let (matches_position, formatted) = format_fields(
|
let (matches_position, formatted) = format_fields(
|
||||||
&displayed_document,
|
&displayed_document,
|
||||||
&fields_ids_map,
|
&self.fields_ids_map,
|
||||||
&formatter_builder,
|
&self.formatter_builder,
|
||||||
&formatted_options,
|
&self.formatted_options,
|
||||||
format.show_matches_position,
|
self.show_matches_position,
|
||||||
&displayed_ids,
|
&self.displayed_ids,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
if let Some(sort) = format.sort.as_ref() {
|
if let Some(sort) = self.sort.as_ref() {
|
||||||
insert_geo_distance(sort, &mut document);
|
insert_geo_distance(sort, &mut document);
|
||||||
}
|
}
|
||||||
|
|
||||||
let ranking_score =
|
let ranking_score =
|
||||||
format.show_ranking_score.then(|| ScoreDetails::global_score(score.iter()));
|
self.show_ranking_score.then(|| ScoreDetails::global_score(score.iter()));
|
||||||
let ranking_score_details =
|
let ranking_score_details =
|
||||||
format.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter()));
|
self.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter()));
|
||||||
|
|
||||||
let hit = SearchHit {
|
let hit = SearchHit {
|
||||||
document,
|
document,
|
||||||
@ -1193,7 +1295,38 @@ fn make_hits(
|
|||||||
ranking_score_details,
|
ranking_score_details,
|
||||||
ranking_score,
|
ranking_score,
|
||||||
};
|
};
|
||||||
documents.push(hit);
|
|
||||||
|
Ok(hit)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn make_hits<'a>(
|
||||||
|
index: &Index,
|
||||||
|
rtxn: &RoTxn<'_>,
|
||||||
|
format: AttributesFormat,
|
||||||
|
matching_words: milli::MatchingWords,
|
||||||
|
documents_ids_scores: impl Iterator<Item = (u32, &'a Vec<ScoreDetails>)> + 'a,
|
||||||
|
) -> Result<Vec<SearchHit>, MeilisearchHttpError> {
|
||||||
|
let mut documents = Vec::new();
|
||||||
|
|
||||||
|
let script_lang_map = index.script_language(rtxn)?;
|
||||||
|
|
||||||
|
let dictionary = index.dictionary(rtxn)?;
|
||||||
|
let dictionary: Option<Vec<_>> =
|
||||||
|
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||||
|
let separators = index.allowed_separators(rtxn)?;
|
||||||
|
let separators: Option<Vec<_>> =
|
||||||
|
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||||
|
|
||||||
|
let tokenizer =
|
||||||
|
HitMaker::tokenizer(&script_lang_map, dictionary.as_deref(), separators.as_deref());
|
||||||
|
|
||||||
|
let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
|
||||||
|
|
||||||
|
let hit_maker = HitMaker::new(index, rtxn, format, formatter_builder)?;
|
||||||
|
|
||||||
|
for (id, score) in documents_ids_scores {
|
||||||
|
documents.push(hit_maker.make_hit(id, score)?);
|
||||||
}
|
}
|
||||||
Ok(documents)
|
Ok(documents)
|
||||||
}
|
}
|
||||||
@ -1309,7 +1442,13 @@ pub fn perform_similar(
|
|||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
};
|
};
|
||||||
|
|
||||||
let hits = make_hits(index, &rtxn, format, Default::default(), documents_ids, document_scores)?;
|
let hits = make_hits(
|
||||||
|
index,
|
||||||
|
&rtxn,
|
||||||
|
format,
|
||||||
|
Default::default(),
|
||||||
|
documents_ids.iter().copied().zip(document_scores.iter()),
|
||||||
|
)?;
|
||||||
|
|
||||||
let max_total_hits = index
|
let max_total_hits = index
|
||||||
.pagination_max_total_hits(&rtxn)
|
.pagination_max_total_hits(&rtxn)
|
||||||
@ -1482,10 +1621,10 @@ fn make_document(
|
|||||||
Ok(document)
|
Ok(document)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn format_fields<'a>(
|
fn format_fields(
|
||||||
document: &Document,
|
document: &Document,
|
||||||
field_ids_map: &FieldsIdsMap,
|
field_ids_map: &FieldsIdsMap,
|
||||||
builder: &'a MatcherBuilder<'a>,
|
builder: &MatcherBuilder<'_>,
|
||||||
formatted_options: &BTreeMap<FieldId, FormatOptions>,
|
formatted_options: &BTreeMap<FieldId, FormatOptions>,
|
||||||
compute_matches: bool,
|
compute_matches: bool,
|
||||||
displayable_ids: &BTreeSet<FieldId>,
|
displayable_ids: &BTreeSet<FieldId>,
|
||||||
@ -1540,9 +1679,9 @@ fn format_fields<'a>(
|
|||||||
Ok((matches_position, document))
|
Ok((matches_position, document))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn format_value<'a>(
|
fn format_value(
|
||||||
value: Value,
|
value: Value,
|
||||||
builder: &'a MatcherBuilder<'a>,
|
builder: &MatcherBuilder<'_>,
|
||||||
format_options: Option<FormatOptions>,
|
format_options: Option<FormatOptions>,
|
||||||
infos: &mut Vec<MatchBounds>,
|
infos: &mut Vec<MatchBounds>,
|
||||||
compute_matches: bool,
|
compute_matches: bool,
|
823
meilisearch/src/search/ranking_rules.rs
Normal file
823
meilisearch/src/search/ranking_rules.rs
Normal file
@ -0,0 +1,823 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
use std::fmt::Write;
|
||||||
|
|
||||||
|
use itertools::Itertools as _;
|
||||||
|
use meilisearch_types::error::{Code, ResponseError};
|
||||||
|
use meilisearch_types::milli::{AscDesc, Criterion, Member, TermsMatchingStrategy};
|
||||||
|
|
||||||
|
pub struct RankingRules {
|
||||||
|
canonical_criteria: Vec<Criterion>,
|
||||||
|
canonical_sort: Option<Vec<AscDesc>>,
|
||||||
|
canonicalization_actions: Vec<CanonicalizationAction>,
|
||||||
|
source_criteria: Vec<Criterion>,
|
||||||
|
source_sort: Option<Vec<AscDesc>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum CanonicalizationAction {
|
||||||
|
PrependedWords {
|
||||||
|
prepended_index: RankingRuleSource,
|
||||||
|
},
|
||||||
|
RemovedDuplicate {
|
||||||
|
earlier_occurrence: RankingRuleSource,
|
||||||
|
removed_occurrence: RankingRuleSource,
|
||||||
|
},
|
||||||
|
RemovedWords {
|
||||||
|
reason: RemoveWords,
|
||||||
|
removed_occurrence: RankingRuleSource,
|
||||||
|
},
|
||||||
|
RemovedPlaceholder {
|
||||||
|
removed_occurrence: RankingRuleSource,
|
||||||
|
},
|
||||||
|
TruncatedVector {
|
||||||
|
vector_rule: RankingRuleSource,
|
||||||
|
truncated_from: RankingRuleSource,
|
||||||
|
},
|
||||||
|
RemovedVector {
|
||||||
|
vector_rule: RankingRuleSource,
|
||||||
|
removed_occurrence: RankingRuleSource,
|
||||||
|
},
|
||||||
|
RemovedSort {
|
||||||
|
removed_occurrence: RankingRuleSource,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum RemoveWords {
|
||||||
|
WasPrepended,
|
||||||
|
MatchingStrategyAll,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for RemoveWords {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
let reason = match self {
|
||||||
|
RemoveWords::WasPrepended => "it was previously prepended",
|
||||||
|
RemoveWords::MatchingStrategyAll => "`query.matchingWords` is set to `all`",
|
||||||
|
};
|
||||||
|
f.write_str(reason)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum CanonicalizationKind {
|
||||||
|
Placeholder,
|
||||||
|
Keyword,
|
||||||
|
Vector,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct CompatibilityError {
|
||||||
|
previous: RankingRule,
|
||||||
|
current: RankingRule,
|
||||||
|
}
|
||||||
|
impl CompatibilityError {
|
||||||
|
pub(crate) fn to_response_error(
|
||||||
|
&self,
|
||||||
|
ranking_rules: &RankingRules,
|
||||||
|
previous_ranking_rules: &RankingRules,
|
||||||
|
query_index: usize,
|
||||||
|
previous_query_index: usize,
|
||||||
|
index_uid: &str,
|
||||||
|
previous_index_uid: &str,
|
||||||
|
) -> meilisearch_types::error::ResponseError {
|
||||||
|
let rule = self.current.as_string(
|
||||||
|
&ranking_rules.canonical_criteria,
|
||||||
|
&ranking_rules.canonical_sort,
|
||||||
|
query_index,
|
||||||
|
index_uid,
|
||||||
|
);
|
||||||
|
let previous_rule = self.previous.as_string(
|
||||||
|
&previous_ranking_rules.canonical_criteria,
|
||||||
|
&previous_ranking_rules.canonical_sort,
|
||||||
|
previous_query_index,
|
||||||
|
previous_index_uid,
|
||||||
|
);
|
||||||
|
|
||||||
|
let canonicalization_actions = ranking_rules.canonicalization_notes();
|
||||||
|
let previous_canonicalization_actions = previous_ranking_rules.canonicalization_notes();
|
||||||
|
|
||||||
|
let mut msg = String::new();
|
||||||
|
let reason = self.reason();
|
||||||
|
let _ = writeln!(
|
||||||
|
&mut msg,
|
||||||
|
"The results of queries #{previous_query_index} and #{query_index} are incompatible: "
|
||||||
|
);
|
||||||
|
let _ = writeln!(&mut msg, " 1. {previous_rule}");
|
||||||
|
let _ = writeln!(&mut msg, " 2. {rule}");
|
||||||
|
let _ = writeln!(&mut msg, " - {reason}");
|
||||||
|
|
||||||
|
if !previous_canonicalization_actions.is_empty() {
|
||||||
|
let _ = write!(&mut msg, " - note: The ranking rules of query #{previous_query_index} were modified during canonicalization:\n{previous_canonicalization_actions}");
|
||||||
|
}
|
||||||
|
|
||||||
|
if !canonicalization_actions.is_empty() {
|
||||||
|
let _ = write!(&mut msg, " - note: The ranking rules of query #{query_index} were modified during canonicalization:\n{canonicalization_actions}");
|
||||||
|
}
|
||||||
|
|
||||||
|
ResponseError::from_msg(msg, Code::InvalidMultiSearchQueryRankingRules)
|
||||||
|
}
|
||||||
|
pub fn reason(&self) -> &'static str {
|
||||||
|
match (self.previous.kind, self.current.kind) {
|
||||||
|
(RankingRuleKind::Relevancy, RankingRuleKind::AscendingSort)
|
||||||
|
| (RankingRuleKind::Relevancy, RankingRuleKind::DescendingSort)
|
||||||
|
| (RankingRuleKind::AscendingSort, RankingRuleKind::Relevancy)
|
||||||
|
| (RankingRuleKind::DescendingSort, RankingRuleKind::Relevancy) => {
|
||||||
|
"cannot compare a relevancy rule with a sort rule"
|
||||||
|
}
|
||||||
|
|
||||||
|
(RankingRuleKind::Relevancy, RankingRuleKind::AscendingGeoSort)
|
||||||
|
| (RankingRuleKind::Relevancy, RankingRuleKind::DescendingGeoSort)
|
||||||
|
| (RankingRuleKind::AscendingGeoSort, RankingRuleKind::Relevancy)
|
||||||
|
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::Relevancy) => {
|
||||||
|
"cannot compare a relevancy rule with a geosort rule"
|
||||||
|
}
|
||||||
|
|
||||||
|
(RankingRuleKind::AscendingSort, RankingRuleKind::DescendingSort)
|
||||||
|
| (RankingRuleKind::DescendingSort, RankingRuleKind::AscendingSort) => {
|
||||||
|
"cannot compare two sort rules in opposite directions"
|
||||||
|
}
|
||||||
|
|
||||||
|
(RankingRuleKind::AscendingSort, RankingRuleKind::AscendingGeoSort)
|
||||||
|
| (RankingRuleKind::AscendingSort, RankingRuleKind::DescendingGeoSort)
|
||||||
|
| (RankingRuleKind::DescendingSort, RankingRuleKind::AscendingGeoSort)
|
||||||
|
| (RankingRuleKind::DescendingSort, RankingRuleKind::DescendingGeoSort)
|
||||||
|
| (RankingRuleKind::AscendingGeoSort, RankingRuleKind::AscendingSort)
|
||||||
|
| (RankingRuleKind::AscendingGeoSort, RankingRuleKind::DescendingSort)
|
||||||
|
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::AscendingSort)
|
||||||
|
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::DescendingSort) => {
|
||||||
|
"cannot compare a sort rule with a geosort rule"
|
||||||
|
}
|
||||||
|
|
||||||
|
(RankingRuleKind::AscendingGeoSort, RankingRuleKind::DescendingGeoSort)
|
||||||
|
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::AscendingGeoSort) => {
|
||||||
|
"cannot compare two geosort rules in opposite directions"
|
||||||
|
}
|
||||||
|
(RankingRuleKind::Relevancy, RankingRuleKind::Relevancy)
|
||||||
|
| (RankingRuleKind::AscendingSort, RankingRuleKind::AscendingSort)
|
||||||
|
| (RankingRuleKind::DescendingSort, RankingRuleKind::DescendingSort)
|
||||||
|
| (RankingRuleKind::AscendingGeoSort, RankingRuleKind::AscendingGeoSort)
|
||||||
|
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::DescendingGeoSort) => {
|
||||||
|
"internal error, comparison should be possible"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RankingRules {
|
||||||
|
pub fn new(
|
||||||
|
criteria: Vec<Criterion>,
|
||||||
|
sort: Option<Vec<AscDesc>>,
|
||||||
|
terms_matching_strategy: TermsMatchingStrategy,
|
||||||
|
canonicalization_kind: CanonicalizationKind,
|
||||||
|
) -> Self {
|
||||||
|
let (canonical_criteria, canonical_sort, canonicalization_actions) =
|
||||||
|
Self::canonicalize(&criteria, &sort, terms_matching_strategy, canonicalization_kind);
|
||||||
|
Self {
|
||||||
|
canonical_criteria,
|
||||||
|
canonical_sort,
|
||||||
|
canonicalization_actions,
|
||||||
|
source_criteria: criteria,
|
||||||
|
source_sort: sort,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn canonicalize(
|
||||||
|
criteria: &[Criterion],
|
||||||
|
sort: &Option<Vec<AscDesc>>,
|
||||||
|
terms_matching_strategy: TermsMatchingStrategy,
|
||||||
|
canonicalization_kind: CanonicalizationKind,
|
||||||
|
) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
|
||||||
|
match canonicalization_kind {
|
||||||
|
CanonicalizationKind::Placeholder => Self::canonicalize_placeholder(criteria, sort),
|
||||||
|
CanonicalizationKind::Keyword => {
|
||||||
|
Self::canonicalize_keyword(criteria, sort, terms_matching_strategy)
|
||||||
|
}
|
||||||
|
CanonicalizationKind::Vector => Self::canonicalize_vector(criteria, sort),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn canonicalize_placeholder(
|
||||||
|
criteria: &[Criterion],
|
||||||
|
sort_query: &Option<Vec<AscDesc>>,
|
||||||
|
) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
|
||||||
|
let mut sort = None;
|
||||||
|
|
||||||
|
let mut sorted_fields = HashMap::new();
|
||||||
|
let mut canonicalization_actions = Vec::new();
|
||||||
|
let mut canonical_criteria = Vec::new();
|
||||||
|
let mut canonical_sort = None;
|
||||||
|
|
||||||
|
for (criterion_index, criterion) in criteria.iter().enumerate() {
|
||||||
|
match criterion.clone() {
|
||||||
|
Criterion::Words
|
||||||
|
| Criterion::Typo
|
||||||
|
| Criterion::Proximity
|
||||||
|
| Criterion::Attribute
|
||||||
|
| Criterion::Exactness => {
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedPlaceholder {
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
Criterion::Sort => {
|
||||||
|
if let Some(previous_index) = sort {
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
|
||||||
|
earlier_occurrence: RankingRuleSource::Criterion(previous_index),
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
});
|
||||||
|
} else if let Some(sort_query) = sort_query {
|
||||||
|
sort = Some(criterion_index);
|
||||||
|
canonical_criteria.push(criterion.clone());
|
||||||
|
canonical_sort = Some(canonicalize_sort(
|
||||||
|
&mut sorted_fields,
|
||||||
|
sort_query.as_slice(),
|
||||||
|
criterion_index,
|
||||||
|
&mut canonicalization_actions,
|
||||||
|
));
|
||||||
|
} else {
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedSort {
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) {
|
||||||
|
std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
|
||||||
|
.push(CanonicalizationAction::RemovedDuplicate {
|
||||||
|
earlier_occurrence: *entry.get(),
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
}),
|
||||||
|
std::collections::hash_map::Entry::Vacant(entry) => {
|
||||||
|
entry.insert(RankingRuleSource::Criterion(criterion_index));
|
||||||
|
canonical_criteria.push(criterion.clone())
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(canonical_criteria, canonical_sort, canonicalization_actions)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn canonicalize_vector(
|
||||||
|
criteria: &[Criterion],
|
||||||
|
sort_query: &Option<Vec<AscDesc>>,
|
||||||
|
) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
|
||||||
|
let mut sort = None;
|
||||||
|
|
||||||
|
let mut sorted_fields = HashMap::new();
|
||||||
|
let mut canonicalization_actions = Vec::new();
|
||||||
|
let mut canonical_criteria = Vec::new();
|
||||||
|
let mut canonical_sort = None;
|
||||||
|
|
||||||
|
let mut vector = None;
|
||||||
|
|
||||||
|
'criteria: for (criterion_index, criterion) in criteria.iter().enumerate() {
|
||||||
|
match criterion.clone() {
|
||||||
|
Criterion::Words
|
||||||
|
| Criterion::Typo
|
||||||
|
| Criterion::Proximity
|
||||||
|
| Criterion::Attribute
|
||||||
|
| Criterion::Exactness => match vector {
|
||||||
|
Some(previous_occurrence) => {
|
||||||
|
if sorted_fields.is_empty() {
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedVector {
|
||||||
|
vector_rule: RankingRuleSource::Criterion(previous_occurrence),
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
canonicalization_actions.push(
|
||||||
|
CanonicalizationAction::TruncatedVector {
|
||||||
|
vector_rule: RankingRuleSource::Criterion(previous_occurrence),
|
||||||
|
truncated_from: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
break 'criteria;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
canonical_criteria.push(criterion.clone());
|
||||||
|
vector = Some(criterion_index);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
Criterion::Sort => {
|
||||||
|
if let Some(previous_index) = sort {
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
|
||||||
|
earlier_occurrence: RankingRuleSource::Criterion(previous_index),
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
});
|
||||||
|
} else if let Some(sort_query) = sort_query {
|
||||||
|
sort = Some(criterion_index);
|
||||||
|
canonical_criteria.push(criterion.clone());
|
||||||
|
canonical_sort = Some(canonicalize_sort(
|
||||||
|
&mut sorted_fields,
|
||||||
|
sort_query.as_slice(),
|
||||||
|
criterion_index,
|
||||||
|
&mut canonicalization_actions,
|
||||||
|
));
|
||||||
|
} else {
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedSort {
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) {
|
||||||
|
std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
|
||||||
|
.push(CanonicalizationAction::RemovedDuplicate {
|
||||||
|
earlier_occurrence: *entry.get(),
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
}),
|
||||||
|
std::collections::hash_map::Entry::Vacant(entry) => {
|
||||||
|
entry.insert(RankingRuleSource::Criterion(criterion_index));
|
||||||
|
canonical_criteria.push(criterion.clone())
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(canonical_criteria, canonical_sort, canonicalization_actions)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn canonicalize_keyword(
|
||||||
|
criteria: &[Criterion],
|
||||||
|
sort_query: &Option<Vec<AscDesc>>,
|
||||||
|
terms_matching_strategy: TermsMatchingStrategy,
|
||||||
|
) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
|
||||||
|
let mut words = None;
|
||||||
|
let mut typo = None;
|
||||||
|
let mut proximity = None;
|
||||||
|
let mut sort = None;
|
||||||
|
let mut attribute = None;
|
||||||
|
let mut exactness = None;
|
||||||
|
let mut sorted_fields = HashMap::new();
|
||||||
|
|
||||||
|
let mut canonical_criteria = Vec::new();
|
||||||
|
let mut canonical_sort = None;
|
||||||
|
|
||||||
|
let mut canonicalization_actions = Vec::new();
|
||||||
|
|
||||||
|
for (criterion_index, criterion) in criteria.iter().enumerate() {
|
||||||
|
let criterion = criterion.clone();
|
||||||
|
match criterion.clone() {
|
||||||
|
Criterion::Words => {
|
||||||
|
if let TermsMatchingStrategy::All = terms_matching_strategy {
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedWords {
|
||||||
|
reason: RemoveWords::MatchingStrategyAll,
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if let Some(maybe_previous_index) = words {
|
||||||
|
if let Some(previous_index) = maybe_previous_index {
|
||||||
|
canonicalization_actions.push(
|
||||||
|
CanonicalizationAction::RemovedDuplicate {
|
||||||
|
earlier_occurrence: RankingRuleSource::Criterion(
|
||||||
|
previous_index,
|
||||||
|
),
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(
|
||||||
|
criterion_index,
|
||||||
|
),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedWords {
|
||||||
|
reason: RemoveWords::WasPrepended,
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
words = Some(Some(criterion_index));
|
||||||
|
canonical_criteria.push(criterion);
|
||||||
|
}
|
||||||
|
Criterion::Typo => {
|
||||||
|
canonicalize_criterion(
|
||||||
|
criterion,
|
||||||
|
criterion_index,
|
||||||
|
terms_matching_strategy,
|
||||||
|
&mut words,
|
||||||
|
&mut canonicalization_actions,
|
||||||
|
&mut canonical_criteria,
|
||||||
|
&mut typo,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Criterion::Proximity => {
|
||||||
|
canonicalize_criterion(
|
||||||
|
criterion,
|
||||||
|
criterion_index,
|
||||||
|
terms_matching_strategy,
|
||||||
|
&mut words,
|
||||||
|
&mut canonicalization_actions,
|
||||||
|
&mut canonical_criteria,
|
||||||
|
&mut proximity,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Criterion::Attribute => {
|
||||||
|
canonicalize_criterion(
|
||||||
|
criterion,
|
||||||
|
criterion_index,
|
||||||
|
terms_matching_strategy,
|
||||||
|
&mut words,
|
||||||
|
&mut canonicalization_actions,
|
||||||
|
&mut canonical_criteria,
|
||||||
|
&mut attribute,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Criterion::Exactness => {
|
||||||
|
canonicalize_criterion(
|
||||||
|
criterion,
|
||||||
|
criterion_index,
|
||||||
|
terms_matching_strategy,
|
||||||
|
&mut words,
|
||||||
|
&mut canonicalization_actions,
|
||||||
|
&mut canonical_criteria,
|
||||||
|
&mut exactness,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Criterion::Sort => {
|
||||||
|
if let Some(previous_index) = sort {
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
|
||||||
|
earlier_occurrence: RankingRuleSource::Criterion(previous_index),
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
});
|
||||||
|
} else if let Some(sort_query) = sort_query {
|
||||||
|
sort = Some(criterion_index);
|
||||||
|
canonical_criteria.push(criterion);
|
||||||
|
canonical_sort = Some(canonicalize_sort(
|
||||||
|
&mut sorted_fields,
|
||||||
|
sort_query.as_slice(),
|
||||||
|
criterion_index,
|
||||||
|
&mut canonicalization_actions,
|
||||||
|
));
|
||||||
|
} else {
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedSort {
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) {
|
||||||
|
std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
|
||||||
|
.push(CanonicalizationAction::RemovedDuplicate {
|
||||||
|
earlier_occurrence: *entry.get(),
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
}),
|
||||||
|
std::collections::hash_map::Entry::Vacant(entry) => {
|
||||||
|
entry.insert(RankingRuleSource::Criterion(criterion_index));
|
||||||
|
canonical_criteria.push(criterion)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(canonical_criteria, canonical_sort, canonicalization_actions)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_compatible_with(&self, previous: &Self) -> Result<(), CompatibilityError> {
|
||||||
|
for (current, previous) in self.coalesce_iterator().zip(previous.coalesce_iterator()) {
|
||||||
|
if current.kind != previous.kind {
|
||||||
|
return Err(CompatibilityError { current, previous });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn constraint_count(&self) -> usize {
|
||||||
|
self.coalesce_iterator().count()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn coalesce_iterator(&self) -> impl Iterator<Item = RankingRule> + '_ {
|
||||||
|
self.canonical_criteria
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.flat_map(|(criterion_index, criterion)| {
|
||||||
|
RankingRule::from_criterion(criterion_index, criterion, &self.canonical_sort)
|
||||||
|
})
|
||||||
|
.coalesce(
|
||||||
|
|previous @ RankingRule { source: previous_source, kind: previous_kind },
|
||||||
|
current @ RankingRule { source, kind }| {
|
||||||
|
match (previous_kind, kind) {
|
||||||
|
(RankingRuleKind::Relevancy, RankingRuleKind::Relevancy) => {
|
||||||
|
let merged_source = match (previous_source, source) {
|
||||||
|
(
|
||||||
|
RankingRuleSource::Criterion(previous),
|
||||||
|
RankingRuleSource::Criterion(current),
|
||||||
|
) => RankingRuleSource::CoalescedCriteria(previous, current),
|
||||||
|
(
|
||||||
|
RankingRuleSource::CoalescedCriteria(begin, _end),
|
||||||
|
RankingRuleSource::Criterion(current),
|
||||||
|
) => RankingRuleSource::CoalescedCriteria(begin, current),
|
||||||
|
(_previous, current) => current,
|
||||||
|
};
|
||||||
|
Ok(RankingRule { source: merged_source, kind })
|
||||||
|
}
|
||||||
|
_ => Err((previous, current)),
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn canonicalization_notes(&self) -> String {
|
||||||
|
use CanonicalizationAction::*;
|
||||||
|
let mut notes = String::new();
|
||||||
|
for (index, action) in self.canonicalization_actions.iter().enumerate() {
|
||||||
|
let index = index + 1;
|
||||||
|
let _ = match action {
|
||||||
|
PrependedWords { prepended_index } => writeln!(
|
||||||
|
&mut notes,
|
||||||
|
" {index}. Prepended rule `words` before first relevancy rule `{}` at position {}",
|
||||||
|
prepended_index.rule_name(&self.source_criteria, &self.source_sort),
|
||||||
|
prepended_index.rule_position()
|
||||||
|
),
|
||||||
|
RemovedDuplicate { earlier_occurrence, removed_occurrence } => writeln!(
|
||||||
|
&mut notes,
|
||||||
|
" {index}. Removed duplicate rule `{}` at position {} as it already appears at position {}",
|
||||||
|
earlier_occurrence.rule_name(&self.source_criteria, &self.source_sort),
|
||||||
|
removed_occurrence.rule_position(),
|
||||||
|
earlier_occurrence.rule_position(),
|
||||||
|
),
|
||||||
|
RemovedWords { reason, removed_occurrence } => writeln!(
|
||||||
|
&mut notes,
|
||||||
|
" {index}. Removed rule `words` at position {} because {reason}",
|
||||||
|
removed_occurrence.rule_position()
|
||||||
|
),
|
||||||
|
RemovedPlaceholder { removed_occurrence } => writeln!(
|
||||||
|
&mut notes,
|
||||||
|
" {index}. Removed relevancy rule `{}` at position {} because the query is a placeholder search (`q`: \"\")",
|
||||||
|
removed_occurrence.rule_name(&self.source_criteria, &self.source_sort),
|
||||||
|
removed_occurrence.rule_position()
|
||||||
|
),
|
||||||
|
TruncatedVector { vector_rule, truncated_from } => writeln!(
|
||||||
|
&mut notes,
|
||||||
|
" {index}. Truncated relevancy rule `{}` at position {} and later rules because the query is a vector search and `vector` was inserted at position {}",
|
||||||
|
truncated_from.rule_name(&self.source_criteria, &self.source_sort),
|
||||||
|
truncated_from.rule_position(),
|
||||||
|
vector_rule.rule_position(),
|
||||||
|
),
|
||||||
|
RemovedVector { vector_rule, removed_occurrence } => writeln!(
|
||||||
|
&mut notes,
|
||||||
|
" {index}. Removed relevancy rule `{}` at position {} because the query is a vector search and `vector` was already inserted at position {}",
|
||||||
|
removed_occurrence.rule_name(&self.source_criteria, &self.source_sort),
|
||||||
|
removed_occurrence.rule_position(),
|
||||||
|
vector_rule.rule_position(),
|
||||||
|
),
|
||||||
|
RemovedSort { removed_occurrence } => writeln!(
|
||||||
|
&mut notes,
|
||||||
|
" {index}. Removed rule `sort` at position {} because `query.sort` is empty",
|
||||||
|
removed_occurrence.rule_position()
|
||||||
|
),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
notes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn canonicalize_sort(
|
||||||
|
sorted_fields: &mut HashMap<String, RankingRuleSource>,
|
||||||
|
sort_query: &[AscDesc],
|
||||||
|
criterion_index: usize,
|
||||||
|
canonicalization_actions: &mut Vec<CanonicalizationAction>,
|
||||||
|
) -> Vec<AscDesc> {
|
||||||
|
let mut geo_sorted = None;
|
||||||
|
let mut canonical_sort = Vec::new();
|
||||||
|
for (sort_index, asc_desc) in sort_query.iter().enumerate() {
|
||||||
|
let source = RankingRuleSource::Sort { criterion_index, sort_index };
|
||||||
|
let asc_desc = asc_desc.clone();
|
||||||
|
match asc_desc.clone() {
|
||||||
|
AscDesc::Asc(Member::Field(s)) | AscDesc::Desc(Member::Field(s)) => {
|
||||||
|
match sorted_fields.entry(s) {
|
||||||
|
std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
|
||||||
|
.push(CanonicalizationAction::RemovedDuplicate {
|
||||||
|
earlier_occurrence: *entry.get(),
|
||||||
|
removed_occurrence: source,
|
||||||
|
}),
|
||||||
|
std::collections::hash_map::Entry::Vacant(entry) => {
|
||||||
|
entry.insert(source);
|
||||||
|
canonical_sort.push(asc_desc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
AscDesc::Asc(Member::Geo(_)) | AscDesc::Desc(Member::Geo(_)) => match geo_sorted {
|
||||||
|
Some(earlier_sort_index) => {
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
|
||||||
|
earlier_occurrence: RankingRuleSource::Sort {
|
||||||
|
criterion_index,
|
||||||
|
sort_index: earlier_sort_index,
|
||||||
|
},
|
||||||
|
removed_occurrence: source,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
geo_sorted = Some(sort_index);
|
||||||
|
canonical_sort.push(asc_desc);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
canonical_sort
|
||||||
|
}
|
||||||
|
|
||||||
|
fn canonicalize_criterion(
|
||||||
|
criterion: Criterion,
|
||||||
|
criterion_index: usize,
|
||||||
|
terms_matching_strategy: TermsMatchingStrategy,
|
||||||
|
words: &mut Option<Option<usize>>,
|
||||||
|
canonicalization_actions: &mut Vec<CanonicalizationAction>,
|
||||||
|
canonical_criteria: &mut Vec<Criterion>,
|
||||||
|
rule: &mut Option<usize>,
|
||||||
|
) {
|
||||||
|
*words = match (terms_matching_strategy, words.take()) {
|
||||||
|
(TermsMatchingStrategy::All, words) => words,
|
||||||
|
(_, None) => {
|
||||||
|
// inject words
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::PrependedWords {
|
||||||
|
prepended_index: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
});
|
||||||
|
canonical_criteria.push(Criterion::Words);
|
||||||
|
Some(None)
|
||||||
|
}
|
||||||
|
(_, words) => words,
|
||||||
|
};
|
||||||
|
if let Some(previous_index) = *rule {
|
||||||
|
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
|
||||||
|
earlier_occurrence: RankingRuleSource::Criterion(previous_index),
|
||||||
|
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
*rule = Some(criterion_index);
|
||||||
|
canonical_criteria.push(criterion)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
enum RankingRuleKind {
|
||||||
|
Relevancy,
|
||||||
|
AscendingSort,
|
||||||
|
DescendingSort,
|
||||||
|
AscendingGeoSort,
|
||||||
|
DescendingGeoSort,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub struct RankingRule {
|
||||||
|
source: RankingRuleSource,
|
||||||
|
kind: RankingRuleKind,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub enum RankingRuleSource {
|
||||||
|
Criterion(usize),
|
||||||
|
CoalescedCriteria(usize, usize),
|
||||||
|
Sort { criterion_index: usize, sort_index: usize },
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RankingRuleSource {
|
||||||
|
fn rule_name(&self, criteria: &[Criterion], sort: &Option<Vec<AscDesc>>) -> String {
|
||||||
|
match self {
|
||||||
|
RankingRuleSource::Criterion(criterion_index) => criteria
|
||||||
|
.get(*criterion_index)
|
||||||
|
.map(|c| c.to_string())
|
||||||
|
.unwrap_or_else(|| "unknown".into()),
|
||||||
|
RankingRuleSource::CoalescedCriteria(begin, end) => {
|
||||||
|
let rules: Vec<_> = criteria
|
||||||
|
.get(*begin..=*end)
|
||||||
|
.iter()
|
||||||
|
.flat_map(|c| c.iter())
|
||||||
|
.map(|c| c.to_string())
|
||||||
|
.collect();
|
||||||
|
rules.join(", ")
|
||||||
|
}
|
||||||
|
RankingRuleSource::Sort { criterion_index: _, sort_index } => {
|
||||||
|
match sort.as_deref().and_then(|sort| sort.get(*sort_index)) {
|
||||||
|
Some(sort) => match sort {
|
||||||
|
AscDesc::Asc(Member::Field(field_name)) => format!("{field_name}:asc"),
|
||||||
|
AscDesc::Desc(Member::Field(field_name)) => {
|
||||||
|
format!("{field_name}:desc")
|
||||||
|
}
|
||||||
|
AscDesc::Asc(Member::Geo(_)) => "_geo(..):asc".to_string(),
|
||||||
|
AscDesc::Desc(Member::Geo(_)) => "_geo(..):desc".to_string(),
|
||||||
|
},
|
||||||
|
None => "unknown".into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rule_position(&self) -> String {
|
||||||
|
match self {
|
||||||
|
RankingRuleSource::Criterion(criterion_index) => {
|
||||||
|
format!("#{criterion_index} in ranking rules")
|
||||||
|
}
|
||||||
|
RankingRuleSource::CoalescedCriteria(begin, end) => {
|
||||||
|
format!("#{begin} to #{end} in ranking rules")
|
||||||
|
}
|
||||||
|
RankingRuleSource::Sort { criterion_index, sort_index } => format!(
|
||||||
|
"#{sort_index} in `query.sort` (as `sort` is #{criterion_index} in ranking rules)"
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RankingRule {
|
||||||
|
fn from_criterion<'a>(
|
||||||
|
criterion_index: usize,
|
||||||
|
criterion: &'a Criterion,
|
||||||
|
sort: &'a Option<Vec<AscDesc>>,
|
||||||
|
) -> impl Iterator<Item = Self> + 'a {
|
||||||
|
let kind = match criterion {
|
||||||
|
Criterion::Words
|
||||||
|
| Criterion::Typo
|
||||||
|
| Criterion::Proximity
|
||||||
|
| Criterion::Attribute
|
||||||
|
| Criterion::Exactness => RankingRuleKind::Relevancy,
|
||||||
|
Criterion::Asc(s) if s == "_geo" => RankingRuleKind::AscendingGeoSort,
|
||||||
|
|
||||||
|
Criterion::Asc(_) => RankingRuleKind::AscendingSort,
|
||||||
|
Criterion::Desc(s) if s == "_geo" => RankingRuleKind::DescendingGeoSort,
|
||||||
|
|
||||||
|
Criterion::Desc(_) => RankingRuleKind::DescendingSort,
|
||||||
|
Criterion::Sort => {
|
||||||
|
return either::Right(sort.iter().flatten().enumerate().map(
|
||||||
|
move |(rule_index, asc_desc)| {
|
||||||
|
Self::from_asc_desc(asc_desc, criterion_index, rule_index)
|
||||||
|
},
|
||||||
|
))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
either::Left(std::iter::once(Self {
|
||||||
|
source: RankingRuleSource::Criterion(criterion_index),
|
||||||
|
kind,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn from_asc_desc(asc_desc: &AscDesc, sort_index: usize, rule_index_in_sort: usize) -> Self {
|
||||||
|
let kind = match asc_desc {
|
||||||
|
AscDesc::Asc(Member::Field(_)) => RankingRuleKind::AscendingSort,
|
||||||
|
AscDesc::Desc(Member::Field(_)) => RankingRuleKind::DescendingSort,
|
||||||
|
AscDesc::Asc(Member::Geo(_)) => RankingRuleKind::AscendingGeoSort,
|
||||||
|
AscDesc::Desc(Member::Geo(_)) => RankingRuleKind::DescendingGeoSort,
|
||||||
|
};
|
||||||
|
Self {
|
||||||
|
source: RankingRuleSource::Sort {
|
||||||
|
criterion_index: sort_index,
|
||||||
|
sort_index: rule_index_in_sort,
|
||||||
|
},
|
||||||
|
kind,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_string(
|
||||||
|
&self,
|
||||||
|
canonical_criteria: &[Criterion],
|
||||||
|
canonical_sort: &Option<Vec<AscDesc>>,
|
||||||
|
query_index: usize,
|
||||||
|
index_uid: &str,
|
||||||
|
) -> String {
|
||||||
|
let kind = match self.kind {
|
||||||
|
RankingRuleKind::Relevancy => "relevancy",
|
||||||
|
RankingRuleKind::AscendingSort => "ascending sort",
|
||||||
|
RankingRuleKind::DescendingSort => "descending sort",
|
||||||
|
RankingRuleKind::AscendingGeoSort => "ascending geo sort",
|
||||||
|
RankingRuleKind::DescendingGeoSort => "descending geo sort",
|
||||||
|
};
|
||||||
|
let rules = self.fetch_from_source(canonical_criteria, canonical_sort);
|
||||||
|
|
||||||
|
let source = match self.source {
|
||||||
|
RankingRuleSource::Criterion(criterion_index) => format!("`queries[{query_index}]`, `{index_uid}.rankingRules[{criterion_index}]`"),
|
||||||
|
RankingRuleSource::CoalescedCriteria(begin, end) => format!("`queries[{query_index}]`, `{index_uid}.rankingRules[{begin}..={end}]`"),
|
||||||
|
RankingRuleSource::Sort { criterion_index, sort_index } => format!("`queries[{query_index}].sort[{sort_index}]`, `{index_uid}.rankingRules[{criterion_index}]`"),
|
||||||
|
};
|
||||||
|
|
||||||
|
format!("{source}: {kind} {rules}")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fetch_from_source(
|
||||||
|
&self,
|
||||||
|
canonical_criteria: &[Criterion],
|
||||||
|
canonical_sort: &Option<Vec<AscDesc>>,
|
||||||
|
) -> String {
|
||||||
|
let rule_name = match self.source {
|
||||||
|
RankingRuleSource::Criterion(index) => {
|
||||||
|
canonical_criteria.get(index).map(|criterion| criterion.to_string())
|
||||||
|
}
|
||||||
|
RankingRuleSource::CoalescedCriteria(begin, end) => {
|
||||||
|
let rules: Vec<String> = canonical_criteria
|
||||||
|
.get(begin..=end)
|
||||||
|
.into_iter()
|
||||||
|
.flat_map(|criteria| criteria.iter())
|
||||||
|
.map(|criterion| criterion.to_string())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
(!rules.is_empty()).then_some(rules.join(", "))
|
||||||
|
}
|
||||||
|
RankingRuleSource::Sort { criterion_index: _, sort_index } => canonical_sort
|
||||||
|
.as_deref()
|
||||||
|
.and_then(|canonical_sort| canonical_sort.get(sort_index))
|
||||||
|
.and_then(|asc_desc: &AscDesc| match asc_desc {
|
||||||
|
AscDesc::Asc(Member::Field(s)) | AscDesc::Desc(Member::Field(s)) => {
|
||||||
|
Some(format!("on field `{s}`"))
|
||||||
|
}
|
||||||
|
_ => None,
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
|
||||||
|
let rule_name = rule_name.unwrap_or_else(|| "default".into());
|
||||||
|
|
||||||
|
format!("rule(s) {rule_name}")
|
||||||
|
}
|
||||||
|
}
|
@ -310,6 +310,23 @@ macro_rules! compute_authorized_single_search {
|
|||||||
tenant_token,
|
tenant_token,
|
||||||
key_content
|
key_content
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// federated
|
||||||
|
let (response, code) = server.multi_search(json!({"federation": {}, "queries" : [{"indexUid": "sales", "filter": $filter}]})).await;
|
||||||
|
assert_eq!(
|
||||||
|
200, code,
|
||||||
|
"{} using tenant_token: {:?} generated with parent_key: {:?}",
|
||||||
|
response, tenant_token, key_content
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
// same count as the search is federated over a single query
|
||||||
|
$expected_count,
|
||||||
|
response["hits"].as_array().unwrap().len(),
|
||||||
|
"{} using tenant_token: {:?} generated with parent_key: {:?}",
|
||||||
|
response,
|
||||||
|
tenant_token,
|
||||||
|
key_content
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -375,6 +392,25 @@ macro_rules! compute_authorized_multiple_search {
|
|||||||
tenant_token,
|
tenant_token,
|
||||||
key_content
|
key_content
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let (response, code) = server.multi_search(json!({"federation": {}, "queries" : [
|
||||||
|
{"indexUid": "sales", "filter": $filter1},
|
||||||
|
{"indexUid": "products", "filter": $filter2},
|
||||||
|
]})).await;
|
||||||
|
assert_eq!(
|
||||||
|
code, 200,
|
||||||
|
"{} using tenant_token: {:?} generated with parent_key: {:?}",
|
||||||
|
response, tenant_token, key_content
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
response["hits"].as_array().unwrap().len(),
|
||||||
|
// sum of counts as the search is federated across to queries in different indexes
|
||||||
|
$expected_count1 + $expected_count2,
|
||||||
|
"{} using tenant_token: {:?} generated with parent_key: {:?}",
|
||||||
|
response,
|
||||||
|
tenant_token,
|
||||||
|
key_content
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -433,6 +469,24 @@ macro_rules! compute_forbidden_single_search {
|
|||||||
"{} using tenant_token: {:?} generated with parent_key: {:?}",
|
"{} using tenant_token: {:?} generated with parent_key: {:?}",
|
||||||
response, tenant_token, key_content
|
response, tenant_token, key_content
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let (mut response, code) = server.multi_search(json!({"federation": {}, "queries" : [{"indexUid": "sales"}]})).await;
|
||||||
|
if failed_query_index.is_none() && !response["message"].is_null() {
|
||||||
|
response["message"] = serde_json::json!(null);
|
||||||
|
}
|
||||||
|
assert_eq!(
|
||||||
|
response,
|
||||||
|
invalid_response(failed_query_index),
|
||||||
|
"{} using tenant_token: {:?} generated with parent_key: {:?}",
|
||||||
|
response,
|
||||||
|
tenant_token,
|
||||||
|
key_content
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
code, 403,
|
||||||
|
"{} using tenant_token: {:?} generated with parent_key: {:?}",
|
||||||
|
response, tenant_token, key_content
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -494,6 +548,27 @@ macro_rules! compute_forbidden_multiple_search {
|
|||||||
"{} using tenant_token: {:?} generated with parent_key: {:?}",
|
"{} using tenant_token: {:?} generated with parent_key: {:?}",
|
||||||
response, tenant_token, key_content
|
response, tenant_token, key_content
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let (mut response, code) = server.multi_search(json!({"federation": {}, "queries" : [
|
||||||
|
{"indexUid": "sales"},
|
||||||
|
{"indexUid": "products"},
|
||||||
|
]})).await;
|
||||||
|
if failed_query_index.is_none() && !response["message"].is_null() {
|
||||||
|
response["message"] = serde_json::json!(null);
|
||||||
|
}
|
||||||
|
assert_eq!(
|
||||||
|
response,
|
||||||
|
invalid_response(failed_query_index),
|
||||||
|
"{} using tenant_token: {:?} generated with parent_key: {:?}",
|
||||||
|
response,
|
||||||
|
tenant_token,
|
||||||
|
key_content
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
code, 403,
|
||||||
|
"{} using tenant_token: {:?} generated with parent_key: {:?}",
|
||||||
|
response, tenant_token, key_content
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -132,6 +132,79 @@ static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
|||||||
])
|
])
|
||||||
});
|
});
|
||||||
|
|
||||||
|
static FRUITS_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||||
|
json!([
|
||||||
|
{
|
||||||
|
"name": "Exclusive sale: green apple",
|
||||||
|
"id": "green-apple-boosted",
|
||||||
|
"BOOST": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Pear",
|
||||||
|
"id": "pear",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Red apple gala",
|
||||||
|
"id": "red-apple-gala",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Exclusive sale: Red Tomato",
|
||||||
|
"id": "red-tomatoes-boosted",
|
||||||
|
"BOOST": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Exclusive sale: Red delicious apple",
|
||||||
|
"id": "red-delicious-boosted",
|
||||||
|
"BOOST": true,
|
||||||
|
}
|
||||||
|
])
|
||||||
|
});
|
||||||
|
|
||||||
|
static VECTOR_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||||
|
json!([
|
||||||
|
{
|
||||||
|
"id": "A",
|
||||||
|
"description": "the dog barks at the cat",
|
||||||
|
"_vectors": {
|
||||||
|
// dimensions [canine, feline, young]
|
||||||
|
"animal": [0.9, 0.8, 0.05],
|
||||||
|
// dimensions [negative/positive, energy]
|
||||||
|
"sentiment": [-0.1, 0.55]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "B",
|
||||||
|
"description": "the kitten scratched the beagle",
|
||||||
|
"_vectors": {
|
||||||
|
// dimensions [canine, feline, young]
|
||||||
|
"animal": [0.8, 0.9, 0.5],
|
||||||
|
// dimensions [negative/positive, energy]
|
||||||
|
"sentiment": [-0.2, 0.65]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "C",
|
||||||
|
"description": "the dog had to stay alone today",
|
||||||
|
"_vectors": {
|
||||||
|
// dimensions [canine, feline, young]
|
||||||
|
"animal": [0.85, 0.02, 0.1],
|
||||||
|
// dimensions [negative/positive, energy]
|
||||||
|
"sentiment": [-1.0, 0.1]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "D",
|
||||||
|
"description": "the little boy pets the puppy",
|
||||||
|
"_vectors": {
|
||||||
|
// dimensions [canine, feline, young]
|
||||||
|
"animal": [0.8, 0.09, 0.8],
|
||||||
|
// dimensions [negative/positive, energy]
|
||||||
|
"sentiment": [0.8, 0.3]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
])
|
||||||
|
});
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn simple_placeholder_search() {
|
async fn simple_placeholder_search() {
|
||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -425,9 +425,6 @@ pub struct Sort {
|
|||||||
|
|
||||||
impl PartialOrd for Sort {
|
impl PartialOrd for Sort {
|
||||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||||
if self.field_name != other.field_name {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
if self.ascending != other.ascending {
|
if self.ascending != other.ascending {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
@ -466,9 +463,6 @@ pub struct GeoSort {
|
|||||||
|
|
||||||
impl PartialOrd for GeoSort {
|
impl PartialOrd for GeoSort {
|
||||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||||
if self.target_point != other.target_point {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
if self.ascending != other.ascending {
|
if self.ascending != other.ascending {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
@ -46,7 +46,7 @@ impl<'m> MatcherBuilder<'m> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn build<'t>(&'m self, text: &'t str) -> Matcher<'t, 'm> {
|
pub fn build<'t>(&self, text: &'t str) -> Matcher<'t, 'm, '_> {
|
||||||
let crop_marker = match &self.crop_marker {
|
let crop_marker = match &self.crop_marker {
|
||||||
Some(marker) => marker.as_str(),
|
Some(marker) => marker.as_str(),
|
||||||
None => DEFAULT_CROP_MARKER,
|
None => DEFAULT_CROP_MARKER,
|
||||||
@ -105,19 +105,19 @@ pub struct MatchBounds {
|
|||||||
pub length: usize,
|
pub length: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Structure used to analize a string, compute words that match,
|
/// Structure used to analyze a string, compute words that match,
|
||||||
/// and format the source string, returning a highlighted and cropped sub-string.
|
/// and format the source string, returning a highlighted and cropped sub-string.
|
||||||
pub struct Matcher<'t, 'm> {
|
pub struct Matcher<'t, 'tokenizer, 'b> {
|
||||||
text: &'t str,
|
text: &'t str,
|
||||||
matching_words: &'m MatchingWords,
|
matching_words: &'b MatchingWords,
|
||||||
tokenizer: &'m Tokenizer<'m>,
|
tokenizer: &'b Tokenizer<'tokenizer>,
|
||||||
crop_marker: &'m str,
|
crop_marker: &'b str,
|
||||||
highlight_prefix: &'m str,
|
highlight_prefix: &'b str,
|
||||||
highlight_suffix: &'m str,
|
highlight_suffix: &'b str,
|
||||||
matches: Option<(Vec<Token<'t>>, Vec<Match>)>,
|
matches: Option<(Vec<Token<'t>>, Vec<Match>)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'t> Matcher<'t, '_> {
|
impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_> {
|
||||||
/// Iterates over tokens and save any of them that matches the query.
|
/// Iterates over tokens and save any of them that matches the query.
|
||||||
fn compute_matches(&mut self) -> &mut Self {
|
fn compute_matches(&mut self) -> &mut Self {
|
||||||
/// some words are counted as matches only if they are close together and in the good order,
|
/// some words are counted as matches only if they are close together and in the good order,
|
||||||
|
Loading…
Reference in New Issue
Block a user