mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 11:57:07 +02:00
Implements the experimental contains filter operator«
This commit is contained in:
parent
1582c7e788
commit
2af9481804
34 changed files with 484 additions and 122 deletions
|
@ -261,6 +261,7 @@ impl super::Analytics for SegmentAnalytics {
|
|||
#[derive(Debug, Clone, Serialize)]
|
||||
struct Infos {
|
||||
env: String,
|
||||
experimental_contains_filter: bool,
|
||||
experimental_enable_metrics: bool,
|
||||
experimental_search_queue_size: usize,
|
||||
experimental_logs_mode: LogMode,
|
||||
|
@ -303,6 +304,7 @@ impl From<Opt> for Infos {
|
|||
// Thus we must not insert `..` at the end.
|
||||
let Opt {
|
||||
db_path,
|
||||
experimental_contains_filter,
|
||||
experimental_enable_metrics,
|
||||
experimental_search_queue_size,
|
||||
experimental_logs_mode,
|
||||
|
@ -353,6 +355,7 @@ impl From<Opt> for Infos {
|
|||
// We consider information sensible if it contains a path, an address, or a key.
|
||||
Self {
|
||||
env,
|
||||
experimental_contains_filter,
|
||||
experimental_enable_metrics,
|
||||
experimental_search_queue_size,
|
||||
experimental_logs_mode,
|
||||
|
|
|
@ -25,12 +25,12 @@ pub enum MeilisearchHttpError {
|
|||
DocumentNotFound(String),
|
||||
#[error("Sending an empty filter is forbidden.")]
|
||||
EmptyFilter,
|
||||
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
|
||||
InvalidExpression(&'static [&'static str], Value),
|
||||
#[error("Using `federationOptions` is not allowed in a non-federated search.\n Hint: remove `federationOptions` from query #{0} or add `federation: {{}}` to the request.")]
|
||||
FederationOptionsInNonFederatedRequest(usize),
|
||||
#[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n Hint: remove `{1}` from query #{0} or remove `federation: {{}}` from the request")]
|
||||
PaginationInFederatedQuery(usize, &'static str),
|
||||
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
|
||||
InvalidExpression(&'static [&'static str], Value),
|
||||
#[error("A {0} payload is missing.")]
|
||||
MissingPayload(PayloadType),
|
||||
#[error("Too many search requests running at the same time: {0}. Retry after 10s.")]
|
||||
|
|
|
@ -54,6 +54,7 @@ const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
|
|||
const MEILI_EXPERIMENTAL_LOGS_MODE: &str = "MEILI_EXPERIMENTAL_LOGS_MODE";
|
||||
const MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS: &str = "MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS";
|
||||
const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE";
|
||||
const MEILI_EXPERIMENTAL_CONTAINS_FILTER: &str = "MEILI_EXPERIMENTAL_CONTAINS_FILTER";
|
||||
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
||||
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
|
||||
const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
|
||||
|
@ -339,6 +340,13 @@ pub struct Opt {
|
|||
#[serde(default)]
|
||||
pub log_level: LogLevel,
|
||||
|
||||
/// Experimental contains filter feature. For more information, see: <https://github.com/orgs/meilisearch/discussions/763>
|
||||
///
|
||||
/// Enables the experimental contains filter operator.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_CONTAINS_FILTER)]
|
||||
#[serde(default)]
|
||||
pub experimental_contains_filter: bool,
|
||||
|
||||
/// Experimental metrics feature. For more information, see: <https://github.com/meilisearch/meilisearch/discussions/3518>
|
||||
///
|
||||
/// Enables the Prometheus metrics on the `GET /metrics` endpoint.
|
||||
|
@ -483,6 +491,7 @@ impl Opt {
|
|||
config_file_path: _,
|
||||
#[cfg(feature = "analytics")]
|
||||
no_analytics,
|
||||
experimental_contains_filter,
|
||||
experimental_enable_metrics,
|
||||
experimental_search_queue_size,
|
||||
experimental_logs_mode,
|
||||
|
@ -540,6 +549,10 @@ impl Opt {
|
|||
|
||||
export_to_env_if_not_present(MEILI_DUMP_DIR, dump_dir);
|
||||
export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level.to_string());
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_CONTAINS_FILTER,
|
||||
experimental_contains_filter.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_ENABLE_METRICS,
|
||||
experimental_enable_metrics.to_string(),
|
||||
|
@ -617,6 +630,7 @@ impl Opt {
|
|||
InstanceTogglableFeatures {
|
||||
metrics: self.experimental_enable_metrics,
|
||||
logs_route: self.experimental_enable_logs_route,
|
||||
contains_filter: self.experimental_contains_filter,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -49,6 +49,8 @@ pub struct RuntimeTogglableFeatures {
|
|||
pub logs_route: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub edit_documents_by_function: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub contains_filter: Option<bool>,
|
||||
}
|
||||
|
||||
async fn patch_features(
|
||||
|
@ -72,6 +74,7 @@ async fn patch_features(
|
|||
.0
|
||||
.edit_documents_by_function
|
||||
.unwrap_or(old_features.edit_documents_by_function),
|
||||
contains_filter: new_features.0.contains_filter.unwrap_or(old_features.contains_filter),
|
||||
};
|
||||
|
||||
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
||||
|
@ -82,6 +85,7 @@ async fn patch_features(
|
|||
metrics,
|
||||
logs_route,
|
||||
edit_documents_by_function,
|
||||
contains_filter,
|
||||
} = new_features;
|
||||
|
||||
analytics.publish(
|
||||
|
@ -91,6 +95,7 @@ async fn patch_features(
|
|||
"metrics": metrics,
|
||||
"logs_route": logs_route,
|
||||
"edit_documents_by_function": edit_documents_by_function,
|
||||
"contains_filter": contains_filter,
|
||||
}),
|
||||
Some(&req),
|
||||
);
|
||||
|
|
|
@ -7,7 +7,7 @@ use bstr::ByteSlice as _;
|
|||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use deserr::Deserr;
|
||||
use futures::StreamExt;
|
||||
use index_scheduler::{IndexScheduler, TaskId};
|
||||
use index_scheduler::{IndexScheduler, RoFeatures, TaskId};
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
|
||||
|
@ -260,8 +260,15 @@ fn documents_by_query(
|
|||
let retrieve_vectors = RetrieveVectors::new(retrieve_vectors, features)?;
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let (total, documents) =
|
||||
retrieve_documents(&index, offset, limit, filter, fields, retrieve_vectors)?;
|
||||
let (total, documents) = retrieve_documents(
|
||||
&index,
|
||||
offset,
|
||||
limit,
|
||||
filter,
|
||||
fields,
|
||||
retrieve_vectors,
|
||||
index_scheduler.features(),
|
||||
)?;
|
||||
|
||||
let ret = PaginationView::new(offset, limit, total as usize, documents);
|
||||
|
||||
|
@ -565,11 +572,9 @@ pub async fn delete_documents_by_filter(
|
|||
analytics.delete_documents(DocumentDeletionKind::PerFilter, &req);
|
||||
|
||||
// we ensure the filter is well formed before enqueuing it
|
||||
|| -> Result<_, ResponseError> {
|
||||
Ok(crate::search::parse_filter(&filter)?.ok_or(MeilisearchHttpError::EmptyFilter)?)
|
||||
}()
|
||||
// and whatever was the error, the error code should always be an InvalidDocumentFilter
|
||||
.map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?;
|
||||
crate::search::parse_filter(&filter, Code::InvalidDocumentFilter, index_scheduler.features())?
|
||||
.ok_or(MeilisearchHttpError::EmptyFilter)?;
|
||||
|
||||
let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
|
||||
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
|
@ -626,11 +631,12 @@ pub async fn edit_documents_by_function(
|
|||
|
||||
if let Some(ref filter) = filter {
|
||||
// we ensure the filter is well formed before enqueuing it
|
||||
|| -> Result<_, ResponseError> {
|
||||
Ok(crate::search::parse_filter(filter)?.ok_or(MeilisearchHttpError::EmptyFilter)?)
|
||||
}()
|
||||
// and whatever was the error, the error code should always be an InvalidDocumentFilter
|
||||
.map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?;
|
||||
crate::search::parse_filter(
|
||||
filter,
|
||||
Code::InvalidDocumentFilter,
|
||||
index_scheduler.features(),
|
||||
)?
|
||||
.ok_or(MeilisearchHttpError::EmptyFilter)?;
|
||||
}
|
||||
let task = KindWithContent::DocumentEdition {
|
||||
index_uid,
|
||||
|
@ -736,12 +742,12 @@ fn retrieve_documents<S: AsRef<str>>(
|
|||
filter: Option<Value>,
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
retrieve_vectors: RetrieveVectors,
|
||||
features: RoFeatures,
|
||||
) -> Result<(u64, Vec<Document>), ResponseError> {
|
||||
let rtxn = index.read_txn()?;
|
||||
let filter = &filter;
|
||||
let filter = if let Some(filter) = filter {
|
||||
parse_filter(filter)
|
||||
.map_err(|err| ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter))?
|
||||
parse_filter(filter, Code::InvalidDocumentFilter, features)?
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
|
|
@ -79,7 +79,14 @@ pub async fn search(
|
|||
let search_kind = search_kind(&search_query, &index_scheduler, &index, features)?;
|
||||
let _permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_facet_search(&index, search_query, facet_query, facet_name, search_kind)
|
||||
perform_facet_search(
|
||||
&index,
|
||||
search_query,
|
||||
facet_query,
|
||||
facet_name,
|
||||
search_kind,
|
||||
index_scheduler.features(),
|
||||
)
|
||||
})
|
||||
.await?;
|
||||
|
||||
|
|
|
@ -231,7 +231,7 @@ pub async fn search_with_url_query(
|
|||
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
||||
let _permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(&index, query, search_kind, retrieve_vector)
|
||||
perform_search(&index, query, search_kind, retrieve_vector, index_scheduler.features())
|
||||
})
|
||||
.await?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
|
@ -274,7 +274,7 @@ pub async fn search_with_post(
|
|||
|
||||
let _permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(&index, query, search_kind, retrieve_vectors)
|
||||
perform_search(&index, query, search_kind, retrieve_vectors, index_scheduler.features())
|
||||
})
|
||||
.await?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
|
|
|
@ -106,7 +106,14 @@ async fn similar(
|
|||
SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?;
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
perform_similar(&index, query, embedder_name, embedder, retrieve_vectors)
|
||||
perform_similar(
|
||||
&index,
|
||||
query,
|
||||
embedder_name,
|
||||
embedder,
|
||||
retrieve_vectors,
|
||||
index_scheduler.features(),
|
||||
)
|
||||
})
|
||||
.await?
|
||||
}
|
||||
|
|
|
@ -112,6 +112,7 @@ pub async fn multi_search_with_post(
|
|||
));
|
||||
}
|
||||
|
||||
let features = index_scheduler.features();
|
||||
let index = index_scheduler
|
||||
.index(&index_uid)
|
||||
.map_err(|err| {
|
||||
|
@ -130,7 +131,7 @@ pub async fn multi_search_with_post(
|
|||
.with_index(query_index)?;
|
||||
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(&index, query, search_kind, retrieve_vector)
|
||||
perform_search(&index, query, search_kind, retrieve_vector, features)
|
||||
})
|
||||
.await
|
||||
.with_index(query_index)?;
|
||||
|
|
|
@ -473,8 +473,14 @@ pub fn perform_federated_search(
|
|||
None => TimeBudget::default(),
|
||||
};
|
||||
|
||||
let (mut search, _is_finite_pagination, _max_total_hits, _offset) =
|
||||
prepare_search(&index, &rtxn, &query, &search_kind, time_budget)?;
|
||||
let (mut search, _is_finite_pagination, _max_total_hits, _offset) = prepare_search(
|
||||
&index,
|
||||
&rtxn,
|
||||
&query,
|
||||
&search_kind,
|
||||
time_budget,
|
||||
index_scheduler.features(),
|
||||
)?;
|
||||
|
||||
search.scoring_strategy(milli::score_details::ScoringStrategy::Detailed);
|
||||
search.offset(0);
|
||||
|
|
|
@ -7,6 +7,7 @@ use std::time::{Duration, Instant};
|
|||
|
||||
use deserr::Deserr;
|
||||
use either::Either;
|
||||
use index_scheduler::RoFeatures;
|
||||
use indexmap::IndexMap;
|
||||
use meilisearch_auth::IndexSearchRules;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
|
@ -761,7 +762,8 @@ fn prepare_search<'t>(
|
|||
query: &'t SearchQuery,
|
||||
search_kind: &SearchKind,
|
||||
time_budget: TimeBudget,
|
||||
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
|
||||
features: RoFeatures,
|
||||
) -> Result<(milli::Search<'t>, bool, usize, usize), ResponseError> {
|
||||
let mut search = index.search(rtxn);
|
||||
search.time_budget(time_budget);
|
||||
if let Some(ranking_score_threshold) = query.ranking_score_threshold {
|
||||
|
@ -848,7 +850,7 @@ fn prepare_search<'t>(
|
|||
search.limit(limit);
|
||||
|
||||
if let Some(ref filter) = query.filter {
|
||||
if let Some(facets) = parse_filter(filter)? {
|
||||
if let Some(facets) = parse_filter(filter, Code::InvalidSearchFilter, features)? {
|
||||
search.filter(facets);
|
||||
}
|
||||
}
|
||||
|
@ -872,7 +874,8 @@ pub fn perform_search(
|
|||
query: SearchQuery,
|
||||
search_kind: SearchKind,
|
||||
retrieve_vectors: RetrieveVectors,
|
||||
) -> Result<SearchResult, MeilisearchHttpError> {
|
||||
features: RoFeatures,
|
||||
) -> Result<SearchResult, ResponseError> {
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
let time_budget = match index.search_cutoff(&rtxn)? {
|
||||
|
@ -881,7 +884,7 @@ pub fn perform_search(
|
|||
};
|
||||
|
||||
let (search, is_finite_pagination, max_total_hits, offset) =
|
||||
prepare_search(index, &rtxn, &query, &search_kind, time_budget)?;
|
||||
prepare_search(index, &rtxn, &query, &search_kind, time_budget, features)?;
|
||||
|
||||
let (
|
||||
milli::SearchResult {
|
||||
|
@ -1337,7 +1340,8 @@ pub fn perform_facet_search(
|
|||
facet_query: Option<String>,
|
||||
facet_name: String,
|
||||
search_kind: SearchKind,
|
||||
) -> Result<FacetSearchResult, MeilisearchHttpError> {
|
||||
features: RoFeatures,
|
||||
) -> Result<FacetSearchResult, ResponseError> {
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
let time_budget = match index.search_cutoff(&rtxn)? {
|
||||
|
@ -1345,7 +1349,8 @@ pub fn perform_facet_search(
|
|||
None => TimeBudget::default(),
|
||||
};
|
||||
|
||||
let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, &search_kind, time_budget)?;
|
||||
let (search, _, _, _) =
|
||||
prepare_search(index, &rtxn, &search_query, &search_kind, time_budget, features)?;
|
||||
let mut facet_search = SearchForFacetValues::new(
|
||||
facet_name,
|
||||
search,
|
||||
|
@ -1371,6 +1376,7 @@ pub fn perform_similar(
|
|||
embedder_name: String,
|
||||
embedder: Arc<Embedder>,
|
||||
retrieve_vectors: RetrieveVectors,
|
||||
features: RoFeatures,
|
||||
) -> Result<SimilarResult, ResponseError> {
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
|
@ -1401,10 +1407,7 @@ pub fn perform_similar(
|
|||
milli::Similar::new(internal_id, offset, limit, index, &rtxn, embedder_name, embedder);
|
||||
|
||||
if let Some(ref filter) = query.filter {
|
||||
if let Some(facets) = parse_filter(filter)
|
||||
// inject InvalidSimilarFilter code
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::InvalidSimilarFilter))?
|
||||
{
|
||||
if let Some(facets) = parse_filter(filter, Code::InvalidSimilarFilter, features)? {
|
||||
similar.filter(facets);
|
||||
}
|
||||
}
|
||||
|
@ -1760,15 +1763,33 @@ fn format_value(
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) fn parse_filter(facets: &Value) -> Result<Option<Filter>, MeilisearchHttpError> {
|
||||
match facets {
|
||||
Value::String(expr) => {
|
||||
let condition = Filter::from_str(expr)?;
|
||||
Ok(condition)
|
||||
pub(crate) fn parse_filter(
|
||||
facets: &Value,
|
||||
filter_parsing_error_code: Code,
|
||||
features: RoFeatures,
|
||||
) -> Result<Option<Filter>, ResponseError> {
|
||||
let filter = match facets {
|
||||
Value::String(expr) => Filter::from_str(expr).map_err(|e| e.into()),
|
||||
Value::Array(arr) => parse_filter_array(arr).map_err(|e| e.into()),
|
||||
v => Err(MeilisearchHttpError::InvalidExpression(&["String", "Array"], v.clone()).into()),
|
||||
};
|
||||
let filter = filter.map_err(|err: ResponseError| {
|
||||
ResponseError::from_msg(err.to_string(), filter_parsing_error_code)
|
||||
})?;
|
||||
|
||||
if let Some(ref filter) = filter {
|
||||
// If the contains operator is used while the contains filter features is not enabled, errors out
|
||||
if let Some((token, error)) =
|
||||
filter.use_contains_operator().zip(features.check_contains_filter().err())
|
||||
{
|
||||
return Err(ResponseError::from_msg(
|
||||
token.as_external_error(error).to_string(),
|
||||
Code::FeatureNotEnabled,
|
||||
));
|
||||
}
|
||||
Value::Array(arr) => parse_filter_array(arr),
|
||||
v => Err(MeilisearchHttpError::InvalidExpression(&["String", "Array"], v.clone())),
|
||||
}
|
||||
|
||||
Ok(filter)
|
||||
}
|
||||
|
||||
fn parse_filter_array(arr: &[Value]) -> Result<Option<Filter>, MeilisearchHttpError> {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue