REvert metadata creation when computing filters at search time

This commit is contained in:
ManyTheFish 2025-03-10 14:29:44 +01:00
parent 689e69d6d2
commit c9a4c6ed96
8 changed files with 96 additions and 70 deletions

View File

@ -10,7 +10,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_document_ids: 1, deleted_documents: Some(1) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }}
3 {uid: 3, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Invalid type for filter subexpression: expected: String, Array, found: true.", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: true, deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }}
4 {uid: 4, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Attribute `id` is not filterable. Available filterable attributes are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }}
4 {uid: 4, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Attribute `id` is not filterable. Available filterable attributes patterns are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }}
5 {uid: 5, batch_uid: 2, status: succeeded, details: { original_filter: "catto EXISTS", deleted_documents: Some(1) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("catto EXISTS") }}
----------------------------------------------------------------------
### Status:

View File

@ -636,7 +636,7 @@ async fn delete_document_by_filter() {
"originalFilter": "\"catto = jorts\""
},
"error": {
"message": "Index `SHARED_DOCUMENTS`: Attribute `catto` is not filterable. Available filterable attributes are: `id`, `title`.\n1:6 catto = jorts",
"message": "Index `SHARED_DOCUMENTS`: Attribute `catto` is not filterable. Available filterable attributes patterns are: `id`, `title`.\n1:6 catto = jorts",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
@ -738,7 +738,7 @@ async fn fetch_document_by_filter() {
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Attribute `doggo` is not filterable. Available filterable attributes are: `color`.\n1:6 doggo = bernese",
"message": "Attribute `doggo` is not filterable. Available filterable attributes patterns are: `color`.\n1:6 doggo = bernese",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"

View File

@ -716,7 +716,7 @@ async fn filter_invalid_attribute_array() {
|response, code| {
snapshot!(response, @r###"
{
"message": "Index `test`: Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
"message": "Index `test`: Attribute `many` is not filterable. Available filterable attributes patterns are: `title`.\n1:5 many = Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@ -737,7 +737,7 @@ async fn filter_invalid_attribute_string() {
|response, code| {
snapshot!(response, @r###"
{
"message": "Index `test`: Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
"message": "Index `test`: Attribute `many` is not filterable. Available filterable attributes patterns are: `title`.\n1:5 many = Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"

View File

@ -720,7 +720,7 @@ async fn test_filterable_attributes_priority() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Index `test`: Attribute `doggos.age` is not filterable. Available filterable attributes are: `doggos.name`.\n1:11 doggos.age > 2",
"message": "Index `test`: Attribute `doggos.age` is not filterable. Available filterable attributes patterns are: `doggos.*`.\n1:11 doggos.age > 2",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@ -746,7 +746,7 @@ async fn test_filterable_attributes_priority() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Index `test`: Attribute `doggos` is not filterable. Available filterable attributes are: `doggos.age`, `doggos.name`.\n1:7 doggos EXISTS",
"message": "Index `test`: Attribute `doggos` is not filterable. Available filterable attributes patterns are: `doggos.*`.\n1:7 doggos EXISTS",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"

View File

@ -1753,7 +1753,7 @@ async fn test_nested_fields() {
assert_eq!(code, 400, "{}", response);
snapshot!(json_string!(response), @r###"
{
"message": "Index `test`: Attribute `nested` is not filterable. Available filterable attributes are: `nested.machin`, `nested.object`, `title`.\n1:7 nested = array",
"message": "Index `test`: Attribute `nested` is not filterable. Available filterable attributes patterns are: `nested.machin`, `nested.object`, `title`.\n1:7 nested = array",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@ -1772,7 +1772,7 @@ async fn test_nested_fields() {
assert_eq!(code, 400, "{}", response);
snapshot!(json_string!(response), @r###"
{
"message": "Index `test`: Attribute `nested` is not filterable. Available filterable attributes are: `nested.machin`, `nested.object`, `title`.\n1:7 nested = \"I lied\"",
"message": "Index `test`: Attribute `nested` is not filterable. Available filterable attributes patterns are: `nested.machin`, `nested.object`, `title`.\n1:7 nested = \"I lied\"",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"

View File

@ -138,12 +138,12 @@ and can not be more than 511 bytes.", .document_id.to_string()
InvalidFilter(String),
#[error("Invalid type for filter subexpression: expected: {}, found: {}.", .0.join(", "), .1)]
InvalidFilterExpression(&'static [&'static str], Value),
#[error("Filter operator `{operator}` is not allowed for the attribute `{field}`.\n - Note: allowed operators: {}.\n - Note: field `{field}` {} in `filterableAttributes`", allowed_operators.join(", "), rule_index.map_or("did not match any rule".to_string(), |rule_index| format!("matched rule #{rule_index}")))]
#[error("Filter operator `{operator}` is not allowed for the attribute `{field}`.\n - Note: allowed operators: {}.\n - Note: field `{field}` {} in `filterableAttributes`", allowed_operators.join(", "), format!("matched rule #{rule_index}"))]
FilterOperatorNotAllowed {
field: String,
allowed_operators: Vec<String>,
operator: String,
rule_index: Option<usize>,
rule_index: usize,
},
#[error("Attribute `{}` is not sortable. {}",
.field,

View File

@ -249,6 +249,38 @@ pub fn filtered_matching_field_names<'fim>(
result
}
/// Match a field against a set of filterable attributes rules.
///
/// This function will return the set of patterns that match the given filter.
///
/// # Arguments
///
/// * `filterable_attributes` - The set of filterable attributes rules to match against.
/// * `filter` - The filter function to apply to the filterable attributes rules.
pub fn filtered_matching_patterns<'patterns>(
filterable_attributes: &'patterns [FilterableAttributesRule],
filter: &impl Fn(FilterableAttributesFeatures) -> bool,
) -> BTreeSet<&'patterns str> {
let mut result = BTreeSet::new();
for rule in filterable_attributes {
if filter(rule.features()) {
match rule {
FilterableAttributesRule::Field(field) => {
result.insert(field.as_str());
}
FilterableAttributesRule::Pattern(patterns) => {
patterns.attribute_patterns.patterns.iter().for_each(|pattern| {
result.insert(pattern);
});
}
}
}
}
result
}
/// Match a field against a set of filterable attributes rules.
///
/// This function will return the features that match the given field name.

View File

@ -12,15 +12,17 @@ use serde_json::Value;
use super::facet_range_search;
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::error::{Error, UserError};
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
use crate::filterable_attributes_rules::{filtered_matching_field_names, is_field_filterable};
use crate::filterable_attributes_rules::{
filtered_matching_patterns, is_field_filterable, matching_features,
};
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec,
};
use crate::index::db_name::FACET_ID_STRING_DOCIDS;
use crate::{
distance_between_two_points, lat_lng_to_xyz, FieldId, FilterableAttributesFeatures,
FilterableAttributesRule, Index, InternalError, Result, SerializationError,
distance_between_two_points, lat_lng_to_xyz, FieldId, FieldsIdsMap,
FilterableAttributesFeatures, FilterableAttributesRule, Index, InternalError, Result,
SerializationError,
};
/// The maximum number of filters the filter AST can process.
@ -62,7 +64,7 @@ impl Display for BadGeoError {
#[derive(Debug)]
enum FilterError<'a> {
AttributeNotFilterable { attribute: &'a str, filterable_fields: BTreeSet<&'a str> },
AttributeNotFilterable { attribute: &'a str, filterable_patterns: BTreeSet<&'a str> },
ParseGeoError(BadGeoError),
TooDeep,
}
@ -77,14 +79,14 @@ impl<'a> From<BadGeoError> for FilterError<'a> {
impl<'a> Display for FilterError<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::AttributeNotFilterable { attribute, filterable_fields } => {
Self::AttributeNotFilterable { attribute, filterable_patterns } => {
write!(f, "Attribute `{attribute}` is not filterable.")?;
if filterable_fields.is_empty() {
if filterable_patterns.is_empty() {
write!(f, " This index does not have configured filterable attributes.")
} else {
write!(f, " Available filterable attributes are: ")?;
write!(f, " Available filterable attributes patterns are: ")?;
let mut filterables_list =
filterable_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>();
filterable_patterns.iter().map(AsRef::as_ref).collect::<Vec<&str>>();
filterables_list.sort_unstable();
for (idx, filterable) in filterables_list.iter().enumerate() {
write!(f, "`{filterable}`")?;
@ -232,27 +234,19 @@ impl<'a> Filter<'a> {
impl<'a> Filter<'a> {
pub fn evaluate(&self, rtxn: &heed::RoTxn<'_>, index: &Index) -> Result<RoaringBitmap> {
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
let fields_ids_map = index.fields_ids_map_with_metadata(rtxn)?;
let fields_ids_map = index.fields_ids_map(rtxn)?;
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
for fid in self.condition.fids(MAX_FILTER_DEPTH) {
let attribute = fid.value();
if let Some((_, metadata)) = fields_ids_map.id_with_metadata(fid.value()) {
if metadata
.filterable_attributes_features(&filterable_attributes_rules)
.is_filterable()
{
continue;
}
} else if is_field_filterable(attribute, &filterable_attributes_rules) {
if is_field_filterable(attribute, &filterable_attributes_rules) {
continue;
}
// If the field is not filterable, return an error
return Err(fid.as_external_error(FilterError::AttributeNotFilterable {
attribute,
filterable_fields: filtered_matching_field_names(
filterable_patterns: filtered_matching_patterns(
&filterable_attributes_rules,
&fields_ids_map,
&|features| features.is_filterable(),
),
}))?;
@ -268,7 +262,7 @@ impl<'a> Filter<'a> {
universe: Option<&RoaringBitmap>,
operator: &Condition<'a>,
features: &FilterableAttributesFeatures,
rule_index: Option<usize>,
rule_index: usize,
) -> Result<RoaringBitmap> {
let numbers_db = index.facet_id_f64_docids;
let strings_db = index.facet_id_string_docids;
@ -463,7 +457,7 @@ impl<'a> Filter<'a> {
&self,
rtxn: &heed::RoTxn<'_>,
index: &Index,
field_ids_map: &FieldIdMapWithMetadata,
field_ids_map: &FieldsIdsMap,
filterable_attribute_rules: &[FilterableAttributesRule],
universe: Option<&RoaringBitmap>,
) -> Result<RoaringBitmap> {
@ -489,34 +483,36 @@ impl<'a> Filter<'a> {
}
}
}
FilterCondition::In { fid, els } => match field_ids_map.id_with_metadata(fid.value()) {
Some((fid, metadata)) => {
let (rule_index, features) = metadata
.filterable_attributes_features_with_rule_index(filterable_attribute_rules);
FilterCondition::In { fid, els } => {
let Some(field_id) = field_ids_map.id(fid.value()) else {
return Ok(RoaringBitmap::new());
};
let Some((rule_index, features)) =
matching_features(fid.value(), filterable_attribute_rules)
else {
return Ok(RoaringBitmap::new());
};
els.iter()
.map(|el| Condition::Equal(el.clone()))
.map(|op| {
Self::evaluate_operator(
rtxn, index, fid, universe, &op, &features, rule_index,
rtxn, index, field_id, universe, &op, &features, rule_index,
)
})
.union()
}
None => Ok(RoaringBitmap::new()),
},
FilterCondition::Condition { fid, op } => {
match field_ids_map.id_with_metadata(fid.value()) {
Some((fid, metadata)) => {
let (rule_index, features) = metadata
.filterable_attributes_features_with_rule_index(
filterable_attribute_rules,
);
Self::evaluate_operator(
rtxn, index, fid, universe, op, &features, rule_index,
)
}
None => Ok(RoaringBitmap::new()),
}
let Some(field_id) = field_ids_map.id(fid.value()) else {
return Ok(RoaringBitmap::new());
};
let Some((rule_index, features)) =
matching_features(fid.value(), filterable_attribute_rules)
else {
return Ok(RoaringBitmap::new());
};
Self::evaluate_operator(rtxn, index, field_id, universe, op, &features, rule_index)
}
FilterCondition::Or(subfilters) => subfilters
.iter()
@ -595,9 +591,8 @@ impl<'a> Filter<'a> {
} else {
Err(point[0].as_external_error(FilterError::AttributeNotFilterable {
attribute: RESERVED_GEO_FIELD_NAME,
filterable_fields: filtered_matching_field_names(
filterable_patterns: filtered_matching_patterns(
filterable_attribute_rules,
&field_ids_map,
&|features| features.is_filterable(),
),
}))?
@ -736,9 +731,8 @@ impl<'a> Filter<'a> {
Err(top_right_point[0].as_external_error(
FilterError::AttributeNotFilterable {
attribute: RESERVED_GEO_FIELD_NAME,
filterable_fields: filtered_matching_field_names(
filterable_patterns: filtered_matching_patterns(
filterable_attribute_rules,
&field_ids_map,
&|features| features.is_filterable(),
),
},
@ -755,7 +749,7 @@ fn generate_filter_error(
field_id: FieldId,
operator: &Condition<'_>,
features: &FilterableAttributesFeatures,
rule_index: Option<usize>,
rule_index: usize,
) -> Error {
match index.fields_ids_map(rtxn) {
Ok(fields_ids_map) => {
@ -917,42 +911,42 @@ mod tests {
let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
snapshot!(error.to_string(), @r###"
Attribute `_geo` is not filterable. This index does not have configured filterable attributes.
Attribute `_geo` is not filterable. Available filterable attributes patterns are: `title`.
12:16 _geoRadius(-100, 150, 10)
"###);
let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
snapshot!(error.to_string(), @r###"
Attribute `_geo` is not filterable. This index does not have configured filterable attributes.
Attribute `_geo` is not filterable. Available filterable attributes patterns are: `title`.
18:20 _geoBoundingBox([42, 150], [30, 10])
"###);
let filter = Filter::from_str("name = 12").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
snapshot!(error.to_string(), @r###"
Attribute `name` is not filterable. This index does not have configured filterable attributes.
Attribute `name` is not filterable. Available filterable attributes patterns are: `title`.
1:5 name = 12
"###);
let filter = Filter::from_str("title = \"test\" AND name = 12").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
snapshot!(error.to_string(), @r###"
Attribute `name` is not filterable. This index does not have configured filterable attributes.
Attribute `name` is not filterable. Available filterable attributes patterns are: `title`.
20:24 title = "test" AND name = 12
"###);
let filter = Filter::from_str("title = \"test\" AND name IN [12]").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
snapshot!(error.to_string(), @r###"
Attribute `name` is not filterable. This index does not have configured filterable attributes.
Attribute `name` is not filterable. Available filterable attributes patterns are: `title`.
20:24 title = "test" AND name IN [12]
"###);
let filter = Filter::from_str("title = \"test\" AND name != 12").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
snapshot!(error.to_string(), @r###"
Attribute `name` is not filterable. This index does not have configured filterable attributes.
Attribute `name` is not filterable. Available filterable attributes patterns are: `title`.
20:24 title = "test" AND name != 12
"###);
}