Refactor search and facet-search

**Changes:**
The search filters are now using the FilterableAttributesFeatures from the FilterableAttributesRules to know if a field is filterable.
Moreover, the FilterableAttributesFeatures is more precise and an error will be returned if an operator is used on a field that doesn't have the related feature.
The facet-search is now checking if the feature is allowed in the FilterableAttributesFeatures and an error will be returned if the field doesn't have the related feature.

**Impact:**
- facet-search is now relying on AttributePatterns to match the locales
- search using filters is now relying on FilterableAttributesFeatures
- distinct attribute is now relying on FilterableAttributesRules
This commit is contained in:
ManyTheFish 2025-03-03 10:25:32 +01:00
parent 0200c65ebf
commit 967033579d
9 changed files with 365 additions and 154 deletions

View file

@ -1,4 +1,4 @@
use std::collections::HashSet;
use std::collections::BTreeSet;
use std::fmt::{Debug, Display};
use std::ops::Bound::{self, Excluded, Included};
@ -12,13 +12,16 @@ use serde_json::Value;
use super::facet_range_search;
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::error::{Error, UserError};
use crate::filterable_attributes_rules::{
filtered_matching_field_names, is_field_filterable, matching_features,
};
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec,
};
use crate::index::db_name::FACET_ID_STRING_DOCIDS;
use crate::{
distance_between_two_points, lat_lng_to_xyz, FieldId, Index, InternalError, Result,
SerializationError,
distance_between_two_points, lat_lng_to_xyz, FieldId, FilterableAttributesFeatures,
FilterableAttributesRule, Index, InternalError, Result, SerializationError,
};
/// The maximum number of filters the filter AST can process.
@ -60,7 +63,7 @@ impl Display for BadGeoError {
#[derive(Debug)]
enum FilterError<'a> {
AttributeNotFilterable { attribute: &'a str, filterable_fields: HashSet<String> },
AttributeNotFilterable { attribute: &'a str, filterable_fields: BTreeSet<&'a str> },
ParseGeoError(BadGeoError),
TooDeep,
}
@ -230,17 +233,22 @@ impl<'a> Filter<'a> {
impl<'a> Filter<'a> {
pub fn evaluate(&self, rtxn: &heed::RoTxn<'_>, index: &Index) -> Result<RoaringBitmap> {
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
let filterable_fields = index.filterable_fields(rtxn)?;
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
for fid in self.condition.fids(MAX_FILTER_DEPTH) {
let attribute = fid.value();
if !crate::is_faceted(attribute, &filterable_fields) {
if !is_field_filterable(attribute, &filterable_attributes_rules) {
let fields_ids_map = index.fields_ids_map(rtxn)?;
return Err(fid.as_external_error(FilterError::AttributeNotFilterable {
attribute,
filterable_fields,
filterable_fields: filtered_matching_field_names(
&filterable_attributes_rules,
&fields_ids_map,
&|features| features.is_filterable(),
),
}))?;
}
}
self.inner_evaluate(rtxn, index, &filterable_fields, None)
self.inner_evaluate(rtxn, index, &filterable_attributes_rules, None)
}
fn evaluate_operator(
@ -249,6 +257,7 @@ impl<'a> Filter<'a> {
field_id: FieldId,
universe: Option<&RoaringBitmap>,
operator: &Condition<'a>,
features: &FilterableAttributesFeatures,
) -> Result<RoaringBitmap> {
let numbers_db = index.facet_id_f64_docids;
let strings_db = index.facet_id_string_docids;
@ -258,6 +267,28 @@ impl<'a> Filter<'a> {
// field id and the level.
let (left, right) = match operator {
// return an error if the filter is not allowed for this field
Condition::GreaterThan(_)
| Condition::GreaterThanOrEqual(_)
| Condition::LowerThan(_)
| Condition::LowerThanOrEqual(_)
| Condition::Between { .. }
if !features.is_filterable_comparison() =>
{
return Err(generate_filter_error(rtxn, index, field_id, operator, features));
}
Condition::Empty if !features.is_filterable_empty() => {
return Err(generate_filter_error(rtxn, index, field_id, operator, features));
}
Condition::Null if !features.is_filterable_null() => {
return Err(generate_filter_error(rtxn, index, field_id, operator, features));
}
Condition::Exists if !features.is_filterable_exists() => {
return Err(generate_filter_error(rtxn, index, field_id, operator, features));
}
Condition::Equal(_) | Condition::NotEqual(_) if !features.is_filterable_equality() => {
return Err(generate_filter_error(rtxn, index, field_id, operator, features));
}
Condition::GreaterThan(val) => {
(Excluded(val.parse_finite_float()?), Included(f64::MAX))
}
@ -307,7 +338,8 @@ impl<'a> Filter<'a> {
}
Condition::NotEqual(val) => {
let operator = Condition::Equal(val.clone());
let docids = Self::evaluate_operator(rtxn, index, field_id, None, &operator)?;
let docids =
Self::evaluate_operator(rtxn, index, field_id, None, &operator, features)?;
let all_ids = index.documents_ids(rtxn)?;
return Ok(all_ids - docids);
}
@ -409,7 +441,7 @@ impl<'a> Filter<'a> {
&self,
rtxn: &heed::RoTxn<'_>,
index: &Index,
filterable_fields: &HashSet<String>,
filterable_fields: &[FilterableAttributesRule],
universe: Option<&RoaringBitmap>,
) -> Result<RoaringBitmap> {
if universe.map_or(false, |u| u.is_empty()) {
@ -434,36 +466,56 @@ impl<'a> Filter<'a> {
}
}
FilterCondition::In { fid, els } => {
if crate::is_faceted(fid.value(), filterable_fields) {
let field_ids_map = index.fields_ids_map(rtxn)?;
if let Some(fid) = field_ids_map.id(fid.value()) {
els.iter()
.map(|el| Condition::Equal(el.clone()))
.map(|op| Self::evaluate_operator(rtxn, index, fid, universe, &op))
.union()
} else {
Ok(RoaringBitmap::new())
match matching_features(fid.value(), filterable_fields) {
Some(features) if features.is_filterable() => {
let field_ids_map = index.fields_ids_map(rtxn)?;
if let Some(fid) = field_ids_map.id(fid.value()) {
els.iter()
.map(|el| Condition::Equal(el.clone()))
.map(|op| {
Self::evaluate_operator(
rtxn, index, fid, universe, &op, &features,
)
})
.union()
} else {
Ok(RoaringBitmap::new())
}
}
_ => {
let field_ids_map = index.fields_ids_map(rtxn)?;
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
attribute: fid.value(),
filterable_fields: filtered_matching_field_names(
filterable_fields,
&field_ids_map,
&|features| features.is_filterable(),
),
}))?
}
} else {
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
attribute: fid.value(),
filterable_fields: filterable_fields.clone(),
}))?
}
}
FilterCondition::Condition { fid, op } => {
if crate::is_faceted(fid.value(), filterable_fields) {
let field_ids_map = index.fields_ids_map(rtxn)?;
if let Some(fid) = field_ids_map.id(fid.value()) {
Self::evaluate_operator(rtxn, index, fid, universe, op)
} else {
Ok(RoaringBitmap::new())
match matching_features(fid.value(), filterable_fields) {
Some(features) if features.is_filterable() => {
let field_ids_map = index.fields_ids_map(rtxn)?;
if let Some(fid) = field_ids_map.id(fid.value()) {
Self::evaluate_operator(rtxn, index, fid, universe, op, &features)
} else {
Ok(RoaringBitmap::new())
}
}
_ => {
let field_ids_map = index.fields_ids_map(rtxn)?;
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
attribute: fid.value(),
filterable_fields: filtered_matching_field_names(
filterable_fields,
&field_ids_map,
&|features| features.is_filterable(),
),
}))?
}
} else {
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
attribute: fid.value(),
filterable_fields: filterable_fields.clone(),
}))?
}
}
FilterCondition::Or(subfilters) => subfilters
@ -502,7 +554,7 @@ impl<'a> Filter<'a> {
}
}
FilterCondition::GeoLowerThan { point, radius } => {
if filterable_fields.contains(RESERVED_GEO_FIELD_NAME) {
if index.is_geo_filtering_enabled(rtxn)? {
let base_point: [f64; 2] =
[point[0].parse_finite_float()?, point[1].parse_finite_float()?];
if !(-90.0..=90.0).contains(&base_point[0]) {
@ -530,14 +582,19 @@ impl<'a> Filter<'a> {
Ok(result)
} else {
let field_ids_map = index.fields_ids_map(rtxn)?;
Err(point[0].as_external_error(FilterError::AttributeNotFilterable {
attribute: RESERVED_GEO_FIELD_NAME,
filterable_fields: filterable_fields.clone(),
filterable_fields: filtered_matching_field_names(
filterable_fields,
&field_ids_map,
&|features| features.is_filterable(),
),
}))?
}
}
FilterCondition::GeoBoundingBox { top_right_point, bottom_left_point } => {
if filterable_fields.contains(RESERVED_GEO_FIELD_NAME) {
if index.is_geo_filtering_enabled(rtxn)? {
let top_right: [f64; 2] = [
top_right_point[0].parse_finite_float()?,
top_right_point[1].parse_finite_float()?,
@ -662,10 +719,15 @@ impl<'a> Filter<'a> {
Ok(selected_lat & selected_lng)
} else {
let field_ids_map = index.fields_ids_map(rtxn)?;
Err(top_right_point[0].as_external_error(
FilterError::AttributeNotFilterable {
attribute: RESERVED_GEO_FIELD_NAME,
filterable_fields: filterable_fields.clone(),
filterable_fields: filtered_matching_field_names(
filterable_fields,
&field_ids_map,
&|features| features.is_filterable(),
),
},
))?
}
@ -674,6 +736,26 @@ impl<'a> Filter<'a> {
}
}
fn generate_filter_error(
rtxn: &heed::RoTxn<'_>,
index: &Index,
field_id: FieldId,
operator: &Condition<'_>,
features: &FilterableAttributesFeatures,
) -> Error {
match index.fields_ids_map(rtxn) {
Ok(fields_ids_map) => {
let field = fields_ids_map.name(field_id).unwrap_or_default();
Error::UserError(UserError::FilterOperatorNotAllowed {
field: field.to_string(),
allowed_operators: features.allowed_filter_operators(),
operator: operator.operator().to_string(),
})
}
Err(e) => e.into(),
}
}
impl<'a> From<FilterCondition<'a>> for Filter<'a> {
fn from(fc: FilterCondition<'a>) -> Self {
Self { condition: fc }
@ -687,12 +769,12 @@ mod tests {
use big_s::S;
use either::Either;
use maplit::hashset;
use meili_snap::snapshot;
use roaring::RoaringBitmap;
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::index::tests::TempIndex;
use crate::Filter;
use crate::{Filter, FilterableAttributesRule};
#[test]
fn empty_db() {
@ -700,7 +782,9 @@ mod tests {
//Set the filterable fields to be the channel.
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S("PrIcE") });
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
"PrIcE".to_string(),
)]);
})
.unwrap();
@ -784,27 +868,32 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let filter = Filter::from_str("_geoRadius(42, 150, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `_geo` is not filterable. This index does not have configured filterable attributes."
));
snapshot!(error.to_string(), @r###"
Attribute `_geo` is not filterable. This index does not have configured filterable attributes.
12:14 _geoRadius(42, 150, 10)
"###);
let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `_geo` is not filterable. This index does not have configured filterable attributes."
));
snapshot!(error.to_string(), @r###"
Attribute `_geo` is not filterable. This index does not have configured filterable attributes.
18:20 _geoBoundingBox([42, 150], [30, 10])
"###);
let filter = Filter::from_str("dog = \"bernese mountain\"").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `dog` is not filterable. This index does not have configured filterable attributes."
));
snapshot!(error.to_string(), @r###"
Attribute `dog` is not filterable. This index does not have configured filterable attributes.
1:4 dog = "bernese mountain"
"###);
drop(rtxn);
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("title")]);
settings.set_filterable_fields(hashset! { S("title") });
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
"title".to_string(),
)]);
})
.unwrap();
@ -812,39 +901,45 @@ mod tests {
let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `_geo` is not filterable. Available filterable attributes are: `title`."
));
snapshot!(error.to_string(), @r###"
Attribute `_geo` is not filterable. This index does not have configured filterable attributes.
12:16 _geoRadius(-100, 150, 10)
"###);
let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `_geo` is not filterable. Available filterable attributes are: `title`."
));
snapshot!(error.to_string(), @r###"
Attribute `_geo` is not filterable. This index does not have configured filterable attributes.
18:20 _geoBoundingBox([42, 150], [30, 10])
"###);
let filter = Filter::from_str("name = 12").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `name` is not filterable. Available filterable attributes are: `title`."
));
snapshot!(error.to_string(), @r###"
Attribute `name` is not filterable. This index does not have configured filterable attributes.
1:5 name = 12
"###);
let filter = Filter::from_str("title = \"test\" AND name = 12").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `name` is not filterable. Available filterable attributes are: `title`."
));
snapshot!(error.to_string(), @r###"
Attribute `name` is not filterable. This index does not have configured filterable attributes.
20:24 title = "test" AND name = 12
"###);
let filter = Filter::from_str("title = \"test\" AND name IN [12]").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `name` is not filterable. Available filterable attributes are: `title`."
));
snapshot!(error.to_string(), @r###"
Attribute `name` is not filterable. This index does not have configured filterable attributes.
20:24 title = "test" AND name IN [12]
"###);
let filter = Filter::from_str("title = \"test\" AND name != 12").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `name` is not filterable. Available filterable attributes are: `title`."
));
snapshot!(error.to_string(), @r###"
Attribute `name` is not filterable. This index does not have configured filterable attributes.
20:24 title = "test" AND name != 12
"###);
}
#[test]
@ -870,7 +965,9 @@ mod tests {
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset!(S("monitor_diagonal")));
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
"monitor_diagonal".to_string(),
)]);
})
.unwrap();
@ -901,7 +998,9 @@ mod tests {
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME) });
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S(
RESERVED_GEO_FIELD_NAME,
))]);
})
.unwrap();
@ -948,7 +1047,10 @@ mod tests {
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S(RESERVED_GEO_FIELD_NAME), S("price")]); // to keep the fields order
settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME), S("price") });
settings.set_filterable_fields(vec![
FilterableAttributesRule::Field(S(RESERVED_GEO_FIELD_NAME)),
FilterableAttributesRule::Field("price".to_string()),
]);
})
.unwrap();
@ -998,7 +1100,10 @@ mod tests {
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S(RESERVED_GEO_FIELD_NAME), S("price")]); // to keep the fields order
settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME), S("price") });
settings.set_filterable_fields(vec![
FilterableAttributesRule::Field(S(RESERVED_GEO_FIELD_NAME)),
FilterableAttributesRule::Field("price".to_string()),
]);
})
.unwrap();
@ -1108,7 +1213,9 @@ mod tests {
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("price")]); // to keep the fields order
settings.set_filterable_fields(hashset! { S("price") });
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
"price".to_string(),
)]);
})
.unwrap();
index
@ -1164,7 +1271,11 @@ mod tests {
index
.update_settings(|settings| {
settings.set_primary_key("id".to_owned());
settings.set_filterable_fields(hashset! { S("id"), S("one"), S("two") });
settings.set_filterable_fields(vec![
FilterableAttributesRule::Field("id".to_string()),
FilterableAttributesRule::Field("one".to_string()),
FilterableAttributesRule::Field("two".to_string()),
]);
})
.unwrap();

View file

@ -10,6 +10,9 @@ use roaring::RoaringBitmap;
use tracing::error;
use crate::error::UserError;
use crate::filterable_attributes_rules::{
filtered_matching_field_names, is_field_facet_searchable,
};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
use crate::search::build_dfa;
use crate::{DocumentId, FieldId, OrderBy, Result, Search};
@ -73,10 +76,16 @@ impl<'a> SearchForFacetValues<'a> {
let index = self.search_query.index;
let rtxn = self.search_query.rtxn;
let filterable_fields = index.filterable_fields(rtxn)?;
if !filterable_fields.contains(&self.facet) {
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
if !is_field_facet_searchable(&self.facet, &filterable_attributes_rules) {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let matching_field_names = filtered_matching_field_names(
&filterable_attributes_rules,
&fields_ids_map,
&|features| features.is_facet_searchable(),
);
let (valid_fields, hidden_fields) =
index.remove_hidden_fields(rtxn, filterable_fields)?;
index.remove_hidden_fields(rtxn, matching_field_names)?;
return Err(UserError::InvalidFacetSearchFacetName {
field: self.facet.clone(),

View file

@ -9,6 +9,7 @@ use roaring::bitmap::RoaringBitmap;
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
use self::new::{execute_vector_search, PartialSearchResult};
use crate::filterable_attributes_rules::{filtered_matching_field_names, is_field_filterable};
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::vector::Embedder;
use crate::{
@ -187,10 +188,19 @@ impl<'a> Search<'a> {
}
if let Some(distinct) = &self.distinct {
let filterable_fields = ctx.index.filterable_fields(ctx.txn)?;
if !crate::is_faceted(distinct, &filterable_fields) {
let filterable_fields = ctx.index.filterable_attributes_rules(ctx.txn)?;
// check if the distinct field is in the filterable fields
if !is_field_filterable(distinct, &filterable_fields) {
// if not, remove the hidden fields from the filterable fields to generate the error message
let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
let matching_field_names = filtered_matching_field_names(
&filterable_fields,
&fields_ids_map,
&|features| features.is_filterable(),
);
let (valid_fields, hidden_fields) =
ctx.index.remove_hidden_fields(ctx.txn, filterable_fields)?;
ctx.index.remove_hidden_fields(ctx.txn, matching_field_names)?;
// and return the error
return Err(Error::UserError(UserError::InvalidDistinctAttribute {
field: distinct.clone(),
valid_fields,