Revert metadata creation when computing facet search and distinct

This commit is contained in:
ManyTheFish 2025-03-10 15:27:17 +01:00
parent b12ffd1356
commit abef655849
6 changed files with 45 additions and 156 deletions

View File

@ -548,7 +548,7 @@ async fn facet_search_with_filterable_attributes_rules_errors() {
&json!({"facetName": "invalid", "facetQuery": "a"}), &json!({"facetName": "invalid", "facetQuery": "a"}),
|response, code| { |response, code| {
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response["message"], @r###""Attribute `invalid` is not facet-searchable. Available facet-searchable attributes are: `genres`. To make it facet-searchable add it to the `filterableAttributes` index settings.""###); snapshot!(response["message"], @r###""Attribute `invalid` is not facet-searchable. Available facet-searchable attributes patterns are: `genres`. To make it facet-searchable add it to the `filterableAttributes` index settings.""###);
}, },
) )
.await; .await;

View File

@ -158,28 +158,32 @@ and can not be more than 511 bytes.", .document_id.to_string()
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool }, InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
#[error("Attribute `{}` is not filterable and thus, cannot be used as distinct attribute. {}", #[error("Attribute `{}` is not filterable and thus, cannot be used as distinct attribute. {}",
.field, .field,
match .valid_fields.is_empty() { match .valid_patterns.is_empty() {
true => "This index does not have configured filterable attributes.".to_string(), true => "This index does not have configured filterable attributes.".to_string(),
false => format!("Available filterable attributes are: `{}{}`.", false => format!("Available filterable attributes patterns are: `{}{}`.",
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "), valid_patterns.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
.hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""), .hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
), ),
} }
)] )]
InvalidDistinctAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool }, InvalidDistinctAttribute {
field: String,
valid_patterns: BTreeSet<String>,
hidden_fields: bool,
},
#[error("Attribute `{}` is not facet-searchable. {}", #[error("Attribute `{}` is not facet-searchable. {}",
.field, .field,
match .valid_fields.is_empty() { match .valid_patterns.is_empty() {
true => "This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.".to_string(), true => "This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.".to_string(),
false => format!("Available facet-searchable attributes are: `{}{}`. To make it facet-searchable add it to the `filterableAttributes` index settings.", false => format!("Available facet-searchable attributes patterns are: `{}{}`. To make it facet-searchable add it to the `filterableAttributes` index settings.",
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "), valid_patterns.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
.hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""), .hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
), ),
} }
)] )]
InvalidFacetSearchFacetName { InvalidFacetSearchFacetName {
field: String, field: String,
valid_fields: BTreeSet<String>, valid_patterns: BTreeSet<String>,
hidden_fields: bool, hidden_fields: bool,
}, },
#[error("Attribute `{}` is not searchable. Available searchable attributes are: `{}{}`.", #[error("Attribute `{}` is not searchable. Available searchable attributes are: `{}{}`.",

View File

@ -6,7 +6,6 @@ use utoipa::ToSchema;
use crate::{ use crate::{
attribute_patterns::{match_distinct_field, match_field_legacy, PatternMatch}, attribute_patterns::{match_distinct_field, match_field_legacy, PatternMatch},
constants::RESERVED_GEO_FIELD_NAME, constants::RESERVED_GEO_FIELD_NAME,
fields_ids_map::metadata::FieldIdMapWithMetadata,
AttributePatterns, AttributePatterns,
}; };
@ -225,30 +224,6 @@ impl Default for FilterFeatures {
} }
} }
/// Match a field against a set of filterable attributes rules.
///
/// This function will return the set of field names that match the given filter.
///
/// # Arguments
///
/// * `filterable_attributes` - The set of filterable attributes rules to match against.
/// * `fields_ids_map` - The map of field names to field ids.
/// * `filter` - The filter function to apply to the filterable attributes rules.
pub fn filtered_matching_field_names<'fim>(
filterable_attributes: &[FilterableAttributesRule],
fields_ids_map: &'fim FieldIdMapWithMetadata,
filter: &impl Fn(FilterableAttributesFeatures) -> bool,
) -> BTreeSet<&'fim str> {
let mut result = BTreeSet::new();
for (_, field_name, metadata) in fields_ids_map.iter() {
let features = metadata.filterable_attributes_features(filterable_attributes);
if filter(features) {
result.insert(field_name);
}
}
result
}
/// Match a field against a set of filterable attributes rules. /// Match a field against a set of filterable attributes rules.
/// ///
/// This function will return the set of patterns that match the given filter. /// This function will return the set of patterns that match the given filter.
@ -306,34 +281,6 @@ pub fn matching_features(
None None
} }
/// Check if a field is filterable calling the method `FilterableAttributesFeatures::is_filterable()`.
///
/// # Arguments
///
/// * `field_name` - The field name to check.
/// * `filterable_attributes` - The set of filterable attributes rules to match against.
pub fn is_field_filterable(
field_name: &str,
filterable_attributes: &[FilterableAttributesRule],
) -> bool {
matching_features(field_name, filterable_attributes)
.map_or(false, |(_, features)| features.is_filterable())
}
/// Check if a field is facet searchable calling the method `FilterableAttributesFeatures::is_facet_searchable()`.
///
/// # Arguments
///
/// * `field_name` - The field name to check.
/// * `filterable_attributes` - The set of filterable attributes rules to match against.
pub fn is_field_facet_searchable(
field_name: &str,
filterable_attributes: &[FilterableAttributesRule],
) -> bool {
matching_features(field_name, filterable_attributes)
.map_or(false, |(_, features)| features.is_facet_searchable())
}
/// Match a field against a set of filterable, facet searchable fields, distinct field, sortable fields, and asc_desc fields. /// Match a field against a set of filterable, facet searchable fields, distinct field, sortable fields, and asc_desc fields.
pub fn match_faceted_field( pub fn match_faceted_field(
field_name: &str, field_name: &str,

View File

@ -10,9 +10,7 @@ use roaring::RoaringBitmap;
use tracing::error; use tracing::error;
use crate::error::UserError; use crate::error::UserError;
use crate::filterable_attributes_rules::{ use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
filtered_matching_field_names, is_field_facet_searchable,
};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue}; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
use crate::search::build_dfa; use crate::search::build_dfa;
use crate::{DocumentId, FieldId, OrderBy, Result, Search}; use crate::{DocumentId, FieldId, OrderBy, Result, Search};
@ -77,37 +75,27 @@ impl<'a> SearchForFacetValues<'a> {
let rtxn = self.search_query.rtxn; let rtxn = self.search_query.rtxn;
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?; let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
let fields_ids_map = index.fields_ids_map_with_metadata(rtxn)?; if !matching_features(&self.facet, &filterable_attributes_rules)
let fid = match fields_ids_map.id_with_metadata(&self.facet) { .map_or(false, |(_, features)| features.is_facet_searchable())
Some((fid, metadata)) {
if metadata let matching_field_names =
.filterable_attributes_features(&filterable_attributes_rules) filtered_matching_patterns(&filterable_attributes_rules, &|features| {
.is_facet_searchable() => features.is_facet_searchable()
{ });
fid let (valid_patterns, hidden_fields) =
} index.remove_hidden_fields(rtxn, matching_field_names)?;
// we return an empty list of results when the attribute has been
// set as filterable but no document contains this field (yet).
None if is_field_facet_searchable(&self.facet, &filterable_attributes_rules) => {
return Ok(Vec::new());
}
// we return an error when the attribute is not facet searchable
_otherwise => {
let matching_field_names = filtered_matching_field_names(
&filterable_attributes_rules,
&fields_ids_map,
&|features| features.is_facet_searchable(),
);
let (valid_fields, hidden_fields) =
index.remove_hidden_fields(rtxn, matching_field_names)?;
return Err(UserError::InvalidFacetSearchFacetName { return Err(UserError::InvalidFacetSearchFacetName {
field: self.facet.clone(), field: self.facet.clone(),
valid_fields, valid_patterns,
hidden_fields, hidden_fields,
}
.into());
} }
.into());
};
let fields_ids_map = index.fields_ids_map(rtxn)?;
let Some(fid) = fields_ids_map.id(&self.facet) else {
return Ok(Vec::new());
}; };
let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? { let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? {

View File

@ -9,7 +9,7 @@ use roaring::bitmap::RoaringBitmap;
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords}; pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
use self::new::{execute_vector_search, PartialSearchResult}; use self::new::{execute_vector_search, PartialSearchResult};
use crate::filterable_attributes_rules::{filtered_matching_field_names, is_field_filterable}; use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::vector::Embedder; use crate::vector::Embedder;
use crate::{ use crate::{
@ -190,20 +190,20 @@ impl<'a> Search<'a> {
if let Some(distinct) = &self.distinct { if let Some(distinct) = &self.distinct {
let filterable_fields = ctx.index.filterable_attributes_rules(ctx.txn)?; let filterable_fields = ctx.index.filterable_attributes_rules(ctx.txn)?;
// check if the distinct field is in the filterable fields // check if the distinct field is in the filterable fields
if !is_field_filterable(distinct, &filterable_fields) { if !matching_features(distinct, &filterable_fields)
.map_or(false, |(_, features)| features.is_filterable())
{
// if not, remove the hidden fields from the filterable fields to generate the error message // if not, remove the hidden fields from the filterable fields to generate the error message
let fields_ids_map = ctx.index.fields_ids_map_with_metadata(ctx.txn)?; let matching_patterns =
let matching_field_names = filtered_matching_field_names( filtered_matching_patterns(&filterable_fields, &|features| {
&filterable_fields, features.is_filterable()
&fields_ids_map, });
&|features| features.is_filterable(), let (valid_patterns, hidden_fields) =
); ctx.index.remove_hidden_fields(ctx.txn, matching_patterns)?;
let (valid_fields, hidden_fields) =
ctx.index.remove_hidden_fields(ctx.txn, matching_field_names)?;
// and return the error // and return the error
return Err(Error::UserError(UserError::InvalidDistinctAttribute { return Err(Error::UserError(UserError::InvalidDistinctAttribute {
field: distinct.clone(), field: distinct.clone(),
valid_fields, valid_patterns,
hidden_fields, hidden_fields,
})); }));
} }

View File

@ -770,12 +770,11 @@ mod tests {
use bumpalo::Bump; use bumpalo::Bump;
use fst::IntoStreamer; use fst::IntoStreamer;
use heed::RwTxn; use heed::RwTxn;
use maplit::{btreeset, hashset}; use maplit::hashset;
use super::*; use super::*;
use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::documents::mmap_from_objects; use crate::documents::mmap_from_objects;
use crate::filterable_attributes_rules::filtered_matching_field_names;
use crate::index::tests::TempIndex; use crate::index::tests::TempIndex;
use crate::index::IndexEmbeddingConfig; use crate::index::IndexEmbeddingConfig;
use crate::progress::Progress; use crate::progress::Progress;
@ -1255,14 +1254,6 @@ mod tests {
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
});
assert_eq!(facets, btreeset!("title", "nested.object", "nested.machin"));
// testing the simple query search // testing the simple query search
let mut search = crate::Search::new(&rtxn, &index); let mut search = crate::Search::new(&rtxn, &index);
search.query("document"); search.query("document");
@ -1478,15 +1469,6 @@ mod tests {
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
});
assert_eq!(facets, btreeset!("dog", "dog.race", "dog.race.bernese mountain"));
for (s, i) in [("zeroth", 0), ("first", 1), ("second", 2), ("third", 3)] { for (s, i) in [("zeroth", 0), ("first", 1), ("second", 2), ("third", 3)] {
let mut search = crate::Search::new(&rtxn, &index); let mut search = crate::Search::new(&rtxn, &index);
let filter = format!(r#""dog.race.bernese mountain" = {s}"#); let filter = format!(r#""dog.race.bernese mountain" = {s}"#);
@ -1504,17 +1486,6 @@ mod tests {
db_snap!(index, facet_id_string_docids, @""); db_snap!(index, facet_id_string_docids, @"");
db_snap!(index, field_id_docid_facet_strings, @""); db_snap!(index, field_id_docid_facet_strings, @"");
let rtxn = index.read_txn().unwrap();
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
});
assert_eq!(facets, btreeset!());
// update the settings to test the sortable // update the settings to test the sortable
index index
.update_settings(|settings| { .update_settings(|settings| {
@ -1744,13 +1715,6 @@ mod tests {
let check_ok = |index: &Index| { let check_ok = |index: &Index| {
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
});
assert_eq!(facets, btreeset!("colour", "colour.green", "colour.green.blue"));
let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap(); let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap();
let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap(); let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();
@ -1855,13 +1819,6 @@ mod tests {
let check_ok = |index: &Index| { let check_ok = |index: &Index| {
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
});
assert_eq!(facets, btreeset!("colour", "colour.green", "colour.green.blue"));
let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap(); let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap();
let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap(); let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();
@ -1924,13 +1881,6 @@ mod tests {
let check_ok = |index: &Index| { let check_ok = |index: &Index| {
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
});
assert_eq!(facets, btreeset!("tags", "tags.green", "tags.green.blue"));
let tags_id = index.fields_ids_map(&rtxn).unwrap().id("tags").unwrap(); let tags_id = index.fields_ids_map(&rtxn).unwrap().id("tags").unwrap();
let tags_green_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green").unwrap(); let tags_green_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green").unwrap();