This commit is contained in:
ManyTheFish 2025-01-27 15:11:18 +01:00
parent eaf3be1702
commit ed882f26b1
24 changed files with 865 additions and 434 deletions

View File

@ -1,5 +1,5 @@
use deserr::Deserr;
use milli::LocalizedAttributesRule;
use milli::{AttributePatterns, LocalizedAttributesRule};
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
@ -7,7 +7,7 @@ use utoipa::ToSchema;
#[deserr(rename_all = camelCase)]
#[serde(rename_all = "camelCase")]
pub struct LocalizedAttributesRuleView {
pub attribute_patterns: Vec<String>,
pub attribute_patterns: AttributePatterns,
pub locales: Vec<Locale>,
}

View File

@ -2,6 +2,8 @@ use deserr::Deserr;
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use crate::is_faceted_by;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Deserr, ToSchema)]
#[repr(transparent)]
#[serde(transparent)]
@ -17,23 +19,69 @@ impl From<Vec<String>> for AttributePatterns {
}
impl AttributePatterns {
pub fn match_str(&self, str: &str) -> bool {
self.patterns.iter().any(|pattern| match_pattern(pattern, str))
pub fn match_str(&self, str: &str) -> PatternMatch {
let mut pattern_match = PatternMatch::NoMatch;
for pattern in &self.patterns {
match match_pattern(pattern, str) {
PatternMatch::Match => return PatternMatch::Match,
PatternMatch::Parent => pattern_match = PatternMatch::Parent,
PatternMatch::NoMatch => {}
}
}
pattern_match
}
}
fn match_pattern(pattern: &str, str: &str) -> bool {
fn match_pattern(pattern: &str, str: &str) -> PatternMatch {
if pattern == "*" {
true
return PatternMatch::Match;
} else if pattern.starts_with('*') && pattern.ends_with('*') {
str.contains(&pattern[1..pattern.len() - 1])
} else if let Some(pattern) = pattern.strip_prefix('*') {
str.ends_with(pattern)
} else if let Some(pattern) = pattern.strip_suffix('*') {
str.starts_with(pattern)
} else {
pattern == str
if str.contains(&pattern[1..pattern.len() - 1]) {
return PatternMatch::Match;
}
} else if let Some(pattern) = pattern.strip_prefix('*') {
if str.ends_with(pattern) {
return PatternMatch::Match;
}
} else if let Some(pattern) = pattern.strip_suffix('*') {
if str.starts_with(pattern) {
return PatternMatch::Match;
}
} else {
if pattern == str {
return PatternMatch::Match;
}
}
// If the field is a parent field of the pattern, return Parent
if is_faceted_by(pattern, str) {
PatternMatch::Parent
} else {
PatternMatch::NoMatch
}
}
pub fn match_field_legacy(pattern: &str, field: &str) -> PatternMatch {
if is_faceted_by(field, pattern) {
// If the field matches the pattern or is a nested field of the pattern, return Match (legacy behavior)
PatternMatch::Match
} else if is_faceted_by(pattern, field) {
// If the field is a parent field of the pattern, return Parent
PatternMatch::Parent
} else {
// If the field does not match the pattern and is not a parent of a nested field that matches the pattern, return NoMatch
PatternMatch::NoMatch
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PatternMatch {
/// The field is a parent of the of a nested field that matches the pattern
Parent,
/// The field matches the pattern
Match,
/// The field does not match the pattern
NoMatch,
}
#[cfg(test)]
@ -42,17 +90,17 @@ mod tests {
#[test]
fn test_match_pattern() {
assert!(match_pattern("*", "test"));
assert!(match_pattern("test*", "test"));
assert!(match_pattern("test*", "testa"));
assert!(match_pattern("*test", "test"));
assert!(match_pattern("*test", "atest"));
assert!(match_pattern("*test*", "test"));
assert!(match_pattern("*test*", "atesta"));
assert!(match_pattern("*test*", "atest"));
assert!(match_pattern("*test*", "testa"));
assert!(!match_pattern("test*test", "test"));
assert!(!match_pattern("*test", "testa"));
assert!(!match_pattern("test*", "atest"));
assert_eq!(match_pattern("*", "test"), PatternMatch::Match);
assert_eq!(match_pattern("test*", "test"), PatternMatch::Match);
assert_eq!(match_pattern("test*", "testa"), PatternMatch::Match);
assert_eq!(match_pattern("*test", "test"), PatternMatch::Match);
assert_eq!(match_pattern("*test", "atest"), PatternMatch::Match);
assert_eq!(match_pattern("*test*", "test"), PatternMatch::Match);
assert_eq!(match_pattern("*test*", "atesta"), PatternMatch::Match);
assert_eq!(match_pattern("*test*", "atest"), PatternMatch::Match);
assert_eq!(match_pattern("*test*", "testa"), PatternMatch::Match);
assert_eq!(match_pattern("test*test", "test"), PatternMatch::NoMatch);
assert_eq!(match_pattern("*test", "testa"), PatternMatch::NoMatch);
assert_eq!(match_pattern("test*", "atest"), PatternMatch::NoMatch);
}
}

View File

@ -5,7 +5,11 @@ use charabia::Language;
use heed::RoTxn;
use super::FieldsIdsMap;
use crate::{FieldId, FilterableAttributesSettings, Index, LocalizedAttributesRule, Result};
use crate::attribute_patterns::PatternMatch;
use crate::{
FieldId, FilterableAttributesFeatures, FilterableAttributesSettings, Index,
LocalizedAttributesRule, Result,
};
#[derive(Debug, Clone, Copy)]
pub struct Metadata {
@ -106,6 +110,34 @@ impl Metadata {
let rule = rules.get((localized_attributes_rule_id - 1) as usize).unwrap();
Some(rule.locales())
}
pub fn filterable_attributes<'rules>(
&self,
rules: &'rules [FilterableAttributesSettings],
) -> Option<&'rules FilterableAttributesSettings> {
let filterable_attributes_rule_id = self.filterable_attributes_rule_id?.get();
// - 1: `filterable_attributes_rule_id` is NonZero
let rule = rules.get((filterable_attributes_rule_id - 1) as usize).unwrap();
Some(rule)
}
pub fn filterable_attributes_features(
&self,
rules: &[FilterableAttributesSettings],
) -> FilterableAttributesFeatures {
self.filterable_attributes(rules)
.map(|rule| rule.features())
// if there is no filterable attributes rule, return no features
.unwrap_or_else(FilterableAttributesFeatures::no_features)
}
pub fn is_sortable(&self) -> bool {
self.sortable
}
pub fn is_searchable(&self) -> bool {
self.searchable
}
}
#[derive(Debug, Clone)]
@ -158,19 +190,14 @@ impl MetadataBuilder {
.localized_attributes
.iter()
.flat_map(|v| v.iter())
.position(|rule| rule.match_str(field))
.position(|rule| rule.match_str(field) == PatternMatch::Match)
// saturating_add(1): make `id` `NonZero`
.map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap());
let filterable_attributes_rule_id = self
.filterable_attributes
.iter()
.position(|attribute| match attribute {
FilterableAttributesSettings::Field(field_name) => field_name == field,
FilterableAttributesSettings::Pattern(patterns) => {
patterns.patterns.match_str(field)
}
})
.position(|attribute| attribute.match_str(field) == PatternMatch::Match)
// saturating_add(1): make `id` `NonZero`
.map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap());

View File

@ -1,10 +1,12 @@
use deserr::{DeserializeError, Deserr, ValuePointerRef};
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::collections::{BTreeSet, HashSet};
use utoipa::ToSchema;
use crate::{
constants::RESERVED_GEO_FIELD_NAME, is_faceted_by, AttributePatterns, FieldId, FieldsIdsMap,
attribute_patterns::{match_field_legacy, PatternMatch},
constants::RESERVED_GEO_FIELD_NAME,
AttributePatterns, FieldId, FieldsIdsMap,
};
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, ToSchema)]
@ -15,17 +17,25 @@ pub enum FilterableAttributesSettings {
}
impl FilterableAttributesSettings {
pub fn match_str(&self, field: &str) -> bool {
pub fn match_str(&self, field: &str) -> PatternMatch {
match self {
FilterableAttributesSettings::Field(field_name) => is_faceted_by(field, field_name),
FilterableAttributesSettings::Pattern(patterns) => patterns.patterns.match_str(field),
FilterableAttributesSettings::Field(pattern) => match_field_legacy(pattern, field),
FilterableAttributesSettings::Pattern(patterns) => patterns.match_str(field),
}
}
pub fn has_geo(&self) -> bool {
/// TODO: This is a temporary solution to check if the geo field is activated.
matches!(self, FilterableAttributesSettings::Field(field_name) if field_name == RESERVED_GEO_FIELD_NAME)
}
pub fn features(&self) -> FilterableAttributesFeatures {
match self {
FilterableAttributesSettings::Field(_) => {
FilterableAttributesFeatures::legacy_default()
}
FilterableAttributesSettings::Pattern(patterns) => patterns.features(),
}
}
}
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, Deserr, ToSchema)]
@ -36,12 +46,62 @@ pub struct FilterableAttributesPatterns {
pub features: Option<FilterableAttributesFeatures>,
}
impl FilterableAttributesPatterns {
pub fn match_str(&self, field: &str) -> PatternMatch {
self.patterns.match_str(field)
}
pub fn features(&self) -> FilterableAttributesFeatures {
self.features.clone().unwrap_or_default()
}
}
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, Deserr, ToSchema)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(rename_all = camelCase, deny_unknown_fields)]
pub struct FilterableAttributesFeatures {
facet_search: Option<String>,
filter: Option<String>,
facet_search: bool,
filter: FilterFeature,
}
impl Default for FilterableAttributesFeatures {
fn default() -> Self {
Self { facet_search: false, filter: FilterFeature::Equal }
}
}
impl FilterableAttributesFeatures {
pub fn legacy_default() -> Self {
Self { facet_search: true, filter: FilterFeature::Order }
}
pub fn no_features() -> Self {
Self { facet_search: false, filter: FilterFeature::Disabled }
}
pub fn is_filterable(&self) -> bool {
self.filter != FilterFeature::Disabled
}
/// Check if `IS NULL` is allowed
pub fn is_filterable_null(&self) -> bool {
self.filter != FilterFeature::Disabled
}
/// Check if `IS EMPTY` is allowed
pub fn is_filterable_empty(&self) -> bool {
self.filter != FilterFeature::Disabled
}
/// Check if `<`, `>`, `<=`, `>=` or `TO` are allowed
pub fn is_filterable_order(&self) -> bool {
self.filter == FilterFeature::Order
}
/// Check if the facet search is allowed
pub fn is_facet_searchable(&self) -> bool {
self.facet_search
}
}
impl<E: DeserializeError> Deserr<E> for FilterableAttributesSettings {
@ -59,6 +119,13 @@ impl<E: DeserializeError> Deserr<E> for FilterableAttributesSettings {
}
}
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, Deserr, ToSchema)]
pub enum FilterFeature {
Disabled,
Equal,
Order,
}
pub fn matching_field_ids(
filterable_attributes: &[FilterableAttributesSettings],
fields_ids_map: &FieldsIdsMap,
@ -66,10 +133,77 @@ pub fn matching_field_ids(
let mut result = HashSet::new();
for (field_id, field_name) in fields_ids_map.iter() {
for filterable_attribute in filterable_attributes {
if filterable_attribute.match_str(field_name) {
if filterable_attribute.match_str(field_name) == PatternMatch::Match {
result.insert(field_id);
}
}
}
result
}
pub fn matching_field_names<'fim>(
filterable_attributes: &[FilterableAttributesSettings],
fields_ids_map: &'fim FieldsIdsMap,
) -> BTreeSet<&'fim str> {
filtered_matching_field_names(filterable_attributes, fields_ids_map, &|_| true)
}
pub fn filtered_matching_field_names<'fim>(
filterable_attributes: &[FilterableAttributesSettings],
fields_ids_map: &'fim FieldsIdsMap,
filter: &impl Fn(&FilterableAttributesFeatures) -> bool,
) -> BTreeSet<&'fim str> {
let mut result = BTreeSet::new();
for (_, field_name) in fields_ids_map.iter() {
for filterable_attribute in filterable_attributes {
if filterable_attribute.match_str(field_name) == PatternMatch::Match {
let features = filterable_attribute.features();
if filter(&features) {
result.insert(field_name);
}
}
}
}
result
}
pub fn matching_features(
field_name: &str,
filterable_attributes: &[FilterableAttributesSettings],
) -> Option<FilterableAttributesFeatures> {
for filterable_attribute in filterable_attributes {
if filterable_attribute.match_str(field_name) == PatternMatch::Match {
return Some(filterable_attribute.features());
}
}
None
}
pub fn is_field_filterable(
field_name: &str,
filterable_attributes: &[FilterableAttributesSettings],
) -> bool {
matching_features(field_name, filterable_attributes)
.map_or(false, |features| features.is_filterable())
}
pub fn match_pattern_by_features(
field_name: &str,
filterable_attributes: &[FilterableAttributesSettings],
filter: &impl Fn(&FilterableAttributesFeatures) -> bool,
) -> PatternMatch {
let mut selection = PatternMatch::NoMatch;
// Check if the field name matches any pattern that is facet searchable or filterable
for pattern in filterable_attributes {
let features = pattern.features();
if filter(&features) {
match pattern.match_str(field_name) {
PatternMatch::Match => return PatternMatch::Match,
PatternMatch::Parent => selection = PatternMatch::Parent,
PatternMatch::NoMatch => (),
}
}
}
selection
}

View File

@ -803,7 +803,7 @@ impl Index {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::FILTERABLE_FIELDS_KEY)
}
/// Returns the filterable fields names.
/// Returns the filterable fields setting value.
pub fn filterable_fields(
&self,
rtxn: &RoTxn<'_>,
@ -815,7 +815,7 @@ impl Index {
.unwrap_or_default())
}
/// Identical to `filterable_fields`, but returns ids instead.
/// Returns the filterable fields ids.
pub fn filterable_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<FieldId>> {
let fields = self.filterable_fields(rtxn)?;
let fields_ids_map = self.fields_ids_map(rtxn)?;
@ -878,67 +878,79 @@ impl Index {
/// Returns true if the geo feature is activated.
pub fn is_geo_activated(&self, rtxn: &RoTxn<'_>) -> Result<bool> {
let geo_filter = self.filterable_fields(rtxn)?.iter().any(|field| field.has_geo());
let geo_sortable = self.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME);
let geo_filter = self.is_geo_filtering_activated(rtxn)?;
let geo_sortable = self.is_geo_sorting_activated(rtxn)?;
Ok(geo_filter || geo_sortable)
}
/// Returns the faceted fields names.
pub fn faceted_fields(&self, rtxn: &RoTxn<'_>) -> heed::Result<HashSet<String>> {
Ok(self
.main
.remap_types::<Str, SerdeJson<_>>()
.get(rtxn, main_key::HIDDEN_FACETED_FIELDS_KEY)?
.unwrap_or_default())
/// Returns true if the geo sorting feature is activated.
pub fn is_geo_sorting_activated(&self, rtxn: &RoTxn<'_>) -> Result<bool> {
let geo_sortable = self.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME);
Ok(geo_sortable)
}
/// Identical to `faceted_fields`, but returns ids instead.
pub fn faceted_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<FieldId>> {
let fields = self.faceted_fields(rtxn)?;
let fields_ids_map = self.fields_ids_map(rtxn)?;
let mut fields_ids = HashSet::new();
for name in fields {
if let Some(field_id) = fields_ids_map.id(&name) {
fields_ids.insert(field_id);
}
/// Returns true if the geo filtering feature is activated.
pub fn is_geo_filtering_activated(&self, rtxn: &RoTxn<'_>) -> Result<bool> {
let geo_filter = self.filterable_fields(rtxn)?.iter().any(|field| field.has_geo());
Ok(geo_filter)
}
Ok(fields_ids)
}
// /// Returns the faceted fields names.
// pub fn faceted_fields(&self, rtxn: &RoTxn<'_>) -> heed::Result<HashSet<String>> {
// Ok(self
// .main
// .remap_types::<Str, SerdeJson<_>>()
// .get(rtxn, main_key::HIDDEN_FACETED_FIELDS_KEY)?
// .unwrap_or_default())
// }
// /// Identical to `faceted_fields`, but returns ids instead.
// pub fn faceted_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<FieldId>> {
// let fields = self.faceted_fields(rtxn)?;
// let fields_ids_map = self.fields_ids_map(rtxn)?;
// let mut fields_ids = HashSet::new();
// for name in fields {
// if let Some(field_id) = fields_ids_map.id(&name) {
// fields_ids.insert(field_id);
// }
// }
// Ok(fields_ids)
// }
/* faceted documents ids */
/// Returns the user defined faceted fields names.
///
/// The user faceted fields are the union of all the filterable, sortable, distinct, and Asc/Desc fields.
pub fn user_defined_faceted_fields(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<String>> {
let fields_ids_map = self.fields_ids_map(rtxn)?;
let filterable_fields = self.filterable_fields_ids(rtxn)?;
let sortable_fields = self.sortable_fields(rtxn)?;
let distinct_field = self.distinct_field(rtxn)?;
let asc_desc_fields =
self.criteria(rtxn)?.into_iter().filter_map(|criterion| match criterion {
Criterion::Asc(field) | Criterion::Desc(field) => Some(field),
_otherwise => None,
});
// pub fn user_defined_faceted_fields(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<String>> {
// let fields_ids_map = self.fields_ids_map(rtxn)?;
// let filterable_fields = self.filterable_fields_ids(rtxn)?;
// let sortable_fields = self.sortable_fields(rtxn)?;
// let distinct_field = self.distinct_field(rtxn)?;
// let asc_desc_fields =
// self.criteria(rtxn)?.into_iter().filter_map(|criterion| match criterion {
// Criterion::Asc(field) | Criterion::Desc(field) => Some(field),
// _otherwise => None,
// });
let mut faceted_fields: HashSet<_> = filterable_fields
.into_iter()
.filter_map(|field_id| {
let field_name = fields_ids_map.name(field_id);
debug_assert!(field_name.is_some(), "field name not found for {field_id}");
field_name.map(|field| field.to_string())
})
.collect();
faceted_fields.extend(sortable_fields);
faceted_fields.extend(asc_desc_fields);
if let Some(field) = distinct_field {
faceted_fields.insert(field.to_owned());
}
// let mut faceted_fields: HashSet<_> = filterable_fields
// .into_iter()
// .filter_map(|field_id| {
// let field_name = fields_ids_map.name(field_id);
// debug_assert!(field_name.is_some(), "field name not found for {field_id}");
// field_name.map(|field| field.to_string())
// })
// .collect();
// faceted_fields.extend(sortable_fields);
// faceted_fields.extend(asc_desc_fields);
// if let Some(field) = distinct_field {
// faceted_fields.insert(field.to_owned());
// }
Ok(faceted_fields)
}
// Ok(faceted_fields)
// }
/* faceted documents ids */
@ -1728,7 +1740,7 @@ pub(crate) mod tests {
use big_s::S;
use bumpalo::Bump;
use heed::{EnvOpenOptions, RwTxn};
use maplit::{btreemap, hashset};
use maplit::btreemap;
use memmap2::Mmap;
use tempfile::TempDir;
@ -1744,7 +1756,8 @@ pub(crate) mod tests {
use crate::vector::settings::{EmbedderSource, EmbeddingSettings};
use crate::vector::EmbeddingConfigs;
use crate::{
db_snap, obkv_to_json, Filter, Index, Search, SearchResult, ThreadPoolNoAbortBuilder,
db_snap, obkv_to_json, Filter, FilterableAttributesSettings, Index, Search, SearchResult,
ThreadPoolNoAbortBuilder,
};
pub(crate) struct TempIndex {
@ -2174,7 +2187,9 @@ pub(crate) mod tests {
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME) });
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
RESERVED_GEO_FIELD_NAME.to_string(),
)]);
})
.unwrap();
index
@ -2282,7 +2297,9 @@ pub(crate) mod tests {
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S("doggo") });
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
"doggo".to_string(),
)]);
})
.unwrap();
index
@ -2319,15 +2336,14 @@ pub(crate) mod tests {
#[test]
fn replace_documents_external_ids_and_soft_deletion_check() {
use big_s::S;
use maplit::hashset;
let index = TempIndex::new();
index
.update_settings(|settings| {
settings.set_primary_key("id".to_owned());
settings.set_filterable_fields(hashset! { S("doggo") });
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
"doggo".to_string(),
)]);
})
.unwrap();
@ -2860,8 +2876,9 @@ pub(crate) mod tests {
index
.update_settings(|settings| {
settings.set_primary_key("id".to_string());
settings
.set_filterable_fields(HashSet::from([RESERVED_GEO_FIELD_NAME.to_string()]));
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
RESERVED_GEO_FIELD_NAME.to_string(),
)]);
})
.unwrap();
@ -2895,8 +2912,9 @@ pub(crate) mod tests {
index
.update_settings(|settings| {
settings.set_primary_key("id".to_string());
settings
.set_filterable_fields(HashSet::from([RESERVED_GEO_FIELD_NAME.to_string()]));
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
RESERVED_GEO_FIELD_NAME.to_string(),
)]);
})
.unwrap();
@ -2929,7 +2947,9 @@ pub(crate) mod tests {
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("name")]);
settings.set_filterable_fields(HashSet::from([S("age")]));
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
"age".to_string(),
)]);
})
.unwrap();
@ -2951,7 +2971,9 @@ pub(crate) mod tests {
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("name"), S("realName")]);
settings.set_filterable_fields(HashSet::from([S("age")]));
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
"age".to_string(),
)]);
})
.unwrap();
@ -3050,7 +3072,10 @@ pub(crate) mod tests {
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("_vectors"), S("_vectors.doggo")]);
settings.set_filterable_fields(hashset![S("_vectors"), S("_vectors.doggo")]);
settings.set_filterable_fields(vec![
FilterableAttributesSettings::Field("_vectors".to_string()),
FilterableAttributesSettings::Field("_vectors.doggo".to_string()),
]);
})
.unwrap();

View File

@ -4,6 +4,7 @@ use charabia::Language;
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use crate::attribute_patterns::PatternMatch;
use crate::fields_ids_map::FieldsIdsMap;
use crate::{AttributePatterns, FieldId};
@ -27,7 +28,7 @@ impl LocalizedAttributesRule {
Self { attribute_patterns: AttributePatterns::from(attribute_patterns), locales }
}
pub fn match_str(&self, str: &str) -> bool {
pub fn match_str(&self, str: &str) -> PatternMatch {
self.attribute_patterns.match_str(str)
}
@ -57,7 +58,7 @@ impl LocalizedFieldIds {
for (field_id, field_name) in fields {
let mut locales = Vec::new();
for rule in rules {
if rule.match_str(field_name) {
if rule.match_str(field_name) == PatternMatch::Match {
locales.extend(rule.locales.iter());
// Take the first rule that matches
break;

View File

@ -11,6 +11,7 @@ use serde::{Deserialize, Serialize};
use crate::error::UserError;
use crate::facet::FacetType;
use crate::filterable_fields::{is_field_filterable, matching_field_names};
use crate::heed_codec::facet::{
FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec,
};
@ -18,7 +19,7 @@ use crate::heed_codec::{BytesRefCodec, StrRefCodec};
use crate::search::facet::facet_distribution_iter::{
count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution,
};
use crate::{FieldId, Index, Result};
use crate::{FieldId, FieldsIdsMap, Index, Result};
/// The default number of values by facets that will
/// be fetched from the key-value store.
@ -281,32 +282,13 @@ impl<'a> FacetDistribution<'a> {
pub fn compute_stats(&self) -> Result<BTreeMap<String, (f64, f64)>> {
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
let candidates = if let Some(candidates) = self.candidates.clone() {
candidates
} else {
return Ok(Default::default());
};
let fields = match &self.facets {
Some(facets) => {
let invalid_fields: HashSet<_> = facets
.iter()
.map(|(name, _)| name)
.filter(|facet| !crate::is_faceted(facet, &filterable_fields))
.collect();
if !invalid_fields.is_empty() {
return Err(UserError::InvalidFacetsDistribution {
invalid_facets_name: invalid_fields.into_iter().cloned().collect(),
valid_facets_name: filterable_fields.into_iter().collect(),
}
.into());
} else {
facets.iter().map(|(name, _)| name).cloned().collect()
}
}
None => filterable_fields,
};
let fields = self.faceted_fields_names(&fields_ids_map)?;
let mut distribution = BTreeMap::new();
for (fid, name) in fields_ids_map.iter() {
@ -341,27 +323,8 @@ impl<'a> FacetDistribution<'a> {
pub fn execute(&self) -> Result<BTreeMap<String, IndexMap<String, u64>>> {
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
let fields = match self.facets {
Some(ref facets) => {
let invalid_fields: HashSet<_> = facets
.iter()
.map(|(name, _)| name)
.filter(|facet| !crate::is_faceted(facet, &filterable_fields))
.collect();
if !invalid_fields.is_empty() {
return Err(UserError::InvalidFacetsDistribution {
invalid_facets_name: invalid_fields.into_iter().cloned().collect(),
valid_facets_name: filterable_fields.into_iter().collect(),
}
.into());
} else {
facets.iter().map(|(name, _)| name).cloned().collect()
}
}
None => filterable_fields,
};
let fields = self.faceted_fields_names(&fields_ids_map)?;
let mut distribution = BTreeMap::new();
for (fid, name) in fields_ids_map.iter() {
@ -378,6 +341,40 @@ impl<'a> FacetDistribution<'a> {
Ok(distribution)
}
fn faceted_fields_names(&self, fields_ids_map: &FieldsIdsMap) -> Result<Vec<String>> {
/// TODO: @many: this is a bit of a mess, we should refactor it
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
let fields = match &self.facets {
Some(facets) => {
let invalid_fields: HashSet<_> = facets
.iter()
.map(|(name, _)| name)
.filter(|facet| !is_field_filterable(facet, &filterable_fields))
.collect();
if !invalid_fields.is_empty() {
let valid_facets_name =
matching_field_names(&filterable_fields, &fields_ids_map);
return Err(UserError::InvalidFacetsDistribution {
invalid_facets_name: invalid_fields.into_iter().cloned().collect(),
valid_facets_name: valid_facets_name
.into_iter()
.map(String::from)
.collect(),
}
.into());
} else {
facets.iter().map(|(name, _)| name).cloned().collect()
}
}
None => matching_field_names(&filterable_fields, &fields_ids_map)
.into_iter()
.map(String::from)
.collect(),
};
Ok(fields)
}
}
impl fmt::Debug for FacetDistribution<'_> {
@ -405,11 +402,10 @@ mod tests {
use std::iter;
use big_s::S;
use maplit::hashset;
use crate::documents::mmap_from_objects;
use crate::index::tests::TempIndex;
use crate::{milli_snap, FacetDistribution, OrderBy};
use crate::{milli_snap, FacetDistribution, FilterableAttributesSettings, OrderBy};
#[test]
fn few_candidates_few_facet_values() {
@ -419,7 +415,10 @@ mod tests {
let index = TempIndex::new();
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.update_settings(|settings| {
settings
.set_filterable_fields(vec![FilterableAttributesSettings::Field(S("colour"))])
})
.unwrap();
let documents = documents!([
@ -490,7 +489,10 @@ mod tests {
let index = TempIndex::new_with_map_size(4096 * 10_000);
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.update_settings(|settings| {
settings
.set_filterable_fields(vec![FilterableAttributesSettings::Field(S("colour"))])
})
.unwrap();
let facet_values = ["Red", "RED", " red ", "Blue", "BLUE"];
@ -575,7 +577,10 @@ mod tests {
let index = TempIndex::new_with_map_size(4096 * 10_000);
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.update_settings(|settings| {
settings
.set_filterable_fields(vec![FilterableAttributesSettings::Field(S("colour"))])
})
.unwrap();
let facet_values = (0..1000).map(|x| format!("{x:x}")).collect::<Vec<_>>();
@ -634,7 +639,10 @@ mod tests {
let index = TempIndex::new_with_map_size(4096 * 10_000);
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.update_settings(|settings| {
settings
.set_filterable_fields(vec![FilterableAttributesSettings::Field(S("colour"))])
})
.unwrap();
let facet_values = (0..1000).collect::<Vec<_>>();
@ -685,7 +693,10 @@ mod tests {
let index = TempIndex::new_with_map_size(4096 * 10_000);
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.update_settings(|settings| {
settings
.set_filterable_fields(vec![FilterableAttributesSettings::Field(S("colour"))])
})
.unwrap();
let facet_values = (0..1000).collect::<Vec<_>>();
@ -736,7 +747,10 @@ mod tests {
let index = TempIndex::new_with_map_size(4096 * 10_000);
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.update_settings(|settings| {
settings
.set_filterable_fields(vec![FilterableAttributesSettings::Field(S("colour"))])
})
.unwrap();
let facet_values = (0..1000).collect::<Vec<_>>();
@ -787,7 +801,10 @@ mod tests {
let index = TempIndex::new_with_map_size(4096 * 10_000);
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.update_settings(|settings| {
settings
.set_filterable_fields(vec![FilterableAttributesSettings::Field(S("colour"))])
})
.unwrap();
let facet_values = (0..1000).collect::<Vec<_>>();

View File

@ -1,4 +1,4 @@
use std::collections::HashSet;
use std::collections::BTreeSet;
use std::fmt::{Debug, Display};
use std::ops::Bound::{self, Excluded, Included};
@ -12,13 +12,16 @@ use serde_json::Value;
use super::facet_range_search;
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::error::{Error, UserError};
use crate::filterable_fields::{
filtered_matching_field_names, is_field_filterable, matching_features,
};
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec,
};
use crate::index::db_name::FACET_ID_STRING_DOCIDS;
use crate::{
distance_between_two_points, lat_lng_to_xyz, FieldId, Index, InternalError, Result,
SerializationError,
distance_between_two_points, lat_lng_to_xyz, FieldId, FilterableAttributesFeatures,
FilterableAttributesSettings, Index, InternalError, Result, SerializationError,
};
/// The maximum number of filters the filter AST can process.
@ -60,7 +63,7 @@ impl Display for BadGeoError {
#[derive(Debug)]
enum FilterError<'a> {
AttributeNotFilterable { attribute: &'a str, filterable_fields: HashSet<String> },
AttributeNotFilterable { attribute: &'a str, filterable_fields: BTreeSet<&'a str> },
ParseGeoError(BadGeoError),
TooDeep,
}
@ -233,10 +236,15 @@ impl<'a> Filter<'a> {
let filterable_fields = index.filterable_fields(rtxn)?;
for fid in self.condition.fids(MAX_FILTER_DEPTH) {
let attribute = fid.value();
if !crate::is_faceted(attribute, &filterable_fields) {
if !is_field_filterable(attribute, &filterable_fields) {
let fields_ids_map = index.fields_ids_map(rtxn)?;
return Err(fid.as_external_error(FilterError::AttributeNotFilterable {
attribute,
filterable_fields,
filterable_fields: filtered_matching_field_names(
&filterable_fields,
&fields_ids_map,
&|features| features.is_filterable(),
),
}))?;
}
}
@ -249,6 +257,7 @@ impl<'a> Filter<'a> {
field_id: FieldId,
universe: Option<&RoaringBitmap>,
operator: &Condition<'a>,
features: &FilterableAttributesFeatures,
) -> Result<RoaringBitmap> {
let numbers_db = index.facet_id_f64_docids;
let strings_db = index.facet_id_string_docids;
@ -258,6 +267,17 @@ impl<'a> Filter<'a> {
// field id and the level.
let (left, right) = match operator {
// return an error if the filter is not allowed for this field
Condition::GreaterThan(_)
| Condition::GreaterThanOrEqual(_)
| Condition::LowerThan(_)
| Condition::LowerThanOrEqual(_)
| Condition::Between { .. }
if !features.is_filterable_order() =>
{
/// TODO produce an dedicated error for this
todo!("filtering on non-ordered fields is not supported, return an error")
}
Condition::GreaterThan(val) => {
(Excluded(val.parse_finite_float()?), Included(f64::MAX))
}
@ -307,7 +327,8 @@ impl<'a> Filter<'a> {
}
Condition::NotEqual(val) => {
let operator = Condition::Equal(val.clone());
let docids = Self::evaluate_operator(rtxn, index, field_id, None, &operator)?;
let docids =
Self::evaluate_operator(rtxn, index, field_id, None, &operator, features)?;
let all_ids = index.documents_ids(rtxn)?;
return Ok(all_ids - docids);
}
@ -409,7 +430,7 @@ impl<'a> Filter<'a> {
&self,
rtxn: &heed::RoTxn<'_>,
index: &Index,
filterable_fields: &HashSet<String>,
filterable_fields: &[FilterableAttributesSettings],
universe: Option<&RoaringBitmap>,
) -> Result<RoaringBitmap> {
if universe.map_or(false, |u| u.is_empty()) {
@ -434,38 +455,58 @@ impl<'a> Filter<'a> {
}
}
FilterCondition::In { fid, els } => {
if crate::is_faceted(fid.value(), filterable_fields) {
match matching_features(fid.value(), filterable_fields) {
Some(features) if features.is_filterable() => {
let field_ids_map = index.fields_ids_map(rtxn)?;
if let Some(fid) = field_ids_map.id(fid.value()) {
els.iter()
.map(|el| Condition::Equal(el.clone()))
.map(|op| Self::evaluate_operator(rtxn, index, fid, universe, &op))
.map(|op| {
Self::evaluate_operator(
rtxn, index, fid, universe, &op, &features,
)
})
.union()
} else {
Ok(RoaringBitmap::new())
}
} else {
}
_ => {
let field_ids_map = index.fields_ids_map(rtxn)?;
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
attribute: fid.value(),
filterable_fields: filterable_fields.clone(),
filterable_fields: filtered_matching_field_names(
filterable_fields,
&field_ids_map,
&|features| features.is_filterable(),
),
}))?
}
}
}
FilterCondition::Condition { fid, op } => {
if crate::is_faceted(fid.value(), filterable_fields) {
match matching_features(fid.value(), filterable_fields) {
Some(features) if features.is_filterable() => {
let field_ids_map = index.fields_ids_map(rtxn)?;
if let Some(fid) = field_ids_map.id(fid.value()) {
Self::evaluate_operator(rtxn, index, fid, universe, op)
Self::evaluate_operator(rtxn, index, fid, universe, op, &features)
} else {
Ok(RoaringBitmap::new())
}
} else {
}
_ => {
let field_ids_map = index.fields_ids_map(rtxn)?;
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
attribute: fid.value(),
filterable_fields: filterable_fields.clone(),
filterable_fields: filtered_matching_field_names(
filterable_fields,
&field_ids_map,
&|features| features.is_filterable(),
),
}))?
}
}
}
FilterCondition::Or(subfilters) => subfilters
.iter()
.cloned()
@ -502,7 +543,7 @@ impl<'a> Filter<'a> {
}
}
FilterCondition::GeoLowerThan { point, radius } => {
if filterable_fields.contains(RESERVED_GEO_FIELD_NAME) {
if index.is_geo_filtering_activated(rtxn)? {
let base_point: [f64; 2] =
[point[0].parse_finite_float()?, point[1].parse_finite_float()?];
if !(-90.0..=90.0).contains(&base_point[0]) {
@ -530,14 +571,19 @@ impl<'a> Filter<'a> {
Ok(result)
} else {
let field_ids_map = index.fields_ids_map(rtxn)?;
Err(point[0].as_external_error(FilterError::AttributeNotFilterable {
attribute: RESERVED_GEO_FIELD_NAME,
filterable_fields: filterable_fields.clone(),
filterable_fields: filtered_matching_field_names(
filterable_fields,
&field_ids_map,
&|features| features.is_filterable(),
),
}))?
}
}
FilterCondition::GeoBoundingBox { top_right_point, bottom_left_point } => {
if filterable_fields.contains(RESERVED_GEO_FIELD_NAME) {
if index.is_geo_filtering_activated(rtxn)? {
let top_right: [f64; 2] = [
top_right_point[0].parse_finite_float()?,
top_right_point[1].parse_finite_float()?,
@ -662,10 +708,15 @@ impl<'a> Filter<'a> {
Ok(selected_lat & selected_lng)
} else {
let field_ids_map = index.fields_ids_map(rtxn)?;
Err(top_right_point[0].as_external_error(
FilterError::AttributeNotFilterable {
attribute: RESERVED_GEO_FIELD_NAME,
filterable_fields: filterable_fields.clone(),
filterable_fields: filtered_matching_field_names(
filterable_fields,
&field_ids_map,
&|features| features.is_filterable(),
),
},
))?
}
@ -687,12 +738,11 @@ mod tests {
use big_s::S;
use either::Either;
use maplit::hashset;
use roaring::RoaringBitmap;
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::index::tests::TempIndex;
use crate::Filter;
use crate::{Filter, FilterableAttributesSettings};
#[test]
fn empty_db() {
@ -700,7 +750,9 @@ mod tests {
//Set the filterable fields to be the channel.
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S("PrIcE") });
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
"PrIcE".to_string(),
)]);
})
.unwrap();
@ -804,7 +856,9 @@ mod tests {
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("title")]);
settings.set_filterable_fields(hashset! { S("title") });
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
"title".to_string(),
)]);
})
.unwrap();
@ -870,7 +924,9 @@ mod tests {
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset!(S("monitor_diagonal")));
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
"monitor_diagonal".to_string(),
)]);
})
.unwrap();
@ -901,7 +957,9 @@ mod tests {
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME) });
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(S(
RESERVED_GEO_FIELD_NAME,
))]);
})
.unwrap();
@ -948,7 +1006,10 @@ mod tests {
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S(RESERVED_GEO_FIELD_NAME), S("price")]); // to keep the fields order
settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME), S("price") });
settings.set_filterable_fields(vec![
FilterableAttributesSettings::Field(S(RESERVED_GEO_FIELD_NAME)),
FilterableAttributesSettings::Field("price".to_string()),
]);
})
.unwrap();
@ -998,7 +1059,10 @@ mod tests {
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S(RESERVED_GEO_FIELD_NAME), S("price")]); // to keep the fields order
settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME), S("price") });
settings.set_filterable_fields(vec![
FilterableAttributesSettings::Field(S(RESERVED_GEO_FIELD_NAME)),
FilterableAttributesSettings::Field("price".to_string()),
]);
})
.unwrap();
@ -1108,7 +1172,9 @@ mod tests {
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("price")]); // to keep the fields order
settings.set_filterable_fields(hashset! { S("price") });
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
"price".to_string(),
)]);
})
.unwrap();
index
@ -1164,7 +1230,11 @@ mod tests {
index
.update_settings(|settings| {
settings.set_primary_key("id".to_owned());
settings.set_filterable_fields(hashset! { S("id"), S("one"), S("two") });
settings.set_filterable_fields(vec![
FilterableAttributesSettings::Field("id".to_string()),
FilterableAttributesSettings::Field("one".to_string()),
FilterableAttributesSettings::Field("two".to_string()),
]);
})
.unwrap();

View File

@ -10,6 +10,7 @@ use roaring::RoaringBitmap;
use tracing::error;
use crate::error::UserError;
use crate::filterable_fields::{is_field_filterable, matching_field_names};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
use crate::search::build_dfa;
use crate::{DocumentId, FieldId, OrderBy, Result, Search};
@ -74,9 +75,11 @@ impl<'a> SearchForFacetValues<'a> {
let rtxn = self.search_query.rtxn;
let filterable_fields = index.filterable_fields(rtxn)?;
if !filterable_fields.contains(&self.facet) {
if !is_field_filterable(&self.facet, &filterable_fields) {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let matching_field_names = matching_field_names(&filterable_fields, &fields_ids_map);
let (valid_fields, hidden_fields) =
index.remove_hidden_fields(rtxn, filterable_fields)?;
index.remove_hidden_fields(rtxn, matching_field_names)?;
return Err(UserError::InvalidFacetSearchFacetName {
field: self.facet.clone(),

View File

@ -9,6 +9,7 @@ use roaring::bitmap::RoaringBitmap;
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
use self::new::{execute_vector_search, PartialSearchResult};
use crate::filterable_fields::{is_field_filterable, matching_field_names};
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::vector::Embedder;
use crate::{
@ -188,9 +189,15 @@ impl<'a> Search<'a> {
if let Some(distinct) = &self.distinct {
let filterable_fields = ctx.index.filterable_fields(ctx.txn)?;
if !crate::is_faceted(distinct, &filterable_fields) {
// check if the distinct field is in the filterable fields
if !is_field_filterable(distinct, &filterable_fields) {
// if not, remove the hidden fields from the filterable fields to generate the error message
let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
let matching_field_names =
matching_field_names(&filterable_fields, &fields_ids_map);
let (valid_fields, hidden_fields) =
ctx.index.remove_hidden_fields(ctx.txn, filterable_fields)?;
ctx.index.remove_hidden_fields(ctx.txn, matching_field_names)?;
// and return the error
return Err(Error::UserError(UserError::InvalidDistinctAttribute {
field: distinct.clone(),
valid_fields,

View File

@ -5,13 +5,11 @@
use std::time::Duration;
use big_s::S;
use maplit::hashset;
use meili_snap::snapshot;
use crate::index::tests::TempIndex;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::{Criterion, Filter, Search, TimeBudget};
use crate::{Criterion, Filter, FilterableAttributesSettings, Search, TimeBudget};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@ -20,7 +18,7 @@ fn create_index() -> TempIndex {
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_searchable_fields(vec!["text".to_owned()]);
s.set_filterable_fields(hashset! { S("id") });
s.set_filterable_fields(vec![FilterableAttributesSettings::Field("id".to_owned())]);
s.set_criteria(vec![Criterion::Words, Criterion::Typo]);
})
.unwrap();

View File

@ -19,7 +19,10 @@ use maplit::hashset;
use super::collect_field_values;
use crate::index::tests::TempIndex;
use crate::{AscDesc, Criterion, Index, Member, Search, SearchResult, TermsMatchingStrategy};
use crate::{
AscDesc, Criterion, FilterableAttributesSettings, Index, Member, Search, SearchResult,
TermsMatchingStrategy,
};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@ -236,7 +239,7 @@ fn test_distinct_placeholder_no_ranking_rules() {
// Set the letter as filterable and unset the distinct attribute.
index
.update_settings(|s| {
s.set_filterable_fields(hashset! { S("letter") });
s.set_filterable_fields(vec![FilterableAttributesSettings::Field("letter".to_owned())]);
s.reset_distinct_field();
})
.unwrap();

View File

@ -9,7 +9,7 @@ use crate::progress::Progress;
use crate::update::new::indexer;
use crate::update::{IndexDocumentsMethod, IndexerConfig, Settings};
use crate::vector::EmbeddingConfigs;
use crate::{db_snap, Criterion, Index};
use crate::{db_snap, Criterion, FilterableAttributesSettings, Index};
pub const CONTENT: &str = include_str!("../../../../tests/assets/test_set.ndjson");
use crate::constants::RESERVED_GEO_FIELD_NAME;
@ -25,14 +25,14 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
let mut builder = Settings::new(&mut wtxn, &index, &config);
builder.set_criteria(criteria.to_vec());
builder.set_filterable_fields(hashset! {
S("tag"),
S("asc_desc_rank"),
S(RESERVED_GEO_FIELD_NAME),
S("opt1"),
S("opt1.opt2"),
S("tag_in")
});
builder.set_filterable_fields(vec![
FilterableAttributesSettings::Field(S("tag")),
FilterableAttributesSettings::Field(S("asc_desc_rank")),
FilterableAttributesSettings::Field(S(RESERVED_GEO_FIELD_NAME)),
FilterableAttributesSettings::Field(S("opt1")),
FilterableAttributesSettings::Field(S("opt1.opt2")),
FilterableAttributesSettings::Field(S("tag_in")),
]);
builder.set_sortable_fields(hashset! {
S("tag"),
S("asc_desc_rank"),

View File

@ -374,7 +374,7 @@ mod tests {
use crate::heed_codec::StrRefCodec;
use crate::index::tests::TempIndex;
use crate::update::facet::test_helpers::{ordered_string, FacetIndex};
use crate::{db_snap, milli_snap};
use crate::{db_snap, milli_snap, FilterableAttributesSettings};
#[test]
fn insert() {
@ -474,7 +474,9 @@ mod tests {
index
.update_settings(|settings| {
settings.set_primary_key("id".to_owned());
settings.set_filterable_fields(hashset! { S("id") });
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
"id".to_string(),
)]);
})
.unwrap();

View File

@ -760,18 +760,19 @@ mod tests {
use bumpalo::Bump;
use fst::IntoStreamer;
use heed::RwTxn;
use maplit::hashset;
use maplit::{btreeset, hashset};
use super::*;
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::documents::mmap_from_objects;
use crate::filterable_fields::filtered_matching_field_names;
use crate::index::tests::TempIndex;
use crate::index::IndexEmbeddingConfig;
use crate::progress::Progress;
use crate::search::TermsMatchingStrategy;
use crate::update::new::indexer;
use crate::update::Setting;
use crate::{db_snap, Filter, Search, UserError};
use crate::{db_snap, Filter, FilterableAttributesSettings, Search, UserError};
#[test]
fn simple_document_replacement() {
@ -1001,7 +1002,9 @@ mod tests {
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
RESERVED_GEO_FIELD_NAME.to_string(),
)]);
})
.unwrap();
}
@ -1013,7 +1016,9 @@ mod tests {
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
RESERVED_GEO_FIELD_NAME.to_string(),
)]);
})
.unwrap();
@ -1229,15 +1234,24 @@ mod tests {
let searchable_fields = vec![S("title"), S("nested.object"), S("nested.machin")];
settings.set_searchable_fields(searchable_fields);
let faceted_fields = hashset!(S("title"), S("nested.object"), S("nested.machin"));
let faceted_fields = vec![
FilterableAttributesSettings::Field("title".to_string()),
FilterableAttributesSettings::Field("nested.object".to_string()),
FilterableAttributesSettings::Field("nested.machin".to_string()),
];
settings.set_filterable_fields(faceted_fields);
})
.unwrap();
let rtxn = index.read_txn().unwrap();
let facets = index.faceted_fields(&rtxn).unwrap();
assert_eq!(facets, hashset!(S("title"), S("nested.object"), S("nested.machin")));
let filterable_fields = index.filterable_fields(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
});
assert_eq!(facets, btreeset!("title", "nested.object", "nested.machin"));
// testing the simple query search
let mut search = crate::Search::new(&rtxn, &index);
@ -1433,7 +1447,9 @@ mod tests {
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset!(String::from("dog")));
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
"dog".to_string(),
)]);
})
.unwrap();
@ -1452,9 +1468,14 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let hidden = index.faceted_fields(&rtxn).unwrap();
let filterable_fields = index.filterable_fields(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
});
assert_eq!(hidden, hashset!(S("dog"), S("dog.race"), S("dog.race.bernese mountain")));
assert_eq!(facets, btreeset!("dog", "dog.race", "dog.race.bernese mountain"));
for (s, i) in [("zeroth", 0), ("first", 1), ("second", 2), ("third", 3)] {
let mut search = crate::Search::new(&rtxn, &index);
@ -1475,9 +1496,14 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let facets = index.faceted_fields(&rtxn).unwrap();
let filterable_fields = index.filterable_fields(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
});
assert_eq!(facets, hashset!());
assert_eq!(facets, btreeset!());
// update the settings to test the sortable
index
@ -1501,9 +1527,14 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let facets = index.faceted_fields(&rtxn).unwrap();
let filterable_fields = index.filterable_fields(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
});
assert_eq!(facets, hashset!(S("dog.race"), S("dog.race.bernese mountain")));
assert_eq!(facets, btreeset!("dog.race", "dog.race.bernese mountain"));
let mut search = crate::Search::new(&rtxn, &index);
search.sort_criteria(vec![crate::AscDesc::Asc(crate::Member::Field(S(
@ -1712,8 +1743,13 @@ mod tests {
let check_ok = |index: &Index| {
let rtxn = index.read_txn().unwrap();
let facets = index.faceted_fields(&rtxn).unwrap();
assert_eq!(facets, hashset!(S("colour"), S("colour.green"), S("colour.green.blue")));
let filterable_fields = index.filterable_fields(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
});
assert_eq!(facets, btreeset!("colour", "colour.green", "colour.green.blue"));
let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap();
let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();
@ -1733,7 +1769,7 @@ mod tests {
assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![7]);
};
let faceted_fields = hashset!(S("colour"));
let faceted_fields = vec![FilterableAttributesSettings::Field("colour".to_string())];
let index = TempIndex::new();
index.add_documents(content()).unwrap();
@ -1818,8 +1854,13 @@ mod tests {
let check_ok = |index: &Index| {
let rtxn = index.read_txn().unwrap();
let facets = index.faceted_fields(&rtxn).unwrap();
assert_eq!(facets, hashset!(S("colour"), S("colour.green"), S("colour.green.blue")));
let filterable_fields = index.filterable_fields(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
});
assert_eq!(facets, btreeset!("colour", "colour.green", "colour.green.blue"));
let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap();
let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();
@ -1839,7 +1880,7 @@ mod tests {
assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![3]);
};
let faceted_fields = hashset!(S("colour"));
let faceted_fields = vec![FilterableAttributesSettings::Field("colour".to_string())];
let index = TempIndex::new();
index.add_documents(content()).unwrap();
@ -1882,8 +1923,13 @@ mod tests {
let check_ok = |index: &Index| {
let rtxn = index.read_txn().unwrap();
let facets = index.faceted_fields(&rtxn).unwrap();
assert_eq!(facets, hashset!(S("tags"), S("tags.green"), S("tags.green.blue")));
let filterable_fields = index.filterable_fields(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
});
assert_eq!(facets, btreeset!("tags", "tags.green", "tags.green.blue"));
let tags_id = index.fields_ids_map(&rtxn).unwrap().id("tags").unwrap();
let tags_green_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green").unwrap();
@ -1902,7 +1948,7 @@ mod tests {
assert_eq!(bitmap_tags_blue.into_iter().collect::<Vec<_>>(), vec![12]);
};
let faceted_fields = hashset!(S("tags"));
let faceted_fields = vec![FilterableAttributesSettings::Field("tags".to_string())];
let index = TempIndex::new();
index.add_documents(content()).unwrap();
@ -2086,7 +2132,9 @@ mod tests {
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S("title") });
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
"title".to_string(),
)]);
})
.unwrap();
@ -2941,7 +2989,10 @@ mod tests {
index
.update_settings_using_wtxn(&mut wtxn, |settings| {
settings.set_primary_key(S("docid"));
settings.set_filterable_fields(hashset! { S("label"), S("label2") });
settings.set_filterable_fields(vec![
FilterableAttributesSettings::Field("label".to_string()),
FilterableAttributesSettings::Field("label2".to_string()),
]);
})
.unwrap();
wtxn.commit().unwrap();
@ -3120,7 +3171,9 @@ mod tests {
index
.update_settings_using_wtxn(&mut wtxn, |settings| {
settings.set_primary_key(S("id"));
settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
RESERVED_GEO_FIELD_NAME.to_string(),
)]);
settings.set_sortable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
})
.unwrap();

View File

@ -4,10 +4,10 @@ use heed::RoTxn;
use super::document::{
Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions,
};
use super::extract::perm_json_p;
use super::vector_document::{
MergedVectorDocument, VectorDocumentFromDb, VectorDocumentFromVersions,
};
use crate::attribute_patterns::PatternMatch;
use crate::documents::FieldIdMapper;
use crate::vector::EmbeddingConfigs;
use crate::{DocumentId, Index, Result};
@ -173,7 +173,7 @@ impl<'doc> Update<'doc> {
/// Otherwise `false`.
pub fn has_changed_for_fields<'t, Mapper: FieldIdMapper>(
&self,
fields: Option<&[&str]>,
selector: &mut impl FnMut(&str) -> PatternMatch,
rtxn: &'t RoTxn,
index: &'t Index,
mapper: &'t Mapper,
@ -185,7 +185,7 @@ impl<'doc> Update<'doc> {
for entry in self.updated().iter_top_level_fields() {
let (key, updated_value) = entry?;
if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
if selector(key) == PatternMatch::NoMatch {
continue;
}
@ -229,7 +229,7 @@ impl<'doc> Update<'doc> {
for entry in current.iter_top_level_fields() {
let (key, _) = entry?;
if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
if selector(key) == PatternMatch::NoMatch {
continue;
}
current_selected_field_count += 1;

View File

@ -5,12 +5,12 @@ use std::ops::DerefMut as _;
use bumpalo::collections::Vec as BVec;
use bumpalo::Bump;
use hashbrown::HashMap;
use heed::RoTxn;
use serde_json::Value;
use super::super::cache::BalancedCaches;
use super::facet_document::extract_document_facets;
use super::FacetKind;
use super::{match_faceted_field, FacetKind};
use crate::fields_ids_map::metadata::Metadata;
use crate::heed_codec::facet::OrderedF64Codec;
use crate::update::del_add::DelAdd;
use crate::update::new::channel::FieldIdDocidFacetSender;
@ -23,13 +23,17 @@ use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::{FullySend, ThreadLocal};
use crate::update::new::DocumentChange;
use crate::update::GrenadParameters;
use crate::{DocumentId, FieldId, Index, Result, MAX_FACET_VALUE_LENGTH};
use crate::{
DocumentId, FieldId, FilterableAttributesFeatures, FilterableAttributesSettings, Result,
MAX_FACET_VALUE_LENGTH,
};
pub struct FacetedExtractorData<'a, 'b> {
attributes_to_extract: &'a [&'a str],
sender: &'a FieldIdDocidFacetSender<'a, 'b>,
grenad_parameters: &'a GrenadParameters,
buckets: usize,
filterable_attributes: Vec<FilterableAttributesSettings>,
sortable_fields: HashSet<String>,
}
impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b> {
@ -52,7 +56,8 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b>
let change = change?;
FacetedDocidsExtractor::extract_document_change(
context,
self.attributes_to_extract,
&self.filterable_attributes,
&self.sortable_fields,
change,
self.sender,
)?
@ -66,11 +71,12 @@ pub struct FacetedDocidsExtractor;
impl FacetedDocidsExtractor {
fn extract_document_change(
context: &DocumentChangeContext<RefCell<BalancedCaches>>,
attributes_to_extract: &[&str],
filterable_attributes: &[FilterableAttributesSettings],
sortable_fields: &HashSet<String>,
document_change: DocumentChange,
sender: &FieldIdDocidFacetSender,
) -> Result<()> {
let index = &context.index;
let index = context.index;
let rtxn = &context.rtxn;
let mut new_fields_ids_map = context.new_fields_ids_map.borrow_mut_or_yield();
let mut cached_sorter = context.data.borrow_mut_or_yield();
@ -78,11 +84,13 @@ impl FacetedDocidsExtractor {
let docid = document_change.docid();
let res = match document_change {
DocumentChange::Deletion(inner) => extract_document_facets(
attributes_to_extract,
inner.current(rtxn, index, context.db_fields_ids_map)?,
inner.external_document_id(),
new_fields_ids_map.deref_mut(),
&mut |fid, depth, value| {
filterable_attributes,
sortable_fields,
&mut |fid, meta, depth, value| {
let features = meta.filterable_attributes_features(filterable_attributes);
Self::facet_fn_with_options(
&context.doc_alloc,
cached_sorter.deref_mut(),
@ -91,6 +99,8 @@ impl FacetedDocidsExtractor {
DelAddFacetValue::insert_del,
docid,
fid,
meta,
features,
depth,
value,
)
@ -98,7 +108,9 @@ impl FacetedDocidsExtractor {
),
DocumentChange::Update(inner) => {
if !inner.has_changed_for_fields(
Some(attributes_to_extract),
&mut |field_name| {
match_faceted_field(field_name, filterable_attributes, sortable_fields)
},
rtxn,
index,
context.db_fields_ids_map,
@ -107,11 +119,13 @@ impl FacetedDocidsExtractor {
}
extract_document_facets(
attributes_to_extract,
inner.current(rtxn, index, context.db_fields_ids_map)?,
inner.external_document_id(),
new_fields_ids_map.deref_mut(),
&mut |fid, depth, value| {
filterable_attributes,
sortable_fields,
&mut |fid, meta, depth, value| {
let features = meta.filterable_attributes_features(filterable_attributes);
Self::facet_fn_with_options(
&context.doc_alloc,
cached_sorter.deref_mut(),
@ -120,6 +134,8 @@ impl FacetedDocidsExtractor {
DelAddFacetValue::insert_del,
docid,
fid,
meta,
features,
depth,
value,
)
@ -127,11 +143,13 @@ impl FacetedDocidsExtractor {
)?;
extract_document_facets(
attributes_to_extract,
inner.merged(rtxn, index, context.db_fields_ids_map)?,
inner.external_document_id(),
new_fields_ids_map.deref_mut(),
&mut |fid, depth, value| {
filterable_attributes,
sortable_fields,
&mut |fid, meta, depth, value| {
let features = meta.filterable_attributes_features(filterable_attributes);
Self::facet_fn_with_options(
&context.doc_alloc,
cached_sorter.deref_mut(),
@ -140,6 +158,8 @@ impl FacetedDocidsExtractor {
DelAddFacetValue::insert_add,
docid,
fid,
meta,
features,
depth,
value,
)
@ -147,11 +167,13 @@ impl FacetedDocidsExtractor {
)
}
DocumentChange::Insertion(inner) => extract_document_facets(
attributes_to_extract,
inner.inserted(),
inner.external_document_id(),
new_fields_ids_map.deref_mut(),
&mut |fid, depth, value| {
filterable_attributes,
sortable_fields,
&mut |fid, meta, depth, value| {
let features = meta.filterable_attributes_features(filterable_attributes);
Self::facet_fn_with_options(
&context.doc_alloc,
cached_sorter.deref_mut(),
@ -160,6 +182,8 @@ impl FacetedDocidsExtractor {
DelAddFacetValue::insert_add,
docid,
fid,
meta,
features,
depth,
value,
)
@ -180,9 +204,16 @@ impl FacetedDocidsExtractor {
facet_fn: impl Fn(&mut DelAddFacetValue<'doc>, FieldId, BVec<'doc, u8>, FacetKind),
docid: DocumentId,
fid: FieldId,
meta: Metadata,
features: FilterableAttributesFeatures,
depth: perm_json_p::Depth,
value: &Value,
) -> Result<()> {
// if the field is not filterable, facet searchable or sortable, do nothing
if !(features.is_filterable() || features.is_facet_searchable() || meta.is_sortable()) {
return Ok(());
}
let mut buffer = BVec::new_in(doc_alloc);
// Exists
// key: fid
@ -246,7 +277,9 @@ impl FacetedDocidsExtractor {
}
// Null
// key: fid
Value::Null if depth == perm_json_p::Depth::OnBaseKey => {
Value::Null
if depth == perm_json_p::Depth::OnBaseKey && features.is_filterable_null() =>
{
buffer.clear();
buffer.push(FacetKind::Null as u8);
buffer.extend_from_slice(&fid.to_be_bytes());
@ -254,19 +287,29 @@ impl FacetedDocidsExtractor {
}
// Empty
// key: fid
Value::Array(a) if a.is_empty() && depth == perm_json_p::Depth::OnBaseKey => {
Value::Array(a)
if a.is_empty()
&& depth == perm_json_p::Depth::OnBaseKey
&& features.is_filterable_empty() =>
{
buffer.clear();
buffer.push(FacetKind::Empty as u8);
buffer.extend_from_slice(&fid.to_be_bytes());
cache_fn(cached_sorter, &buffer, docid)
}
Value::String(_) if depth == perm_json_p::Depth::OnBaseKey => {
Value::String(_)
if depth == perm_json_p::Depth::OnBaseKey && features.is_filterable_empty() =>
{
buffer.clear();
buffer.push(FacetKind::Empty as u8);
buffer.extend_from_slice(&fid.to_be_bytes());
cache_fn(cached_sorter, &buffer, docid)
}
Value::Object(o) if o.is_empty() && depth == perm_json_p::Depth::OnBaseKey => {
Value::Object(o)
if o.is_empty()
&& depth == perm_json_p::Depth::OnBaseKey
&& features.is_filterable_empty() =>
{
buffer.clear();
buffer.push(FacetKind::Empty as u8);
buffer.extend_from_slice(&fid.to_be_bytes());
@ -276,10 +319,6 @@ impl FacetedDocidsExtractor {
_ => Ok(()),
}
}
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<HashSet<String>> {
index.user_defined_faceted_fields(rtxn)
}
}
struct DelAddFacetValue<'doc> {
@ -385,9 +424,8 @@ impl FacetedDocidsExtractor {
{
let index = indexing_context.index;
let rtxn = index.read_txn()?;
let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?;
let attributes_to_extract: Vec<_> =
attributes_to_extract.iter().map(|s| s.as_ref()).collect();
let filterable_attributes = index.filterable_fields(&rtxn)?;
let sortable_fields = index.sortable_fields(&rtxn)?;
let datastore = ThreadLocal::new();
{
@ -396,10 +434,11 @@ impl FacetedDocidsExtractor {
let _entered = span.enter();
let extractor = FacetedExtractorData {
attributes_to_extract: &attributes_to_extract,
grenad_parameters: indexing_context.grenad_parameters,
buckets: rayon::current_num_threads(),
sender,
filterable_attributes,
sortable_fields,
};
extract(
document_changes,

View File

@ -1,46 +1,70 @@
use std::collections::HashSet;
use serde_json::Value;
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::attribute_patterns::PatternMatch;
use crate::fields_ids_map::metadata::Metadata;
use crate::update::new::document::Document;
use crate::update::new::extract::geo::extract_geo_coordinates;
use crate::update::new::extract::perm_json_p;
use crate::{FieldId, GlobalFieldsIdsMap, InternalError, Result, UserError};
use crate::{
FieldId, FilterableAttributesSettings, GlobalFieldsIdsMap, InternalError, Result, UserError,
};
use super::match_faceted_field;
pub fn extract_document_facets<'doc>(
attributes_to_extract: &[&str],
document: impl Document<'doc>,
external_document_id: &str,
field_id_map: &mut GlobalFieldsIdsMap,
facet_fn: &mut impl FnMut(FieldId, perm_json_p::Depth, &Value) -> Result<()>,
filterable_attributes: &[FilterableAttributesSettings],
sortable_fields: &HashSet<String>,
facet_fn: &mut impl FnMut(FieldId, Metadata, perm_json_p::Depth, &Value) -> Result<()>,
) -> Result<()> {
for res in document.iter_top_level_fields() {
let (field_name, value) = res?;
let mut tokenize_field =
|name: &str, depth: perm_json_p::Depth, value: &Value| match field_id_map
.id_or_insert(name)
{
Some(field_id) => facet_fn(field_id, depth, value),
None => Err(UserError::AttributeLimitReached.into()),
// return the match result for the given field name.
let match_field = |field_name: &str| -> PatternMatch {
match_faceted_field(field_name, filterable_attributes, sortable_fields)
};
// if the current field is searchable or contains a searchable attribute
let selection = perm_json_p::select_field(field_name, Some(attributes_to_extract), &[]);
if selection != perm_json_p::Selection::Skip {
// extract the field if it is faceted (facet searchable, filterable, sortable)
let mut extract_field = |name: &str, depth: perm_json_p::Depth, value: &Value| -> Result<()> {
match field_id_map.id_with_metadata_or_insert(name) {
Some((field_id, meta)) => {
facet_fn(field_id, meta, depth, value)?;
Ok(())
}
None => Err(UserError::AttributeLimitReached.into()),
}
};
for res in document.iter_top_level_fields() {
let (field_name, value) = res?;
let selection = match_field(field_name);
// extract the field if it matches a pattern and if it is faceted (facet searchable, filterable, sortable)
let mut match_and_extract = |name: &str, depth: perm_json_p::Depth, value: &Value| {
let selection = match_field(name);
if selection == PatternMatch::Match {
extract_field(name, depth, value)?;
}
Ok(selection)
};
if selection != PatternMatch::NoMatch {
// parse json.
match serde_json::value::to_value(value).map_err(InternalError::SerdeJson)? {
Value::Object(object) => {
perm_json_p::seek_leaf_values_in_object(
&object,
Some(attributes_to_extract),
&[], // skip no attributes
field_name,
perm_json_p::Depth::OnBaseKey,
&mut tokenize_field,
&mut match_and_extract,
)?;
if selection == perm_json_p::Selection::Select {
tokenize_field(
if selection == PatternMatch::Match {
extract_field(
field_name,
perm_json_p::Depth::OnBaseKey,
&Value::Object(object),
@ -50,36 +74,34 @@ pub fn extract_document_facets<'doc>(
Value::Array(array) => {
perm_json_p::seek_leaf_values_in_array(
&array,
Some(attributes_to_extract),
&[], // skip no attributes
field_name,
perm_json_p::Depth::OnBaseKey,
&mut tokenize_field,
&mut match_and_extract,
)?;
if selection == perm_json_p::Selection::Select {
tokenize_field(
if selection == PatternMatch::Match {
extract_field(
field_name,
perm_json_p::Depth::OnBaseKey,
&Value::Array(array),
)?;
}
}
value => tokenize_field(field_name, perm_json_p::Depth::OnBaseKey, &value)?,
value => extract_field(field_name, perm_json_p::Depth::OnBaseKey, &value)?,
}
}
}
if attributes_to_extract.contains(&RESERVED_GEO_FIELD_NAME) {
if filterable_attributes.iter().any(|rule| rule.has_geo()) {
if let Some(geo_value) = document.geo_field()? {
if let Some([lat, lng]) = extract_geo_coordinates(external_document_id, geo_value)? {
let (lat_fid, lng_fid) = field_id_map
.id_or_insert("_geo.lat")
.zip(field_id_map.id_or_insert("_geo.lng"))
let ((lat_fid, lat_meta), (lng_fid, lng_meta)) = field_id_map
.id_with_metadata_or_insert("_geo.lat")
.zip(field_id_map.id_with_metadata_or_insert("_geo.lng"))
.ok_or(UserError::AttributeLimitReached)?;
facet_fn(lat_fid, perm_json_p::Depth::OnBaseKey, &lat.into())?;
facet_fn(lng_fid, perm_json_p::Depth::OnBaseKey, &lng.into())?;
facet_fn(lat_fid, lat_meta, perm_json_p::Depth::OnBaseKey, &lat.into())?;
facet_fn(lng_fid, lng_meta, perm_json_p::Depth::OnBaseKey, &lng.into())?;
}
}
}

View File

@ -1,8 +1,16 @@
mod extract_facets;
mod facet_document;
use std::collections::HashSet;
pub use extract_facets::FacetedDocidsExtractor;
use crate::{
attribute_patterns::{match_field_legacy, PatternMatch},
filterable_fields::match_pattern_by_features,
FilterableAttributesSettings,
};
#[repr(u8)]
#[derive(Debug, Clone, Copy)]
pub enum FacetKind {
@ -31,3 +39,30 @@ impl FacetKind {
(FacetKind::from(key[0]), &key[1..])
}
}
pub fn match_faceted_field(
field_name: &str,
filterable_fields: &[FilterableAttributesSettings],
sortable_fields: &HashSet<String>,
) -> PatternMatch {
// Check if the field matches any filterable or facet searchable field
let mut selection = match_pattern_by_features(field_name, &filterable_fields, &|features| {
features.is_facet_searchable() || features.is_filterable()
});
// If the field matches the pattern, return Match
if selection == PatternMatch::Match {
return selection;
}
// Otherwise, check if the field matches any sortable field
for pattern in sortable_fields {
match match_field_legacy(&pattern, field_name) {
PatternMatch::Match => return PatternMatch::Match,
PatternMatch::Parent => selection = PatternMatch::Parent,
PatternMatch::NoMatch => (),
}
}
selection
}

View File

@ -35,7 +35,7 @@ pub trait DocidsExtractor {
pub mod perm_json_p {
use serde_json::{Map, Value};
use crate::Result;
use crate::{attribute_patterns::PatternMatch, Result};
const SPLIT_SYMBOL: char = '.';
/// Returns `true` if the `selector` match the `key`.
@ -68,11 +68,9 @@ pub mod perm_json_p {
pub fn seek_leaf_values_in_object(
value: &Map<String, Value>,
selectors: Option<&[&str]>,
skip_selectors: &[&str],
base_key: &str,
base_depth: Depth,
seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<()>,
seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<PatternMatch>,
) -> Result<()> {
if value.is_empty() {
seeker(base_key, base_depth, &Value::Object(Map::with_capacity(0)))?;
@ -85,40 +83,16 @@ pub mod perm_json_p {
format!("{}{}{}", base_key, SPLIT_SYMBOL, key)
};
// here if the user only specified `doggo` we need to iterate in all the fields of `doggo`
// so we check the contained_in on both side
let selection = select_field(&base_key, selectors, skip_selectors);
if selection != Selection::Skip {
let selection = seeker(&base_key, Depth::OnBaseKey, value)?;
if selection != PatternMatch::NoMatch {
match value {
Value::Object(object) => {
if selection == Selection::Select {
seeker(&base_key, Depth::OnBaseKey, value)?;
}
seek_leaf_values_in_object(
object,
selectors,
skip_selectors,
&base_key,
Depth::OnBaseKey,
seeker,
)
seek_leaf_values_in_object(object, &base_key, Depth::OnBaseKey, seeker)
}
Value::Array(array) => {
if selection == Selection::Select {
seeker(&base_key, Depth::OnBaseKey, value)?;
seek_leaf_values_in_array(array, &base_key, Depth::OnBaseKey, seeker)
}
seek_leaf_values_in_array(
array,
selectors,
skip_selectors,
&base_key,
Depth::OnBaseKey,
seeker,
)
}
value => seeker(&base_key, Depth::OnBaseKey, value),
_ => Ok(()),
}?;
}
}
@ -128,11 +102,9 @@ pub mod perm_json_p {
pub fn seek_leaf_values_in_array(
values: &[Value],
selectors: Option<&[&str]>,
skip_selectors: &[&str],
base_key: &str,
base_depth: Depth,
seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<()>,
seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<PatternMatch>,
) -> Result<()> {
if values.is_empty() {
seeker(base_key, base_depth, &Value::Array(vec![]))?;
@ -140,61 +112,16 @@ pub mod perm_json_p {
for value in values {
match value {
Value::Object(object) => seek_leaf_values_in_object(
object,
selectors,
skip_selectors,
base_key,
Depth::InsideArray,
seeker,
),
Value::Array(array) => seek_leaf_values_in_array(
array,
selectors,
skip_selectors,
base_key,
Depth::InsideArray,
seeker,
),
value => seeker(base_key, Depth::InsideArray, value),
Value::Object(object) => {
seek_leaf_values_in_object(object, base_key, Depth::InsideArray, seeker)
}
Value::Array(array) => {
seek_leaf_values_in_array(array, base_key, Depth::InsideArray, seeker)
}
value => seeker(base_key, Depth::InsideArray, value).map(|_| ()),
}?;
}
Ok(())
}
pub fn select_field(
field_name: &str,
selectors: Option<&[&str]>,
skip_selectors: &[&str],
) -> Selection {
if skip_selectors.iter().any(|skip_selector| {
contained_in(skip_selector, field_name) || contained_in(field_name, skip_selector)
}) {
Selection::Skip
} else if let Some(selectors) = selectors {
let mut selection = Selection::Skip;
for selector in selectors {
if contained_in(field_name, selector) {
selection = Selection::Select;
break;
} else if contained_in(selector, field_name) {
selection = Selection::Parent;
}
}
selection
} else {
Selection::Select
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Selection {
/// The field is a parent of the of a nested field that must be selected
Parent,
/// The field must be selected
Select,
/// The field must be skipped
Skip,
}
}

View File

@ -9,6 +9,7 @@ use heed::{BytesDecode, BytesEncode, RoTxn, RwTxn};
use super::fst_merger_builder::FstMergerBuilder;
use super::KvReaderDelAdd;
use crate::attribute_patterns::PatternMatch;
use crate::heed_codec::facet::FacetGroupKey;
use crate::update::del_add::{DelAdd, KvWriterDelAdd};
use crate::update::{create_sorter, MergeDeladdBtreesetString};
@ -92,7 +93,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
let locales = self
.localized_attributes_rules
.iter()
.find(|rule| rule.match_str(field_name))
.find(|rule| rule.match_str(field_name) == PatternMatch::Match)
.map(|rule| rule.locales.clone());
e.insert(locales);

View File

@ -31,7 +31,10 @@ use crate::vector::settings::{
WriteBackToDocuments,
};
use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs};
use crate::{FieldId, FieldsIdsMap, Index, LocalizedAttributesRule, LocalizedFieldIds, Result};
use crate::{
FieldId, FieldsIdsMap, FilterableAttributesSettings, Index, LocalizedAttributesRule,
LocalizedFieldIds, Result,
};
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
pub enum Setting<T> {
@ -155,7 +158,7 @@ pub struct Settings<'a, 't, 'i> {
searchable_fields: Setting<Vec<String>>,
displayed_fields: Setting<Vec<String>>,
filterable_fields: Setting<HashSet<String>>,
filterable_fields: Setting<Vec<FilterableAttributesSettings>>,
sortable_fields: Setting<HashSet<String>>,
criteria: Setting<Vec<Criterion>>,
stop_words: Setting<BTreeSet<String>>,
@ -241,8 +244,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
self.filterable_fields = Setting::Reset;
}
pub fn set_filterable_fields(&mut self, names: HashSet<String>) {
self.filterable_fields = Setting::Set(names);
pub fn set_filterable_fields(&mut self, rules: Vec<FilterableAttributesSettings>) {
self.filterable_fields = Setting::Set(rules);
}
pub fn set_sortable_fields(&mut self, names: HashSet<String>) {
@ -760,11 +763,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
fn update_filterable(&mut self) -> Result<()> {
match self.filterable_fields {
Setting::Set(ref fields) => {
let mut new_facets = HashSet::new();
for name in fields {
new_facets.insert(name.clone());
}
self.index.put_filterable_fields(self.wtxn, &new_facets)?;
self.index.put_filterable_fields(self.wtxn, fields)?;
}
Setting::Reset => {
self.index.delete_filterable_fields(self.wtxn)?;
@ -1895,7 +1894,7 @@ pub fn validate_embedding_settings(
mod tests {
use big_s::S;
use heed::types::Bytes;
use maplit::{btreemap, btreeset, hashset};
use maplit::{btreemap, btreeset};
use meili_snap::snapshot;
use super::*;
@ -2105,7 +2104,9 @@ mod tests {
// Set the filterable fields to be the age.
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S("age") });
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
"age".to_string(),
)]);
})
.unwrap();
@ -2121,7 +2122,7 @@ mod tests {
// Check that the displayed fields are correctly set.
let rtxn = index.read_txn().unwrap();
let fields_ids = index.filterable_fields(&rtxn).unwrap();
assert_eq!(fields_ids, hashset! { S("age") });
assert_eq!(fields_ids, vec![FilterableAttributesSettings::Field("age".to_string(),)]);
// Only count the field_id 0 and level 0 facet values.
// TODO we must support typed CSVs for numbers to be understood.
let fidmap = index.fields_ids_map(&rtxn).unwrap();
@ -2163,14 +2164,23 @@ mod tests {
// Set the filterable fields to be the age and the name.
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S("age"), S("name") });
settings.set_filterable_fields(vec![
FilterableAttributesSettings::Field("age".to_string()),
FilterableAttributesSettings::Field("name".to_string()),
]);
})
.unwrap();
// Check that the displayed fields are correctly set.
let rtxn = index.read_txn().unwrap();
let fields_ids = index.filterable_fields(&rtxn).unwrap();
assert_eq!(fields_ids, hashset! { S("age"), S("name") });
assert_eq!(
fields_ids,
vec![
FilterableAttributesSettings::Field("age".to_string()),
FilterableAttributesSettings::Field("name".to_string()),
]
);
let rtxn = index.read_txn().unwrap();
// Only count the field_id 2 and level 0 facet values.
@ -2195,14 +2205,16 @@ mod tests {
// Remove the age from the filterable fields.
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S("name") });
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
"name".to_string(),
)]);
})
.unwrap();
// Check that the displayed fields are correctly set.
let rtxn = index.read_txn().unwrap();
let fields_ids = index.filterable_fields(&rtxn).unwrap();
assert_eq!(fields_ids, hashset! { S("name") });
assert_eq!(fields_ids, vec![FilterableAttributesSettings::Field("name".to_string())]);
let rtxn = index.read_txn().unwrap();
// Only count the field_id 2 and level 0 facet values.
@ -2532,7 +2544,10 @@ mod tests {
index
.update_settings(|settings| {
settings.set_displayed_fields(vec!["hello".to_string()]);
settings.set_filterable_fields(hashset! { S("age"), S("toto") });
settings.set_filterable_fields(vec![
FilterableAttributesSettings::Field("age".to_string()),
FilterableAttributesSettings::Field("toto".to_string()),
]);
settings.set_criteria(vec![Criterion::Asc(S("toto"))]);
})
.unwrap();
@ -2649,7 +2664,9 @@ mod tests {
// Set the genres setting
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S("genres") });
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
"genres".to_string(),
)]);
})
.unwrap();

View File

@ -1,13 +1,12 @@
use big_s::S;
use bumpalo::Bump;
use heed::EnvOpenOptions;
use maplit::hashset;
use milli::documents::mmap_from_objects;
use milli::progress::Progress;
use milli::update::new::indexer;
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
use milli::vector::EmbeddingConfigs;
use milli::{FacetDistribution, Index, Object, OrderBy};
use milli::{FacetDistribution, FilterableAttributesSettings, Index, Object, OrderBy};
use serde_json::{from_value, json};
#[test]
@ -21,10 +20,10 @@ fn test_facet_distribution_with_no_facet_values() {
let config = IndexerConfig::default();
let mut builder = Settings::new(&mut wtxn, &index, &config);
builder.set_filterable_fields(hashset! {
S("genres"),
S("tags"),
});
builder.set_filterable_fields(vec![
FilterableAttributesSettings::Field(S("genres")),
FilterableAttributesSettings::Field(S("tags")),
]);
builder.execute(|_| (), || false).unwrap();
wtxn.commit().unwrap();

View File

@ -11,7 +11,10 @@ use milli::progress::Progress;
use milli::update::new::indexer;
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
use milli::vector::EmbeddingConfigs;
use milli::{AscDesc, Criterion, DocumentId, Index, Member, TermsMatchingStrategy};
use milli::{
AscDesc, Criterion, DocumentId, FilterableAttributesSettings, Index, Member,
TermsMatchingStrategy,
};
use serde::{Deserialize, Deserializer};
use slice_group_by::GroupBy;
@ -42,14 +45,14 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
let mut builder = Settings::new(&mut wtxn, &index, &config);
builder.set_criteria(criteria.to_vec());
builder.set_filterable_fields(hashset! {
S("tag"),
S("asc_desc_rank"),
S("_geo"),
S("opt1"),
S("opt1.opt2"),
S("tag_in")
});
builder.set_filterable_fields(vec![
FilterableAttributesSettings::Field(S("tag")),
FilterableAttributesSettings::Field(S("asc_desc_rank")),
FilterableAttributesSettings::Field(S("_geo")),
FilterableAttributesSettings::Field(S("opt1")),
FilterableAttributesSettings::Field(S("opt1.opt2")),
FilterableAttributesSettings::Field(S("tag_in")),
]);
builder.set_sortable_fields(hashset! {
S("tag"),
S("asc_desc_rank"),