mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-02-04 17:43:28 +01:00
WIP
This commit is contained in:
parent
eaf3be1702
commit
ed882f26b1
@ -1,5 +1,5 @@
|
||||
use deserr::Deserr;
|
||||
use milli::LocalizedAttributesRule;
|
||||
use milli::{AttributePatterns, LocalizedAttributesRule};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utoipa::ToSchema;
|
||||
|
||||
@ -7,7 +7,7 @@ use utoipa::ToSchema;
|
||||
#[deserr(rename_all = camelCase)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct LocalizedAttributesRuleView {
|
||||
pub attribute_patterns: Vec<String>,
|
||||
pub attribute_patterns: AttributePatterns,
|
||||
pub locales: Vec<Locale>,
|
||||
}
|
||||
|
||||
|
@ -2,6 +2,8 @@ use deserr::Deserr;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utoipa::ToSchema;
|
||||
|
||||
use crate::is_faceted_by;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Deserr, ToSchema)]
|
||||
#[repr(transparent)]
|
||||
#[serde(transparent)]
|
||||
@ -17,23 +19,69 @@ impl From<Vec<String>> for AttributePatterns {
|
||||
}
|
||||
|
||||
impl AttributePatterns {
|
||||
pub fn match_str(&self, str: &str) -> bool {
|
||||
self.patterns.iter().any(|pattern| match_pattern(pattern, str))
|
||||
pub fn match_str(&self, str: &str) -> PatternMatch {
|
||||
let mut pattern_match = PatternMatch::NoMatch;
|
||||
for pattern in &self.patterns {
|
||||
match match_pattern(pattern, str) {
|
||||
PatternMatch::Match => return PatternMatch::Match,
|
||||
PatternMatch::Parent => pattern_match = PatternMatch::Parent,
|
||||
PatternMatch::NoMatch => {}
|
||||
}
|
||||
}
|
||||
pattern_match
|
||||
}
|
||||
}
|
||||
|
||||
fn match_pattern(pattern: &str, str: &str) -> bool {
|
||||
fn match_pattern(pattern: &str, str: &str) -> PatternMatch {
|
||||
if pattern == "*" {
|
||||
true
|
||||
return PatternMatch::Match;
|
||||
} else if pattern.starts_with('*') && pattern.ends_with('*') {
|
||||
str.contains(&pattern[1..pattern.len() - 1])
|
||||
if str.contains(&pattern[1..pattern.len() - 1]) {
|
||||
return PatternMatch::Match;
|
||||
}
|
||||
} else if let Some(pattern) = pattern.strip_prefix('*') {
|
||||
str.ends_with(pattern)
|
||||
if str.ends_with(pattern) {
|
||||
return PatternMatch::Match;
|
||||
}
|
||||
} else if let Some(pattern) = pattern.strip_suffix('*') {
|
||||
str.starts_with(pattern)
|
||||
if str.starts_with(pattern) {
|
||||
return PatternMatch::Match;
|
||||
}
|
||||
} else {
|
||||
pattern == str
|
||||
if pattern == str {
|
||||
return PatternMatch::Match;
|
||||
}
|
||||
}
|
||||
|
||||
// If the field is a parent field of the pattern, return Parent
|
||||
if is_faceted_by(pattern, str) {
|
||||
PatternMatch::Parent
|
||||
} else {
|
||||
PatternMatch::NoMatch
|
||||
}
|
||||
}
|
||||
|
||||
pub fn match_field_legacy(pattern: &str, field: &str) -> PatternMatch {
|
||||
if is_faceted_by(field, pattern) {
|
||||
// If the field matches the pattern or is a nested field of the pattern, return Match (legacy behavior)
|
||||
PatternMatch::Match
|
||||
} else if is_faceted_by(pattern, field) {
|
||||
// If the field is a parent field of the pattern, return Parent
|
||||
PatternMatch::Parent
|
||||
} else {
|
||||
// If the field does not match the pattern and is not a parent of a nested field that matches the pattern, return NoMatch
|
||||
PatternMatch::NoMatch
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum PatternMatch {
|
||||
/// The field is a parent of the of a nested field that matches the pattern
|
||||
Parent,
|
||||
/// The field matches the pattern
|
||||
Match,
|
||||
/// The field does not match the pattern
|
||||
NoMatch,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@ -42,17 +90,17 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_match_pattern() {
|
||||
assert!(match_pattern("*", "test"));
|
||||
assert!(match_pattern("test*", "test"));
|
||||
assert!(match_pattern("test*", "testa"));
|
||||
assert!(match_pattern("*test", "test"));
|
||||
assert!(match_pattern("*test", "atest"));
|
||||
assert!(match_pattern("*test*", "test"));
|
||||
assert!(match_pattern("*test*", "atesta"));
|
||||
assert!(match_pattern("*test*", "atest"));
|
||||
assert!(match_pattern("*test*", "testa"));
|
||||
assert!(!match_pattern("test*test", "test"));
|
||||
assert!(!match_pattern("*test", "testa"));
|
||||
assert!(!match_pattern("test*", "atest"));
|
||||
assert_eq!(match_pattern("*", "test"), PatternMatch::Match);
|
||||
assert_eq!(match_pattern("test*", "test"), PatternMatch::Match);
|
||||
assert_eq!(match_pattern("test*", "testa"), PatternMatch::Match);
|
||||
assert_eq!(match_pattern("*test", "test"), PatternMatch::Match);
|
||||
assert_eq!(match_pattern("*test", "atest"), PatternMatch::Match);
|
||||
assert_eq!(match_pattern("*test*", "test"), PatternMatch::Match);
|
||||
assert_eq!(match_pattern("*test*", "atesta"), PatternMatch::Match);
|
||||
assert_eq!(match_pattern("*test*", "atest"), PatternMatch::Match);
|
||||
assert_eq!(match_pattern("*test*", "testa"), PatternMatch::Match);
|
||||
assert_eq!(match_pattern("test*test", "test"), PatternMatch::NoMatch);
|
||||
assert_eq!(match_pattern("*test", "testa"), PatternMatch::NoMatch);
|
||||
assert_eq!(match_pattern("test*", "atest"), PatternMatch::NoMatch);
|
||||
}
|
||||
}
|
||||
|
@ -5,7 +5,11 @@ use charabia::Language;
|
||||
use heed::RoTxn;
|
||||
|
||||
use super::FieldsIdsMap;
|
||||
use crate::{FieldId, FilterableAttributesSettings, Index, LocalizedAttributesRule, Result};
|
||||
use crate::attribute_patterns::PatternMatch;
|
||||
use crate::{
|
||||
FieldId, FilterableAttributesFeatures, FilterableAttributesSettings, Index,
|
||||
LocalizedAttributesRule, Result,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct Metadata {
|
||||
@ -106,6 +110,34 @@ impl Metadata {
|
||||
let rule = rules.get((localized_attributes_rule_id - 1) as usize).unwrap();
|
||||
Some(rule.locales())
|
||||
}
|
||||
|
||||
pub fn filterable_attributes<'rules>(
|
||||
&self,
|
||||
rules: &'rules [FilterableAttributesSettings],
|
||||
) -> Option<&'rules FilterableAttributesSettings> {
|
||||
let filterable_attributes_rule_id = self.filterable_attributes_rule_id?.get();
|
||||
// - 1: `filterable_attributes_rule_id` is NonZero
|
||||
let rule = rules.get((filterable_attributes_rule_id - 1) as usize).unwrap();
|
||||
Some(rule)
|
||||
}
|
||||
|
||||
pub fn filterable_attributes_features(
|
||||
&self,
|
||||
rules: &[FilterableAttributesSettings],
|
||||
) -> FilterableAttributesFeatures {
|
||||
self.filterable_attributes(rules)
|
||||
.map(|rule| rule.features())
|
||||
// if there is no filterable attributes rule, return no features
|
||||
.unwrap_or_else(FilterableAttributesFeatures::no_features)
|
||||
}
|
||||
|
||||
pub fn is_sortable(&self) -> bool {
|
||||
self.sortable
|
||||
}
|
||||
|
||||
pub fn is_searchable(&self) -> bool {
|
||||
self.searchable
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@ -158,19 +190,14 @@ impl MetadataBuilder {
|
||||
.localized_attributes
|
||||
.iter()
|
||||
.flat_map(|v| v.iter())
|
||||
.position(|rule| rule.match_str(field))
|
||||
.position(|rule| rule.match_str(field) == PatternMatch::Match)
|
||||
// saturating_add(1): make `id` `NonZero`
|
||||
.map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap());
|
||||
|
||||
let filterable_attributes_rule_id = self
|
||||
.filterable_attributes
|
||||
.iter()
|
||||
.position(|attribute| match attribute {
|
||||
FilterableAttributesSettings::Field(field_name) => field_name == field,
|
||||
FilterableAttributesSettings::Pattern(patterns) => {
|
||||
patterns.patterns.match_str(field)
|
||||
}
|
||||
})
|
||||
.position(|attribute| attribute.match_str(field) == PatternMatch::Match)
|
||||
// saturating_add(1): make `id` `NonZero`
|
||||
.map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap());
|
||||
|
||||
|
@ -1,10 +1,12 @@
|
||||
use deserr::{DeserializeError, Deserr, ValuePointerRef};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashSet;
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
use utoipa::ToSchema;
|
||||
|
||||
use crate::{
|
||||
constants::RESERVED_GEO_FIELD_NAME, is_faceted_by, AttributePatterns, FieldId, FieldsIdsMap,
|
||||
attribute_patterns::{match_field_legacy, PatternMatch},
|
||||
constants::RESERVED_GEO_FIELD_NAME,
|
||||
AttributePatterns, FieldId, FieldsIdsMap,
|
||||
};
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, ToSchema)]
|
||||
@ -15,17 +17,25 @@ pub enum FilterableAttributesSettings {
|
||||
}
|
||||
|
||||
impl FilterableAttributesSettings {
|
||||
pub fn match_str(&self, field: &str) -> bool {
|
||||
pub fn match_str(&self, field: &str) -> PatternMatch {
|
||||
match self {
|
||||
FilterableAttributesSettings::Field(field_name) => is_faceted_by(field, field_name),
|
||||
FilterableAttributesSettings::Pattern(patterns) => patterns.patterns.match_str(field),
|
||||
FilterableAttributesSettings::Field(pattern) => match_field_legacy(pattern, field),
|
||||
FilterableAttributesSettings::Pattern(patterns) => patterns.match_str(field),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn has_geo(&self) -> bool {
|
||||
/// TODO: This is a temporary solution to check if the geo field is activated.
|
||||
matches!(self, FilterableAttributesSettings::Field(field_name) if field_name == RESERVED_GEO_FIELD_NAME)
|
||||
}
|
||||
|
||||
pub fn features(&self) -> FilterableAttributesFeatures {
|
||||
match self {
|
||||
FilterableAttributesSettings::Field(_) => {
|
||||
FilterableAttributesFeatures::legacy_default()
|
||||
}
|
||||
FilterableAttributesSettings::Pattern(patterns) => patterns.features(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, Deserr, ToSchema)]
|
||||
@ -36,12 +46,62 @@ pub struct FilterableAttributesPatterns {
|
||||
pub features: Option<FilterableAttributesFeatures>,
|
||||
}
|
||||
|
||||
impl FilterableAttributesPatterns {
|
||||
pub fn match_str(&self, field: &str) -> PatternMatch {
|
||||
self.patterns.match_str(field)
|
||||
}
|
||||
|
||||
pub fn features(&self) -> FilterableAttributesFeatures {
|
||||
self.features.clone().unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, Deserr, ToSchema)]
|
||||
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
||||
#[deserr(rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct FilterableAttributesFeatures {
|
||||
facet_search: Option<String>,
|
||||
filter: Option<String>,
|
||||
facet_search: bool,
|
||||
filter: FilterFeature,
|
||||
}
|
||||
|
||||
impl Default for FilterableAttributesFeatures {
|
||||
fn default() -> Self {
|
||||
Self { facet_search: false, filter: FilterFeature::Equal }
|
||||
}
|
||||
}
|
||||
|
||||
impl FilterableAttributesFeatures {
|
||||
pub fn legacy_default() -> Self {
|
||||
Self { facet_search: true, filter: FilterFeature::Order }
|
||||
}
|
||||
|
||||
pub fn no_features() -> Self {
|
||||
Self { facet_search: false, filter: FilterFeature::Disabled }
|
||||
}
|
||||
|
||||
pub fn is_filterable(&self) -> bool {
|
||||
self.filter != FilterFeature::Disabled
|
||||
}
|
||||
|
||||
/// Check if `IS NULL` is allowed
|
||||
pub fn is_filterable_null(&self) -> bool {
|
||||
self.filter != FilterFeature::Disabled
|
||||
}
|
||||
|
||||
/// Check if `IS EMPTY` is allowed
|
||||
pub fn is_filterable_empty(&self) -> bool {
|
||||
self.filter != FilterFeature::Disabled
|
||||
}
|
||||
|
||||
/// Check if `<`, `>`, `<=`, `>=` or `TO` are allowed
|
||||
pub fn is_filterable_order(&self) -> bool {
|
||||
self.filter == FilterFeature::Order
|
||||
}
|
||||
|
||||
/// Check if the facet search is allowed
|
||||
pub fn is_facet_searchable(&self) -> bool {
|
||||
self.facet_search
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: DeserializeError> Deserr<E> for FilterableAttributesSettings {
|
||||
@ -59,6 +119,13 @@ impl<E: DeserializeError> Deserr<E> for FilterableAttributesSettings {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, Deserr, ToSchema)]
|
||||
pub enum FilterFeature {
|
||||
Disabled,
|
||||
Equal,
|
||||
Order,
|
||||
}
|
||||
|
||||
pub fn matching_field_ids(
|
||||
filterable_attributes: &[FilterableAttributesSettings],
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
@ -66,10 +133,77 @@ pub fn matching_field_ids(
|
||||
let mut result = HashSet::new();
|
||||
for (field_id, field_name) in fields_ids_map.iter() {
|
||||
for filterable_attribute in filterable_attributes {
|
||||
if filterable_attribute.match_str(field_name) {
|
||||
if filterable_attribute.match_str(field_name) == PatternMatch::Match {
|
||||
result.insert(field_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn matching_field_names<'fim>(
|
||||
filterable_attributes: &[FilterableAttributesSettings],
|
||||
fields_ids_map: &'fim FieldsIdsMap,
|
||||
) -> BTreeSet<&'fim str> {
|
||||
filtered_matching_field_names(filterable_attributes, fields_ids_map, &|_| true)
|
||||
}
|
||||
|
||||
pub fn filtered_matching_field_names<'fim>(
|
||||
filterable_attributes: &[FilterableAttributesSettings],
|
||||
fields_ids_map: &'fim FieldsIdsMap,
|
||||
filter: &impl Fn(&FilterableAttributesFeatures) -> bool,
|
||||
) -> BTreeSet<&'fim str> {
|
||||
let mut result = BTreeSet::new();
|
||||
for (_, field_name) in fields_ids_map.iter() {
|
||||
for filterable_attribute in filterable_attributes {
|
||||
if filterable_attribute.match_str(field_name) == PatternMatch::Match {
|
||||
let features = filterable_attribute.features();
|
||||
if filter(&features) {
|
||||
result.insert(field_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn matching_features(
|
||||
field_name: &str,
|
||||
filterable_attributes: &[FilterableAttributesSettings],
|
||||
) -> Option<FilterableAttributesFeatures> {
|
||||
for filterable_attribute in filterable_attributes {
|
||||
if filterable_attribute.match_str(field_name) == PatternMatch::Match {
|
||||
return Some(filterable_attribute.features());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn is_field_filterable(
|
||||
field_name: &str,
|
||||
filterable_attributes: &[FilterableAttributesSettings],
|
||||
) -> bool {
|
||||
matching_features(field_name, filterable_attributes)
|
||||
.map_or(false, |features| features.is_filterable())
|
||||
}
|
||||
|
||||
pub fn match_pattern_by_features(
|
||||
field_name: &str,
|
||||
filterable_attributes: &[FilterableAttributesSettings],
|
||||
filter: &impl Fn(&FilterableAttributesFeatures) -> bool,
|
||||
) -> PatternMatch {
|
||||
let mut selection = PatternMatch::NoMatch;
|
||||
// Check if the field name matches any pattern that is facet searchable or filterable
|
||||
for pattern in filterable_attributes {
|
||||
let features = pattern.features();
|
||||
if filter(&features) {
|
||||
match pattern.match_str(field_name) {
|
||||
PatternMatch::Match => return PatternMatch::Match,
|
||||
PatternMatch::Parent => selection = PatternMatch::Parent,
|
||||
PatternMatch::NoMatch => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
selection
|
||||
}
|
||||
|
@ -803,7 +803,7 @@ impl Index {
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::FILTERABLE_FIELDS_KEY)
|
||||
}
|
||||
|
||||
/// Returns the filterable fields names.
|
||||
/// Returns the filterable fields setting value.
|
||||
pub fn filterable_fields(
|
||||
&self,
|
||||
rtxn: &RoTxn<'_>,
|
||||
@ -815,7 +815,7 @@ impl Index {
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
/// Identical to `filterable_fields`, but returns ids instead.
|
||||
/// Returns the filterable fields ids.
|
||||
pub fn filterable_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<FieldId>> {
|
||||
let fields = self.filterable_fields(rtxn)?;
|
||||
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||
@ -878,67 +878,79 @@ impl Index {
|
||||
|
||||
/// Returns true if the geo feature is activated.
|
||||
pub fn is_geo_activated(&self, rtxn: &RoTxn<'_>) -> Result<bool> {
|
||||
let geo_filter = self.filterable_fields(rtxn)?.iter().any(|field| field.has_geo());
|
||||
let geo_sortable = self.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME);
|
||||
let geo_filter = self.is_geo_filtering_activated(rtxn)?;
|
||||
let geo_sortable = self.is_geo_sorting_activated(rtxn)?;
|
||||
Ok(geo_filter || geo_sortable)
|
||||
}
|
||||
|
||||
/// Returns the faceted fields names.
|
||||
pub fn faceted_fields(&self, rtxn: &RoTxn<'_>) -> heed::Result<HashSet<String>> {
|
||||
Ok(self
|
||||
.main
|
||||
.remap_types::<Str, SerdeJson<_>>()
|
||||
.get(rtxn, main_key::HIDDEN_FACETED_FIELDS_KEY)?
|
||||
.unwrap_or_default())
|
||||
/// Returns true if the geo sorting feature is activated.
|
||||
pub fn is_geo_sorting_activated(&self, rtxn: &RoTxn<'_>) -> Result<bool> {
|
||||
let geo_sortable = self.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME);
|
||||
Ok(geo_sortable)
|
||||
}
|
||||
|
||||
/// Identical to `faceted_fields`, but returns ids instead.
|
||||
pub fn faceted_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<FieldId>> {
|
||||
let fields = self.faceted_fields(rtxn)?;
|
||||
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||
|
||||
let mut fields_ids = HashSet::new();
|
||||
for name in fields {
|
||||
if let Some(field_id) = fields_ids_map.id(&name) {
|
||||
fields_ids.insert(field_id);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(fields_ids)
|
||||
/// Returns true if the geo filtering feature is activated.
|
||||
pub fn is_geo_filtering_activated(&self, rtxn: &RoTxn<'_>) -> Result<bool> {
|
||||
let geo_filter = self.filterable_fields(rtxn)?.iter().any(|field| field.has_geo());
|
||||
Ok(geo_filter)
|
||||
}
|
||||
|
||||
// /// Returns the faceted fields names.
|
||||
// pub fn faceted_fields(&self, rtxn: &RoTxn<'_>) -> heed::Result<HashSet<String>> {
|
||||
// Ok(self
|
||||
// .main
|
||||
// .remap_types::<Str, SerdeJson<_>>()
|
||||
// .get(rtxn, main_key::HIDDEN_FACETED_FIELDS_KEY)?
|
||||
// .unwrap_or_default())
|
||||
// }
|
||||
|
||||
// /// Identical to `faceted_fields`, but returns ids instead.
|
||||
// pub fn faceted_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<FieldId>> {
|
||||
// let fields = self.faceted_fields(rtxn)?;
|
||||
// let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||
|
||||
// let mut fields_ids = HashSet::new();
|
||||
// for name in fields {
|
||||
// if let Some(field_id) = fields_ids_map.id(&name) {
|
||||
// fields_ids.insert(field_id);
|
||||
// }
|
||||
// }
|
||||
|
||||
// Ok(fields_ids)
|
||||
// }
|
||||
|
||||
/* faceted documents ids */
|
||||
|
||||
/// Returns the user defined faceted fields names.
|
||||
///
|
||||
/// The user faceted fields are the union of all the filterable, sortable, distinct, and Asc/Desc fields.
|
||||
pub fn user_defined_faceted_fields(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<String>> {
|
||||
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||
let filterable_fields = self.filterable_fields_ids(rtxn)?;
|
||||
let sortable_fields = self.sortable_fields(rtxn)?;
|
||||
let distinct_field = self.distinct_field(rtxn)?;
|
||||
let asc_desc_fields =
|
||||
self.criteria(rtxn)?.into_iter().filter_map(|criterion| match criterion {
|
||||
Criterion::Asc(field) | Criterion::Desc(field) => Some(field),
|
||||
_otherwise => None,
|
||||
});
|
||||
// pub fn user_defined_faceted_fields(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<String>> {
|
||||
// let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||
// let filterable_fields = self.filterable_fields_ids(rtxn)?;
|
||||
// let sortable_fields = self.sortable_fields(rtxn)?;
|
||||
// let distinct_field = self.distinct_field(rtxn)?;
|
||||
// let asc_desc_fields =
|
||||
// self.criteria(rtxn)?.into_iter().filter_map(|criterion| match criterion {
|
||||
// Criterion::Asc(field) | Criterion::Desc(field) => Some(field),
|
||||
// _otherwise => None,
|
||||
// });
|
||||
|
||||
let mut faceted_fields: HashSet<_> = filterable_fields
|
||||
.into_iter()
|
||||
.filter_map(|field_id| {
|
||||
let field_name = fields_ids_map.name(field_id);
|
||||
debug_assert!(field_name.is_some(), "field name not found for {field_id}");
|
||||
field_name.map(|field| field.to_string())
|
||||
})
|
||||
.collect();
|
||||
faceted_fields.extend(sortable_fields);
|
||||
faceted_fields.extend(asc_desc_fields);
|
||||
if let Some(field) = distinct_field {
|
||||
faceted_fields.insert(field.to_owned());
|
||||
}
|
||||
// let mut faceted_fields: HashSet<_> = filterable_fields
|
||||
// .into_iter()
|
||||
// .filter_map(|field_id| {
|
||||
// let field_name = fields_ids_map.name(field_id);
|
||||
// debug_assert!(field_name.is_some(), "field name not found for {field_id}");
|
||||
// field_name.map(|field| field.to_string())
|
||||
// })
|
||||
// .collect();
|
||||
// faceted_fields.extend(sortable_fields);
|
||||
// faceted_fields.extend(asc_desc_fields);
|
||||
// if let Some(field) = distinct_field {
|
||||
// faceted_fields.insert(field.to_owned());
|
||||
// }
|
||||
|
||||
Ok(faceted_fields)
|
||||
}
|
||||
// Ok(faceted_fields)
|
||||
// }
|
||||
|
||||
/* faceted documents ids */
|
||||
|
||||
@ -1728,7 +1740,7 @@ pub(crate) mod tests {
|
||||
use big_s::S;
|
||||
use bumpalo::Bump;
|
||||
use heed::{EnvOpenOptions, RwTxn};
|
||||
use maplit::{btreemap, hashset};
|
||||
use maplit::btreemap;
|
||||
use memmap2::Mmap;
|
||||
use tempfile::TempDir;
|
||||
|
||||
@ -1744,7 +1756,8 @@ pub(crate) mod tests {
|
||||
use crate::vector::settings::{EmbedderSource, EmbeddingSettings};
|
||||
use crate::vector::EmbeddingConfigs;
|
||||
use crate::{
|
||||
db_snap, obkv_to_json, Filter, Index, Search, SearchResult, ThreadPoolNoAbortBuilder,
|
||||
db_snap, obkv_to_json, Filter, FilterableAttributesSettings, Index, Search, SearchResult,
|
||||
ThreadPoolNoAbortBuilder,
|
||||
};
|
||||
|
||||
pub(crate) struct TempIndex {
|
||||
@ -2174,7 +2187,9 @@ pub(crate) mod tests {
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME) });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
|
||||
RESERVED_GEO_FIELD_NAME.to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
index
|
||||
@ -2282,7 +2297,9 @@ pub(crate) mod tests {
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset! { S("doggo") });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
|
||||
"doggo".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
index
|
||||
@ -2319,15 +2336,14 @@ pub(crate) mod tests {
|
||||
|
||||
#[test]
|
||||
fn replace_documents_external_ids_and_soft_deletion_check() {
|
||||
use big_s::S;
|
||||
use maplit::hashset;
|
||||
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("id".to_owned());
|
||||
settings.set_filterable_fields(hashset! { S("doggo") });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
|
||||
"doggo".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -2860,8 +2876,9 @@ pub(crate) mod tests {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("id".to_string());
|
||||
settings
|
||||
.set_filterable_fields(HashSet::from([RESERVED_GEO_FIELD_NAME.to_string()]));
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
|
||||
RESERVED_GEO_FIELD_NAME.to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -2895,8 +2912,9 @@ pub(crate) mod tests {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("id".to_string());
|
||||
settings
|
||||
.set_filterable_fields(HashSet::from([RESERVED_GEO_FIELD_NAME.to_string()]));
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
|
||||
RESERVED_GEO_FIELD_NAME.to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -2929,7 +2947,9 @@ pub(crate) mod tests {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_searchable_fields(vec![S("name")]);
|
||||
settings.set_filterable_fields(HashSet::from([S("age")]));
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
|
||||
"age".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -2951,7 +2971,9 @@ pub(crate) mod tests {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_searchable_fields(vec![S("name"), S("realName")]);
|
||||
settings.set_filterable_fields(HashSet::from([S("age")]));
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
|
||||
"age".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -3050,7 +3072,10 @@ pub(crate) mod tests {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_searchable_fields(vec![S("_vectors"), S("_vectors.doggo")]);
|
||||
settings.set_filterable_fields(hashset![S("_vectors"), S("_vectors.doggo")]);
|
||||
settings.set_filterable_fields(vec![
|
||||
FilterableAttributesSettings::Field("_vectors".to_string()),
|
||||
FilterableAttributesSettings::Field("_vectors.doggo".to_string()),
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
|
@ -4,6 +4,7 @@ use charabia::Language;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utoipa::ToSchema;
|
||||
|
||||
use crate::attribute_patterns::PatternMatch;
|
||||
use crate::fields_ids_map::FieldsIdsMap;
|
||||
use crate::{AttributePatterns, FieldId};
|
||||
|
||||
@ -27,7 +28,7 @@ impl LocalizedAttributesRule {
|
||||
Self { attribute_patterns: AttributePatterns::from(attribute_patterns), locales }
|
||||
}
|
||||
|
||||
pub fn match_str(&self, str: &str) -> bool {
|
||||
pub fn match_str(&self, str: &str) -> PatternMatch {
|
||||
self.attribute_patterns.match_str(str)
|
||||
}
|
||||
|
||||
@ -57,7 +58,7 @@ impl LocalizedFieldIds {
|
||||
for (field_id, field_name) in fields {
|
||||
let mut locales = Vec::new();
|
||||
for rule in rules {
|
||||
if rule.match_str(field_name) {
|
||||
if rule.match_str(field_name) == PatternMatch::Match {
|
||||
locales.extend(rule.locales.iter());
|
||||
// Take the first rule that matches
|
||||
break;
|
||||
|
@ -11,6 +11,7 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::UserError;
|
||||
use crate::facet::FacetType;
|
||||
use crate::filterable_fields::{is_field_filterable, matching_field_names};
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec,
|
||||
};
|
||||
@ -18,7 +19,7 @@ use crate::heed_codec::{BytesRefCodec, StrRefCodec};
|
||||
use crate::search::facet::facet_distribution_iter::{
|
||||
count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution,
|
||||
};
|
||||
use crate::{FieldId, Index, Result};
|
||||
use crate::{FieldId, FieldsIdsMap, Index, Result};
|
||||
|
||||
/// The default number of values by facets that will
|
||||
/// be fetched from the key-value store.
|
||||
@ -281,32 +282,13 @@ impl<'a> FacetDistribution<'a> {
|
||||
|
||||
pub fn compute_stats(&self) -> Result<BTreeMap<String, (f64, f64)>> {
|
||||
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
|
||||
let candidates = if let Some(candidates) = self.candidates.clone() {
|
||||
candidates
|
||||
} else {
|
||||
return Ok(Default::default());
|
||||
};
|
||||
|
||||
let fields = match &self.facets {
|
||||
Some(facets) => {
|
||||
let invalid_fields: HashSet<_> = facets
|
||||
.iter()
|
||||
.map(|(name, _)| name)
|
||||
.filter(|facet| !crate::is_faceted(facet, &filterable_fields))
|
||||
.collect();
|
||||
if !invalid_fields.is_empty() {
|
||||
return Err(UserError::InvalidFacetsDistribution {
|
||||
invalid_facets_name: invalid_fields.into_iter().cloned().collect(),
|
||||
valid_facets_name: filterable_fields.into_iter().collect(),
|
||||
}
|
||||
.into());
|
||||
} else {
|
||||
facets.iter().map(|(name, _)| name).cloned().collect()
|
||||
}
|
||||
}
|
||||
None => filterable_fields,
|
||||
};
|
||||
let fields = self.faceted_fields_names(&fields_ids_map)?;
|
||||
|
||||
let mut distribution = BTreeMap::new();
|
||||
for (fid, name) in fields_ids_map.iter() {
|
||||
@ -341,27 +323,8 @@ impl<'a> FacetDistribution<'a> {
|
||||
|
||||
pub fn execute(&self) -> Result<BTreeMap<String, IndexMap<String, u64>>> {
|
||||
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
|
||||
|
||||
let fields = match self.facets {
|
||||
Some(ref facets) => {
|
||||
let invalid_fields: HashSet<_> = facets
|
||||
.iter()
|
||||
.map(|(name, _)| name)
|
||||
.filter(|facet| !crate::is_faceted(facet, &filterable_fields))
|
||||
.collect();
|
||||
if !invalid_fields.is_empty() {
|
||||
return Err(UserError::InvalidFacetsDistribution {
|
||||
invalid_facets_name: invalid_fields.into_iter().cloned().collect(),
|
||||
valid_facets_name: filterable_fields.into_iter().collect(),
|
||||
}
|
||||
.into());
|
||||
} else {
|
||||
facets.iter().map(|(name, _)| name).cloned().collect()
|
||||
}
|
||||
}
|
||||
None => filterable_fields,
|
||||
};
|
||||
let fields = self.faceted_fields_names(&fields_ids_map)?;
|
||||
|
||||
let mut distribution = BTreeMap::new();
|
||||
for (fid, name) in fields_ids_map.iter() {
|
||||
@ -378,6 +341,40 @@ impl<'a> FacetDistribution<'a> {
|
||||
|
||||
Ok(distribution)
|
||||
}
|
||||
|
||||
fn faceted_fields_names(&self, fields_ids_map: &FieldsIdsMap) -> Result<Vec<String>> {
|
||||
/// TODO: @many: this is a bit of a mess, we should refactor it
|
||||
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
|
||||
let fields = match &self.facets {
|
||||
Some(facets) => {
|
||||
let invalid_fields: HashSet<_> = facets
|
||||
.iter()
|
||||
.map(|(name, _)| name)
|
||||
.filter(|facet| !is_field_filterable(facet, &filterable_fields))
|
||||
.collect();
|
||||
if !invalid_fields.is_empty() {
|
||||
let valid_facets_name =
|
||||
matching_field_names(&filterable_fields, &fields_ids_map);
|
||||
return Err(UserError::InvalidFacetsDistribution {
|
||||
invalid_facets_name: invalid_fields.into_iter().cloned().collect(),
|
||||
valid_facets_name: valid_facets_name
|
||||
.into_iter()
|
||||
.map(String::from)
|
||||
.collect(),
|
||||
}
|
||||
.into());
|
||||
} else {
|
||||
facets.iter().map(|(name, _)| name).cloned().collect()
|
||||
}
|
||||
}
|
||||
None => matching_field_names(&filterable_fields, &fields_ids_map)
|
||||
.into_iter()
|
||||
.map(String::from)
|
||||
.collect(),
|
||||
};
|
||||
|
||||
Ok(fields)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for FacetDistribution<'_> {
|
||||
@ -405,11 +402,10 @@ mod tests {
|
||||
use std::iter;
|
||||
|
||||
use big_s::S;
|
||||
use maplit::hashset;
|
||||
|
||||
use crate::documents::mmap_from_objects;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::{milli_snap, FacetDistribution, OrderBy};
|
||||
use crate::{milli_snap, FacetDistribution, FilterableAttributesSettings, OrderBy};
|
||||
|
||||
#[test]
|
||||
fn few_candidates_few_facet_values() {
|
||||
@ -419,7 +415,10 @@ mod tests {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||
.update_settings(|settings| {
|
||||
settings
|
||||
.set_filterable_fields(vec![FilterableAttributesSettings::Field(S("colour"))])
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let documents = documents!([
|
||||
@ -490,7 +489,10 @@ mod tests {
|
||||
let index = TempIndex::new_with_map_size(4096 * 10_000);
|
||||
|
||||
index
|
||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||
.update_settings(|settings| {
|
||||
settings
|
||||
.set_filterable_fields(vec![FilterableAttributesSettings::Field(S("colour"))])
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let facet_values = ["Red", "RED", " red ", "Blue", "BLUE"];
|
||||
@ -575,7 +577,10 @@ mod tests {
|
||||
let index = TempIndex::new_with_map_size(4096 * 10_000);
|
||||
|
||||
index
|
||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||
.update_settings(|settings| {
|
||||
settings
|
||||
.set_filterable_fields(vec![FilterableAttributesSettings::Field(S("colour"))])
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let facet_values = (0..1000).map(|x| format!("{x:x}")).collect::<Vec<_>>();
|
||||
@ -634,7 +639,10 @@ mod tests {
|
||||
let index = TempIndex::new_with_map_size(4096 * 10_000);
|
||||
|
||||
index
|
||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||
.update_settings(|settings| {
|
||||
settings
|
||||
.set_filterable_fields(vec![FilterableAttributesSettings::Field(S("colour"))])
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let facet_values = (0..1000).collect::<Vec<_>>();
|
||||
@ -685,7 +693,10 @@ mod tests {
|
||||
let index = TempIndex::new_with_map_size(4096 * 10_000);
|
||||
|
||||
index
|
||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||
.update_settings(|settings| {
|
||||
settings
|
||||
.set_filterable_fields(vec![FilterableAttributesSettings::Field(S("colour"))])
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let facet_values = (0..1000).collect::<Vec<_>>();
|
||||
@ -736,7 +747,10 @@ mod tests {
|
||||
let index = TempIndex::new_with_map_size(4096 * 10_000);
|
||||
|
||||
index
|
||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||
.update_settings(|settings| {
|
||||
settings
|
||||
.set_filterable_fields(vec![FilterableAttributesSettings::Field(S("colour"))])
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let facet_values = (0..1000).collect::<Vec<_>>();
|
||||
@ -787,7 +801,10 @@ mod tests {
|
||||
let index = TempIndex::new_with_map_size(4096 * 10_000);
|
||||
|
||||
index
|
||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||
.update_settings(|settings| {
|
||||
settings
|
||||
.set_filterable_fields(vec![FilterableAttributesSettings::Field(S("colour"))])
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let facet_values = (0..1000).collect::<Vec<_>>();
|
||||
|
@ -1,4 +1,4 @@
|
||||
use std::collections::HashSet;
|
||||
use std::collections::BTreeSet;
|
||||
use std::fmt::{Debug, Display};
|
||||
use std::ops::Bound::{self, Excluded, Included};
|
||||
|
||||
@ -12,13 +12,16 @@ use serde_json::Value;
|
||||
use super::facet_range_search;
|
||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
||||
use crate::error::{Error, UserError};
|
||||
use crate::filterable_fields::{
|
||||
filtered_matching_field_names, is_field_filterable, matching_features,
|
||||
};
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec,
|
||||
};
|
||||
use crate::index::db_name::FACET_ID_STRING_DOCIDS;
|
||||
use crate::{
|
||||
distance_between_two_points, lat_lng_to_xyz, FieldId, Index, InternalError, Result,
|
||||
SerializationError,
|
||||
distance_between_two_points, lat_lng_to_xyz, FieldId, FilterableAttributesFeatures,
|
||||
FilterableAttributesSettings, Index, InternalError, Result, SerializationError,
|
||||
};
|
||||
|
||||
/// The maximum number of filters the filter AST can process.
|
||||
@ -60,7 +63,7 @@ impl Display for BadGeoError {
|
||||
|
||||
#[derive(Debug)]
|
||||
enum FilterError<'a> {
|
||||
AttributeNotFilterable { attribute: &'a str, filterable_fields: HashSet<String> },
|
||||
AttributeNotFilterable { attribute: &'a str, filterable_fields: BTreeSet<&'a str> },
|
||||
ParseGeoError(BadGeoError),
|
||||
TooDeep,
|
||||
}
|
||||
@ -233,10 +236,15 @@ impl<'a> Filter<'a> {
|
||||
let filterable_fields = index.filterable_fields(rtxn)?;
|
||||
for fid in self.condition.fids(MAX_FILTER_DEPTH) {
|
||||
let attribute = fid.value();
|
||||
if !crate::is_faceted(attribute, &filterable_fields) {
|
||||
if !is_field_filterable(attribute, &filterable_fields) {
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
return Err(fid.as_external_error(FilterError::AttributeNotFilterable {
|
||||
attribute,
|
||||
filterable_fields,
|
||||
filterable_fields: filtered_matching_field_names(
|
||||
&filterable_fields,
|
||||
&fields_ids_map,
|
||||
&|features| features.is_filterable(),
|
||||
),
|
||||
}))?;
|
||||
}
|
||||
}
|
||||
@ -249,6 +257,7 @@ impl<'a> Filter<'a> {
|
||||
field_id: FieldId,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
operator: &Condition<'a>,
|
||||
features: &FilterableAttributesFeatures,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let numbers_db = index.facet_id_f64_docids;
|
||||
let strings_db = index.facet_id_string_docids;
|
||||
@ -258,6 +267,17 @@ impl<'a> Filter<'a> {
|
||||
// field id and the level.
|
||||
|
||||
let (left, right) = match operator {
|
||||
// return an error if the filter is not allowed for this field
|
||||
Condition::GreaterThan(_)
|
||||
| Condition::GreaterThanOrEqual(_)
|
||||
| Condition::LowerThan(_)
|
||||
| Condition::LowerThanOrEqual(_)
|
||||
| Condition::Between { .. }
|
||||
if !features.is_filterable_order() =>
|
||||
{
|
||||
/// TODO produce an dedicated error for this
|
||||
todo!("filtering on non-ordered fields is not supported, return an error")
|
||||
}
|
||||
Condition::GreaterThan(val) => {
|
||||
(Excluded(val.parse_finite_float()?), Included(f64::MAX))
|
||||
}
|
||||
@ -307,7 +327,8 @@ impl<'a> Filter<'a> {
|
||||
}
|
||||
Condition::NotEqual(val) => {
|
||||
let operator = Condition::Equal(val.clone());
|
||||
let docids = Self::evaluate_operator(rtxn, index, field_id, None, &operator)?;
|
||||
let docids =
|
||||
Self::evaluate_operator(rtxn, index, field_id, None, &operator, features)?;
|
||||
let all_ids = index.documents_ids(rtxn)?;
|
||||
return Ok(all_ids - docids);
|
||||
}
|
||||
@ -409,7 +430,7 @@ impl<'a> Filter<'a> {
|
||||
&self,
|
||||
rtxn: &heed::RoTxn<'_>,
|
||||
index: &Index,
|
||||
filterable_fields: &HashSet<String>,
|
||||
filterable_fields: &[FilterableAttributesSettings],
|
||||
universe: Option<&RoaringBitmap>,
|
||||
) -> Result<RoaringBitmap> {
|
||||
if universe.map_or(false, |u| u.is_empty()) {
|
||||
@ -434,36 +455,56 @@ impl<'a> Filter<'a> {
|
||||
}
|
||||
}
|
||||
FilterCondition::In { fid, els } => {
|
||||
if crate::is_faceted(fid.value(), filterable_fields) {
|
||||
let field_ids_map = index.fields_ids_map(rtxn)?;
|
||||
if let Some(fid) = field_ids_map.id(fid.value()) {
|
||||
els.iter()
|
||||
.map(|el| Condition::Equal(el.clone()))
|
||||
.map(|op| Self::evaluate_operator(rtxn, index, fid, universe, &op))
|
||||
.union()
|
||||
} else {
|
||||
Ok(RoaringBitmap::new())
|
||||
match matching_features(fid.value(), filterable_fields) {
|
||||
Some(features) if features.is_filterable() => {
|
||||
let field_ids_map = index.fields_ids_map(rtxn)?;
|
||||
if let Some(fid) = field_ids_map.id(fid.value()) {
|
||||
els.iter()
|
||||
.map(|el| Condition::Equal(el.clone()))
|
||||
.map(|op| {
|
||||
Self::evaluate_operator(
|
||||
rtxn, index, fid, universe, &op, &features,
|
||||
)
|
||||
})
|
||||
.union()
|
||||
} else {
|
||||
Ok(RoaringBitmap::new())
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
let field_ids_map = index.fields_ids_map(rtxn)?;
|
||||
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
|
||||
attribute: fid.value(),
|
||||
filterable_fields: filtered_matching_field_names(
|
||||
filterable_fields,
|
||||
&field_ids_map,
|
||||
&|features| features.is_filterable(),
|
||||
),
|
||||
}))?
|
||||
}
|
||||
} else {
|
||||
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
|
||||
attribute: fid.value(),
|
||||
filterable_fields: filterable_fields.clone(),
|
||||
}))?
|
||||
}
|
||||
}
|
||||
FilterCondition::Condition { fid, op } => {
|
||||
if crate::is_faceted(fid.value(), filterable_fields) {
|
||||
let field_ids_map = index.fields_ids_map(rtxn)?;
|
||||
if let Some(fid) = field_ids_map.id(fid.value()) {
|
||||
Self::evaluate_operator(rtxn, index, fid, universe, op)
|
||||
} else {
|
||||
Ok(RoaringBitmap::new())
|
||||
match matching_features(fid.value(), filterable_fields) {
|
||||
Some(features) if features.is_filterable() => {
|
||||
let field_ids_map = index.fields_ids_map(rtxn)?;
|
||||
if let Some(fid) = field_ids_map.id(fid.value()) {
|
||||
Self::evaluate_operator(rtxn, index, fid, universe, op, &features)
|
||||
} else {
|
||||
Ok(RoaringBitmap::new())
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
let field_ids_map = index.fields_ids_map(rtxn)?;
|
||||
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
|
||||
attribute: fid.value(),
|
||||
filterable_fields: filtered_matching_field_names(
|
||||
filterable_fields,
|
||||
&field_ids_map,
|
||||
&|features| features.is_filterable(),
|
||||
),
|
||||
}))?
|
||||
}
|
||||
} else {
|
||||
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
|
||||
attribute: fid.value(),
|
||||
filterable_fields: filterable_fields.clone(),
|
||||
}))?
|
||||
}
|
||||
}
|
||||
FilterCondition::Or(subfilters) => subfilters
|
||||
@ -502,7 +543,7 @@ impl<'a> Filter<'a> {
|
||||
}
|
||||
}
|
||||
FilterCondition::GeoLowerThan { point, radius } => {
|
||||
if filterable_fields.contains(RESERVED_GEO_FIELD_NAME) {
|
||||
if index.is_geo_filtering_activated(rtxn)? {
|
||||
let base_point: [f64; 2] =
|
||||
[point[0].parse_finite_float()?, point[1].parse_finite_float()?];
|
||||
if !(-90.0..=90.0).contains(&base_point[0]) {
|
||||
@ -530,14 +571,19 @@ impl<'a> Filter<'a> {
|
||||
|
||||
Ok(result)
|
||||
} else {
|
||||
let field_ids_map = index.fields_ids_map(rtxn)?;
|
||||
Err(point[0].as_external_error(FilterError::AttributeNotFilterable {
|
||||
attribute: RESERVED_GEO_FIELD_NAME,
|
||||
filterable_fields: filterable_fields.clone(),
|
||||
filterable_fields: filtered_matching_field_names(
|
||||
filterable_fields,
|
||||
&field_ids_map,
|
||||
&|features| features.is_filterable(),
|
||||
),
|
||||
}))?
|
||||
}
|
||||
}
|
||||
FilterCondition::GeoBoundingBox { top_right_point, bottom_left_point } => {
|
||||
if filterable_fields.contains(RESERVED_GEO_FIELD_NAME) {
|
||||
if index.is_geo_filtering_activated(rtxn)? {
|
||||
let top_right: [f64; 2] = [
|
||||
top_right_point[0].parse_finite_float()?,
|
||||
top_right_point[1].parse_finite_float()?,
|
||||
@ -662,10 +708,15 @@ impl<'a> Filter<'a> {
|
||||
|
||||
Ok(selected_lat & selected_lng)
|
||||
} else {
|
||||
let field_ids_map = index.fields_ids_map(rtxn)?;
|
||||
Err(top_right_point[0].as_external_error(
|
||||
FilterError::AttributeNotFilterable {
|
||||
attribute: RESERVED_GEO_FIELD_NAME,
|
||||
filterable_fields: filterable_fields.clone(),
|
||||
filterable_fields: filtered_matching_field_names(
|
||||
filterable_fields,
|
||||
&field_ids_map,
|
||||
&|features| features.is_filterable(),
|
||||
),
|
||||
},
|
||||
))?
|
||||
}
|
||||
@ -687,12 +738,11 @@ mod tests {
|
||||
|
||||
use big_s::S;
|
||||
use either::Either;
|
||||
use maplit::hashset;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::Filter;
|
||||
use crate::{Filter, FilterableAttributesSettings};
|
||||
|
||||
#[test]
|
||||
fn empty_db() {
|
||||
@ -700,7 +750,9 @@ mod tests {
|
||||
//Set the filterable fields to be the channel.
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset! { S("PrIcE") });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
|
||||
"PrIcE".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -804,7 +856,9 @@ mod tests {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_searchable_fields(vec![S("title")]);
|
||||
settings.set_filterable_fields(hashset! { S("title") });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
|
||||
"title".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -870,7 +924,9 @@ mod tests {
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset!(S("monitor_diagonal")));
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
|
||||
"monitor_diagonal".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -901,7 +957,9 @@ mod tests {
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME) });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(S(
|
||||
RESERVED_GEO_FIELD_NAME,
|
||||
))]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -948,7 +1006,10 @@ mod tests {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_searchable_fields(vec![S(RESERVED_GEO_FIELD_NAME), S("price")]); // to keep the fields order
|
||||
settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME), S("price") });
|
||||
settings.set_filterable_fields(vec![
|
||||
FilterableAttributesSettings::Field(S(RESERVED_GEO_FIELD_NAME)),
|
||||
FilterableAttributesSettings::Field("price".to_string()),
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -998,7 +1059,10 @@ mod tests {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_searchable_fields(vec![S(RESERVED_GEO_FIELD_NAME), S("price")]); // to keep the fields order
|
||||
settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME), S("price") });
|
||||
settings.set_filterable_fields(vec![
|
||||
FilterableAttributesSettings::Field(S(RESERVED_GEO_FIELD_NAME)),
|
||||
FilterableAttributesSettings::Field("price".to_string()),
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -1108,7 +1172,9 @@ mod tests {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_searchable_fields(vec![S("price")]); // to keep the fields order
|
||||
settings.set_filterable_fields(hashset! { S("price") });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
|
||||
"price".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
index
|
||||
@ -1164,7 +1230,11 @@ mod tests {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("id".to_owned());
|
||||
settings.set_filterable_fields(hashset! { S("id"), S("one"), S("two") });
|
||||
settings.set_filterable_fields(vec![
|
||||
FilterableAttributesSettings::Field("id".to_string()),
|
||||
FilterableAttributesSettings::Field("one".to_string()),
|
||||
FilterableAttributesSettings::Field("two".to_string()),
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
|
@ -10,6 +10,7 @@ use roaring::RoaringBitmap;
|
||||
use tracing::error;
|
||||
|
||||
use crate::error::UserError;
|
||||
use crate::filterable_fields::{is_field_filterable, matching_field_names};
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
|
||||
use crate::search::build_dfa;
|
||||
use crate::{DocumentId, FieldId, OrderBy, Result, Search};
|
||||
@ -74,9 +75,11 @@ impl<'a> SearchForFacetValues<'a> {
|
||||
let rtxn = self.search_query.rtxn;
|
||||
|
||||
let filterable_fields = index.filterable_fields(rtxn)?;
|
||||
if !filterable_fields.contains(&self.facet) {
|
||||
if !is_field_filterable(&self.facet, &filterable_fields) {
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let matching_field_names = matching_field_names(&filterable_fields, &fields_ids_map);
|
||||
let (valid_fields, hidden_fields) =
|
||||
index.remove_hidden_fields(rtxn, filterable_fields)?;
|
||||
index.remove_hidden_fields(rtxn, matching_field_names)?;
|
||||
|
||||
return Err(UserError::InvalidFacetSearchFacetName {
|
||||
field: self.facet.clone(),
|
||||
|
@ -9,6 +9,7 @@ use roaring::bitmap::RoaringBitmap;
|
||||
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
|
||||
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
|
||||
use self::new::{execute_vector_search, PartialSearchResult};
|
||||
use crate::filterable_fields::{is_field_filterable, matching_field_names};
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::vector::Embedder;
|
||||
use crate::{
|
||||
@ -188,9 +189,15 @@ impl<'a> Search<'a> {
|
||||
|
||||
if let Some(distinct) = &self.distinct {
|
||||
let filterable_fields = ctx.index.filterable_fields(ctx.txn)?;
|
||||
if !crate::is_faceted(distinct, &filterable_fields) {
|
||||
// check if the distinct field is in the filterable fields
|
||||
if !is_field_filterable(distinct, &filterable_fields) {
|
||||
// if not, remove the hidden fields from the filterable fields to generate the error message
|
||||
let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
|
||||
let matching_field_names =
|
||||
matching_field_names(&filterable_fields, &fields_ids_map);
|
||||
let (valid_fields, hidden_fields) =
|
||||
ctx.index.remove_hidden_fields(ctx.txn, filterable_fields)?;
|
||||
ctx.index.remove_hidden_fields(ctx.txn, matching_field_names)?;
|
||||
// and return the error
|
||||
return Err(Error::UserError(UserError::InvalidDistinctAttribute {
|
||||
field: distinct.clone(),
|
||||
valid_fields,
|
||||
|
@ -5,13 +5,11 @@
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use big_s::S;
|
||||
use maplit::hashset;
|
||||
use meili_snap::snapshot;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::{Criterion, Filter, Search, TimeBudget};
|
||||
use crate::{Criterion, Filter, FilterableAttributesSettings, Search, TimeBudget};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@ -20,7 +18,7 @@ fn create_index() -> TempIndex {
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_filterable_fields(hashset! { S("id") });
|
||||
s.set_filterable_fields(vec![FilterableAttributesSettings::Field("id".to_owned())]);
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Typo]);
|
||||
})
|
||||
.unwrap();
|
||||
|
@ -19,7 +19,10 @@ use maplit::hashset;
|
||||
|
||||
use super::collect_field_values;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::{AscDesc, Criterion, Index, Member, Search, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{
|
||||
AscDesc, Criterion, FilterableAttributesSettings, Index, Member, Search, SearchResult,
|
||||
TermsMatchingStrategy,
|
||||
};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@ -236,7 +239,7 @@ fn test_distinct_placeholder_no_ranking_rules() {
|
||||
// Set the letter as filterable and unset the distinct attribute.
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_filterable_fields(hashset! { S("letter") });
|
||||
s.set_filterable_fields(vec![FilterableAttributesSettings::Field("letter".to_owned())]);
|
||||
s.reset_distinct_field();
|
||||
})
|
||||
.unwrap();
|
||||
|
@ -9,7 +9,7 @@ use crate::progress::Progress;
|
||||
use crate::update::new::indexer;
|
||||
use crate::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||
use crate::vector::EmbeddingConfigs;
|
||||
use crate::{db_snap, Criterion, Index};
|
||||
use crate::{db_snap, Criterion, FilterableAttributesSettings, Index};
|
||||
pub const CONTENT: &str = include_str!("../../../../tests/assets/test_set.ndjson");
|
||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
||||
|
||||
@ -25,14 +25,14 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
builder.set_criteria(criteria.to_vec());
|
||||
builder.set_filterable_fields(hashset! {
|
||||
S("tag"),
|
||||
S("asc_desc_rank"),
|
||||
S(RESERVED_GEO_FIELD_NAME),
|
||||
S("opt1"),
|
||||
S("opt1.opt2"),
|
||||
S("tag_in")
|
||||
});
|
||||
builder.set_filterable_fields(vec![
|
||||
FilterableAttributesSettings::Field(S("tag")),
|
||||
FilterableAttributesSettings::Field(S("asc_desc_rank")),
|
||||
FilterableAttributesSettings::Field(S(RESERVED_GEO_FIELD_NAME)),
|
||||
FilterableAttributesSettings::Field(S("opt1")),
|
||||
FilterableAttributesSettings::Field(S("opt1.opt2")),
|
||||
FilterableAttributesSettings::Field(S("tag_in")),
|
||||
]);
|
||||
builder.set_sortable_fields(hashset! {
|
||||
S("tag"),
|
||||
S("asc_desc_rank"),
|
||||
|
@ -374,7 +374,7 @@ mod tests {
|
||||
use crate::heed_codec::StrRefCodec;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::update::facet::test_helpers::{ordered_string, FacetIndex};
|
||||
use crate::{db_snap, milli_snap};
|
||||
use crate::{db_snap, milli_snap, FilterableAttributesSettings};
|
||||
|
||||
#[test]
|
||||
fn insert() {
|
||||
@ -474,7 +474,9 @@ mod tests {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("id".to_owned());
|
||||
settings.set_filterable_fields(hashset! { S("id") });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
|
||||
"id".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
|
@ -760,18 +760,19 @@ mod tests {
|
||||
use bumpalo::Bump;
|
||||
use fst::IntoStreamer;
|
||||
use heed::RwTxn;
|
||||
use maplit::hashset;
|
||||
use maplit::{btreeset, hashset};
|
||||
|
||||
use super::*;
|
||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
||||
use crate::documents::mmap_from_objects;
|
||||
use crate::filterable_fields::filtered_matching_field_names;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::index::IndexEmbeddingConfig;
|
||||
use crate::progress::Progress;
|
||||
use crate::search::TermsMatchingStrategy;
|
||||
use crate::update::new::indexer;
|
||||
use crate::update::Setting;
|
||||
use crate::{db_snap, Filter, Search, UserError};
|
||||
use crate::{db_snap, Filter, FilterableAttributesSettings, Search, UserError};
|
||||
|
||||
#[test]
|
||||
fn simple_document_replacement() {
|
||||
@ -1001,7 +1002,9 @@ mod tests {
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
|
||||
RESERVED_GEO_FIELD_NAME.to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
@ -1013,7 +1016,9 @@ mod tests {
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
|
||||
RESERVED_GEO_FIELD_NAME.to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -1229,15 +1234,24 @@ mod tests {
|
||||
let searchable_fields = vec![S("title"), S("nested.object"), S("nested.machin")];
|
||||
settings.set_searchable_fields(searchable_fields);
|
||||
|
||||
let faceted_fields = hashset!(S("title"), S("nested.object"), S("nested.machin"));
|
||||
let faceted_fields = vec![
|
||||
FilterableAttributesSettings::Field("title".to_string()),
|
||||
FilterableAttributesSettings::Field("nested.object".to_string()),
|
||||
FilterableAttributesSettings::Field("nested.machin".to_string()),
|
||||
];
|
||||
settings.set_filterable_fields(faceted_fields);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let facets = index.faceted_fields(&rtxn).unwrap();
|
||||
assert_eq!(facets, hashset!(S("title"), S("nested.object"), S("nested.machin")));
|
||||
let filterable_fields = index.filterable_fields(&rtxn).unwrap();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let facets =
|
||||
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
|
||||
features.is_filterable()
|
||||
});
|
||||
assert_eq!(facets, btreeset!("title", "nested.object", "nested.machin"));
|
||||
|
||||
// testing the simple query search
|
||||
let mut search = crate::Search::new(&rtxn, &index);
|
||||
@ -1433,7 +1447,9 @@ mod tests {
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset!(String::from("dog")));
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
|
||||
"dog".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -1452,9 +1468,14 @@ mod tests {
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let hidden = index.faceted_fields(&rtxn).unwrap();
|
||||
let filterable_fields = index.filterable_fields(&rtxn).unwrap();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let facets =
|
||||
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
|
||||
features.is_filterable()
|
||||
});
|
||||
|
||||
assert_eq!(hidden, hashset!(S("dog"), S("dog.race"), S("dog.race.bernese mountain")));
|
||||
assert_eq!(facets, btreeset!("dog", "dog.race", "dog.race.bernese mountain"));
|
||||
|
||||
for (s, i) in [("zeroth", 0), ("first", 1), ("second", 2), ("third", 3)] {
|
||||
let mut search = crate::Search::new(&rtxn, &index);
|
||||
@ -1475,9 +1496,14 @@ mod tests {
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let facets = index.faceted_fields(&rtxn).unwrap();
|
||||
let filterable_fields = index.filterable_fields(&rtxn).unwrap();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let facets =
|
||||
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
|
||||
features.is_filterable()
|
||||
});
|
||||
|
||||
assert_eq!(facets, hashset!());
|
||||
assert_eq!(facets, btreeset!());
|
||||
|
||||
// update the settings to test the sortable
|
||||
index
|
||||
@ -1501,9 +1527,14 @@ mod tests {
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let facets = index.faceted_fields(&rtxn).unwrap();
|
||||
let filterable_fields = index.filterable_fields(&rtxn).unwrap();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let facets =
|
||||
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
|
||||
features.is_filterable()
|
||||
});
|
||||
|
||||
assert_eq!(facets, hashset!(S("dog.race"), S("dog.race.bernese mountain")));
|
||||
assert_eq!(facets, btreeset!("dog.race", "dog.race.bernese mountain"));
|
||||
|
||||
let mut search = crate::Search::new(&rtxn, &index);
|
||||
search.sort_criteria(vec![crate::AscDesc::Asc(crate::Member::Field(S(
|
||||
@ -1712,8 +1743,13 @@ mod tests {
|
||||
|
||||
let check_ok = |index: &Index| {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let facets = index.faceted_fields(&rtxn).unwrap();
|
||||
assert_eq!(facets, hashset!(S("colour"), S("colour.green"), S("colour.green.blue")));
|
||||
let filterable_fields = index.filterable_fields(&rtxn).unwrap();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let facets =
|
||||
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
|
||||
features.is_filterable()
|
||||
});
|
||||
assert_eq!(facets, btreeset!("colour", "colour.green", "colour.green.blue"));
|
||||
|
||||
let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap();
|
||||
let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();
|
||||
@ -1733,7 +1769,7 @@ mod tests {
|
||||
assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![7]);
|
||||
};
|
||||
|
||||
let faceted_fields = hashset!(S("colour"));
|
||||
let faceted_fields = vec![FilterableAttributesSettings::Field("colour".to_string())];
|
||||
|
||||
let index = TempIndex::new();
|
||||
index.add_documents(content()).unwrap();
|
||||
@ -1818,8 +1854,13 @@ mod tests {
|
||||
|
||||
let check_ok = |index: &Index| {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let facets = index.faceted_fields(&rtxn).unwrap();
|
||||
assert_eq!(facets, hashset!(S("colour"), S("colour.green"), S("colour.green.blue")));
|
||||
let filterable_fields = index.filterable_fields(&rtxn).unwrap();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let facets =
|
||||
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
|
||||
features.is_filterable()
|
||||
});
|
||||
assert_eq!(facets, btreeset!("colour", "colour.green", "colour.green.blue"));
|
||||
|
||||
let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap();
|
||||
let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();
|
||||
@ -1839,7 +1880,7 @@ mod tests {
|
||||
assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![3]);
|
||||
};
|
||||
|
||||
let faceted_fields = hashset!(S("colour"));
|
||||
let faceted_fields = vec![FilterableAttributesSettings::Field("colour".to_string())];
|
||||
|
||||
let index = TempIndex::new();
|
||||
index.add_documents(content()).unwrap();
|
||||
@ -1882,8 +1923,13 @@ mod tests {
|
||||
|
||||
let check_ok = |index: &Index| {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let facets = index.faceted_fields(&rtxn).unwrap();
|
||||
assert_eq!(facets, hashset!(S("tags"), S("tags.green"), S("tags.green.blue")));
|
||||
let filterable_fields = index.filterable_fields(&rtxn).unwrap();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let facets =
|
||||
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
|
||||
features.is_filterable()
|
||||
});
|
||||
assert_eq!(facets, btreeset!("tags", "tags.green", "tags.green.blue"));
|
||||
|
||||
let tags_id = index.fields_ids_map(&rtxn).unwrap().id("tags").unwrap();
|
||||
let tags_green_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green").unwrap();
|
||||
@ -1902,7 +1948,7 @@ mod tests {
|
||||
assert_eq!(bitmap_tags_blue.into_iter().collect::<Vec<_>>(), vec![12]);
|
||||
};
|
||||
|
||||
let faceted_fields = hashset!(S("tags"));
|
||||
let faceted_fields = vec![FilterableAttributesSettings::Field("tags".to_string())];
|
||||
|
||||
let index = TempIndex::new();
|
||||
index.add_documents(content()).unwrap();
|
||||
@ -2086,7 +2132,9 @@ mod tests {
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset! { S("title") });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
|
||||
"title".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -2941,7 +2989,10 @@ mod tests {
|
||||
index
|
||||
.update_settings_using_wtxn(&mut wtxn, |settings| {
|
||||
settings.set_primary_key(S("docid"));
|
||||
settings.set_filterable_fields(hashset! { S("label"), S("label2") });
|
||||
settings.set_filterable_fields(vec![
|
||||
FilterableAttributesSettings::Field("label".to_string()),
|
||||
FilterableAttributesSettings::Field("label2".to_string()),
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
@ -3120,7 +3171,9 @@ mod tests {
|
||||
index
|
||||
.update_settings_using_wtxn(&mut wtxn, |settings| {
|
||||
settings.set_primary_key(S("id"));
|
||||
settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
|
||||
RESERVED_GEO_FIELD_NAME.to_string(),
|
||||
)]);
|
||||
settings.set_sortable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
|
||||
})
|
||||
.unwrap();
|
||||
|
@ -4,10 +4,10 @@ use heed::RoTxn;
|
||||
use super::document::{
|
||||
Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions,
|
||||
};
|
||||
use super::extract::perm_json_p;
|
||||
use super::vector_document::{
|
||||
MergedVectorDocument, VectorDocumentFromDb, VectorDocumentFromVersions,
|
||||
};
|
||||
use crate::attribute_patterns::PatternMatch;
|
||||
use crate::documents::FieldIdMapper;
|
||||
use crate::vector::EmbeddingConfigs;
|
||||
use crate::{DocumentId, Index, Result};
|
||||
@ -173,7 +173,7 @@ impl<'doc> Update<'doc> {
|
||||
/// Otherwise `false`.
|
||||
pub fn has_changed_for_fields<'t, Mapper: FieldIdMapper>(
|
||||
&self,
|
||||
fields: Option<&[&str]>,
|
||||
selector: &mut impl FnMut(&str) -> PatternMatch,
|
||||
rtxn: &'t RoTxn,
|
||||
index: &'t Index,
|
||||
mapper: &'t Mapper,
|
||||
@ -185,7 +185,7 @@ impl<'doc> Update<'doc> {
|
||||
for entry in self.updated().iter_top_level_fields() {
|
||||
let (key, updated_value) = entry?;
|
||||
|
||||
if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
|
||||
if selector(key) == PatternMatch::NoMatch {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -229,7 +229,7 @@ impl<'doc> Update<'doc> {
|
||||
for entry in current.iter_top_level_fields() {
|
||||
let (key, _) = entry?;
|
||||
|
||||
if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
|
||||
if selector(key) == PatternMatch::NoMatch {
|
||||
continue;
|
||||
}
|
||||
current_selected_field_count += 1;
|
||||
|
@ -5,12 +5,12 @@ use std::ops::DerefMut as _;
|
||||
use bumpalo::collections::Vec as BVec;
|
||||
use bumpalo::Bump;
|
||||
use hashbrown::HashMap;
|
||||
use heed::RoTxn;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::super::cache::BalancedCaches;
|
||||
use super::facet_document::extract_document_facets;
|
||||
use super::FacetKind;
|
||||
use super::{match_faceted_field, FacetKind};
|
||||
use crate::fields_ids_map::metadata::Metadata;
|
||||
use crate::heed_codec::facet::OrderedF64Codec;
|
||||
use crate::update::del_add::DelAdd;
|
||||
use crate::update::new::channel::FieldIdDocidFacetSender;
|
||||
@ -23,13 +23,17 @@ use crate::update::new::steps::IndexingStep;
|
||||
use crate::update::new::thread_local::{FullySend, ThreadLocal};
|
||||
use crate::update::new::DocumentChange;
|
||||
use crate::update::GrenadParameters;
|
||||
use crate::{DocumentId, FieldId, Index, Result, MAX_FACET_VALUE_LENGTH};
|
||||
use crate::{
|
||||
DocumentId, FieldId, FilterableAttributesFeatures, FilterableAttributesSettings, Result,
|
||||
MAX_FACET_VALUE_LENGTH,
|
||||
};
|
||||
|
||||
pub struct FacetedExtractorData<'a, 'b> {
|
||||
attributes_to_extract: &'a [&'a str],
|
||||
sender: &'a FieldIdDocidFacetSender<'a, 'b>,
|
||||
grenad_parameters: &'a GrenadParameters,
|
||||
buckets: usize,
|
||||
filterable_attributes: Vec<FilterableAttributesSettings>,
|
||||
sortable_fields: HashSet<String>,
|
||||
}
|
||||
|
||||
impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b> {
|
||||
@ -52,7 +56,8 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b>
|
||||
let change = change?;
|
||||
FacetedDocidsExtractor::extract_document_change(
|
||||
context,
|
||||
self.attributes_to_extract,
|
||||
&self.filterable_attributes,
|
||||
&self.sortable_fields,
|
||||
change,
|
||||
self.sender,
|
||||
)?
|
||||
@ -66,11 +71,12 @@ pub struct FacetedDocidsExtractor;
|
||||
impl FacetedDocidsExtractor {
|
||||
fn extract_document_change(
|
||||
context: &DocumentChangeContext<RefCell<BalancedCaches>>,
|
||||
attributes_to_extract: &[&str],
|
||||
filterable_attributes: &[FilterableAttributesSettings],
|
||||
sortable_fields: &HashSet<String>,
|
||||
document_change: DocumentChange,
|
||||
sender: &FieldIdDocidFacetSender,
|
||||
) -> Result<()> {
|
||||
let index = &context.index;
|
||||
let index = context.index;
|
||||
let rtxn = &context.rtxn;
|
||||
let mut new_fields_ids_map = context.new_fields_ids_map.borrow_mut_or_yield();
|
||||
let mut cached_sorter = context.data.borrow_mut_or_yield();
|
||||
@ -78,11 +84,13 @@ impl FacetedDocidsExtractor {
|
||||
let docid = document_change.docid();
|
||||
let res = match document_change {
|
||||
DocumentChange::Deletion(inner) => extract_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
||||
inner.external_document_id(),
|
||||
new_fields_ids_map.deref_mut(),
|
||||
&mut |fid, depth, value| {
|
||||
filterable_attributes,
|
||||
sortable_fields,
|
||||
&mut |fid, meta, depth, value| {
|
||||
let features = meta.filterable_attributes_features(filterable_attributes);
|
||||
Self::facet_fn_with_options(
|
||||
&context.doc_alloc,
|
||||
cached_sorter.deref_mut(),
|
||||
@ -91,6 +99,8 @@ impl FacetedDocidsExtractor {
|
||||
DelAddFacetValue::insert_del,
|
||||
docid,
|
||||
fid,
|
||||
meta,
|
||||
features,
|
||||
depth,
|
||||
value,
|
||||
)
|
||||
@ -98,7 +108,9 @@ impl FacetedDocidsExtractor {
|
||||
),
|
||||
DocumentChange::Update(inner) => {
|
||||
if !inner.has_changed_for_fields(
|
||||
Some(attributes_to_extract),
|
||||
&mut |field_name| {
|
||||
match_faceted_field(field_name, filterable_attributes, sortable_fields)
|
||||
},
|
||||
rtxn,
|
||||
index,
|
||||
context.db_fields_ids_map,
|
||||
@ -107,11 +119,13 @@ impl FacetedDocidsExtractor {
|
||||
}
|
||||
|
||||
extract_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
||||
inner.external_document_id(),
|
||||
new_fields_ids_map.deref_mut(),
|
||||
&mut |fid, depth, value| {
|
||||
filterable_attributes,
|
||||
sortable_fields,
|
||||
&mut |fid, meta, depth, value| {
|
||||
let features = meta.filterable_attributes_features(filterable_attributes);
|
||||
Self::facet_fn_with_options(
|
||||
&context.doc_alloc,
|
||||
cached_sorter.deref_mut(),
|
||||
@ -120,6 +134,8 @@ impl FacetedDocidsExtractor {
|
||||
DelAddFacetValue::insert_del,
|
||||
docid,
|
||||
fid,
|
||||
meta,
|
||||
features,
|
||||
depth,
|
||||
value,
|
||||
)
|
||||
@ -127,11 +143,13 @@ impl FacetedDocidsExtractor {
|
||||
)?;
|
||||
|
||||
extract_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.merged(rtxn, index, context.db_fields_ids_map)?,
|
||||
inner.external_document_id(),
|
||||
new_fields_ids_map.deref_mut(),
|
||||
&mut |fid, depth, value| {
|
||||
filterable_attributes,
|
||||
sortable_fields,
|
||||
&mut |fid, meta, depth, value| {
|
||||
let features = meta.filterable_attributes_features(filterable_attributes);
|
||||
Self::facet_fn_with_options(
|
||||
&context.doc_alloc,
|
||||
cached_sorter.deref_mut(),
|
||||
@ -140,6 +158,8 @@ impl FacetedDocidsExtractor {
|
||||
DelAddFacetValue::insert_add,
|
||||
docid,
|
||||
fid,
|
||||
meta,
|
||||
features,
|
||||
depth,
|
||||
value,
|
||||
)
|
||||
@ -147,11 +167,13 @@ impl FacetedDocidsExtractor {
|
||||
)
|
||||
}
|
||||
DocumentChange::Insertion(inner) => extract_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.inserted(),
|
||||
inner.external_document_id(),
|
||||
new_fields_ids_map.deref_mut(),
|
||||
&mut |fid, depth, value| {
|
||||
filterable_attributes,
|
||||
sortable_fields,
|
||||
&mut |fid, meta, depth, value| {
|
||||
let features = meta.filterable_attributes_features(filterable_attributes);
|
||||
Self::facet_fn_with_options(
|
||||
&context.doc_alloc,
|
||||
cached_sorter.deref_mut(),
|
||||
@ -160,6 +182,8 @@ impl FacetedDocidsExtractor {
|
||||
DelAddFacetValue::insert_add,
|
||||
docid,
|
||||
fid,
|
||||
meta,
|
||||
features,
|
||||
depth,
|
||||
value,
|
||||
)
|
||||
@ -180,9 +204,16 @@ impl FacetedDocidsExtractor {
|
||||
facet_fn: impl Fn(&mut DelAddFacetValue<'doc>, FieldId, BVec<'doc, u8>, FacetKind),
|
||||
docid: DocumentId,
|
||||
fid: FieldId,
|
||||
meta: Metadata,
|
||||
features: FilterableAttributesFeatures,
|
||||
depth: perm_json_p::Depth,
|
||||
value: &Value,
|
||||
) -> Result<()> {
|
||||
// if the field is not filterable, facet searchable or sortable, do nothing
|
||||
if !(features.is_filterable() || features.is_facet_searchable() || meta.is_sortable()) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut buffer = BVec::new_in(doc_alloc);
|
||||
// Exists
|
||||
// key: fid
|
||||
@ -246,7 +277,9 @@ impl FacetedDocidsExtractor {
|
||||
}
|
||||
// Null
|
||||
// key: fid
|
||||
Value::Null if depth == perm_json_p::Depth::OnBaseKey => {
|
||||
Value::Null
|
||||
if depth == perm_json_p::Depth::OnBaseKey && features.is_filterable_null() =>
|
||||
{
|
||||
buffer.clear();
|
||||
buffer.push(FacetKind::Null as u8);
|
||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||
@ -254,19 +287,29 @@ impl FacetedDocidsExtractor {
|
||||
}
|
||||
// Empty
|
||||
// key: fid
|
||||
Value::Array(a) if a.is_empty() && depth == perm_json_p::Depth::OnBaseKey => {
|
||||
Value::Array(a)
|
||||
if a.is_empty()
|
||||
&& depth == perm_json_p::Depth::OnBaseKey
|
||||
&& features.is_filterable_empty() =>
|
||||
{
|
||||
buffer.clear();
|
||||
buffer.push(FacetKind::Empty as u8);
|
||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||
cache_fn(cached_sorter, &buffer, docid)
|
||||
}
|
||||
Value::String(_) if depth == perm_json_p::Depth::OnBaseKey => {
|
||||
Value::String(_)
|
||||
if depth == perm_json_p::Depth::OnBaseKey && features.is_filterable_empty() =>
|
||||
{
|
||||
buffer.clear();
|
||||
buffer.push(FacetKind::Empty as u8);
|
||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||
cache_fn(cached_sorter, &buffer, docid)
|
||||
}
|
||||
Value::Object(o) if o.is_empty() && depth == perm_json_p::Depth::OnBaseKey => {
|
||||
Value::Object(o)
|
||||
if o.is_empty()
|
||||
&& depth == perm_json_p::Depth::OnBaseKey
|
||||
&& features.is_filterable_empty() =>
|
||||
{
|
||||
buffer.clear();
|
||||
buffer.push(FacetKind::Empty as u8);
|
||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||
@ -276,10 +319,6 @@ impl FacetedDocidsExtractor {
|
||||
_ => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<HashSet<String>> {
|
||||
index.user_defined_faceted_fields(rtxn)
|
||||
}
|
||||
}
|
||||
|
||||
struct DelAddFacetValue<'doc> {
|
||||
@ -385,9 +424,8 @@ impl FacetedDocidsExtractor {
|
||||
{
|
||||
let index = indexing_context.index;
|
||||
let rtxn = index.read_txn()?;
|
||||
let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?;
|
||||
let attributes_to_extract: Vec<_> =
|
||||
attributes_to_extract.iter().map(|s| s.as_ref()).collect();
|
||||
let filterable_attributes = index.filterable_fields(&rtxn)?;
|
||||
let sortable_fields = index.sortable_fields(&rtxn)?;
|
||||
let datastore = ThreadLocal::new();
|
||||
|
||||
{
|
||||
@ -396,10 +434,11 @@ impl FacetedDocidsExtractor {
|
||||
let _entered = span.enter();
|
||||
|
||||
let extractor = FacetedExtractorData {
|
||||
attributes_to_extract: &attributes_to_extract,
|
||||
grenad_parameters: indexing_context.grenad_parameters,
|
||||
buckets: rayon::current_num_threads(),
|
||||
sender,
|
||||
filterable_attributes,
|
||||
sortable_fields,
|
||||
};
|
||||
extract(
|
||||
document_changes,
|
||||
|
@ -1,46 +1,70 @@
|
||||
use std::collections::HashSet;
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
||||
use crate::attribute_patterns::PatternMatch;
|
||||
use crate::fields_ids_map::metadata::Metadata;
|
||||
use crate::update::new::document::Document;
|
||||
use crate::update::new::extract::geo::extract_geo_coordinates;
|
||||
use crate::update::new::extract::perm_json_p;
|
||||
use crate::{FieldId, GlobalFieldsIdsMap, InternalError, Result, UserError};
|
||||
use crate::{
|
||||
FieldId, FilterableAttributesSettings, GlobalFieldsIdsMap, InternalError, Result, UserError,
|
||||
};
|
||||
|
||||
use super::match_faceted_field;
|
||||
|
||||
pub fn extract_document_facets<'doc>(
|
||||
attributes_to_extract: &[&str],
|
||||
document: impl Document<'doc>,
|
||||
external_document_id: &str,
|
||||
field_id_map: &mut GlobalFieldsIdsMap,
|
||||
facet_fn: &mut impl FnMut(FieldId, perm_json_p::Depth, &Value) -> Result<()>,
|
||||
filterable_attributes: &[FilterableAttributesSettings],
|
||||
sortable_fields: &HashSet<String>,
|
||||
facet_fn: &mut impl FnMut(FieldId, Metadata, perm_json_p::Depth, &Value) -> Result<()>,
|
||||
) -> Result<()> {
|
||||
// return the match result for the given field name.
|
||||
let match_field = |field_name: &str| -> PatternMatch {
|
||||
match_faceted_field(field_name, filterable_attributes, sortable_fields)
|
||||
};
|
||||
|
||||
// extract the field if it is faceted (facet searchable, filterable, sortable)
|
||||
let mut extract_field = |name: &str, depth: perm_json_p::Depth, value: &Value| -> Result<()> {
|
||||
match field_id_map.id_with_metadata_or_insert(name) {
|
||||
Some((field_id, meta)) => {
|
||||
facet_fn(field_id, meta, depth, value)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
None => Err(UserError::AttributeLimitReached.into()),
|
||||
}
|
||||
};
|
||||
|
||||
for res in document.iter_top_level_fields() {
|
||||
let (field_name, value) = res?;
|
||||
let selection = match_field(field_name);
|
||||
|
||||
let mut tokenize_field =
|
||||
|name: &str, depth: perm_json_p::Depth, value: &Value| match field_id_map
|
||||
.id_or_insert(name)
|
||||
{
|
||||
Some(field_id) => facet_fn(field_id, depth, value),
|
||||
None => Err(UserError::AttributeLimitReached.into()),
|
||||
};
|
||||
// extract the field if it matches a pattern and if it is faceted (facet searchable, filterable, sortable)
|
||||
let mut match_and_extract = |name: &str, depth: perm_json_p::Depth, value: &Value| {
|
||||
let selection = match_field(name);
|
||||
if selection == PatternMatch::Match {
|
||||
extract_field(name, depth, value)?;
|
||||
}
|
||||
|
||||
// if the current field is searchable or contains a searchable attribute
|
||||
let selection = perm_json_p::select_field(field_name, Some(attributes_to_extract), &[]);
|
||||
if selection != perm_json_p::Selection::Skip {
|
||||
Ok(selection)
|
||||
};
|
||||
|
||||
if selection != PatternMatch::NoMatch {
|
||||
// parse json.
|
||||
match serde_json::value::to_value(value).map_err(InternalError::SerdeJson)? {
|
||||
Value::Object(object) => {
|
||||
perm_json_p::seek_leaf_values_in_object(
|
||||
&object,
|
||||
Some(attributes_to_extract),
|
||||
&[], // skip no attributes
|
||||
field_name,
|
||||
perm_json_p::Depth::OnBaseKey,
|
||||
&mut tokenize_field,
|
||||
&mut match_and_extract,
|
||||
)?;
|
||||
|
||||
if selection == perm_json_p::Selection::Select {
|
||||
tokenize_field(
|
||||
if selection == PatternMatch::Match {
|
||||
extract_field(
|
||||
field_name,
|
||||
perm_json_p::Depth::OnBaseKey,
|
||||
&Value::Object(object),
|
||||
@ -50,36 +74,34 @@ pub fn extract_document_facets<'doc>(
|
||||
Value::Array(array) => {
|
||||
perm_json_p::seek_leaf_values_in_array(
|
||||
&array,
|
||||
Some(attributes_to_extract),
|
||||
&[], // skip no attributes
|
||||
field_name,
|
||||
perm_json_p::Depth::OnBaseKey,
|
||||
&mut tokenize_field,
|
||||
&mut match_and_extract,
|
||||
)?;
|
||||
|
||||
if selection == perm_json_p::Selection::Select {
|
||||
tokenize_field(
|
||||
if selection == PatternMatch::Match {
|
||||
extract_field(
|
||||
field_name,
|
||||
perm_json_p::Depth::OnBaseKey,
|
||||
&Value::Array(array),
|
||||
)?;
|
||||
}
|
||||
}
|
||||
value => tokenize_field(field_name, perm_json_p::Depth::OnBaseKey, &value)?,
|
||||
value => extract_field(field_name, perm_json_p::Depth::OnBaseKey, &value)?,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if attributes_to_extract.contains(&RESERVED_GEO_FIELD_NAME) {
|
||||
if filterable_attributes.iter().any(|rule| rule.has_geo()) {
|
||||
if let Some(geo_value) = document.geo_field()? {
|
||||
if let Some([lat, lng]) = extract_geo_coordinates(external_document_id, geo_value)? {
|
||||
let (lat_fid, lng_fid) = field_id_map
|
||||
.id_or_insert("_geo.lat")
|
||||
.zip(field_id_map.id_or_insert("_geo.lng"))
|
||||
let ((lat_fid, lat_meta), (lng_fid, lng_meta)) = field_id_map
|
||||
.id_with_metadata_or_insert("_geo.lat")
|
||||
.zip(field_id_map.id_with_metadata_or_insert("_geo.lng"))
|
||||
.ok_or(UserError::AttributeLimitReached)?;
|
||||
|
||||
facet_fn(lat_fid, perm_json_p::Depth::OnBaseKey, &lat.into())?;
|
||||
facet_fn(lng_fid, perm_json_p::Depth::OnBaseKey, &lng.into())?;
|
||||
facet_fn(lat_fid, lat_meta, perm_json_p::Depth::OnBaseKey, &lat.into())?;
|
||||
facet_fn(lng_fid, lng_meta, perm_json_p::Depth::OnBaseKey, &lng.into())?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,8 +1,16 @@
|
||||
mod extract_facets;
|
||||
mod facet_document;
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
pub use extract_facets::FacetedDocidsExtractor;
|
||||
|
||||
use crate::{
|
||||
attribute_patterns::{match_field_legacy, PatternMatch},
|
||||
filterable_fields::match_pattern_by_features,
|
||||
FilterableAttributesSettings,
|
||||
};
|
||||
|
||||
#[repr(u8)]
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum FacetKind {
|
||||
@ -31,3 +39,30 @@ impl FacetKind {
|
||||
(FacetKind::from(key[0]), &key[1..])
|
||||
}
|
||||
}
|
||||
|
||||
pub fn match_faceted_field(
|
||||
field_name: &str,
|
||||
filterable_fields: &[FilterableAttributesSettings],
|
||||
sortable_fields: &HashSet<String>,
|
||||
) -> PatternMatch {
|
||||
// Check if the field matches any filterable or facet searchable field
|
||||
let mut selection = match_pattern_by_features(field_name, &filterable_fields, &|features| {
|
||||
features.is_facet_searchable() || features.is_filterable()
|
||||
});
|
||||
|
||||
// If the field matches the pattern, return Match
|
||||
if selection == PatternMatch::Match {
|
||||
return selection;
|
||||
}
|
||||
|
||||
// Otherwise, check if the field matches any sortable field
|
||||
for pattern in sortable_fields {
|
||||
match match_field_legacy(&pattern, field_name) {
|
||||
PatternMatch::Match => return PatternMatch::Match,
|
||||
PatternMatch::Parent => selection = PatternMatch::Parent,
|
||||
PatternMatch::NoMatch => (),
|
||||
}
|
||||
}
|
||||
|
||||
selection
|
||||
}
|
||||
|
@ -35,7 +35,7 @@ pub trait DocidsExtractor {
|
||||
pub mod perm_json_p {
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
use crate::Result;
|
||||
use crate::{attribute_patterns::PatternMatch, Result};
|
||||
const SPLIT_SYMBOL: char = '.';
|
||||
|
||||
/// Returns `true` if the `selector` match the `key`.
|
||||
@ -68,11 +68,9 @@ pub mod perm_json_p {
|
||||
|
||||
pub fn seek_leaf_values_in_object(
|
||||
value: &Map<String, Value>,
|
||||
selectors: Option<&[&str]>,
|
||||
skip_selectors: &[&str],
|
||||
base_key: &str,
|
||||
base_depth: Depth,
|
||||
seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<()>,
|
||||
seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<PatternMatch>,
|
||||
) -> Result<()> {
|
||||
if value.is_empty() {
|
||||
seeker(base_key, base_depth, &Value::Object(Map::with_capacity(0)))?;
|
||||
@ -85,40 +83,16 @@ pub mod perm_json_p {
|
||||
format!("{}{}{}", base_key, SPLIT_SYMBOL, key)
|
||||
};
|
||||
|
||||
// here if the user only specified `doggo` we need to iterate in all the fields of `doggo`
|
||||
// so we check the contained_in on both side
|
||||
let selection = select_field(&base_key, selectors, skip_selectors);
|
||||
if selection != Selection::Skip {
|
||||
let selection = seeker(&base_key, Depth::OnBaseKey, value)?;
|
||||
if selection != PatternMatch::NoMatch {
|
||||
match value {
|
||||
Value::Object(object) => {
|
||||
if selection == Selection::Select {
|
||||
seeker(&base_key, Depth::OnBaseKey, value)?;
|
||||
}
|
||||
|
||||
seek_leaf_values_in_object(
|
||||
object,
|
||||
selectors,
|
||||
skip_selectors,
|
||||
&base_key,
|
||||
Depth::OnBaseKey,
|
||||
seeker,
|
||||
)
|
||||
seek_leaf_values_in_object(object, &base_key, Depth::OnBaseKey, seeker)
|
||||
}
|
||||
Value::Array(array) => {
|
||||
if selection == Selection::Select {
|
||||
seeker(&base_key, Depth::OnBaseKey, value)?;
|
||||
}
|
||||
|
||||
seek_leaf_values_in_array(
|
||||
array,
|
||||
selectors,
|
||||
skip_selectors,
|
||||
&base_key,
|
||||
Depth::OnBaseKey,
|
||||
seeker,
|
||||
)
|
||||
seek_leaf_values_in_array(array, &base_key, Depth::OnBaseKey, seeker)
|
||||
}
|
||||
value => seeker(&base_key, Depth::OnBaseKey, value),
|
||||
_ => Ok(()),
|
||||
}?;
|
||||
}
|
||||
}
|
||||
@ -128,11 +102,9 @@ pub mod perm_json_p {
|
||||
|
||||
pub fn seek_leaf_values_in_array(
|
||||
values: &[Value],
|
||||
selectors: Option<&[&str]>,
|
||||
skip_selectors: &[&str],
|
||||
base_key: &str,
|
||||
base_depth: Depth,
|
||||
seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<()>,
|
||||
seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<PatternMatch>,
|
||||
) -> Result<()> {
|
||||
if values.is_empty() {
|
||||
seeker(base_key, base_depth, &Value::Array(vec![]))?;
|
||||
@ -140,61 +112,16 @@ pub mod perm_json_p {
|
||||
|
||||
for value in values {
|
||||
match value {
|
||||
Value::Object(object) => seek_leaf_values_in_object(
|
||||
object,
|
||||
selectors,
|
||||
skip_selectors,
|
||||
base_key,
|
||||
Depth::InsideArray,
|
||||
seeker,
|
||||
),
|
||||
Value::Array(array) => seek_leaf_values_in_array(
|
||||
array,
|
||||
selectors,
|
||||
skip_selectors,
|
||||
base_key,
|
||||
Depth::InsideArray,
|
||||
seeker,
|
||||
),
|
||||
value => seeker(base_key, Depth::InsideArray, value),
|
||||
Value::Object(object) => {
|
||||
seek_leaf_values_in_object(object, base_key, Depth::InsideArray, seeker)
|
||||
}
|
||||
Value::Array(array) => {
|
||||
seek_leaf_values_in_array(array, base_key, Depth::InsideArray, seeker)
|
||||
}
|
||||
value => seeker(base_key, Depth::InsideArray, value).map(|_| ()),
|
||||
}?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn select_field(
|
||||
field_name: &str,
|
||||
selectors: Option<&[&str]>,
|
||||
skip_selectors: &[&str],
|
||||
) -> Selection {
|
||||
if skip_selectors.iter().any(|skip_selector| {
|
||||
contained_in(skip_selector, field_name) || contained_in(field_name, skip_selector)
|
||||
}) {
|
||||
Selection::Skip
|
||||
} else if let Some(selectors) = selectors {
|
||||
let mut selection = Selection::Skip;
|
||||
for selector in selectors {
|
||||
if contained_in(field_name, selector) {
|
||||
selection = Selection::Select;
|
||||
break;
|
||||
} else if contained_in(selector, field_name) {
|
||||
selection = Selection::Parent;
|
||||
}
|
||||
}
|
||||
selection
|
||||
} else {
|
||||
Selection::Select
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Selection {
|
||||
/// The field is a parent of the of a nested field that must be selected
|
||||
Parent,
|
||||
/// The field must be selected
|
||||
Select,
|
||||
/// The field must be skipped
|
||||
Skip,
|
||||
}
|
||||
}
|
||||
|
@ -9,6 +9,7 @@ use heed::{BytesDecode, BytesEncode, RoTxn, RwTxn};
|
||||
|
||||
use super::fst_merger_builder::FstMergerBuilder;
|
||||
use super::KvReaderDelAdd;
|
||||
use crate::attribute_patterns::PatternMatch;
|
||||
use crate::heed_codec::facet::FacetGroupKey;
|
||||
use crate::update::del_add::{DelAdd, KvWriterDelAdd};
|
||||
use crate::update::{create_sorter, MergeDeladdBtreesetString};
|
||||
@ -92,7 +93,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
|
||||
let locales = self
|
||||
.localized_attributes_rules
|
||||
.iter()
|
||||
.find(|rule| rule.match_str(field_name))
|
||||
.find(|rule| rule.match_str(field_name) == PatternMatch::Match)
|
||||
.map(|rule| rule.locales.clone());
|
||||
|
||||
e.insert(locales);
|
||||
|
@ -31,7 +31,10 @@ use crate::vector::settings::{
|
||||
WriteBackToDocuments,
|
||||
};
|
||||
use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs};
|
||||
use crate::{FieldId, FieldsIdsMap, Index, LocalizedAttributesRule, LocalizedFieldIds, Result};
|
||||
use crate::{
|
||||
FieldId, FieldsIdsMap, FilterableAttributesSettings, Index, LocalizedAttributesRule,
|
||||
LocalizedFieldIds, Result,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
|
||||
pub enum Setting<T> {
|
||||
@ -155,7 +158,7 @@ pub struct Settings<'a, 't, 'i> {
|
||||
|
||||
searchable_fields: Setting<Vec<String>>,
|
||||
displayed_fields: Setting<Vec<String>>,
|
||||
filterable_fields: Setting<HashSet<String>>,
|
||||
filterable_fields: Setting<Vec<FilterableAttributesSettings>>,
|
||||
sortable_fields: Setting<HashSet<String>>,
|
||||
criteria: Setting<Vec<Criterion>>,
|
||||
stop_words: Setting<BTreeSet<String>>,
|
||||
@ -241,8 +244,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
self.filterable_fields = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_filterable_fields(&mut self, names: HashSet<String>) {
|
||||
self.filterable_fields = Setting::Set(names);
|
||||
pub fn set_filterable_fields(&mut self, rules: Vec<FilterableAttributesSettings>) {
|
||||
self.filterable_fields = Setting::Set(rules);
|
||||
}
|
||||
|
||||
pub fn set_sortable_fields(&mut self, names: HashSet<String>) {
|
||||
@ -760,11 +763,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
fn update_filterable(&mut self) -> Result<()> {
|
||||
match self.filterable_fields {
|
||||
Setting::Set(ref fields) => {
|
||||
let mut new_facets = HashSet::new();
|
||||
for name in fields {
|
||||
new_facets.insert(name.clone());
|
||||
}
|
||||
self.index.put_filterable_fields(self.wtxn, &new_facets)?;
|
||||
self.index.put_filterable_fields(self.wtxn, fields)?;
|
||||
}
|
||||
Setting::Reset => {
|
||||
self.index.delete_filterable_fields(self.wtxn)?;
|
||||
@ -1895,7 +1894,7 @@ pub fn validate_embedding_settings(
|
||||
mod tests {
|
||||
use big_s::S;
|
||||
use heed::types::Bytes;
|
||||
use maplit::{btreemap, btreeset, hashset};
|
||||
use maplit::{btreemap, btreeset};
|
||||
use meili_snap::snapshot;
|
||||
|
||||
use super::*;
|
||||
@ -2105,7 +2104,9 @@ mod tests {
|
||||
// Set the filterable fields to be the age.
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset! { S("age") });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
|
||||
"age".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -2121,7 +2122,7 @@ mod tests {
|
||||
// Check that the displayed fields are correctly set.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let fields_ids = index.filterable_fields(&rtxn).unwrap();
|
||||
assert_eq!(fields_ids, hashset! { S("age") });
|
||||
assert_eq!(fields_ids, vec![FilterableAttributesSettings::Field("age".to_string(),)]);
|
||||
// Only count the field_id 0 and level 0 facet values.
|
||||
// TODO we must support typed CSVs for numbers to be understood.
|
||||
let fidmap = index.fields_ids_map(&rtxn).unwrap();
|
||||
@ -2163,14 +2164,23 @@ mod tests {
|
||||
// Set the filterable fields to be the age and the name.
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset! { S("age"), S("name") });
|
||||
settings.set_filterable_fields(vec![
|
||||
FilterableAttributesSettings::Field("age".to_string()),
|
||||
FilterableAttributesSettings::Field("name".to_string()),
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
// Check that the displayed fields are correctly set.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let fields_ids = index.filterable_fields(&rtxn).unwrap();
|
||||
assert_eq!(fields_ids, hashset! { S("age"), S("name") });
|
||||
assert_eq!(
|
||||
fields_ids,
|
||||
vec![
|
||||
FilterableAttributesSettings::Field("age".to_string()),
|
||||
FilterableAttributesSettings::Field("name".to_string()),
|
||||
]
|
||||
);
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
// Only count the field_id 2 and level 0 facet values.
|
||||
@ -2195,14 +2205,16 @@ mod tests {
|
||||
// Remove the age from the filterable fields.
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset! { S("name") });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
|
||||
"name".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
// Check that the displayed fields are correctly set.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let fields_ids = index.filterable_fields(&rtxn).unwrap();
|
||||
assert_eq!(fields_ids, hashset! { S("name") });
|
||||
assert_eq!(fields_ids, vec![FilterableAttributesSettings::Field("name".to_string())]);
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
// Only count the field_id 2 and level 0 facet values.
|
||||
@ -2532,7 +2544,10 @@ mod tests {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_displayed_fields(vec!["hello".to_string()]);
|
||||
settings.set_filterable_fields(hashset! { S("age"), S("toto") });
|
||||
settings.set_filterable_fields(vec![
|
||||
FilterableAttributesSettings::Field("age".to_string()),
|
||||
FilterableAttributesSettings::Field("toto".to_string()),
|
||||
]);
|
||||
settings.set_criteria(vec![Criterion::Asc(S("toto"))]);
|
||||
})
|
||||
.unwrap();
|
||||
@ -2649,7 +2664,9 @@ mod tests {
|
||||
// Set the genres setting
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset! { S("genres") });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
|
||||
"genres".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
|
@ -1,13 +1,12 @@
|
||||
use big_s::S;
|
||||
use bumpalo::Bump;
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::hashset;
|
||||
use milli::documents::mmap_from_objects;
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::{FacetDistribution, Index, Object, OrderBy};
|
||||
use milli::{FacetDistribution, FilterableAttributesSettings, Index, Object, OrderBy};
|
||||
use serde_json::{from_value, json};
|
||||
|
||||
#[test]
|
||||
@ -21,10 +20,10 @@ fn test_facet_distribution_with_no_facet_values() {
|
||||
let config = IndexerConfig::default();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
builder.set_filterable_fields(hashset! {
|
||||
S("genres"),
|
||||
S("tags"),
|
||||
});
|
||||
builder.set_filterable_fields(vec![
|
||||
FilterableAttributesSettings::Field(S("genres")),
|
||||
FilterableAttributesSettings::Field(S("tags")),
|
||||
]);
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
|
@ -11,7 +11,10 @@ use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::{AscDesc, Criterion, DocumentId, Index, Member, TermsMatchingStrategy};
|
||||
use milli::{
|
||||
AscDesc, Criterion, DocumentId, FilterableAttributesSettings, Index, Member,
|
||||
TermsMatchingStrategy,
|
||||
};
|
||||
use serde::{Deserialize, Deserializer};
|
||||
use slice_group_by::GroupBy;
|
||||
|
||||
@ -42,14 +45,14 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
builder.set_criteria(criteria.to_vec());
|
||||
builder.set_filterable_fields(hashset! {
|
||||
S("tag"),
|
||||
S("asc_desc_rank"),
|
||||
S("_geo"),
|
||||
S("opt1"),
|
||||
S("opt1.opt2"),
|
||||
S("tag_in")
|
||||
});
|
||||
builder.set_filterable_fields(vec![
|
||||
FilterableAttributesSettings::Field(S("tag")),
|
||||
FilterableAttributesSettings::Field(S("asc_desc_rank")),
|
||||
FilterableAttributesSettings::Field(S("_geo")),
|
||||
FilterableAttributesSettings::Field(S("opt1")),
|
||||
FilterableAttributesSettings::Field(S("opt1.opt2")),
|
||||
FilterableAttributesSettings::Field(S("tag_in")),
|
||||
]);
|
||||
builder.set_sortable_fields(hashset! {
|
||||
S("tag"),
|
||||
S("asc_desc_rank"),
|
||||
|
Loading…
x
Reference in New Issue
Block a user