mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-02-05 01:53:28 +01:00
WIP
This commit is contained in:
parent
e568dbbabb
commit
eaf3be1702
@ -11,7 +11,9 @@ use fst::IntoStreamer;
|
|||||||
use milli::index::{IndexEmbeddingConfig, PrefixSearch};
|
use milli::index::{IndexEmbeddingConfig, PrefixSearch};
|
||||||
use milli::proximity::ProximityPrecision;
|
use milli::proximity::ProximityPrecision;
|
||||||
use milli::update::Setting;
|
use milli::update::Setting;
|
||||||
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
|
use milli::{
|
||||||
|
Criterion, CriterionError, FilterableAttributesSettings, Index, DEFAULT_VALUES_PER_FACET,
|
||||||
|
};
|
||||||
use serde::{Deserialize, Serialize, Serializer};
|
use serde::{Deserialize, Serialize, Serializer};
|
||||||
use utoipa::ToSchema;
|
use utoipa::ToSchema;
|
||||||
|
|
||||||
@ -202,8 +204,8 @@ pub struct Settings<T> {
|
|||||||
/// Attributes to use for faceting and filtering. See [Filtering and Faceted Search](https://www.meilisearch.com/docs/learn/filtering_and_sorting/search_with_facet_filters).
|
/// Attributes to use for faceting and filtering. See [Filtering and Faceted Search](https://www.meilisearch.com/docs/learn/filtering_and_sorting/search_with_facet_filters).
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsFilterableAttributes>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSettingsFilterableAttributes>)]
|
||||||
#[schema(value_type = Option<Vec<String>>, example = json!(["release_date", "genre"]))]
|
#[schema(value_type = Option<Vec<FilterableAttributesSettings>>, example = json!(["release_date", "genre"]))]
|
||||||
pub filterable_attributes: Setting<BTreeSet<String>>,
|
pub filterable_attributes: Setting<Vec<FilterableAttributesSettings>>,
|
||||||
/// Attributes to use when sorting search results.
|
/// Attributes to use when sorting search results.
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsSortableAttributes>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSettingsSortableAttributes>)]
|
||||||
|
58
crates/milli/src/attribute_patterns.rs
Normal file
58
crates/milli/src/attribute_patterns.rs
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
use deserr::Deserr;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use utoipa::ToSchema;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Deserr, ToSchema)]
|
||||||
|
#[repr(transparent)]
|
||||||
|
#[serde(transparent)]
|
||||||
|
pub struct AttributePatterns {
|
||||||
|
#[schema(value_type = Vec<String>)]
|
||||||
|
pub patterns: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Vec<String>> for AttributePatterns {
|
||||||
|
fn from(patterns: Vec<String>) -> Self {
|
||||||
|
Self { patterns }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AttributePatterns {
|
||||||
|
pub fn match_str(&self, str: &str) -> bool {
|
||||||
|
self.patterns.iter().any(|pattern| match_pattern(pattern, str))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn match_pattern(pattern: &str, str: &str) -> bool {
|
||||||
|
if pattern == "*" {
|
||||||
|
true
|
||||||
|
} else if pattern.starts_with('*') && pattern.ends_with('*') {
|
||||||
|
str.contains(&pattern[1..pattern.len() - 1])
|
||||||
|
} else if let Some(pattern) = pattern.strip_prefix('*') {
|
||||||
|
str.ends_with(pattern)
|
||||||
|
} else if let Some(pattern) = pattern.strip_suffix('*') {
|
||||||
|
str.starts_with(pattern)
|
||||||
|
} else {
|
||||||
|
pattern == str
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_match_pattern() {
|
||||||
|
assert!(match_pattern("*", "test"));
|
||||||
|
assert!(match_pattern("test*", "test"));
|
||||||
|
assert!(match_pattern("test*", "testa"));
|
||||||
|
assert!(match_pattern("*test", "test"));
|
||||||
|
assert!(match_pattern("*test", "atest"));
|
||||||
|
assert!(match_pattern("*test*", "test"));
|
||||||
|
assert!(match_pattern("*test*", "atesta"));
|
||||||
|
assert!(match_pattern("*test*", "atest"));
|
||||||
|
assert!(match_pattern("*test*", "testa"));
|
||||||
|
assert!(!match_pattern("test*test", "test"));
|
||||||
|
assert!(!match_pattern("*test", "testa"));
|
||||||
|
assert!(!match_pattern("test*", "atest"));
|
||||||
|
}
|
||||||
|
}
|
@ -5,14 +5,14 @@ use charabia::Language;
|
|||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
|
|
||||||
use super::FieldsIdsMap;
|
use super::FieldsIdsMap;
|
||||||
use crate::{FieldId, Index, LocalizedAttributesRule, Result};
|
use crate::{FieldId, FilterableAttributesSettings, Index, LocalizedAttributesRule, Result};
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
pub struct Metadata {
|
pub struct Metadata {
|
||||||
pub searchable: bool,
|
pub searchable: bool,
|
||||||
pub filterable: bool,
|
|
||||||
pub sortable: bool,
|
pub sortable: bool,
|
||||||
localized_attributes_rule_id: Option<NonZeroU16>,
|
localized_attributes_rule_id: Option<NonZeroU16>,
|
||||||
|
filterable_attributes_rule_id: Option<NonZeroU16>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
@ -111,7 +111,7 @@ impl Metadata {
|
|||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct MetadataBuilder {
|
pub struct MetadataBuilder {
|
||||||
searchable_attributes: Vec<String>,
|
searchable_attributes: Vec<String>,
|
||||||
filterable_attributes: HashSet<String>,
|
filterable_attributes: Vec<FilterableAttributesSettings>,
|
||||||
sortable_attributes: HashSet<String>,
|
sortable_attributes: HashSet<String>,
|
||||||
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
|
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
|
||||||
}
|
}
|
||||||
@ -134,7 +134,7 @@ impl MetadataBuilder {
|
|||||||
|
|
||||||
pub fn new(
|
pub fn new(
|
||||||
searchable_attributes: Vec<String>,
|
searchable_attributes: Vec<String>,
|
||||||
filterable_attributes: HashSet<String>,
|
filterable_attributes: Vec<FilterableAttributesSettings>,
|
||||||
sortable_attributes: HashSet<String>,
|
sortable_attributes: HashSet<String>,
|
||||||
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
|
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
@ -152,8 +152,6 @@ impl MetadataBuilder {
|
|||||||
.iter()
|
.iter()
|
||||||
.any(|attribute| attribute == "*" || attribute == field);
|
.any(|attribute| attribute == "*" || attribute == field);
|
||||||
|
|
||||||
let filterable = self.filterable_attributes.contains(field);
|
|
||||||
|
|
||||||
let sortable = self.sortable_attributes.contains(field);
|
let sortable = self.sortable_attributes.contains(field);
|
||||||
|
|
||||||
let localized_attributes_rule_id = self
|
let localized_attributes_rule_id = self
|
||||||
@ -164,7 +162,24 @@ impl MetadataBuilder {
|
|||||||
// saturating_add(1): make `id` `NonZero`
|
// saturating_add(1): make `id` `NonZero`
|
||||||
.map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap());
|
.map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap());
|
||||||
|
|
||||||
Metadata { searchable, filterable, sortable, localized_attributes_rule_id }
|
let filterable_attributes_rule_id = self
|
||||||
|
.filterable_attributes
|
||||||
|
.iter()
|
||||||
|
.position(|attribute| match attribute {
|
||||||
|
FilterableAttributesSettings::Field(field_name) => field_name == field,
|
||||||
|
FilterableAttributesSettings::Pattern(patterns) => {
|
||||||
|
patterns.patterns.match_str(field)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
// saturating_add(1): make `id` `NonZero`
|
||||||
|
.map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap());
|
||||||
|
|
||||||
|
Metadata {
|
||||||
|
searchable,
|
||||||
|
sortable,
|
||||||
|
localized_attributes_rule_id,
|
||||||
|
filterable_attributes_rule_id,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn searchable_attributes(&self) -> &[String] {
|
pub fn searchable_attributes(&self) -> &[String] {
|
||||||
@ -175,7 +190,7 @@ impl MetadataBuilder {
|
|||||||
&self.sortable_attributes
|
&self.sortable_attributes
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn filterable_attributes(&self) -> &HashSet<String> {
|
pub fn filterable_attributes(&self) -> &[FilterableAttributesSettings] {
|
||||||
&self.filterable_attributes
|
&self.filterable_attributes
|
||||||
}
|
}
|
||||||
|
|
||||||
|
75
crates/milli/src/filterable_fields.rs
Normal file
75
crates/milli/src/filterable_fields.rs
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
use deserr::{DeserializeError, Deserr, ValuePointerRef};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::collections::HashSet;
|
||||||
|
use utoipa::ToSchema;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
constants::RESERVED_GEO_FIELD_NAME, is_faceted_by, AttributePatterns, FieldId, FieldsIdsMap,
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, ToSchema)]
|
||||||
|
#[serde(untagged)]
|
||||||
|
pub enum FilterableAttributesSettings {
|
||||||
|
Field(String),
|
||||||
|
Pattern(FilterableAttributesPatterns),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FilterableAttributesSettings {
|
||||||
|
pub fn match_str(&self, field: &str) -> bool {
|
||||||
|
match self {
|
||||||
|
FilterableAttributesSettings::Field(field_name) => is_faceted_by(field, field_name),
|
||||||
|
FilterableAttributesSettings::Pattern(patterns) => patterns.patterns.match_str(field),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn has_geo(&self) -> bool {
|
||||||
|
/// TODO: This is a temporary solution to check if the geo field is activated.
|
||||||
|
matches!(self, FilterableAttributesSettings::Field(field_name) if field_name == RESERVED_GEO_FIELD_NAME)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, Deserr, ToSchema)]
|
||||||
|
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
||||||
|
#[deserr(rename_all = camelCase, deny_unknown_fields)]
|
||||||
|
pub struct FilterableAttributesPatterns {
|
||||||
|
pub patterns: AttributePatterns,
|
||||||
|
pub features: Option<FilterableAttributesFeatures>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, Deserr, ToSchema)]
|
||||||
|
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
||||||
|
#[deserr(rename_all = camelCase, deny_unknown_fields)]
|
||||||
|
pub struct FilterableAttributesFeatures {
|
||||||
|
facet_search: Option<String>,
|
||||||
|
filter: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<E: DeserializeError> Deserr<E> for FilterableAttributesSettings {
|
||||||
|
fn deserialize_from_value<V: deserr::IntoValue>(
|
||||||
|
value: deserr::Value<V>,
|
||||||
|
location: ValuePointerRef,
|
||||||
|
) -> Result<Self, E> {
|
||||||
|
if value.kind() == deserr::ValueKind::Map {
|
||||||
|
Ok(Self::Pattern(FilterableAttributesPatterns::deserialize_from_value(
|
||||||
|
value, location,
|
||||||
|
)?))
|
||||||
|
} else {
|
||||||
|
Ok(Self::Field(String::deserialize_from_value(value, location)?))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn matching_field_ids(
|
||||||
|
filterable_attributes: &[FilterableAttributesSettings],
|
||||||
|
fields_ids_map: &FieldsIdsMap,
|
||||||
|
) -> HashSet<FieldId> {
|
||||||
|
let mut result = HashSet::new();
|
||||||
|
for (field_id, field_name) in fields_ids_map.iter() {
|
||||||
|
for filterable_attribute in filterable_attributes {
|
||||||
|
if filterable_attribute.match_str(field_name) {
|
||||||
|
result.insert(field_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
@ -10,10 +10,11 @@ use roaring::RoaringBitmap;
|
|||||||
use rstar::RTree;
|
use rstar::RTree;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::constants::RESERVED_VECTORS_FIELD_NAME;
|
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
|
||||||
use crate::documents::PrimaryKey;
|
use crate::documents::PrimaryKey;
|
||||||
use crate::error::{InternalError, UserError};
|
use crate::error::{InternalError, UserError};
|
||||||
use crate::fields_ids_map::FieldsIdsMap;
|
use crate::fields_ids_map::FieldsIdsMap;
|
||||||
|
use crate::filterable_fields::matching_field_ids;
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
||||||
FieldIdCodec, OrderedF64Codec,
|
FieldIdCodec, OrderedF64Codec,
|
||||||
@ -25,8 +26,9 @@ use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfig};
|
|||||||
use crate::{
|
use crate::{
|
||||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||||
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
|
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
|
||||||
FieldidsWeightsMap, GeoPoint, LocalizedAttributesRule, ObkvCodec, Result, RoaringBitmapCodec,
|
FieldidsWeightsMap, FilterableAttributesSettings, GeoPoint, LocalizedAttributesRule, ObkvCodec,
|
||||||
RoaringBitmapLenCodec, Search, U8StrStrCodec, Weight, BEU16, BEU32, BEU64,
|
Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, Weight, BEU16, BEU32,
|
||||||
|
BEU64,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
|
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
|
||||||
@ -787,7 +789,7 @@ impl Index {
|
|||||||
pub(crate) fn put_filterable_fields(
|
pub(crate) fn put_filterable_fields(
|
||||||
&self,
|
&self,
|
||||||
wtxn: &mut RwTxn<'_>,
|
wtxn: &mut RwTxn<'_>,
|
||||||
fields: &HashSet<String>,
|
fields: &Vec<FilterableAttributesSettings>,
|
||||||
) -> heed::Result<()> {
|
) -> heed::Result<()> {
|
||||||
self.main.remap_types::<Str, SerdeJson<_>>().put(
|
self.main.remap_types::<Str, SerdeJson<_>>().put(
|
||||||
wtxn,
|
wtxn,
|
||||||
@ -802,7 +804,10 @@ impl Index {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the filterable fields names.
|
/// Returns the filterable fields names.
|
||||||
pub fn filterable_fields(&self, rtxn: &RoTxn<'_>) -> heed::Result<HashSet<String>> {
|
pub fn filterable_fields(
|
||||||
|
&self,
|
||||||
|
rtxn: &RoTxn<'_>,
|
||||||
|
) -> heed::Result<Vec<FilterableAttributesSettings>> {
|
||||||
Ok(self
|
Ok(self
|
||||||
.main
|
.main
|
||||||
.remap_types::<Str, SerdeJson<_>>()
|
.remap_types::<Str, SerdeJson<_>>()
|
||||||
@ -815,14 +820,9 @@ impl Index {
|
|||||||
let fields = self.filterable_fields(rtxn)?;
|
let fields = self.filterable_fields(rtxn)?;
|
||||||
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||||
|
|
||||||
let mut fields_ids = HashSet::new();
|
let matching_field_ids = matching_field_ids(&fields, &fields_ids_map);
|
||||||
for name in fields {
|
|
||||||
if let Some(field_id) = fields_ids_map.id(&name) {
|
|
||||||
fields_ids.insert(field_id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(fields_ids)
|
Ok(matching_field_ids)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* sortable fields */
|
/* sortable fields */
|
||||||
@ -876,6 +876,13 @@ impl Index {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if the geo feature is activated.
|
||||||
|
pub fn is_geo_activated(&self, rtxn: &RoTxn<'_>) -> Result<bool> {
|
||||||
|
let geo_filter = self.filterable_fields(rtxn)?.iter().any(|field| field.has_geo());
|
||||||
|
let geo_sortable = self.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME);
|
||||||
|
Ok(geo_filter || geo_sortable)
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the faceted fields names.
|
/// Returns the faceted fields names.
|
||||||
pub fn faceted_fields(&self, rtxn: &RoTxn<'_>) -> heed::Result<HashSet<String>> {
|
pub fn faceted_fields(&self, rtxn: &RoTxn<'_>) -> heed::Result<HashSet<String>> {
|
||||||
Ok(self
|
Ok(self
|
||||||
@ -906,7 +913,8 @@ impl Index {
|
|||||||
///
|
///
|
||||||
/// The user faceted fields are the union of all the filterable, sortable, distinct, and Asc/Desc fields.
|
/// The user faceted fields are the union of all the filterable, sortable, distinct, and Asc/Desc fields.
|
||||||
pub fn user_defined_faceted_fields(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<String>> {
|
pub fn user_defined_faceted_fields(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<String>> {
|
||||||
let filterable_fields = self.filterable_fields(rtxn)?;
|
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||||
|
let filterable_fields = self.filterable_fields_ids(rtxn)?;
|
||||||
let sortable_fields = self.sortable_fields(rtxn)?;
|
let sortable_fields = self.sortable_fields(rtxn)?;
|
||||||
let distinct_field = self.distinct_field(rtxn)?;
|
let distinct_field = self.distinct_field(rtxn)?;
|
||||||
let asc_desc_fields =
|
let asc_desc_fields =
|
||||||
@ -915,7 +923,14 @@ impl Index {
|
|||||||
_otherwise => None,
|
_otherwise => None,
|
||||||
});
|
});
|
||||||
|
|
||||||
let mut faceted_fields = filterable_fields;
|
let mut faceted_fields: HashSet<_> = filterable_fields
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(|field_id| {
|
||||||
|
let field_name = fields_ids_map.name(field_id);
|
||||||
|
debug_assert!(field_name.is_some(), "field name not found for {field_id}");
|
||||||
|
field_name.map(|field| field.to_string())
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
faceted_fields.extend(sortable_fields);
|
faceted_fields.extend(sortable_fields);
|
||||||
faceted_fields.extend(asc_desc_fields);
|
faceted_fields.extend(asc_desc_fields);
|
||||||
if let Some(field) = distinct_field {
|
if let Some(field) = distinct_field {
|
||||||
@ -925,21 +940,6 @@ impl Index {
|
|||||||
Ok(faceted_fields)
|
Ok(faceted_fields)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Identical to `user_defined_faceted_fields`, but returns ids instead.
|
|
||||||
pub fn user_defined_faceted_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<FieldId>> {
|
|
||||||
let fields = self.user_defined_faceted_fields(rtxn)?;
|
|
||||||
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
|
||||||
|
|
||||||
let mut fields_ids = HashSet::new();
|
|
||||||
for name in fields {
|
|
||||||
if let Some(field_id) = fields_ids_map.id(&name) {
|
|
||||||
fields_ids.insert(field_id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(fields_ids)
|
|
||||||
}
|
|
||||||
|
|
||||||
/* faceted documents ids */
|
/* faceted documents ids */
|
||||||
|
|
||||||
/// Retrieve all the documents which contain this field id set as null
|
/// Retrieve all the documents which contain this field id set as null
|
||||||
|
@ -9,11 +9,13 @@ pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
|||||||
pub mod documents;
|
pub mod documents;
|
||||||
|
|
||||||
mod asc_desc;
|
mod asc_desc;
|
||||||
|
mod attribute_patterns;
|
||||||
mod criterion;
|
mod criterion;
|
||||||
mod error;
|
mod error;
|
||||||
mod external_documents_ids;
|
mod external_documents_ids;
|
||||||
pub mod facet;
|
pub mod facet;
|
||||||
mod fields_ids_map;
|
mod fields_ids_map;
|
||||||
|
mod filterable_fields;
|
||||||
pub mod heed_codec;
|
pub mod heed_codec;
|
||||||
pub mod index;
|
pub mod index;
|
||||||
mod localized_attributes_rules;
|
mod localized_attributes_rules;
|
||||||
@ -51,6 +53,7 @@ pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbor
|
|||||||
pub use {charabia as tokenizer, heed, rhai};
|
pub use {charabia as tokenizer, heed, rhai};
|
||||||
|
|
||||||
pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
|
pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
|
||||||
|
pub use self::attribute_patterns::AttributePatterns;
|
||||||
pub use self::criterion::{default_criteria, Criterion, CriterionError};
|
pub use self::criterion::{default_criteria, Criterion, CriterionError};
|
||||||
pub use self::error::{
|
pub use self::error::{
|
||||||
Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
|
Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
|
||||||
@ -58,6 +61,9 @@ pub use self::error::{
|
|||||||
pub use self::external_documents_ids::ExternalDocumentsIds;
|
pub use self::external_documents_ids::ExternalDocumentsIds;
|
||||||
pub use self::fieldids_weights_map::FieldidsWeightsMap;
|
pub use self::fieldids_weights_map::FieldidsWeightsMap;
|
||||||
pub use self::fields_ids_map::{FieldsIdsMap, GlobalFieldsIdsMap};
|
pub use self::fields_ids_map::{FieldsIdsMap, GlobalFieldsIdsMap};
|
||||||
|
pub use self::filterable_fields::{
|
||||||
|
FilterableAttributesFeatures, FilterableAttributesPatterns, FilterableAttributesSettings,
|
||||||
|
};
|
||||||
pub use self::heed_codec::{
|
pub use self::heed_codec::{
|
||||||
BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
|
BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
|
||||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec,
|
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec,
|
||||||
|
@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize};
|
|||||||
use utoipa::ToSchema;
|
use utoipa::ToSchema;
|
||||||
|
|
||||||
use crate::fields_ids_map::FieldsIdsMap;
|
use crate::fields_ids_map::FieldsIdsMap;
|
||||||
use crate::FieldId;
|
use crate::{AttributePatterns, FieldId};
|
||||||
|
|
||||||
/// A rule that defines which locales are supported for a given attribute.
|
/// A rule that defines which locales are supported for a given attribute.
|
||||||
///
|
///
|
||||||
@ -17,18 +17,18 @@ use crate::FieldId;
|
|||||||
/// The pattern `*attribute_name*` matches any attribute name that contains `attribute_name`.
|
/// The pattern `*attribute_name*` matches any attribute name that contains `attribute_name`.
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||||
pub struct LocalizedAttributesRule {
|
pub struct LocalizedAttributesRule {
|
||||||
pub attribute_patterns: Vec<String>,
|
pub attribute_patterns: AttributePatterns,
|
||||||
#[schema(value_type = Vec<String>)]
|
#[schema(value_type = Vec<String>)]
|
||||||
pub locales: Vec<Language>,
|
pub locales: Vec<Language>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LocalizedAttributesRule {
|
impl LocalizedAttributesRule {
|
||||||
pub fn new(attribute_patterns: Vec<String>, locales: Vec<Language>) -> Self {
|
pub fn new(attribute_patterns: Vec<String>, locales: Vec<Language>) -> Self {
|
||||||
Self { attribute_patterns, locales }
|
Self { attribute_patterns: AttributePatterns::from(attribute_patterns), locales }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn match_str(&self, str: &str) -> bool {
|
pub fn match_str(&self, str: &str) -> bool {
|
||||||
self.attribute_patterns.iter().any(|pattern| match_pattern(pattern.as_str(), str))
|
self.attribute_patterns.match_str(str)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn locales(&self) -> &[Language] {
|
pub fn locales(&self) -> &[Language] {
|
||||||
@ -36,20 +36,6 @@ impl LocalizedAttributesRule {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn match_pattern(pattern: &str, str: &str) -> bool {
|
|
||||||
if pattern == "*" {
|
|
||||||
true
|
|
||||||
} else if pattern.starts_with('*') && pattern.ends_with('*') {
|
|
||||||
str.contains(&pattern[1..pattern.len() - 1])
|
|
||||||
} else if let Some(pattern) = pattern.strip_prefix('*') {
|
|
||||||
str.ends_with(pattern)
|
|
||||||
} else if let Some(pattern) = pattern.strip_suffix('*') {
|
|
||||||
str.starts_with(pattern)
|
|
||||||
} else {
|
|
||||||
pattern == str
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
pub struct LocalizedFieldIds {
|
pub struct LocalizedFieldIds {
|
||||||
field_id_to_locales: HashMap<FieldId, Vec<Language>>,
|
field_id_to_locales: HashMap<FieldId, Vec<Language>>,
|
||||||
@ -65,7 +51,7 @@ impl LocalizedFieldIds {
|
|||||||
|
|
||||||
if let Some(rules) = rules {
|
if let Some(rules) = rules {
|
||||||
let fields = fields_ids.filter_map(|field_id| {
|
let fields = fields_ids.filter_map(|field_id| {
|
||||||
fields_ids_map.name(field_id).map(|field_name| (field_id, field_name))
|
fields_ids_map.name(field_id).map(|field_name: &str| (field_id, field_name))
|
||||||
});
|
});
|
||||||
|
|
||||||
for (field_id, field_name) in fields {
|
for (field_id, field_name) in fields {
|
||||||
@ -108,24 +94,3 @@ impl LocalizedFieldIds {
|
|||||||
locales
|
locales
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_match_pattern() {
|
|
||||||
assert!(match_pattern("*", "test"));
|
|
||||||
assert!(match_pattern("test*", "test"));
|
|
||||||
assert!(match_pattern("test*", "testa"));
|
|
||||||
assert!(match_pattern("*test", "test"));
|
|
||||||
assert!(match_pattern("*test", "atest"));
|
|
||||||
assert!(match_pattern("*test*", "test"));
|
|
||||||
assert!(match_pattern("*test*", "atesta"));
|
|
||||||
assert!(match_pattern("*test*", "atest"));
|
|
||||||
assert!(match_pattern("*test*", "testa"));
|
|
||||||
assert!(!match_pattern("test*test", "test"));
|
|
||||||
assert!(!match_pattern("*test", "testa"));
|
|
||||||
assert!(!match_pattern("test*", "atest"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -95,12 +95,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
|
|||||||
// If the settings specifies that a _geo field must be used therefore we must check the
|
// If the settings specifies that a _geo field must be used therefore we must check the
|
||||||
// validity of it in all the documents of this batch and this is when we return `Some`.
|
// validity of it in all the documents of this batch and this is when we return `Some`.
|
||||||
let geo_field_id = match documents_batch_index.id(RESERVED_GEO_FIELD_NAME) {
|
let geo_field_id = match documents_batch_index.id(RESERVED_GEO_FIELD_NAME) {
|
||||||
Some(geo_field_id)
|
Some(geo_field_id) if index.is_geo_activated(rtxn)? => Some(geo_field_id),
|
||||||
if index.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME)
|
|
||||||
|| index.filterable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME) =>
|
|
||||||
{
|
|
||||||
Some(geo_field_id)
|
|
||||||
}
|
|
||||||
_otherwise => None,
|
_otherwise => None,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -9,7 +9,6 @@ use heed::RoTxn;
|
|||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
|
||||||
use crate::error::GeoError;
|
use crate::error::GeoError;
|
||||||
use crate::update::new::document::Document;
|
use crate::update::new::document::Document;
|
||||||
use crate::update::new::indexer::document_changes::{DocumentChangeContext, Extractor};
|
use crate::update::new::indexer::document_changes::{DocumentChangeContext, Extractor};
|
||||||
@ -29,9 +28,7 @@ impl GeoExtractor {
|
|||||||
index: &Index,
|
index: &Index,
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
) -> Result<Option<Self>> {
|
) -> Result<Option<Self>> {
|
||||||
let is_sortable = index.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME);
|
if index.is_geo_activated(rtxn)? {
|
||||||
let is_filterable = index.filterable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME);
|
|
||||||
if is_sortable || is_filterable {
|
|
||||||
Ok(Some(GeoExtractor { grenad_parameters }))
|
Ok(Some(GeoExtractor { grenad_parameters }))
|
||||||
} else {
|
} else {
|
||||||
Ok(None)
|
Ok(None)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user