This commit is contained in:
ManyTheFish 2025-02-04 16:25:08 +01:00
parent deea0c44ea
commit ebd9922cba
23 changed files with 456 additions and 370 deletions

View File

@ -226,8 +226,8 @@ pub(crate) mod test {
use meilisearch_types::features::RuntimeTogglableFeatures;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::{Action, Key};
use meilisearch_types::milli;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::{self, FilterableAttributesRule};
use meilisearch_types::settings::{Checked, FacetingSettings, Settings};
use meilisearch_types::tasks::{Details, Status};
use serde_json::{json, Map, Value};
@ -271,7 +271,10 @@ pub(crate) mod test {
let settings = Settings {
displayed_attributes: Setting::Set(vec![S("race"), S("name")]).into(),
searchable_attributes: Setting::Set(vec![S("name"), S("race")]).into(),
filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }),
filterable_attributes: Setting::Set(vec![
FilterableAttributesRule::Field(S("race")),
FilterableAttributesRule::Field(S("age")),
]),
sortable_attributes: Setting::Set(btreeset! { S("age") }),
ranking_rules: Setting::NotSet,
stop_words: Setting::NotSet,

View File

@ -318,7 +318,16 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
v6::Settings {
displayed_attributes: v6::Setting::from(settings.displayed_attributes).into(),
searchable_attributes: v6::Setting::from(settings.searchable_attributes).into(),
filterable_attributes: settings.filterable_attributes.into(),
filterable_attributes: match settings.filterable_attributes {
v5::settings::Setting::Set(filterable_attributes) => v6::Setting::Set(
filterable_attributes
.into_iter()
.map(|attr| v6::FilterableAttributesRule::Field(attr))
.collect(),
),
v5::settings::Setting::Reset => v6::Setting::Reset,
v5::settings::Setting::NotSet => v6::Setting::NotSet,
},
sortable_attributes: settings.sortable_attributes.into(),
ranking_rules: {
match settings.ranking_rules {

View File

@ -43,6 +43,8 @@ pub type ResponseError = meilisearch_types::error::ResponseError;
pub type Code = meilisearch_types::error::Code;
pub type RankingRuleView = meilisearch_types::settings::RankingRuleView;
pub type FilterableAttributesRule = meilisearch_types::milli::FilterableAttributesRule;
pub struct V6Reader {
dump: TempDir,
instance_uid: Option<Uuid>,

View File

@ -74,6 +74,21 @@ pub fn match_field_legacy(pattern: &str, field: &str) -> PatternMatch {
}
}
/// Match a field against a distinct field.
pub fn match_distinct_field(distinct_field: Option<&str>, field: &str) -> PatternMatch {
if let Some(distinct_field) = distinct_field {
if field == distinct_field {
// If the field matches exactly the distinct field, return Match
return PatternMatch::Match;
} else if is_faceted_by(distinct_field, field) {
// If the field is a parent field of the distinct field, return Parent
return PatternMatch::Parent;
}
}
// If the field does not match the distinct field and is not a parent of a nested field that matches the distinct field, return NoMatch
PatternMatch::NoMatch
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PatternMatch {
/// The field is a parent of the of a nested field that matches the pattern

View File

@ -6,6 +6,7 @@ use heed::RoTxn;
use super::FieldsIdsMap;
use crate::attribute_patterns::PatternMatch;
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
use crate::{
is_faceted_by, FieldId, FilterableAttributesFeatures, FilterableAttributesRule, Index,
LocalizedAttributesRule, Result,
@ -13,8 +14,11 @@ use crate::{
#[derive(Debug, Clone, Copy)]
pub struct Metadata {
pub searchable: bool,
pub searchable: Option<u16>,
pub sortable: bool,
pub distinct: bool,
pub asc_desc: bool,
pub geo: bool,
localized_attributes_rule_id: Option<NonZeroU16>,
filterable_attributes_rule_id: Option<NonZeroU16>,
}
@ -136,21 +140,33 @@ impl Metadata {
}
pub fn is_searchable(&self) -> bool {
self.searchable.is_some()
}
pub fn searchable_weight(&self) -> Option<u16> {
self.searchable
}
/// Returns `true` if the field is part of the facet databases. (sortable, filterable, or facet searchable)
pub fn is_distinct(&self) -> bool {
self.distinct
}
pub fn is_asc_desc(&self) -> bool {
self.asc_desc
}
pub fn is_geo(&self) -> bool {
self.geo
}
/// Returns `true` if the field is part of the facet databases. (sortable, distinct, asc_desc, filterable or facet searchable)
pub fn is_faceted(&self, rules: &[FilterableAttributesRule]) -> bool {
if self.is_sortable() {
if self.is_distinct() || self.is_sortable() || self.is_asc_desc() {
return true;
}
let features = self.filterable_attributes_features(&rules);
if features.is_filterable() {
return true;
}
if features.is_facet_searchable() {
if features.is_filterable() || features.is_facet_searchable() {
return true;
}
@ -164,6 +180,8 @@ pub struct MetadataBuilder {
filterable_attributes: Vec<FilterableAttributesRule>,
sortable_attributes: HashSet<String>,
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
distinct_attribute: Option<String>,
asc_desc_attributes: HashSet<String>,
}
impl MetadataBuilder {
@ -176,45 +194,98 @@ impl MetadataBuilder {
let filterable_attributes = index.filterable_attributes_rules(rtxn)?;
let sortable_attributes = index.sortable_fields(rtxn)?;
let localized_attributes = index.localized_attributes_rules(rtxn)?;
let distinct_attribute = index.distinct_field(rtxn)?.map(|s| s.to_string());
let asc_desc_attributes = index.asc_desc_fields(rtxn)?;
Ok(Self {
searchable_attributes,
filterable_attributes,
sortable_attributes,
localized_attributes,
distinct_attribute,
asc_desc_attributes,
})
}
// pub fn new(
// searchable_attributes: Option<Vec<String>>,
// filterable_attributes: Vec<FilterableAttributesRule>,
// sortable_attributes: HashSet<String>,
// localized_attributes: Option<Vec<LocalizedAttributesRule>>,
// ) -> Self {
// let searchable_attributes = match searchable_attributes {
// Some(fields) if fields.iter().any(|f| f == "*") => None,
// None => None,
// Some(fields) => Some(fields),
// };
#[cfg(test)]
/// Build a new `MetadataBuilder` from the given parameters.
///
/// This is used for testing, prefer using `MetadataBuilder::from_index` instead.
pub fn new(
searchable_attributes: Option<Vec<String>>,
filterable_attributes: Vec<FilterableAttributesRule>,
sortable_attributes: HashSet<String>,
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
distinct_attribute: Option<String>,
asc_desc_attributes: HashSet<String>,
) -> Self {
let searchable_attributes = match searchable_attributes {
Some(fields) if fields.iter().any(|f| f == "*") => None,
None => None,
Some(fields) => Some(fields),
};
// Self {
// searchable_attributes,
// filterable_attributes,
// sortable_attributes,
// localized_attributes,
// }
// }
Self {
searchable_attributes,
filterable_attributes,
sortable_attributes,
localized_attributes,
distinct_attribute,
asc_desc_attributes,
}
}
pub fn metadata_for_field(&self, field: &str) -> Metadata {
let searchable = match &self.searchable_attributes {
// A field is searchable if it is faceted by a searchable attribute
Some(attributes) => attributes.iter().any(|pattern| is_faceted_by(field, pattern)),
None => true,
};
if is_faceted_by(field, RESERVED_VECTORS_FIELD_NAME) {
// Vectors fields are not searchable, filterable, distinct or asc_desc
return Metadata {
searchable: None,
sortable: false,
distinct: false,
asc_desc: false,
geo: false,
localized_attributes_rule_id: None,
filterable_attributes_rule_id: None,
};
}
// A field is sortable if it is faceted by a sortable attribute
let sortable = self.sortable_attributes.iter().any(|pattern| is_faceted_by(field, pattern));
let filterable_attributes_rule_id = self
.filterable_attributes
.iter()
.position(|attribute| attribute.match_str(field) == PatternMatch::Match)
// saturating_add(1): make `id` `NonZero`
.map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap());
if is_faceted_by(field, RESERVED_GEO_FIELD_NAME) {
// Geo fields are not searchable, distinct or asc_desc
return Metadata {
searchable: None,
sortable,
distinct: false,
asc_desc: false,
geo: true,
localized_attributes_rule_id: None,
filterable_attributes_rule_id,
};
}
let searchable = match &self.searchable_attributes {
// A field is searchable if it is faceted by a searchable attribute
Some(attributes) => attributes
.iter()
.enumerate()
.find(|(_i, pattern)| is_faceted_by(field, pattern))
.map(|(i, _)| i as u16),
None => None,
};
let distinct =
self.distinct_attribute.as_ref().is_some_and(|distinct_field| field == distinct_field);
let asc_desc = self.asc_desc_attributes.contains(field);
let localized_attributes_rule_id = self
.localized_attributes
.iter()
@ -223,16 +294,12 @@ impl MetadataBuilder {
// saturating_add(1): make `id` `NonZero`
.map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap());
let filterable_attributes_rule_id = self
.filterable_attributes
.iter()
.position(|attribute| attribute.match_str(field) == PatternMatch::Match)
// saturating_add(1): make `id` `NonZero`
.map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap());
Metadata {
searchable,
sortable,
distinct,
asc_desc,
geo: false,
localized_attributes_rule_id,
filterable_attributes_rule_id,
}

View File

@ -4,7 +4,7 @@ use std::collections::{BTreeSet, HashSet};
use utoipa::ToSchema;
use crate::{
attribute_patterns::{match_field_legacy, PatternMatch},
attribute_patterns::{match_distinct_field, match_field_legacy, PatternMatch},
constants::RESERVED_GEO_FIELD_NAME,
AttributePatterns, FieldId, FieldsIdsMap,
};
@ -210,3 +210,39 @@ pub fn match_pattern_by_features(
selection
}
/// Match a field against a set of filterable, facet searchable fields, distinct field, sortable fields, and asc_desc fields.
pub fn match_faceted_field(
field_name: &str,
filterable_fields: &[FilterableAttributesRule],
sortable_fields: &HashSet<String>,
asc_desc_fields: &HashSet<String>,
distinct_field: &Option<String>,
) -> PatternMatch {
// Check if the field matches any filterable or facet searchable field
let mut selection = match_pattern_by_features(field_name, &filterable_fields, &|features| {
features.is_facet_searchable() || features.is_filterable()
});
// If the field matches the pattern, return Match
if selection == PatternMatch::Match {
return selection;
}
match match_distinct_field(distinct_field.as_deref(), field_name) {
PatternMatch::Match => return PatternMatch::Match,
PatternMatch::Parent => selection = PatternMatch::Parent,
PatternMatch::NoMatch => (),
}
// Otherwise, check if the field matches any sortable/asc_desc field
for pattern in sortable_fields.iter().chain(asc_desc_fields.iter()) {
match match_field_legacy(&pattern, field_name) {
PatternMatch::Match => return PatternMatch::Match,
PatternMatch::Parent => selection = PatternMatch::Parent,
PatternMatch::NoMatch => (),
}
}
selection
}

View File

@ -1,6 +1,5 @@
use std::borrow::Cow;
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::convert::TryInto;
use std::fs::File;
use std::path::Path;
@ -14,6 +13,7 @@ use crate::attribute_patterns::PatternMatch;
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
use crate::documents::PrimaryKey;
use crate::error::{InternalError, UserError};
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
use crate::fields_ids_map::FieldsIdsMap;
use crate::filterable_attributes_rules::match_pattern_by_features;
use crate::heed_codec::facet::{
@ -648,8 +648,7 @@ impl Index {
&self,
wtxn: &mut RwTxn<'_>,
user_fields: &[&str],
non_searchable_fields_ids: &[FieldId],
fields_ids_map: &FieldsIdsMap,
fields_ids_map: &FieldIdMapWithMetadata,
) -> Result<()> {
// We can write the user defined searchable fields as-is.
self.put_user_defined_searchable_fields(wtxn, user_fields)?;
@ -657,29 +656,17 @@ impl Index {
let mut weights = FieldidsWeightsMap::default();
// Now we generate the real searchable fields:
// 1. Take the user defined searchable fields as-is to keep the priority defined by the attributes criterion.
// 2. Iterate over the user defined searchable fields.
// 3. If a user defined field is a subset of a field defined in the fields_ids_map
// (ie doggo.name is a subset of doggo) right after doggo and with the same weight.
let mut real_fields = Vec::new();
for (id, field_from_map) in fields_ids_map.iter() {
for (weight, user_field) in user_fields.iter().enumerate() {
if crate::is_faceted_by(field_from_map, user_field)
&& !real_fields.contains(&field_from_map)
&& !non_searchable_fields_ids.contains(&id)
{
real_fields.push(field_from_map);
let weight: u16 =
weight.try_into().map_err(|_| UserError::AttributeLimitReached)?;
weights.insert(id, weight);
}
for (id, field_from_map, metadata) in fields_ids_map.iter() {
if let Some(weight) = metadata.searchable_weight() {
real_fields.push(field_from_map);
weights.insert(id, weight);
}
}
self.put_searchable_fields(wtxn, &real_fields)?;
self.put_fieldids_weights_map(wtxn, &weights)?;
Ok(())
}
@ -978,6 +965,19 @@ impl Index {
// Ok(faceted_fields)
// }
pub fn asc_desc_fields(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<String>> {
let asc_desc_fields = self
.criteria(rtxn)?
.into_iter()
.filter_map(|criterion| match criterion {
Criterion::Asc(field) | Criterion::Desc(field) => Some(field),
_otherwise => None,
})
.collect();
Ok(asc_desc_fields)
}
/* faceted documents ids */
/// Retrieve all the documents which contain this field id set as null

View File

@ -7,14 +7,14 @@ use liquid::model::{
};
use liquid::{ObjectView, ValueView};
use super::{FieldMetadata, FieldsIdsMapWithMetadata};
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, Metadata};
use crate::GlobalFieldsIdsMap;
#[derive(Debug, Clone, Copy)]
pub struct FieldValue<'a, D: ObjectView> {
name: &'a str,
document: &'a D,
metadata: FieldMetadata,
metadata: Metadata,
}
impl<'a, D: ObjectView> ValueView for FieldValue<'a, D> {
@ -67,7 +67,10 @@ impl<'a, D: ObjectView> FieldValue<'a, D> {
}
pub fn is_searchable(&self) -> &bool {
&self.metadata.searchable
match self.metadata.is_searchable() {
true => &true,
false => &false,
}
}
pub fn is_empty(&self) -> bool {
@ -125,15 +128,11 @@ pub struct BorrowedFields<'a, 'map, D: ObjectView> {
}
impl<'a, D: ObjectView> OwnedFields<'a, D> {
pub fn new(document: &'a D, field_id_map: &'a FieldsIdsMapWithMetadata<'a>) -> Self {
pub fn new(document: &'a D, field_id_map: &'a FieldIdMapWithMetadata) -> Self {
Self(
std::iter::repeat(document)
.zip(field_id_map.iter())
.map(|(document, (fid, name))| FieldValue {
document,
name,
metadata: field_id_map.metadata(fid).unwrap_or_default(),
})
.map(|(document, (_fid, name, metadata))| FieldValue { document, name, metadata })
.collect(),
)
}
@ -187,7 +186,7 @@ impl<'a, 'map, D: ObjectView> ArrayView for BorrowedFields<'a, 'map, D> {
let fv = self.doc_alloc.alloc(FieldValue {
name: self.doc_alloc.alloc_str(&k),
document: self.document,
metadata: FieldMetadata { searchable: metadata.searchable },
metadata,
});
fv as _
}))
@ -207,7 +206,7 @@ impl<'a, 'map, D: ObjectView> ArrayView for BorrowedFields<'a, 'map, D> {
let fv = self.doc_alloc.alloc(FieldValue {
name: self.doc_alloc.alloc_str(&key),
document: self.document,
metadata: FieldMetadata { searchable: metadata.searchable },
metadata,
});
Some(fv as _)
}

View File

@ -5,11 +5,9 @@ mod fields;
mod template_checker;
use std::cell::RefCell;
use std::collections::BTreeMap;
use std::convert::TryFrom;
use std::fmt::Debug;
use std::num::NonZeroUsize;
use std::ops::Deref;
use bumpalo::Bump;
use document::ParseableDocument;
@ -18,8 +16,9 @@ use fields::{BorrowedFields, OwnedFields};
use self::context::Context;
use self::document::Document;
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
use crate::update::del_add::DelAdd;
use crate::{FieldId, FieldsIdsMap, GlobalFieldsIdsMap};
use crate::GlobalFieldsIdsMap;
pub struct Prompt {
template: liquid::Template,
@ -145,9 +144,9 @@ impl Prompt {
&self,
document: &obkv::KvReaderU16,
side: DelAdd,
field_id_map: &FieldsIdsMapWithMetadata,
field_id_map: &FieldIdMapWithMetadata,
) -> Result<String, RenderPromptError> {
let document = Document::new(document, side, field_id_map);
let document = Document::new(document, side, field_id_map.as_fields_ids_map());
let fields = OwnedFields::new(&document, field_id_map);
let context = Context::new(&document, &fields);
@ -172,40 +171,6 @@ fn truncate(s: &mut String, max_bytes: usize) {
}
}
pub struct FieldsIdsMapWithMetadata<'a> {
fields_ids_map: &'a FieldsIdsMap,
metadata: BTreeMap<FieldId, FieldMetadata>,
}
impl<'a> FieldsIdsMapWithMetadata<'a> {
pub fn new(fields_ids_map: &'a FieldsIdsMap, searchable_fields_ids: &'_ [FieldId]) -> Self {
let mut metadata: BTreeMap<FieldId, FieldMetadata> =
fields_ids_map.ids().map(|id| (id, Default::default())).collect();
for searchable_field_id in searchable_fields_ids {
let Some(metadata) = metadata.get_mut(searchable_field_id) else { continue };
metadata.searchable = true;
}
Self { fields_ids_map, metadata }
}
pub fn metadata(&self, field_id: FieldId) -> Option<FieldMetadata> {
self.metadata.get(&field_id).copied()
}
}
impl<'a> Deref for FieldsIdsMapWithMetadata<'a> {
type Target = FieldsIdsMap;
fn deref(&self) -> &Self::Target {
self.fields_ids_map
}
}
#[derive(Debug, Default, Clone, Copy)]
pub struct FieldMetadata {
pub searchable: bool,
}
#[cfg(test)]
mod test {
use super::Prompt;

View File

@ -159,7 +159,7 @@ impl<'i> FacetsUpdate<'i> {
let field_ids = self.index.facet_leveled_field_ids(wtxn)?;
let bulk_update = FacetsUpdateBulk::new(
self.index,
field_ids,
field_ids.into_iter().collect(),
self.facet_type,
self.delta_data,
self.group_size,

View File

@ -150,9 +150,12 @@ fn searchable_fields_changed(
obkv: &KvReader<FieldId>,
settings_diff: &InnerIndexSettingsDiff,
) -> bool {
let searchable_fields = &settings_diff.new.searchable_fields_ids;
for (field_id, field_bytes) in obkv.iter() {
if searchable_fields.contains(&field_id) {
let Some(metadata) = settings_diff.new.fields_ids_map.metadata(field_id) else {
debug_assert!(false, "Field id {} is not in the fields ids map", field_id);
continue;
};
if metadata.is_searchable() {
let del_add = KvReaderDelAdd::from_slice(field_bytes);
match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) {
// if both fields are None, check the next field.
@ -200,8 +203,12 @@ fn tokens_from_document<'a>(
buffers.obkv_buffer.clear();
let mut document_writer = KvWriterU16::new(&mut buffers.obkv_buffer);
for (field_id, field_bytes) in obkv.iter() {
let Some(metadata) = settings.fields_ids_map.metadata(field_id) else {
debug_assert!(false, "Field id {} is not in the fields ids map", field_id);
continue;
};
// if field is searchable.
if settings.searchable_fields_ids.contains(&field_id) {
if metadata.is_searchable() {
// extract deletion or addition only.
if let Some(field_bytes) = KvReaderDelAdd::from_slice(field_bytes).get(del_add) {
// parse json.
@ -216,7 +223,7 @@ fn tokens_from_document<'a>(
buffers.field_buffer.clear();
if let Some(field) = json_to_string(&value, &mut buffers.field_buffer) {
// create an iterator of token with their positions.
let locales = settings.localized_searchable_fields_ids.locales(field_id);
let locales = metadata.locales(&settings.localized_attributes_rules);
let tokens = process_tokens(tokenizer.tokenize_with_allow_list(field, locales))
.take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);

View File

@ -12,12 +12,11 @@ use heed::BytesEncode;
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
use crate::heed_codec::{BEU16StrCodec, StrRefCodec};
use crate::localized_attributes_rules::LocalizedFieldIds;
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
use crate::update::index_documents::helpers::{
MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps,
};
use crate::update::settings::InnerIndexSettingsDiff;
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
use crate::{FieldId, Result, MAX_FACET_VALUE_LENGTH};
/// Extracts the facet string and the documents ids where this facet string appear.
@ -33,13 +32,10 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
if settings_diff.settings_update_only() {
extract_facet_string_docids_settings(docid_fid_facet_string, indexer, settings_diff)
} else {
let localized_field_ids = &settings_diff.new.localized_faceted_fields_ids;
let facet_search = settings_diff.new.facet_search;
extract_facet_string_docids_document_update(
docid_fid_facet_string,
indexer,
localized_field_ids,
facet_search,
&settings_diff.new,
)
}
}
@ -52,8 +48,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
docid_fid_facet_string: grenad::Reader<R>,
indexer: GrenadParameters,
localized_field_ids: &LocalizedFieldIds,
facet_search: bool,
settings: &InnerIndexSettings,
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
let max_memory = indexer.max_memory_by_thread();
@ -92,6 +87,14 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
let field_id = FieldId::from_be_bytes(field_id_bytes);
let Some(metadata) = settings.fields_ids_map.metadata(field_id) else {
unreachable!("metadata not found for field_id: {}", field_id)
};
if !metadata.is_faceted(&settings.filterable_attributes_rules) {
continue;
}
let (document_id_bytes, normalized_value_bytes) =
try_split_array_at::<_, 4>(bytes).unwrap();
let document_id = u32::from_be_bytes(document_id_bytes);
@ -99,8 +102,10 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
let normalized_value = str::from_utf8(normalized_value_bytes)?;
// Facet search normalization
if facet_search {
let locales = localized_field_ids.locales(field_id);
let features =
metadata.filterable_attributes_features(&settings.filterable_attributes_rules);
if features.is_facet_searchable() {
let locales = metadata.locales(&settings.localized_attributes_rules);
let hyper_normalized_value = normalize_facet_string(normalized_value, locales);
let set = BTreeSet::from_iter(std::iter::once(normalized_value));
@ -178,8 +183,15 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
let field_id = FieldId::from_be_bytes(field_id_bytes);
let old_locales = settings_diff.old.localized_faceted_fields_ids.locales(field_id);
let new_locales = settings_diff.new.localized_faceted_fields_ids.locales(field_id);
let Some(old_metadata) = settings_diff.old.fields_ids_map.metadata(field_id) else {
unreachable!("old metadata not found for field_id: {}", field_id)
};
let Some(new_metadata) = settings_diff.new.fields_ids_map.metadata(field_id) else {
unreachable!("new metadata not found for field_id: {}", field_id)
};
let old_locales = old_metadata.locales(&settings_diff.old.localized_attributes_rules);
let new_locales = new_metadata.locales(&settings_diff.new.localized_attributes_rules);
let are_same_locales = old_locales == new_locales;
let reindex_facet_search =

View File

@ -76,9 +76,9 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
let mut strings_key_buffer = Vec::new();
let old_faceted_fids: BTreeSet<_> =
settings_diff.old.faceted_fields_ids.iter().copied().collect();
settings_diff.list_faceted_fields_from_fid_map(DelAdd::Deletion);
let new_faceted_fids: BTreeSet<_> =
settings_diff.new.faceted_fields_ids.iter().copied().collect();
settings_diff.list_faceted_fields_from_fid_map(DelAdd::Addition);
if !settings_diff.settings_update_only || settings_diff.reindex_facets() {
let mut cursor = obkv_documents.into_cursor()?;

View File

@ -15,8 +15,9 @@ use serde_json::Value;
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
use crate::constants::RESERVED_VECTORS_FIELD_NAME;
use crate::error::FaultSource;
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
use crate::index::IndexEmbeddingConfig;
use crate::prompt::{FieldsIdsMapWithMetadata, Prompt};
use crate::prompt::Prompt;
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
use crate::update::settings::InnerIndexSettingsDiff;
use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
@ -190,12 +191,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
let reindex_vectors = settings_diff.reindex_vectors();
let old_fields_ids_map = &settings_diff.old.fields_ids_map;
let old_fields_ids_map =
FieldsIdsMapWithMetadata::new(old_fields_ids_map, &settings_diff.old.searchable_fields_ids);
let new_fields_ids_map = &settings_diff.new.fields_ids_map;
let new_fields_ids_map =
FieldsIdsMapWithMetadata::new(new_fields_ids_map, &settings_diff.new.searchable_fields_ids);
// the vector field id may have changed
let old_vectors_fid = old_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME);
@ -383,7 +380,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
);
continue;
}
regenerate_prompt(obkv, prompt, &new_fields_ids_map)?
regenerate_prompt(obkv, prompt, new_fields_ids_map)?
}
},
// prompt regeneration is only triggered for existing embedders
@ -400,7 +397,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
regenerate_if_prompt_changed(
obkv,
(old_prompt, prompt),
(&old_fields_ids_map, &new_fields_ids_map),
(old_fields_ids_map, new_fields_ids_map),
)?
} else {
// we can simply ignore user provided vectors as they are not regenerated and are
@ -416,7 +413,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
prompt,
(add_to_user_provided, remove_from_user_provided),
(old, new),
(&old_fields_ids_map, &new_fields_ids_map),
(old_fields_ids_map, new_fields_ids_map),
document_id,
embedder_name,
embedder_is_manual,
@ -486,10 +483,7 @@ fn extract_vector_document_diff(
prompt: &Prompt,
(add_to_user_provided, remove_from_user_provided): (&mut RoaringBitmap, &mut RoaringBitmap),
(old, new): (VectorState, VectorState),
(old_fields_ids_map, new_fields_ids_map): (
&FieldsIdsMapWithMetadata,
&FieldsIdsMapWithMetadata,
),
(old_fields_ids_map, new_fields_ids_map): (&FieldIdMapWithMetadata, &FieldIdMapWithMetadata),
document_id: impl Fn() -> Value,
embedder_name: &str,
embedder_is_manual: bool,
@ -611,10 +605,7 @@ fn extract_vector_document_diff(
fn regenerate_if_prompt_changed(
obkv: &obkv::KvReader<FieldId>,
(old_prompt, new_prompt): (&Prompt, &Prompt),
(old_fields_ids_map, new_fields_ids_map): (
&FieldsIdsMapWithMetadata,
&FieldsIdsMapWithMetadata,
),
(old_fields_ids_map, new_fields_ids_map): (&FieldIdMapWithMetadata, &FieldIdMapWithMetadata),
) -> Result<VectorStateDelta> {
let old_prompt = old_prompt
.render_kvdeladd(obkv, DelAdd::Deletion, old_fields_ids_map)
@ -630,7 +621,7 @@ fn regenerate_if_prompt_changed(
fn regenerate_prompt(
obkv: &obkv::KvReader<FieldId>,
prompt: &Prompt,
new_fields_ids_map: &FieldsIdsMapWithMetadata,
new_fields_ids_map: &FieldIdMapWithMetadata,
) -> Result<VectorStateDelta> {
let prompt = prompt.render_kvdeladd(obkv, DelAdd::Addition, new_fields_ids_map)?;

View File

@ -217,7 +217,7 @@ where
// update the internal facet and searchable list,
// because they might have changed due to the nested documents flattening.
settings_diff.new.recompute_facets(self.wtxn, self.index)?;
// settings_diff.new.recompute_facets(self.wtxn, self.index)?;
settings_diff.new.recompute_searchables(self.wtxn, self.index)?;
let settings_diff = Arc::new(settings_diff);

View File

@ -1,7 +1,7 @@
use std::borrow::Cow;
use std::collections::btree_map::Entry as BEntry;
use std::collections::hash_map::Entry as HEntry;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::collections::{BTreeMap, HashMap};
use std::fs::File;
use std::io::{Read, Seek};
@ -18,8 +18,10 @@ use super::helpers::{
ObkvsMergeAdditionsAndDeletions,
};
use super::{create_writer, IndexDocumentsMethod, IndexerConfig, KeepFirst};
use crate::attribute_patterns::PatternMatch;
use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
use crate::error::{Error, InternalError, UserError};
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
use crate::index::{db_name, main_key};
use crate::update::del_add::{
into_del_add_obkv, into_del_add_obkv_conditional_operation, DelAdd, DelAddOperation,
@ -31,9 +33,7 @@ use crate::update::{AvailableIds, UpdateIndexingStep};
use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
use crate::vector::settings::WriteBackToDocuments;
use crate::vector::ArroyWrapper;
use crate::{
is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
};
use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, Index, Result};
pub struct TransformOutput {
pub primary_key: String,
@ -52,7 +52,7 @@ pub struct TransformOutput {
/// containing all those documents.
pub struct Transform<'a, 'i> {
pub index: &'i Index,
fields_ids_map: FieldsIdsMap,
fields_ids_map: FieldIdMapWithMetadata,
indexer_settings: &'a IndexerConfig,
pub index_documents_method: IndexDocumentsMethod,
@ -84,7 +84,7 @@ pub enum Operation {
///
/// If new fields are present in the addition, they are added to the index field ids map.
fn create_fields_mapping(
index_field_map: &mut FieldsIdsMap,
index_field_map: &mut FieldIdMapWithMetadata,
batch_field_map: &DocumentsBatchIndex,
) -> Result<HashMap<FieldId, FieldId>> {
batch_field_map
@ -141,10 +141,13 @@ impl<'a, 'i> Transform<'a, 'i> {
true,
);
let documents_ids = index.documents_ids(wtxn)?;
let fields_ids_map = index.fields_ids_map(wtxn)?;
let builder = MetadataBuilder::from_index(index, wtxn)?;
let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder);
Ok(Transform {
index,
fields_ids_map: index.fields_ids_map(wtxn)?,
fields_ids_map,
indexer_settings,
available_documents_ids: AvailableIds::new(&documents_ids),
original_sorter,
@ -354,7 +357,7 @@ impl<'a, 'i> Transform<'a, 'i> {
documents_seen: documents_count,
});
self.index.put_fields_ids_map(wtxn, &self.fields_ids_map)?;
self.index.put_fields_ids_map(wtxn, self.fields_ids_map.as_fields_ids_map())?;
self.index.put_primary_key(wtxn, &primary_key)?;
self.documents_count += documents_count;
// Now that we have a valid sorter that contains the user id and the obkv we
@ -371,7 +374,7 @@ impl<'a, 'i> Transform<'a, 'i> {
)]
fn flatten_from_fields_ids_map(
obkv: &KvReader<FieldId>,
fields_ids_map: &mut FieldsIdsMap,
fields_ids_map: &mut FieldIdMapWithMetadata,
) -> Result<Option<Vec<u8>>> {
if obkv
.iter()
@ -657,7 +660,6 @@ impl<'a, 'i> Transform<'a, 'i> {
fn rebind_existing_document(
old_obkv: &KvReader<FieldId>,
settings_diff: &InnerIndexSettingsDiff,
modified_faceted_fields: &HashSet<String>,
mut injected_vectors: serde_json::Map<String, serde_json::Value>,
old_vectors_fid: Option<FieldId>,
original_obkv_buffer: Option<&mut Vec<u8>>,
@ -667,23 +669,26 @@ impl<'a, 'i> Transform<'a, 'i> {
let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) };
// If only a faceted field has been added, keep only this field.
let global_facet_settings_changed = settings_diff.global_facet_settings_changed();
let facet_fids_changed = settings_diff.facet_fids_changed();
let necessary_faceted_field =
|id: FieldId| -> bool {
let necessary_faceted_field = |id: FieldId| -> Option<DelAddOperation> {
if facet_fids_changed {
let field_name = settings_diff.new.fields_ids_map.name(id).unwrap();
if global_facet_settings_changed {
settings_diff.new.user_defined_faceted_fields.iter().any(|long| {
is_faceted_by(long, field_name) || is_faceted_by(field_name, long)
})
} else if facet_fids_changed {
modified_faceted_fields.iter().any(|long| {
is_faceted_by(long, field_name) || is_faceted_by(field_name, long)
})
} else {
false
// if the faceted fields changed, we need to keep all the field that are
// faceted in the old or new settings.
match (
settings_diff.old.match_faceted_field(field_name),
settings_diff.new.match_faceted_field(field_name),
) {
(PatternMatch::NoMatch, PatternMatch::NoMatch) => None,
(PatternMatch::NoMatch, _) => Some(DelAddOperation::Addition),
(_, PatternMatch::NoMatch) => Some(DelAddOperation::Deletion),
(_, _) => Some(DelAddOperation::DeletionAndAddition),
}
};
} else {
None
}
};
// Alway provide all fields when vectors are involved because
// we need the fields for the prompt/templating.
@ -734,9 +739,12 @@ impl<'a, 'i> Transform<'a, 'i> {
}
}
if is_primary_key(id) || necessary_faceted_field(id) || reindex_vectors {
if is_primary_key(id) || reindex_vectors {
operations.insert(id, DelAddOperation::DeletionAndAddition);
obkv_writer.insert(id, val)?;
} else if let Some(operation) = necessary_faceted_field(id) {
operations.insert(id, operation);
obkv_writer.insert(id, val)?;
} else if let Some(operation) = settings_diff.reindex_searchable_id(id) {
operations.insert(id, operation);
obkv_writer.insert(id, val)?;
@ -856,7 +864,6 @@ impl<'a, 'i> Transform<'a, 'i> {
};
if original_sorter.is_some() || flattened_sorter.is_some() {
let modified_faceted_fields = settings_diff.modified_faceted_fields();
let mut original_obkv_buffer = Vec::new();
let mut flattened_obkv_buffer = Vec::new();
let mut document_sorter_key_buffer = Vec::new();
@ -897,7 +904,6 @@ impl<'a, 'i> Transform<'a, 'i> {
Self::rebind_existing_document(
old_obkv,
&settings_diff,
&modified_faceted_fields,
injected_vectors,
old_vectors_fid,
Some(&mut original_obkv_buffer).filter(|_| original_sorter.is_some()),

View File

@ -9,8 +9,9 @@ use serde_json::Value;
use super::super::cache::BalancedCaches;
use super::facet_document::extract_document_facets;
use super::{match_faceted_field, FacetKind};
use super::FacetKind;
use crate::fields_ids_map::metadata::Metadata;
use crate::filterable_attributes_rules::match_faceted_field;
use crate::heed_codec::facet::OrderedF64Codec;
use crate::update::del_add::DelAdd;
use crate::update::new::channel::FieldIdDocidFacetSender;
@ -34,6 +35,8 @@ pub struct FacetedExtractorData<'a, 'b> {
buckets: usize,
filterable_attributes: Vec<FilterableAttributesRule>,
sortable_fields: HashSet<String>,
asc_desc_fields: HashSet<String>,
distinct_field: Option<String>,
}
impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b> {
@ -58,6 +61,8 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b>
context,
&self.filterable_attributes,
&self.sortable_fields,
&self.asc_desc_fields,
&self.distinct_field,
change,
self.sender,
)?
@ -73,6 +78,8 @@ impl FacetedDocidsExtractor {
context: &DocumentChangeContext<RefCell<BalancedCaches>>,
filterable_attributes: &[FilterableAttributesRule],
sortable_fields: &HashSet<String>,
asc_desc_fields: &HashSet<String>,
distinct_field: &Option<String>,
document_change: DocumentChange,
sender: &FieldIdDocidFacetSender,
) -> Result<()> {
@ -89,6 +96,8 @@ impl FacetedDocidsExtractor {
new_fields_ids_map.deref_mut(),
filterable_attributes,
sortable_fields,
asc_desc_fields,
distinct_field,
&mut |fid, meta, depth, value| {
let features = meta.filterable_attributes_features(filterable_attributes);
Self::facet_fn_with_options(
@ -109,7 +118,13 @@ impl FacetedDocidsExtractor {
DocumentChange::Update(inner) => {
if !inner.has_changed_for_fields(
&mut |field_name| {
match_faceted_field(field_name, filterable_attributes, sortable_fields)
match_faceted_field(
field_name,
filterable_attributes,
sortable_fields,
asc_desc_fields,
distinct_field,
)
},
rtxn,
index,
@ -124,6 +139,8 @@ impl FacetedDocidsExtractor {
new_fields_ids_map.deref_mut(),
filterable_attributes,
sortable_fields,
asc_desc_fields,
distinct_field,
&mut |fid, meta, depth, value| {
let features = meta.filterable_attributes_features(filterable_attributes);
Self::facet_fn_with_options(
@ -148,6 +165,8 @@ impl FacetedDocidsExtractor {
new_fields_ids_map.deref_mut(),
filterable_attributes,
sortable_fields,
asc_desc_fields,
distinct_field,
&mut |fid, meta, depth, value| {
let features = meta.filterable_attributes_features(filterable_attributes);
Self::facet_fn_with_options(
@ -172,6 +191,8 @@ impl FacetedDocidsExtractor {
new_fields_ids_map.deref_mut(),
filterable_attributes,
sortable_fields,
asc_desc_fields,
distinct_field,
&mut |fid, meta, depth, value| {
let features = meta.filterable_attributes_features(filterable_attributes);
Self::facet_fn_with_options(
@ -426,6 +447,8 @@ impl FacetedDocidsExtractor {
let rtxn = index.read_txn()?;
let filterable_attributes = index.filterable_attributes_rules(&rtxn)?;
let sortable_fields = index.sortable_fields(&rtxn)?;
let asc_desc_fields = index.asc_desc_fields(&rtxn)?;
let distinct_field = index.distinct_field(&rtxn)?.map(|s| s.to_string());
let datastore = ThreadLocal::new();
{
@ -439,6 +462,8 @@ impl FacetedDocidsExtractor {
sender,
filterable_attributes,
sortable_fields,
asc_desc_fields,
distinct_field,
};
extract(
document_changes,

View File

@ -8,22 +8,30 @@ use crate::update::new::document::Document;
use crate::update::new::extract::geo::extract_geo_coordinates;
use crate::update::new::extract::perm_json_p;
use crate::{
FieldId, FilterableAttributesSettings, GlobalFieldsIdsMap, InternalError, Result, UserError,
FieldId, FilterableAttributesRule, GlobalFieldsIdsMap, InternalError, Result, UserError,
};
use super::match_faceted_field;
use crate::filterable_attributes_rules::match_faceted_field;
pub fn extract_document_facets<'doc>(
document: impl Document<'doc>,
external_document_id: &str,
field_id_map: &mut GlobalFieldsIdsMap,
filterable_attributes: &[FilterableAttributesSettings],
filterable_attributes: &[FilterableAttributesRule],
sortable_fields: &HashSet<String>,
asc_desc_fields: &HashSet<String>,
distinct_field: &Option<String>,
facet_fn: &mut impl FnMut(FieldId, Metadata, perm_json_p::Depth, &Value) -> Result<()>,
) -> Result<()> {
// return the match result for the given field name.
let match_field = |field_name: &str| -> PatternMatch {
match_faceted_field(field_name, filterable_attributes, sortable_fields)
match_faceted_field(
field_name,
filterable_attributes,
sortable_fields,
asc_desc_fields,
distinct_field,
)
};
// extract the field if it is faceted (facet searchable, filterable, sortable)

View File

@ -1,16 +1,8 @@
mod extract_facets;
mod facet_document;
use std::collections::HashSet;
pub use extract_facets::FacetedDocidsExtractor;
use crate::{
attribute_patterns::{match_field_legacy, PatternMatch},
filterable_fields::match_pattern_by_features,
FilterableAttributesSettings,
};
#[repr(u8)]
#[derive(Debug, Clone, Copy)]
pub enum FacetKind {
@ -39,30 +31,3 @@ impl FacetKind {
(FacetKind::from(key[0]), &key[1..])
}
}
pub fn match_faceted_field(
field_name: &str,
filterable_fields: &[FilterableAttributesSettings],
sortable_fields: &HashSet<String>,
) -> PatternMatch {
// Check if the field matches any filterable or facet searchable field
let mut selection = match_pattern_by_features(field_name, &filterable_fields, &|features| {
features.is_facet_searchable() || features.is_filterable()
});
// If the field matches the pattern, return Match
if selection == PatternMatch::Match {
return selection;
}
// Otherwise, check if the field matches any sortable field
for pattern in sortable_fields {
match match_field_legacy(&pattern, field_name) {
PatternMatch::Match => return PatternMatch::Match,
PatternMatch::Parent => selection = PatternMatch::Parent,
PatternMatch::NoMatch => (),
}
}
selection
}

View File

@ -29,7 +29,7 @@ impl GeoExtractor {
grenad_parameters: GrenadParameters,
) -> Result<Option<Self>> {
if index.is_geo_activated(rtxn)? {
Ok(Some(GeoExtractor {filterable_attributes_ruless }))
Ok(Some(GeoExtractor { grenad_parameters }))
} else {
Ok(None)
}

View File

@ -227,7 +227,14 @@ mod test {
let fields_ids_map = FieldIdMapWithMetadata::new(
fields_ids_map,
MetadataBuilder::new(Default::default(), Default::default(), Default::default(), None),
MetadataBuilder::new(
Default::default(),
Default::default(),
Default::default(),
None,
None,
Default::default(),
),
);
let fields_ids_map_lock = std::sync::RwLock::new(fields_ids_map);

View File

@ -114,7 +114,7 @@ pub(super) fn update_index(
index.put_primary_key(wtxn, new_primary_key.name())?;
}
let mut inner_index_settings = InnerIndexSettings::from_index(index, wtxn, Some(embedders))?;
inner_index_settings.recompute_facets(wtxn, index)?;
// inner_index_settings.recompute_facets(wtxn, index)?;
inner_index_settings.recompute_searchables(wtxn, index)?;
index.put_field_distribution(wtxn, &field_distribution)?;
index.put_documents_ids(wtxn, &document_ids)?;

View File

@ -11,12 +11,15 @@ use roaring::RoaringBitmap;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use time::OffsetDateTime;
use super::del_add::DelAddOperation;
use super::del_add::{DelAdd, DelAddOperation};
use super::index_documents::{IndexDocumentsConfig, Transform};
use super::IndexerConfig;
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
use crate::attribute_patterns::PatternMatch;
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::criterion::Criterion;
use crate::error::UserError;
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
use crate::filterable_attributes_rules::match_faceted_field;
use crate::index::{
IndexEmbeddingConfig, PrefixSearch, DEFAULT_MIN_WORD_LEN_ONE_TYPO,
DEFAULT_MIN_WORD_LEN_TWO_TYPOS,
@ -31,10 +34,7 @@ use crate::vector::settings::{
WriteBackToDocuments,
};
use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs};
use crate::{
FieldId, FieldsIdsMap, FilterableAttributesSettings, Index, LocalizedAttributesRule,
LocalizedFieldIds, Result,
};
use crate::{FieldId, FilterableAttributesRule, Index, LocalizedAttributesRule, Result};
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
pub enum Setting<T> {
@ -158,7 +158,7 @@ pub struct Settings<'a, 't, 'i> {
searchable_fields: Setting<Vec<String>>,
displayed_fields: Setting<Vec<String>>,
filterable_fields: Setting<Vec<FilterableAttributesSettings>>,
filterable_fields: Setting<Vec<FilterableAttributesRule>>,
sortable_fields: Setting<HashSet<String>>,
criteria: Setting<Vec<Criterion>>,
stop_words: Setting<BTreeSet<String>>,
@ -244,7 +244,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
self.filterable_fields = Setting::Reset;
}
pub fn set_filterable_fields(&mut self, rules: Vec<FilterableAttributesSettings>) {
pub fn set_filterable_fields(&mut self, rules: Vec<FilterableAttributesRule>) {
self.filterable_fields = Setting::Set(rules);
}
@ -519,7 +519,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
}
/// Updates the index's searchable attributes.
fn update_searchable(&mut self) -> Result<bool> {
fn update_user_defined_searchable_attributes(&mut self) -> Result<bool> {
match self.searchable_fields {
Setting::Set(ref fields) => {
// Check to see if the searchable fields changed before doing anything else
@ -532,26 +532,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
return Ok(false);
}
// Since we're updating the settings we can only add new fields at the end of the field id map
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
// fields are deduplicated, only the first occurrence is taken into account
let names = fields.iter().unique().map(String::as_str).collect::<Vec<_>>();
// Add all the searchable attributes to the field map, and then add the
// remaining fields from the old field map to the new one
for name in names.iter() {
// The fields ids map won't change the field id of already present elements thus only the
// new fields will be inserted.
fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
}
self.index.put_all_searchable_fields_from_fields_ids_map(
self.wtxn,
&names,
&fields_ids_map.nested_ids(RESERVED_VECTORS_FIELD_NAME),
&fields_ids_map,
)?;
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
self.index.put_user_defined_searchable_fields(self.wtxn, &names)?;
Ok(true)
}
Setting::Reset => Ok(self.index.delete_all_searchable_fields(self.wtxn)?),
@ -763,10 +747,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
fn update_filterable(&mut self) -> Result<()> {
match self.filterable_fields {
Setting::Set(ref fields) => {
self.index.put_filterable_fields(self.wtxn, fields)?;
self.index.put_filterable_attributes_rules(self.wtxn, fields)?;
}
Setting::Reset => {
self.index.delete_filterable_fields(self.wtxn)?;
self.index.delete_filterable_attributes_rules(self.wtxn)?;
}
Setting::NotSet => (),
}
@ -1256,7 +1240,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
self.update_separator_tokens()?;
self.update_dictionary()?;
self.update_synonyms()?;
self.update_searchable()?;
self.update_user_defined_searchable_attributes()?;
self.update_exact_attributes()?;
self.update_proximity_precision()?;
self.update_prefix_search()?;
@ -1266,7 +1250,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
let embedding_config_updates = self.update_embedding_configs()?;
let mut new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn, None)?;
new_inner_settings.recompute_facets(self.wtxn, self.index)?;
// new_inner_settings.recompute_facets(self.wtxn, self.index)?;
new_inner_settings.recompute_searchables(self.wtxn, self.index)?;
let primary_key_id = self
.index
@ -1318,8 +1303,8 @@ impl InnerIndexSettingsDiff {
settings_update_only: bool,
) -> Self {
let only_additional_fields = match (
&old_settings.user_defined_searchable_fields,
&new_settings.user_defined_searchable_fields,
&old_settings.user_defined_searchable_attributes,
&new_settings.user_defined_searchable_attributes,
) {
(None, None) | (Some(_), None) | (None, Some(_)) => None, // None means *
(Some(old), Some(new)) => {
@ -1341,14 +1326,14 @@ impl InnerIndexSettingsDiff {
|| old_settings.dictionary != new_settings.dictionary
|| old_settings.proximity_precision != new_settings.proximity_precision
|| old_settings.prefix_search != new_settings.prefix_search
|| old_settings.localized_searchable_fields_ids
!= new_settings.localized_searchable_fields_ids
|| old_settings.localized_attributes_rules
!= new_settings.localized_attributes_rules
};
let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes;
let cache_user_defined_searchables = old_settings.user_defined_searchable_fields
!= new_settings.user_defined_searchable_fields;
let cache_user_defined_searchables = old_settings.user_defined_searchable_attributes
!= new_settings.user_defined_searchable_attributes;
// if the user-defined searchables changed, then we need to reindex prompts.
if cache_user_defined_searchables {
@ -1431,30 +1416,39 @@ impl InnerIndexSettingsDiff {
}
}
/// List the faceted fields from the inner fid map.
/// This is used to list the faceted fields when we are reindexing,
/// but it can't be used in document addition because the field id map must be exhaustive.
pub fn list_faceted_fields_from_fid_map(&self, del_add: DelAdd) -> BTreeSet<FieldId> {
let settings = match del_add {
DelAdd::Deletion => &self.old,
DelAdd::Addition => &self.new,
};
settings
.fields_ids_map
.iter_id_metadata()
.filter(|(_, metadata)| metadata.is_faceted(&settings.filterable_attributes_rules))
.map(|(id, _)| id)
.collect()
}
pub fn facet_fids_changed(&self) -> bool {
let existing_fields = &self.new.existing_fields;
if existing_fields.iter().any(|field| field.contains('.')) {
return true;
}
let new_faceted_fields: HashSet<_> = existing_fields
.iter()
.filter(|field| self.new.match_faceted_field(field) == PatternMatch::Match)
.collect();
let old_faceted_fields: HashSet<_> = existing_fields
.iter()
.filter(|field| self.old.match_faceted_field(field) == PatternMatch::Match)
.collect();
let old_faceted_fields = &self.old.user_defined_faceted_fields;
if old_faceted_fields.iter().any(|field| field.contains('.')) {
return true;
}
// If there is new faceted fields we indicate that we must reindex as we must
// index new fields as facets. It means that the distinct attribute,
// an Asc/Desc criterion or a filtered attribute as be added or removed.
let new_faceted_fields = &self.new.user_defined_faceted_fields;
if new_faceted_fields.iter().any(|field| field.contains('.')) {
return true;
}
(existing_fields - old_faceted_fields) != (existing_fields - new_faceted_fields)
old_faceted_fields != new_faceted_fields
}
pub fn global_facet_settings_changed(&self) -> bool {
self.old.localized_faceted_fields_ids != self.new.localized_faceted_fields_ids
self.old.localized_attributes_rules != self.new.localized_attributes_rules
|| self.old.facet_search != self.new.facet_search
}
@ -1475,9 +1469,9 @@ impl InnerIndexSettingsDiff {
|| (!self.settings_update_only && self.new.geo_fields_ids.is_some())
}
pub fn modified_faceted_fields(&self) -> HashSet<String> {
&self.old.user_defined_faceted_fields ^ &self.new.user_defined_faceted_fields
}
// pub fn modified_faceted_fields(&self) -> HashSet<String> {
// &self.old.user_defined_faceted_fields ^ &self.new.user_defined_faceted_fields
// }
}
#[derive(Clone)]
@ -1485,20 +1479,18 @@ pub(crate) struct InnerIndexSettings {
pub stop_words: Option<fst::Set<Vec<u8>>>,
pub allowed_separators: Option<BTreeSet<String>>,
pub dictionary: Option<BTreeSet<String>>,
pub fields_ids_map: FieldsIdsMap,
pub user_defined_faceted_fields: HashSet<String>,
pub user_defined_searchable_fields: Option<Vec<String>>,
pub faceted_fields_ids: HashSet<FieldId>,
pub searchable_fields_ids: Vec<FieldId>,
pub fields_ids_map: FieldIdMapWithMetadata,
pub localized_attributes_rules: Vec<LocalizedAttributesRule>,
pub filterable_attributes_rules: Vec<FilterableAttributesRule>,
pub asc_desc_fields: HashSet<String>,
pub distinct_field: Option<String>,
pub user_defined_searchable_attributes: Option<Vec<String>>,
pub sortable_fields: HashSet<String>,
pub exact_attributes: HashSet<FieldId>,
pub proximity_precision: ProximityPrecision,
pub embedding_configs: EmbeddingConfigs,
pub existing_fields: HashSet<String>,
pub geo_fields_ids: Option<(FieldId, FieldId)>,
pub non_searchable_fields_ids: Vec<FieldId>,
pub non_faceted_fields_ids: Vec<FieldId>,
pub localized_searchable_fields_ids: LocalizedFieldIds,
pub localized_faceted_fields_ids: LocalizedFieldIds,
pub prefix_search: PrefixSearch,
pub facet_search: bool,
}
@ -1514,12 +1506,6 @@ impl InnerIndexSettings {
let allowed_separators = index.allowed_separators(rtxn)?;
let dictionary = index.dictionary(rtxn)?;
let mut fields_ids_map = index.fields_ids_map(rtxn)?;
let user_defined_searchable_fields = index.user_defined_searchable_fields(rtxn)?;
let user_defined_searchable_fields =
user_defined_searchable_fields.map(|sf| sf.into_iter().map(String::from).collect());
let user_defined_faceted_fields = index.user_defined_faceted_fields(rtxn)?;
let mut searchable_fields_ids = index.searchable_fields_ids(rtxn)?;
let mut faceted_fields_ids = index.faceted_fields_ids(rtxn)?;
let exact_attributes = index.exact_attributes_ids(rtxn)?;
let proximity_precision = index.proximity_precision(rtxn)?.unwrap_or_default();
let embedding_configs = match embedding_configs {
@ -1535,82 +1521,73 @@ impl InnerIndexSettings {
.collect();
// index.fields_ids_map($a)? ==>> fields_ids_map
let geo_fields_ids = match fields_ids_map.id(RESERVED_GEO_FIELD_NAME) {
Some(gfid) => {
let is_sortable = index.sortable_fields_ids(rtxn)?.contains(&gfid);
let is_filterable = index.filterable_fields_ids(rtxn)?.contains(&gfid);
Some(_) if index.is_geo_activated(rtxn)? => {
// if `_geo` is faceted then we get the `lat` and `lng`
if is_sortable || is_filterable {
let field_ids = fields_ids_map
.insert("_geo.lat")
.zip(fields_ids_map.insert("_geo.lng"))
.ok_or(UserError::AttributeLimitReached)?;
Some(field_ids)
} else {
None
}
let field_ids = fields_ids_map
.insert("_geo.lat")
.zip(fields_ids_map.insert("_geo.lng"))
.ok_or(UserError::AttributeLimitReached)?;
Some(field_ids)
}
None => None,
_ => None,
};
let localized_attributes_rules = index.localized_attributes_rules(rtxn)?;
let localized_searchable_fields_ids = LocalizedFieldIds::new(
&localized_attributes_rules,
&fields_ids_map,
searchable_fields_ids.iter().cloned(),
);
let localized_faceted_fields_ids = LocalizedFieldIds::new(
&localized_attributes_rules,
&fields_ids_map,
faceted_fields_ids.iter().cloned(),
);
let vectors_fids = fields_ids_map.nested_ids(RESERVED_VECTORS_FIELD_NAME);
searchable_fields_ids.retain(|id| !vectors_fids.contains(id));
faceted_fields_ids.retain(|id| !vectors_fids.contains(id));
let localized_attributes_rules =
index.localized_attributes_rules(rtxn)?.unwrap_or_default();
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
let sortable_fields = index.sortable_fields(rtxn)?;
let asc_desc_fields = index.asc_desc_fields(rtxn)?;
let distinct_field = index.distinct_field(rtxn)?.map(|f| f.to_string());
let user_defined_searchable_attributes = index
.user_defined_searchable_fields(rtxn)?
.map(|fields| fields.into_iter().map(|f| f.to_string()).collect());
let builder = MetadataBuilder::from_index(index, rtxn)?;
let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder);
Ok(Self {
stop_words,
allowed_separators,
dictionary,
fields_ids_map,
user_defined_faceted_fields,
user_defined_searchable_fields,
faceted_fields_ids,
searchable_fields_ids,
localized_attributes_rules,
filterable_attributes_rules,
asc_desc_fields,
distinct_field,
user_defined_searchable_attributes,
sortable_fields,
exact_attributes,
proximity_precision,
embedding_configs,
existing_fields,
geo_fields_ids,
non_searchable_fields_ids: vectors_fids.clone(),
non_faceted_fields_ids: vectors_fids.clone(),
localized_searchable_fields_ids,
localized_faceted_fields_ids,
prefix_search,
facet_search,
})
}
// find and insert the new field ids
pub fn recompute_facets(&mut self, wtxn: &mut heed::RwTxn<'_>, index: &Index) -> Result<()> {
let new_facets = self
.fields_ids_map
.iter()
.filter(|(fid, _field)| !self.non_faceted_fields_ids.contains(fid))
.filter(|(_fid, field)| crate::is_faceted(field, &self.user_defined_faceted_fields))
.map(|(_fid, field)| field.to_string())
.collect();
index.put_faceted_fields(wtxn, &new_facets)?;
self.faceted_fields_ids = index.faceted_fields_ids(wtxn)?;
let localized_attributes_rules = index.localized_attributes_rules(wtxn)?;
self.localized_faceted_fields_ids = LocalizedFieldIds::new(
&localized_attributes_rules,
&self.fields_ids_map,
self.faceted_fields_ids.iter().cloned(),
);
Ok(())
pub fn match_faceted_field(&self, field: &str) -> PatternMatch {
match_faceted_field(
field,
&self.filterable_attributes_rules,
&self.sortable_fields,
&self.asc_desc_fields,
&self.distinct_field,
)
}
// find and insert the new field ids
// pub fn recompute_facets(&mut self, wtxn: &mut heed::RwTxn<'_>, index: &Index) -> Result<()> {
// let new_facets = self
// .fields_ids_map
// .iter()
// .filter(|(_fid, field, metadata)| {
// metadata.is_faceted(&self.filterable_attributes_rules)
// })
// .map(|(_fid, field, _metadata)| field.to_string())
// .collect();
// index.put_faceted_fields(wtxn, &new_facets)?;
// Ok(())
// }
// find and insert the new field ids
pub fn recompute_searchables(
&mut self,
@ -1618,7 +1595,7 @@ impl InnerIndexSettings {
index: &Index,
) -> Result<()> {
let searchable_fields = self
.user_defined_searchable_fields
.user_defined_searchable_attributes
.as_ref()
.map(|searchable| searchable.iter().map(|s| s.as_str()).collect::<Vec<_>>());
@ -1627,17 +1604,9 @@ impl InnerIndexSettings {
index.put_all_searchable_fields_from_fields_ids_map(
wtxn,
&searchable_fields,
&self.non_searchable_fields_ids,
&self.fields_ids_map,
)?;
}
self.searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
let localized_attributes_rules = index.localized_attributes_rules(wtxn)?;
self.localized_searchable_fields_ids = LocalizedFieldIds::new(
&localized_attributes_rules,
&self.fields_ids_map,
self.searchable_fields_ids.iter().cloned(),
);
Ok(())
}
@ -2104,7 +2073,7 @@ mod tests {
// Set the filterable fields to be the age.
index
.update_settings(|settings| {
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
"age".to_string(),
)]);
})
@ -2121,8 +2090,8 @@ mod tests {
// Check that the displayed fields are correctly set.
let rtxn = index.read_txn().unwrap();
let fields_ids = index.filterable_fields(&rtxn).unwrap();
assert_eq!(fields_ids, vec![FilterableAttributesSettings::Field("age".to_string(),)]);
let fields_ids = index.filterable_attributes_rules(&rtxn).unwrap();
assert_eq!(fields_ids, vec![FilterableAttributesRule::Field("age".to_string(),)]);
// Only count the field_id 0 and level 0 facet values.
// TODO we must support typed CSVs for numbers to be understood.
let fidmap = index.fields_ids_map(&rtxn).unwrap();
@ -2165,20 +2134,20 @@ mod tests {
index
.update_settings(|settings| {
settings.set_filterable_fields(vec![
FilterableAttributesSettings::Field("age".to_string()),
FilterableAttributesSettings::Field("name".to_string()),
FilterableAttributesRule::Field("age".to_string()),
FilterableAttributesRule::Field("name".to_string()),
]);
})
.unwrap();
// Check that the displayed fields are correctly set.
let rtxn = index.read_txn().unwrap();
let fields_ids = index.filterable_fields(&rtxn).unwrap();
let fields_ids = index.filterable_attributes_rules(&rtxn).unwrap();
assert_eq!(
fields_ids,
vec![
FilterableAttributesSettings::Field("age".to_string()),
FilterableAttributesSettings::Field("name".to_string()),
FilterableAttributesRule::Field("age".to_string()),
FilterableAttributesRule::Field("name".to_string()),
]
);
@ -2205,7 +2174,7 @@ mod tests {
// Remove the age from the filterable fields.
index
.update_settings(|settings| {
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
"name".to_string(),
)]);
})
@ -2213,8 +2182,8 @@ mod tests {
// Check that the displayed fields are correctly set.
let rtxn = index.read_txn().unwrap();
let fields_ids = index.filterable_fields(&rtxn).unwrap();
assert_eq!(fields_ids, vec![FilterableAttributesSettings::Field("name".to_string())]);
let fields_ids = index.filterable_attributes_rules(&rtxn).unwrap();
assert_eq!(fields_ids, vec![FilterableAttributesRule::Field("name".to_string())]);
let rtxn = index.read_txn().unwrap();
// Only count the field_id 2 and level 0 facet values.
@ -2545,8 +2514,8 @@ mod tests {
.update_settings(|settings| {
settings.set_displayed_fields(vec!["hello".to_string()]);
settings.set_filterable_fields(vec![
FilterableAttributesSettings::Field("age".to_string()),
FilterableAttributesSettings::Field("toto".to_string()),
FilterableAttributesRule::Field("age".to_string()),
FilterableAttributesRule::Field("toto".to_string()),
]);
settings.set_criteria(vec![Criterion::Asc(S("toto"))]);
})
@ -2664,7 +2633,7 @@ mod tests {
// Set the genres setting
index
.update_settings(|settings| {
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
"genres".to_string(),
)]);
})