mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-02-04 17:43:28 +01:00
wip
This commit is contained in:
parent
deea0c44ea
commit
ebd9922cba
@ -226,8 +226,8 @@ pub(crate) mod test {
|
||||
use meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::keys::{Action, Key};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::{self, FilterableAttributesRule};
|
||||
use meilisearch_types::settings::{Checked, FacetingSettings, Settings};
|
||||
use meilisearch_types::tasks::{Details, Status};
|
||||
use serde_json::{json, Map, Value};
|
||||
@ -271,7 +271,10 @@ pub(crate) mod test {
|
||||
let settings = Settings {
|
||||
displayed_attributes: Setting::Set(vec![S("race"), S("name")]).into(),
|
||||
searchable_attributes: Setting::Set(vec![S("name"), S("race")]).into(),
|
||||
filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }),
|
||||
filterable_attributes: Setting::Set(vec![
|
||||
FilterableAttributesRule::Field(S("race")),
|
||||
FilterableAttributesRule::Field(S("age")),
|
||||
]),
|
||||
sortable_attributes: Setting::Set(btreeset! { S("age") }),
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
|
@ -318,7 +318,16 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
||||
v6::Settings {
|
||||
displayed_attributes: v6::Setting::from(settings.displayed_attributes).into(),
|
||||
searchable_attributes: v6::Setting::from(settings.searchable_attributes).into(),
|
||||
filterable_attributes: settings.filterable_attributes.into(),
|
||||
filterable_attributes: match settings.filterable_attributes {
|
||||
v5::settings::Setting::Set(filterable_attributes) => v6::Setting::Set(
|
||||
filterable_attributes
|
||||
.into_iter()
|
||||
.map(|attr| v6::FilterableAttributesRule::Field(attr))
|
||||
.collect(),
|
||||
),
|
||||
v5::settings::Setting::Reset => v6::Setting::Reset,
|
||||
v5::settings::Setting::NotSet => v6::Setting::NotSet,
|
||||
},
|
||||
sortable_attributes: settings.sortable_attributes.into(),
|
||||
ranking_rules: {
|
||||
match settings.ranking_rules {
|
||||
|
@ -43,6 +43,8 @@ pub type ResponseError = meilisearch_types::error::ResponseError;
|
||||
pub type Code = meilisearch_types::error::Code;
|
||||
pub type RankingRuleView = meilisearch_types::settings::RankingRuleView;
|
||||
|
||||
pub type FilterableAttributesRule = meilisearch_types::milli::FilterableAttributesRule;
|
||||
|
||||
pub struct V6Reader {
|
||||
dump: TempDir,
|
||||
instance_uid: Option<Uuid>,
|
||||
|
@ -74,6 +74,21 @@ pub fn match_field_legacy(pattern: &str, field: &str) -> PatternMatch {
|
||||
}
|
||||
}
|
||||
|
||||
/// Match a field against a distinct field.
|
||||
pub fn match_distinct_field(distinct_field: Option<&str>, field: &str) -> PatternMatch {
|
||||
if let Some(distinct_field) = distinct_field {
|
||||
if field == distinct_field {
|
||||
// If the field matches exactly the distinct field, return Match
|
||||
return PatternMatch::Match;
|
||||
} else if is_faceted_by(distinct_field, field) {
|
||||
// If the field is a parent field of the distinct field, return Parent
|
||||
return PatternMatch::Parent;
|
||||
}
|
||||
}
|
||||
// If the field does not match the distinct field and is not a parent of a nested field that matches the distinct field, return NoMatch
|
||||
PatternMatch::NoMatch
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum PatternMatch {
|
||||
/// The field is a parent of the of a nested field that matches the pattern
|
||||
|
@ -6,6 +6,7 @@ use heed::RoTxn;
|
||||
|
||||
use super::FieldsIdsMap;
|
||||
use crate::attribute_patterns::PatternMatch;
|
||||
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
|
||||
use crate::{
|
||||
is_faceted_by, FieldId, FilterableAttributesFeatures, FilterableAttributesRule, Index,
|
||||
LocalizedAttributesRule, Result,
|
||||
@ -13,8 +14,11 @@ use crate::{
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct Metadata {
|
||||
pub searchable: bool,
|
||||
pub searchable: Option<u16>,
|
||||
pub sortable: bool,
|
||||
pub distinct: bool,
|
||||
pub asc_desc: bool,
|
||||
pub geo: bool,
|
||||
localized_attributes_rule_id: Option<NonZeroU16>,
|
||||
filterable_attributes_rule_id: Option<NonZeroU16>,
|
||||
}
|
||||
@ -136,21 +140,33 @@ impl Metadata {
|
||||
}
|
||||
|
||||
pub fn is_searchable(&self) -> bool {
|
||||
self.searchable.is_some()
|
||||
}
|
||||
|
||||
pub fn searchable_weight(&self) -> Option<u16> {
|
||||
self.searchable
|
||||
}
|
||||
|
||||
/// Returns `true` if the field is part of the facet databases. (sortable, filterable, or facet searchable)
|
||||
pub fn is_distinct(&self) -> bool {
|
||||
self.distinct
|
||||
}
|
||||
|
||||
pub fn is_asc_desc(&self) -> bool {
|
||||
self.asc_desc
|
||||
}
|
||||
|
||||
pub fn is_geo(&self) -> bool {
|
||||
self.geo
|
||||
}
|
||||
|
||||
/// Returns `true` if the field is part of the facet databases. (sortable, distinct, asc_desc, filterable or facet searchable)
|
||||
pub fn is_faceted(&self, rules: &[FilterableAttributesRule]) -> bool {
|
||||
if self.is_sortable() {
|
||||
if self.is_distinct() || self.is_sortable() || self.is_asc_desc() {
|
||||
return true;
|
||||
}
|
||||
|
||||
let features = self.filterable_attributes_features(&rules);
|
||||
if features.is_filterable() {
|
||||
return true;
|
||||
}
|
||||
|
||||
if features.is_facet_searchable() {
|
||||
if features.is_filterable() || features.is_facet_searchable() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -164,6 +180,8 @@ pub struct MetadataBuilder {
|
||||
filterable_attributes: Vec<FilterableAttributesRule>,
|
||||
sortable_attributes: HashSet<String>,
|
||||
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
|
||||
distinct_attribute: Option<String>,
|
||||
asc_desc_attributes: HashSet<String>,
|
||||
}
|
||||
|
||||
impl MetadataBuilder {
|
||||
@ -176,45 +194,98 @@ impl MetadataBuilder {
|
||||
let filterable_attributes = index.filterable_attributes_rules(rtxn)?;
|
||||
let sortable_attributes = index.sortable_fields(rtxn)?;
|
||||
let localized_attributes = index.localized_attributes_rules(rtxn)?;
|
||||
let distinct_attribute = index.distinct_field(rtxn)?.map(|s| s.to_string());
|
||||
let asc_desc_attributes = index.asc_desc_fields(rtxn)?;
|
||||
|
||||
Ok(Self {
|
||||
searchable_attributes,
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
localized_attributes,
|
||||
distinct_attribute,
|
||||
asc_desc_attributes,
|
||||
})
|
||||
}
|
||||
|
||||
// pub fn new(
|
||||
// searchable_attributes: Option<Vec<String>>,
|
||||
// filterable_attributes: Vec<FilterableAttributesRule>,
|
||||
// sortable_attributes: HashSet<String>,
|
||||
// localized_attributes: Option<Vec<LocalizedAttributesRule>>,
|
||||
// ) -> Self {
|
||||
// let searchable_attributes = match searchable_attributes {
|
||||
// Some(fields) if fields.iter().any(|f| f == "*") => None,
|
||||
// None => None,
|
||||
// Some(fields) => Some(fields),
|
||||
// };
|
||||
#[cfg(test)]
|
||||
/// Build a new `MetadataBuilder` from the given parameters.
|
||||
///
|
||||
/// This is used for testing, prefer using `MetadataBuilder::from_index` instead.
|
||||
pub fn new(
|
||||
searchable_attributes: Option<Vec<String>>,
|
||||
filterable_attributes: Vec<FilterableAttributesRule>,
|
||||
sortable_attributes: HashSet<String>,
|
||||
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
|
||||
distinct_attribute: Option<String>,
|
||||
asc_desc_attributes: HashSet<String>,
|
||||
) -> Self {
|
||||
let searchable_attributes = match searchable_attributes {
|
||||
Some(fields) if fields.iter().any(|f| f == "*") => None,
|
||||
None => None,
|
||||
Some(fields) => Some(fields),
|
||||
};
|
||||
|
||||
// Self {
|
||||
// searchable_attributes,
|
||||
// filterable_attributes,
|
||||
// sortable_attributes,
|
||||
// localized_attributes,
|
||||
// }
|
||||
// }
|
||||
Self {
|
||||
searchable_attributes,
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
localized_attributes,
|
||||
distinct_attribute,
|
||||
asc_desc_attributes,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn metadata_for_field(&self, field: &str) -> Metadata {
|
||||
let searchable = match &self.searchable_attributes {
|
||||
// A field is searchable if it is faceted by a searchable attribute
|
||||
Some(attributes) => attributes.iter().any(|pattern| is_faceted_by(field, pattern)),
|
||||
None => true,
|
||||
};
|
||||
if is_faceted_by(field, RESERVED_VECTORS_FIELD_NAME) {
|
||||
// Vectors fields are not searchable, filterable, distinct or asc_desc
|
||||
return Metadata {
|
||||
searchable: None,
|
||||
sortable: false,
|
||||
distinct: false,
|
||||
asc_desc: false,
|
||||
geo: false,
|
||||
localized_attributes_rule_id: None,
|
||||
filterable_attributes_rule_id: None,
|
||||
};
|
||||
}
|
||||
|
||||
// A field is sortable if it is faceted by a sortable attribute
|
||||
let sortable = self.sortable_attributes.iter().any(|pattern| is_faceted_by(field, pattern));
|
||||
|
||||
let filterable_attributes_rule_id = self
|
||||
.filterable_attributes
|
||||
.iter()
|
||||
.position(|attribute| attribute.match_str(field) == PatternMatch::Match)
|
||||
// saturating_add(1): make `id` `NonZero`
|
||||
.map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap());
|
||||
|
||||
if is_faceted_by(field, RESERVED_GEO_FIELD_NAME) {
|
||||
// Geo fields are not searchable, distinct or asc_desc
|
||||
return Metadata {
|
||||
searchable: None,
|
||||
sortable,
|
||||
distinct: false,
|
||||
asc_desc: false,
|
||||
geo: true,
|
||||
localized_attributes_rule_id: None,
|
||||
filterable_attributes_rule_id,
|
||||
};
|
||||
}
|
||||
|
||||
let searchable = match &self.searchable_attributes {
|
||||
// A field is searchable if it is faceted by a searchable attribute
|
||||
Some(attributes) => attributes
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find(|(_i, pattern)| is_faceted_by(field, pattern))
|
||||
.map(|(i, _)| i as u16),
|
||||
None => None,
|
||||
};
|
||||
|
||||
let distinct =
|
||||
self.distinct_attribute.as_ref().is_some_and(|distinct_field| field == distinct_field);
|
||||
let asc_desc = self.asc_desc_attributes.contains(field);
|
||||
|
||||
let localized_attributes_rule_id = self
|
||||
.localized_attributes
|
||||
.iter()
|
||||
@ -223,16 +294,12 @@ impl MetadataBuilder {
|
||||
// saturating_add(1): make `id` `NonZero`
|
||||
.map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap());
|
||||
|
||||
let filterable_attributes_rule_id = self
|
||||
.filterable_attributes
|
||||
.iter()
|
||||
.position(|attribute| attribute.match_str(field) == PatternMatch::Match)
|
||||
// saturating_add(1): make `id` `NonZero`
|
||||
.map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap());
|
||||
|
||||
Metadata {
|
||||
searchable,
|
||||
sortable,
|
||||
distinct,
|
||||
asc_desc,
|
||||
geo: false,
|
||||
localized_attributes_rule_id,
|
||||
filterable_attributes_rule_id,
|
||||
}
|
||||
|
@ -4,7 +4,7 @@ use std::collections::{BTreeSet, HashSet};
|
||||
use utoipa::ToSchema;
|
||||
|
||||
use crate::{
|
||||
attribute_patterns::{match_field_legacy, PatternMatch},
|
||||
attribute_patterns::{match_distinct_field, match_field_legacy, PatternMatch},
|
||||
constants::RESERVED_GEO_FIELD_NAME,
|
||||
AttributePatterns, FieldId, FieldsIdsMap,
|
||||
};
|
||||
@ -210,3 +210,39 @@ pub fn match_pattern_by_features(
|
||||
|
||||
selection
|
||||
}
|
||||
|
||||
/// Match a field against a set of filterable, facet searchable fields, distinct field, sortable fields, and asc_desc fields.
|
||||
pub fn match_faceted_field(
|
||||
field_name: &str,
|
||||
filterable_fields: &[FilterableAttributesRule],
|
||||
sortable_fields: &HashSet<String>,
|
||||
asc_desc_fields: &HashSet<String>,
|
||||
distinct_field: &Option<String>,
|
||||
) -> PatternMatch {
|
||||
// Check if the field matches any filterable or facet searchable field
|
||||
let mut selection = match_pattern_by_features(field_name, &filterable_fields, &|features| {
|
||||
features.is_facet_searchable() || features.is_filterable()
|
||||
});
|
||||
|
||||
// If the field matches the pattern, return Match
|
||||
if selection == PatternMatch::Match {
|
||||
return selection;
|
||||
}
|
||||
|
||||
match match_distinct_field(distinct_field.as_deref(), field_name) {
|
||||
PatternMatch::Match => return PatternMatch::Match,
|
||||
PatternMatch::Parent => selection = PatternMatch::Parent,
|
||||
PatternMatch::NoMatch => (),
|
||||
}
|
||||
|
||||
// Otherwise, check if the field matches any sortable/asc_desc field
|
||||
for pattern in sortable_fields.iter().chain(asc_desc_fields.iter()) {
|
||||
match match_field_legacy(&pattern, field_name) {
|
||||
PatternMatch::Match => return PatternMatch::Match,
|
||||
PatternMatch::Parent => selection = PatternMatch::Parent,
|
||||
PatternMatch::NoMatch => (),
|
||||
}
|
||||
}
|
||||
|
||||
selection
|
||||
}
|
||||
|
@ -1,6 +1,5 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||
use std::convert::TryInto;
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
|
||||
@ -14,6 +13,7 @@ use crate::attribute_patterns::PatternMatch;
|
||||
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
|
||||
use crate::documents::PrimaryKey;
|
||||
use crate::error::{InternalError, UserError};
|
||||
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
|
||||
use crate::fields_ids_map::FieldsIdsMap;
|
||||
use crate::filterable_attributes_rules::match_pattern_by_features;
|
||||
use crate::heed_codec::facet::{
|
||||
@ -648,8 +648,7 @@ impl Index {
|
||||
&self,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
user_fields: &[&str],
|
||||
non_searchable_fields_ids: &[FieldId],
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
fields_ids_map: &FieldIdMapWithMetadata,
|
||||
) -> Result<()> {
|
||||
// We can write the user defined searchable fields as-is.
|
||||
self.put_user_defined_searchable_fields(wtxn, user_fields)?;
|
||||
@ -657,29 +656,17 @@ impl Index {
|
||||
let mut weights = FieldidsWeightsMap::default();
|
||||
|
||||
// Now we generate the real searchable fields:
|
||||
// 1. Take the user defined searchable fields as-is to keep the priority defined by the attributes criterion.
|
||||
// 2. Iterate over the user defined searchable fields.
|
||||
// 3. If a user defined field is a subset of a field defined in the fields_ids_map
|
||||
// (ie doggo.name is a subset of doggo) right after doggo and with the same weight.
|
||||
let mut real_fields = Vec::new();
|
||||
|
||||
for (id, field_from_map) in fields_ids_map.iter() {
|
||||
for (weight, user_field) in user_fields.iter().enumerate() {
|
||||
if crate::is_faceted_by(field_from_map, user_field)
|
||||
&& !real_fields.contains(&field_from_map)
|
||||
&& !non_searchable_fields_ids.contains(&id)
|
||||
{
|
||||
real_fields.push(field_from_map);
|
||||
|
||||
let weight: u16 =
|
||||
weight.try_into().map_err(|_| UserError::AttributeLimitReached)?;
|
||||
weights.insert(id, weight);
|
||||
}
|
||||
for (id, field_from_map, metadata) in fields_ids_map.iter() {
|
||||
if let Some(weight) = metadata.searchable_weight() {
|
||||
real_fields.push(field_from_map);
|
||||
weights.insert(id, weight);
|
||||
}
|
||||
}
|
||||
|
||||
self.put_searchable_fields(wtxn, &real_fields)?;
|
||||
self.put_fieldids_weights_map(wtxn, &weights)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@ -978,6 +965,19 @@ impl Index {
|
||||
// Ok(faceted_fields)
|
||||
// }
|
||||
|
||||
pub fn asc_desc_fields(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<String>> {
|
||||
let asc_desc_fields = self
|
||||
.criteria(rtxn)?
|
||||
.into_iter()
|
||||
.filter_map(|criterion| match criterion {
|
||||
Criterion::Asc(field) | Criterion::Desc(field) => Some(field),
|
||||
_otherwise => None,
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(asc_desc_fields)
|
||||
}
|
||||
|
||||
/* faceted documents ids */
|
||||
|
||||
/// Retrieve all the documents which contain this field id set as null
|
||||
|
@ -7,14 +7,14 @@ use liquid::model::{
|
||||
};
|
||||
use liquid::{ObjectView, ValueView};
|
||||
|
||||
use super::{FieldMetadata, FieldsIdsMapWithMetadata};
|
||||
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, Metadata};
|
||||
use crate::GlobalFieldsIdsMap;
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct FieldValue<'a, D: ObjectView> {
|
||||
name: &'a str,
|
||||
document: &'a D,
|
||||
metadata: FieldMetadata,
|
||||
metadata: Metadata,
|
||||
}
|
||||
|
||||
impl<'a, D: ObjectView> ValueView for FieldValue<'a, D> {
|
||||
@ -67,7 +67,10 @@ impl<'a, D: ObjectView> FieldValue<'a, D> {
|
||||
}
|
||||
|
||||
pub fn is_searchable(&self) -> &bool {
|
||||
&self.metadata.searchable
|
||||
match self.metadata.is_searchable() {
|
||||
true => &true,
|
||||
false => &false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
@ -125,15 +128,11 @@ pub struct BorrowedFields<'a, 'map, D: ObjectView> {
|
||||
}
|
||||
|
||||
impl<'a, D: ObjectView> OwnedFields<'a, D> {
|
||||
pub fn new(document: &'a D, field_id_map: &'a FieldsIdsMapWithMetadata<'a>) -> Self {
|
||||
pub fn new(document: &'a D, field_id_map: &'a FieldIdMapWithMetadata) -> Self {
|
||||
Self(
|
||||
std::iter::repeat(document)
|
||||
.zip(field_id_map.iter())
|
||||
.map(|(document, (fid, name))| FieldValue {
|
||||
document,
|
||||
name,
|
||||
metadata: field_id_map.metadata(fid).unwrap_or_default(),
|
||||
})
|
||||
.map(|(document, (_fid, name, metadata))| FieldValue { document, name, metadata })
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
@ -187,7 +186,7 @@ impl<'a, 'map, D: ObjectView> ArrayView for BorrowedFields<'a, 'map, D> {
|
||||
let fv = self.doc_alloc.alloc(FieldValue {
|
||||
name: self.doc_alloc.alloc_str(&k),
|
||||
document: self.document,
|
||||
metadata: FieldMetadata { searchable: metadata.searchable },
|
||||
metadata,
|
||||
});
|
||||
fv as _
|
||||
}))
|
||||
@ -207,7 +206,7 @@ impl<'a, 'map, D: ObjectView> ArrayView for BorrowedFields<'a, 'map, D> {
|
||||
let fv = self.doc_alloc.alloc(FieldValue {
|
||||
name: self.doc_alloc.alloc_str(&key),
|
||||
document: self.document,
|
||||
metadata: FieldMetadata { searchable: metadata.searchable },
|
||||
metadata,
|
||||
});
|
||||
Some(fv as _)
|
||||
}
|
||||
|
@ -5,11 +5,9 @@ mod fields;
|
||||
mod template_checker;
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::collections::BTreeMap;
|
||||
use std::convert::TryFrom;
|
||||
use std::fmt::Debug;
|
||||
use std::num::NonZeroUsize;
|
||||
use std::ops::Deref;
|
||||
|
||||
use bumpalo::Bump;
|
||||
use document::ParseableDocument;
|
||||
@ -18,8 +16,9 @@ use fields::{BorrowedFields, OwnedFields};
|
||||
|
||||
use self::context::Context;
|
||||
use self::document::Document;
|
||||
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
|
||||
use crate::update::del_add::DelAdd;
|
||||
use crate::{FieldId, FieldsIdsMap, GlobalFieldsIdsMap};
|
||||
use crate::GlobalFieldsIdsMap;
|
||||
|
||||
pub struct Prompt {
|
||||
template: liquid::Template,
|
||||
@ -145,9 +144,9 @@ impl Prompt {
|
||||
&self,
|
||||
document: &obkv::KvReaderU16,
|
||||
side: DelAdd,
|
||||
field_id_map: &FieldsIdsMapWithMetadata,
|
||||
field_id_map: &FieldIdMapWithMetadata,
|
||||
) -> Result<String, RenderPromptError> {
|
||||
let document = Document::new(document, side, field_id_map);
|
||||
let document = Document::new(document, side, field_id_map.as_fields_ids_map());
|
||||
let fields = OwnedFields::new(&document, field_id_map);
|
||||
let context = Context::new(&document, &fields);
|
||||
|
||||
@ -172,40 +171,6 @@ fn truncate(s: &mut String, max_bytes: usize) {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FieldsIdsMapWithMetadata<'a> {
|
||||
fields_ids_map: &'a FieldsIdsMap,
|
||||
metadata: BTreeMap<FieldId, FieldMetadata>,
|
||||
}
|
||||
|
||||
impl<'a> FieldsIdsMapWithMetadata<'a> {
|
||||
pub fn new(fields_ids_map: &'a FieldsIdsMap, searchable_fields_ids: &'_ [FieldId]) -> Self {
|
||||
let mut metadata: BTreeMap<FieldId, FieldMetadata> =
|
||||
fields_ids_map.ids().map(|id| (id, Default::default())).collect();
|
||||
for searchable_field_id in searchable_fields_ids {
|
||||
let Some(metadata) = metadata.get_mut(searchable_field_id) else { continue };
|
||||
metadata.searchable = true;
|
||||
}
|
||||
Self { fields_ids_map, metadata }
|
||||
}
|
||||
|
||||
pub fn metadata(&self, field_id: FieldId) -> Option<FieldMetadata> {
|
||||
self.metadata.get(&field_id).copied()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Deref for FieldsIdsMapWithMetadata<'a> {
|
||||
type Target = FieldsIdsMap;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.fields_ids_map
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, Copy)]
|
||||
pub struct FieldMetadata {
|
||||
pub searchable: bool,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::Prompt;
|
||||
|
@ -159,7 +159,7 @@ impl<'i> FacetsUpdate<'i> {
|
||||
let field_ids = self.index.facet_leveled_field_ids(wtxn)?;
|
||||
let bulk_update = FacetsUpdateBulk::new(
|
||||
self.index,
|
||||
field_ids,
|
||||
field_ids.into_iter().collect(),
|
||||
self.facet_type,
|
||||
self.delta_data,
|
||||
self.group_size,
|
||||
|
@ -150,9 +150,12 @@ fn searchable_fields_changed(
|
||||
obkv: &KvReader<FieldId>,
|
||||
settings_diff: &InnerIndexSettingsDiff,
|
||||
) -> bool {
|
||||
let searchable_fields = &settings_diff.new.searchable_fields_ids;
|
||||
for (field_id, field_bytes) in obkv.iter() {
|
||||
if searchable_fields.contains(&field_id) {
|
||||
let Some(metadata) = settings_diff.new.fields_ids_map.metadata(field_id) else {
|
||||
debug_assert!(false, "Field id {} is not in the fields ids map", field_id);
|
||||
continue;
|
||||
};
|
||||
if metadata.is_searchable() {
|
||||
let del_add = KvReaderDelAdd::from_slice(field_bytes);
|
||||
match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) {
|
||||
// if both fields are None, check the next field.
|
||||
@ -200,8 +203,12 @@ fn tokens_from_document<'a>(
|
||||
buffers.obkv_buffer.clear();
|
||||
let mut document_writer = KvWriterU16::new(&mut buffers.obkv_buffer);
|
||||
for (field_id, field_bytes) in obkv.iter() {
|
||||
let Some(metadata) = settings.fields_ids_map.metadata(field_id) else {
|
||||
debug_assert!(false, "Field id {} is not in the fields ids map", field_id);
|
||||
continue;
|
||||
};
|
||||
// if field is searchable.
|
||||
if settings.searchable_fields_ids.contains(&field_id) {
|
||||
if metadata.is_searchable() {
|
||||
// extract deletion or addition only.
|
||||
if let Some(field_bytes) = KvReaderDelAdd::from_slice(field_bytes).get(del_add) {
|
||||
// parse json.
|
||||
@ -216,7 +223,7 @@ fn tokens_from_document<'a>(
|
||||
buffers.field_buffer.clear();
|
||||
if let Some(field) = json_to_string(&value, &mut buffers.field_buffer) {
|
||||
// create an iterator of token with their positions.
|
||||
let locales = settings.localized_searchable_fields_ids.locales(field_id);
|
||||
let locales = metadata.locales(&settings.localized_attributes_rules);
|
||||
let tokens = process_tokens(tokenizer.tokenize_with_allow_list(field, locales))
|
||||
.take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);
|
||||
|
||||
|
@ -12,12 +12,11 @@ use heed::BytesEncode;
|
||||
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
|
||||
use crate::heed_codec::{BEU16StrCodec, StrRefCodec};
|
||||
use crate::localized_attributes_rules::LocalizedFieldIds;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||
use crate::update::index_documents::helpers::{
|
||||
MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps,
|
||||
};
|
||||
use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
|
||||
use crate::{FieldId, Result, MAX_FACET_VALUE_LENGTH};
|
||||
|
||||
/// Extracts the facet string and the documents ids where this facet string appear.
|
||||
@ -33,13 +32,10 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
if settings_diff.settings_update_only() {
|
||||
extract_facet_string_docids_settings(docid_fid_facet_string, indexer, settings_diff)
|
||||
} else {
|
||||
let localized_field_ids = &settings_diff.new.localized_faceted_fields_ids;
|
||||
let facet_search = settings_diff.new.facet_search;
|
||||
extract_facet_string_docids_document_update(
|
||||
docid_fid_facet_string,
|
||||
indexer,
|
||||
localized_field_ids,
|
||||
facet_search,
|
||||
&settings_diff.new,
|
||||
)
|
||||
}
|
||||
}
|
||||
@ -52,8 +48,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
|
||||
docid_fid_facet_string: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
localized_field_ids: &LocalizedFieldIds,
|
||||
facet_search: bool,
|
||||
settings: &InnerIndexSettings,
|
||||
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
@ -92,6 +87,14 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
|
||||
let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
|
||||
let field_id = FieldId::from_be_bytes(field_id_bytes);
|
||||
|
||||
let Some(metadata) = settings.fields_ids_map.metadata(field_id) else {
|
||||
unreachable!("metadata not found for field_id: {}", field_id)
|
||||
};
|
||||
|
||||
if !metadata.is_faceted(&settings.filterable_attributes_rules) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let (document_id_bytes, normalized_value_bytes) =
|
||||
try_split_array_at::<_, 4>(bytes).unwrap();
|
||||
let document_id = u32::from_be_bytes(document_id_bytes);
|
||||
@ -99,8 +102,10 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
|
||||
let normalized_value = str::from_utf8(normalized_value_bytes)?;
|
||||
|
||||
// Facet search normalization
|
||||
if facet_search {
|
||||
let locales = localized_field_ids.locales(field_id);
|
||||
let features =
|
||||
metadata.filterable_attributes_features(&settings.filterable_attributes_rules);
|
||||
if features.is_facet_searchable() {
|
||||
let locales = metadata.locales(&settings.localized_attributes_rules);
|
||||
let hyper_normalized_value = normalize_facet_string(normalized_value, locales);
|
||||
|
||||
let set = BTreeSet::from_iter(std::iter::once(normalized_value));
|
||||
@ -178,8 +183,15 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
|
||||
let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
|
||||
let field_id = FieldId::from_be_bytes(field_id_bytes);
|
||||
|
||||
let old_locales = settings_diff.old.localized_faceted_fields_ids.locales(field_id);
|
||||
let new_locales = settings_diff.new.localized_faceted_fields_ids.locales(field_id);
|
||||
let Some(old_metadata) = settings_diff.old.fields_ids_map.metadata(field_id) else {
|
||||
unreachable!("old metadata not found for field_id: {}", field_id)
|
||||
};
|
||||
let Some(new_metadata) = settings_diff.new.fields_ids_map.metadata(field_id) else {
|
||||
unreachable!("new metadata not found for field_id: {}", field_id)
|
||||
};
|
||||
|
||||
let old_locales = old_metadata.locales(&settings_diff.old.localized_attributes_rules);
|
||||
let new_locales = new_metadata.locales(&settings_diff.new.localized_attributes_rules);
|
||||
|
||||
let are_same_locales = old_locales == new_locales;
|
||||
let reindex_facet_search =
|
||||
|
@ -76,9 +76,9 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
let mut strings_key_buffer = Vec::new();
|
||||
|
||||
let old_faceted_fids: BTreeSet<_> =
|
||||
settings_diff.old.faceted_fields_ids.iter().copied().collect();
|
||||
settings_diff.list_faceted_fields_from_fid_map(DelAdd::Deletion);
|
||||
let new_faceted_fids: BTreeSet<_> =
|
||||
settings_diff.new.faceted_fields_ids.iter().copied().collect();
|
||||
settings_diff.list_faceted_fields_from_fid_map(DelAdd::Addition);
|
||||
|
||||
if !settings_diff.settings_update_only || settings_diff.reindex_facets() {
|
||||
let mut cursor = obkv_documents.into_cursor()?;
|
||||
|
@ -15,8 +15,9 @@ use serde_json::Value;
|
||||
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
|
||||
use crate::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||
use crate::error::FaultSource;
|
||||
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
|
||||
use crate::index::IndexEmbeddingConfig;
|
||||
use crate::prompt::{FieldsIdsMapWithMetadata, Prompt};
|
||||
use crate::prompt::Prompt;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||
use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
|
||||
@ -190,12 +191,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
let reindex_vectors = settings_diff.reindex_vectors();
|
||||
|
||||
let old_fields_ids_map = &settings_diff.old.fields_ids_map;
|
||||
let old_fields_ids_map =
|
||||
FieldsIdsMapWithMetadata::new(old_fields_ids_map, &settings_diff.old.searchable_fields_ids);
|
||||
|
||||
let new_fields_ids_map = &settings_diff.new.fields_ids_map;
|
||||
let new_fields_ids_map =
|
||||
FieldsIdsMapWithMetadata::new(new_fields_ids_map, &settings_diff.new.searchable_fields_ids);
|
||||
|
||||
// the vector field id may have changed
|
||||
let old_vectors_fid = old_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME);
|
||||
@ -383,7 +380,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
);
|
||||
continue;
|
||||
}
|
||||
regenerate_prompt(obkv, prompt, &new_fields_ids_map)?
|
||||
regenerate_prompt(obkv, prompt, new_fields_ids_map)?
|
||||
}
|
||||
},
|
||||
// prompt regeneration is only triggered for existing embedders
|
||||
@ -400,7 +397,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
regenerate_if_prompt_changed(
|
||||
obkv,
|
||||
(old_prompt, prompt),
|
||||
(&old_fields_ids_map, &new_fields_ids_map),
|
||||
(old_fields_ids_map, new_fields_ids_map),
|
||||
)?
|
||||
} else {
|
||||
// we can simply ignore user provided vectors as they are not regenerated and are
|
||||
@ -416,7 +413,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
prompt,
|
||||
(add_to_user_provided, remove_from_user_provided),
|
||||
(old, new),
|
||||
(&old_fields_ids_map, &new_fields_ids_map),
|
||||
(old_fields_ids_map, new_fields_ids_map),
|
||||
document_id,
|
||||
embedder_name,
|
||||
embedder_is_manual,
|
||||
@ -486,10 +483,7 @@ fn extract_vector_document_diff(
|
||||
prompt: &Prompt,
|
||||
(add_to_user_provided, remove_from_user_provided): (&mut RoaringBitmap, &mut RoaringBitmap),
|
||||
(old, new): (VectorState, VectorState),
|
||||
(old_fields_ids_map, new_fields_ids_map): (
|
||||
&FieldsIdsMapWithMetadata,
|
||||
&FieldsIdsMapWithMetadata,
|
||||
),
|
||||
(old_fields_ids_map, new_fields_ids_map): (&FieldIdMapWithMetadata, &FieldIdMapWithMetadata),
|
||||
document_id: impl Fn() -> Value,
|
||||
embedder_name: &str,
|
||||
embedder_is_manual: bool,
|
||||
@ -611,10 +605,7 @@ fn extract_vector_document_diff(
|
||||
fn regenerate_if_prompt_changed(
|
||||
obkv: &obkv::KvReader<FieldId>,
|
||||
(old_prompt, new_prompt): (&Prompt, &Prompt),
|
||||
(old_fields_ids_map, new_fields_ids_map): (
|
||||
&FieldsIdsMapWithMetadata,
|
||||
&FieldsIdsMapWithMetadata,
|
||||
),
|
||||
(old_fields_ids_map, new_fields_ids_map): (&FieldIdMapWithMetadata, &FieldIdMapWithMetadata),
|
||||
) -> Result<VectorStateDelta> {
|
||||
let old_prompt = old_prompt
|
||||
.render_kvdeladd(obkv, DelAdd::Deletion, old_fields_ids_map)
|
||||
@ -630,7 +621,7 @@ fn regenerate_if_prompt_changed(
|
||||
fn regenerate_prompt(
|
||||
obkv: &obkv::KvReader<FieldId>,
|
||||
prompt: &Prompt,
|
||||
new_fields_ids_map: &FieldsIdsMapWithMetadata,
|
||||
new_fields_ids_map: &FieldIdMapWithMetadata,
|
||||
) -> Result<VectorStateDelta> {
|
||||
let prompt = prompt.render_kvdeladd(obkv, DelAdd::Addition, new_fields_ids_map)?;
|
||||
|
||||
|
@ -217,7 +217,7 @@ where
|
||||
|
||||
// update the internal facet and searchable list,
|
||||
// because they might have changed due to the nested documents flattening.
|
||||
settings_diff.new.recompute_facets(self.wtxn, self.index)?;
|
||||
// settings_diff.new.recompute_facets(self.wtxn, self.index)?;
|
||||
settings_diff.new.recompute_searchables(self.wtxn, self.index)?;
|
||||
|
||||
let settings_diff = Arc::new(settings_diff);
|
||||
|
@ -1,7 +1,7 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::btree_map::Entry as BEntry;
|
||||
use std::collections::hash_map::Entry as HEntry;
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::fs::File;
|
||||
use std::io::{Read, Seek};
|
||||
|
||||
@ -18,8 +18,10 @@ use super::helpers::{
|
||||
ObkvsMergeAdditionsAndDeletions,
|
||||
};
|
||||
use super::{create_writer, IndexDocumentsMethod, IndexerConfig, KeepFirst};
|
||||
use crate::attribute_patterns::PatternMatch;
|
||||
use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
|
||||
use crate::error::{Error, InternalError, UserError};
|
||||
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
||||
use crate::index::{db_name, main_key};
|
||||
use crate::update::del_add::{
|
||||
into_del_add_obkv, into_del_add_obkv_conditional_operation, DelAdd, DelAddOperation,
|
||||
@ -31,9 +33,7 @@ use crate::update::{AvailableIds, UpdateIndexingStep};
|
||||
use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||
use crate::vector::settings::WriteBackToDocuments;
|
||||
use crate::vector::ArroyWrapper;
|
||||
use crate::{
|
||||
is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
|
||||
};
|
||||
use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, Index, Result};
|
||||
|
||||
pub struct TransformOutput {
|
||||
pub primary_key: String,
|
||||
@ -52,7 +52,7 @@ pub struct TransformOutput {
|
||||
/// containing all those documents.
|
||||
pub struct Transform<'a, 'i> {
|
||||
pub index: &'i Index,
|
||||
fields_ids_map: FieldsIdsMap,
|
||||
fields_ids_map: FieldIdMapWithMetadata,
|
||||
|
||||
indexer_settings: &'a IndexerConfig,
|
||||
pub index_documents_method: IndexDocumentsMethod,
|
||||
@ -84,7 +84,7 @@ pub enum Operation {
|
||||
///
|
||||
/// If new fields are present in the addition, they are added to the index field ids map.
|
||||
fn create_fields_mapping(
|
||||
index_field_map: &mut FieldsIdsMap,
|
||||
index_field_map: &mut FieldIdMapWithMetadata,
|
||||
batch_field_map: &DocumentsBatchIndex,
|
||||
) -> Result<HashMap<FieldId, FieldId>> {
|
||||
batch_field_map
|
||||
@ -141,10 +141,13 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
true,
|
||||
);
|
||||
let documents_ids = index.documents_ids(wtxn)?;
|
||||
let fields_ids_map = index.fields_ids_map(wtxn)?;
|
||||
let builder = MetadataBuilder::from_index(index, wtxn)?;
|
||||
let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder);
|
||||
|
||||
Ok(Transform {
|
||||
index,
|
||||
fields_ids_map: index.fields_ids_map(wtxn)?,
|
||||
fields_ids_map,
|
||||
indexer_settings,
|
||||
available_documents_ids: AvailableIds::new(&documents_ids),
|
||||
original_sorter,
|
||||
@ -354,7 +357,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
documents_seen: documents_count,
|
||||
});
|
||||
|
||||
self.index.put_fields_ids_map(wtxn, &self.fields_ids_map)?;
|
||||
self.index.put_fields_ids_map(wtxn, self.fields_ids_map.as_fields_ids_map())?;
|
||||
self.index.put_primary_key(wtxn, &primary_key)?;
|
||||
self.documents_count += documents_count;
|
||||
// Now that we have a valid sorter that contains the user id and the obkv we
|
||||
@ -371,7 +374,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
)]
|
||||
fn flatten_from_fields_ids_map(
|
||||
obkv: &KvReader<FieldId>,
|
||||
fields_ids_map: &mut FieldsIdsMap,
|
||||
fields_ids_map: &mut FieldIdMapWithMetadata,
|
||||
) -> Result<Option<Vec<u8>>> {
|
||||
if obkv
|
||||
.iter()
|
||||
@ -657,7 +660,6 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
fn rebind_existing_document(
|
||||
old_obkv: &KvReader<FieldId>,
|
||||
settings_diff: &InnerIndexSettingsDiff,
|
||||
modified_faceted_fields: &HashSet<String>,
|
||||
mut injected_vectors: serde_json::Map<String, serde_json::Value>,
|
||||
old_vectors_fid: Option<FieldId>,
|
||||
original_obkv_buffer: Option<&mut Vec<u8>>,
|
||||
@ -667,23 +669,26 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) };
|
||||
|
||||
// If only a faceted field has been added, keep only this field.
|
||||
let global_facet_settings_changed = settings_diff.global_facet_settings_changed();
|
||||
let facet_fids_changed = settings_diff.facet_fids_changed();
|
||||
let necessary_faceted_field =
|
||||
|id: FieldId| -> bool {
|
||||
|
||||
let necessary_faceted_field = |id: FieldId| -> Option<DelAddOperation> {
|
||||
if facet_fids_changed {
|
||||
let field_name = settings_diff.new.fields_ids_map.name(id).unwrap();
|
||||
if global_facet_settings_changed {
|
||||
settings_diff.new.user_defined_faceted_fields.iter().any(|long| {
|
||||
is_faceted_by(long, field_name) || is_faceted_by(field_name, long)
|
||||
})
|
||||
} else if facet_fids_changed {
|
||||
modified_faceted_fields.iter().any(|long| {
|
||||
is_faceted_by(long, field_name) || is_faceted_by(field_name, long)
|
||||
})
|
||||
} else {
|
||||
false
|
||||
// if the faceted fields changed, we need to keep all the field that are
|
||||
// faceted in the old or new settings.
|
||||
match (
|
||||
settings_diff.old.match_faceted_field(field_name),
|
||||
settings_diff.new.match_faceted_field(field_name),
|
||||
) {
|
||||
(PatternMatch::NoMatch, PatternMatch::NoMatch) => None,
|
||||
(PatternMatch::NoMatch, _) => Some(DelAddOperation::Addition),
|
||||
(_, PatternMatch::NoMatch) => Some(DelAddOperation::Deletion),
|
||||
(_, _) => Some(DelAddOperation::DeletionAndAddition),
|
||||
}
|
||||
};
|
||||
} else {
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
// Alway provide all fields when vectors are involved because
|
||||
// we need the fields for the prompt/templating.
|
||||
@ -734,9 +739,12 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
}
|
||||
}
|
||||
|
||||
if is_primary_key(id) || necessary_faceted_field(id) || reindex_vectors {
|
||||
if is_primary_key(id) || reindex_vectors {
|
||||
operations.insert(id, DelAddOperation::DeletionAndAddition);
|
||||
obkv_writer.insert(id, val)?;
|
||||
} else if let Some(operation) = necessary_faceted_field(id) {
|
||||
operations.insert(id, operation);
|
||||
obkv_writer.insert(id, val)?;
|
||||
} else if let Some(operation) = settings_diff.reindex_searchable_id(id) {
|
||||
operations.insert(id, operation);
|
||||
obkv_writer.insert(id, val)?;
|
||||
@ -856,7 +864,6 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
};
|
||||
|
||||
if original_sorter.is_some() || flattened_sorter.is_some() {
|
||||
let modified_faceted_fields = settings_diff.modified_faceted_fields();
|
||||
let mut original_obkv_buffer = Vec::new();
|
||||
let mut flattened_obkv_buffer = Vec::new();
|
||||
let mut document_sorter_key_buffer = Vec::new();
|
||||
@ -897,7 +904,6 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
Self::rebind_existing_document(
|
||||
old_obkv,
|
||||
&settings_diff,
|
||||
&modified_faceted_fields,
|
||||
injected_vectors,
|
||||
old_vectors_fid,
|
||||
Some(&mut original_obkv_buffer).filter(|_| original_sorter.is_some()),
|
||||
|
@ -9,8 +9,9 @@ use serde_json::Value;
|
||||
|
||||
use super::super::cache::BalancedCaches;
|
||||
use super::facet_document::extract_document_facets;
|
||||
use super::{match_faceted_field, FacetKind};
|
||||
use super::FacetKind;
|
||||
use crate::fields_ids_map::metadata::Metadata;
|
||||
use crate::filterable_attributes_rules::match_faceted_field;
|
||||
use crate::heed_codec::facet::OrderedF64Codec;
|
||||
use crate::update::del_add::DelAdd;
|
||||
use crate::update::new::channel::FieldIdDocidFacetSender;
|
||||
@ -34,6 +35,8 @@ pub struct FacetedExtractorData<'a, 'b> {
|
||||
buckets: usize,
|
||||
filterable_attributes: Vec<FilterableAttributesRule>,
|
||||
sortable_fields: HashSet<String>,
|
||||
asc_desc_fields: HashSet<String>,
|
||||
distinct_field: Option<String>,
|
||||
}
|
||||
|
||||
impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b> {
|
||||
@ -58,6 +61,8 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b>
|
||||
context,
|
||||
&self.filterable_attributes,
|
||||
&self.sortable_fields,
|
||||
&self.asc_desc_fields,
|
||||
&self.distinct_field,
|
||||
change,
|
||||
self.sender,
|
||||
)?
|
||||
@ -73,6 +78,8 @@ impl FacetedDocidsExtractor {
|
||||
context: &DocumentChangeContext<RefCell<BalancedCaches>>,
|
||||
filterable_attributes: &[FilterableAttributesRule],
|
||||
sortable_fields: &HashSet<String>,
|
||||
asc_desc_fields: &HashSet<String>,
|
||||
distinct_field: &Option<String>,
|
||||
document_change: DocumentChange,
|
||||
sender: &FieldIdDocidFacetSender,
|
||||
) -> Result<()> {
|
||||
@ -89,6 +96,8 @@ impl FacetedDocidsExtractor {
|
||||
new_fields_ids_map.deref_mut(),
|
||||
filterable_attributes,
|
||||
sortable_fields,
|
||||
asc_desc_fields,
|
||||
distinct_field,
|
||||
&mut |fid, meta, depth, value| {
|
||||
let features = meta.filterable_attributes_features(filterable_attributes);
|
||||
Self::facet_fn_with_options(
|
||||
@ -109,7 +118,13 @@ impl FacetedDocidsExtractor {
|
||||
DocumentChange::Update(inner) => {
|
||||
if !inner.has_changed_for_fields(
|
||||
&mut |field_name| {
|
||||
match_faceted_field(field_name, filterable_attributes, sortable_fields)
|
||||
match_faceted_field(
|
||||
field_name,
|
||||
filterable_attributes,
|
||||
sortable_fields,
|
||||
asc_desc_fields,
|
||||
distinct_field,
|
||||
)
|
||||
},
|
||||
rtxn,
|
||||
index,
|
||||
@ -124,6 +139,8 @@ impl FacetedDocidsExtractor {
|
||||
new_fields_ids_map.deref_mut(),
|
||||
filterable_attributes,
|
||||
sortable_fields,
|
||||
asc_desc_fields,
|
||||
distinct_field,
|
||||
&mut |fid, meta, depth, value| {
|
||||
let features = meta.filterable_attributes_features(filterable_attributes);
|
||||
Self::facet_fn_with_options(
|
||||
@ -148,6 +165,8 @@ impl FacetedDocidsExtractor {
|
||||
new_fields_ids_map.deref_mut(),
|
||||
filterable_attributes,
|
||||
sortable_fields,
|
||||
asc_desc_fields,
|
||||
distinct_field,
|
||||
&mut |fid, meta, depth, value| {
|
||||
let features = meta.filterable_attributes_features(filterable_attributes);
|
||||
Self::facet_fn_with_options(
|
||||
@ -172,6 +191,8 @@ impl FacetedDocidsExtractor {
|
||||
new_fields_ids_map.deref_mut(),
|
||||
filterable_attributes,
|
||||
sortable_fields,
|
||||
asc_desc_fields,
|
||||
distinct_field,
|
||||
&mut |fid, meta, depth, value| {
|
||||
let features = meta.filterable_attributes_features(filterable_attributes);
|
||||
Self::facet_fn_with_options(
|
||||
@ -426,6 +447,8 @@ impl FacetedDocidsExtractor {
|
||||
let rtxn = index.read_txn()?;
|
||||
let filterable_attributes = index.filterable_attributes_rules(&rtxn)?;
|
||||
let sortable_fields = index.sortable_fields(&rtxn)?;
|
||||
let asc_desc_fields = index.asc_desc_fields(&rtxn)?;
|
||||
let distinct_field = index.distinct_field(&rtxn)?.map(|s| s.to_string());
|
||||
let datastore = ThreadLocal::new();
|
||||
|
||||
{
|
||||
@ -439,6 +462,8 @@ impl FacetedDocidsExtractor {
|
||||
sender,
|
||||
filterable_attributes,
|
||||
sortable_fields,
|
||||
asc_desc_fields,
|
||||
distinct_field,
|
||||
};
|
||||
extract(
|
||||
document_changes,
|
||||
|
@ -8,22 +8,30 @@ use crate::update::new::document::Document;
|
||||
use crate::update::new::extract::geo::extract_geo_coordinates;
|
||||
use crate::update::new::extract::perm_json_p;
|
||||
use crate::{
|
||||
FieldId, FilterableAttributesSettings, GlobalFieldsIdsMap, InternalError, Result, UserError,
|
||||
FieldId, FilterableAttributesRule, GlobalFieldsIdsMap, InternalError, Result, UserError,
|
||||
};
|
||||
|
||||
use super::match_faceted_field;
|
||||
use crate::filterable_attributes_rules::match_faceted_field;
|
||||
|
||||
pub fn extract_document_facets<'doc>(
|
||||
document: impl Document<'doc>,
|
||||
external_document_id: &str,
|
||||
field_id_map: &mut GlobalFieldsIdsMap,
|
||||
filterable_attributes: &[FilterableAttributesSettings],
|
||||
filterable_attributes: &[FilterableAttributesRule],
|
||||
sortable_fields: &HashSet<String>,
|
||||
asc_desc_fields: &HashSet<String>,
|
||||
distinct_field: &Option<String>,
|
||||
facet_fn: &mut impl FnMut(FieldId, Metadata, perm_json_p::Depth, &Value) -> Result<()>,
|
||||
) -> Result<()> {
|
||||
// return the match result for the given field name.
|
||||
let match_field = |field_name: &str| -> PatternMatch {
|
||||
match_faceted_field(field_name, filterable_attributes, sortable_fields)
|
||||
match_faceted_field(
|
||||
field_name,
|
||||
filterable_attributes,
|
||||
sortable_fields,
|
||||
asc_desc_fields,
|
||||
distinct_field,
|
||||
)
|
||||
};
|
||||
|
||||
// extract the field if it is faceted (facet searchable, filterable, sortable)
|
||||
|
@ -1,16 +1,8 @@
|
||||
mod extract_facets;
|
||||
mod facet_document;
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
pub use extract_facets::FacetedDocidsExtractor;
|
||||
|
||||
use crate::{
|
||||
attribute_patterns::{match_field_legacy, PatternMatch},
|
||||
filterable_fields::match_pattern_by_features,
|
||||
FilterableAttributesSettings,
|
||||
};
|
||||
|
||||
#[repr(u8)]
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum FacetKind {
|
||||
@ -39,30 +31,3 @@ impl FacetKind {
|
||||
(FacetKind::from(key[0]), &key[1..])
|
||||
}
|
||||
}
|
||||
|
||||
pub fn match_faceted_field(
|
||||
field_name: &str,
|
||||
filterable_fields: &[FilterableAttributesSettings],
|
||||
sortable_fields: &HashSet<String>,
|
||||
) -> PatternMatch {
|
||||
// Check if the field matches any filterable or facet searchable field
|
||||
let mut selection = match_pattern_by_features(field_name, &filterable_fields, &|features| {
|
||||
features.is_facet_searchable() || features.is_filterable()
|
||||
});
|
||||
|
||||
// If the field matches the pattern, return Match
|
||||
if selection == PatternMatch::Match {
|
||||
return selection;
|
||||
}
|
||||
|
||||
// Otherwise, check if the field matches any sortable field
|
||||
for pattern in sortable_fields {
|
||||
match match_field_legacy(&pattern, field_name) {
|
||||
PatternMatch::Match => return PatternMatch::Match,
|
||||
PatternMatch::Parent => selection = PatternMatch::Parent,
|
||||
PatternMatch::NoMatch => (),
|
||||
}
|
||||
}
|
||||
|
||||
selection
|
||||
}
|
||||
|
@ -29,7 +29,7 @@ impl GeoExtractor {
|
||||
grenad_parameters: GrenadParameters,
|
||||
) -> Result<Option<Self>> {
|
||||
if index.is_geo_activated(rtxn)? {
|
||||
Ok(Some(GeoExtractor {filterable_attributes_ruless }))
|
||||
Ok(Some(GeoExtractor { grenad_parameters }))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
|
@ -227,7 +227,14 @@ mod test {
|
||||
|
||||
let fields_ids_map = FieldIdMapWithMetadata::new(
|
||||
fields_ids_map,
|
||||
MetadataBuilder::new(Default::default(), Default::default(), Default::default(), None),
|
||||
MetadataBuilder::new(
|
||||
Default::default(),
|
||||
Default::default(),
|
||||
Default::default(),
|
||||
None,
|
||||
None,
|
||||
Default::default(),
|
||||
),
|
||||
);
|
||||
|
||||
let fields_ids_map_lock = std::sync::RwLock::new(fields_ids_map);
|
||||
|
@ -114,7 +114,7 @@ pub(super) fn update_index(
|
||||
index.put_primary_key(wtxn, new_primary_key.name())?;
|
||||
}
|
||||
let mut inner_index_settings = InnerIndexSettings::from_index(index, wtxn, Some(embedders))?;
|
||||
inner_index_settings.recompute_facets(wtxn, index)?;
|
||||
// inner_index_settings.recompute_facets(wtxn, index)?;
|
||||
inner_index_settings.recompute_searchables(wtxn, index)?;
|
||||
index.put_field_distribution(wtxn, &field_distribution)?;
|
||||
index.put_documents_ids(wtxn, &document_ids)?;
|
||||
|
@ -11,12 +11,15 @@ use roaring::RoaringBitmap;
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use super::del_add::DelAddOperation;
|
||||
use super::del_add::{DelAdd, DelAddOperation};
|
||||
use super::index_documents::{IndexDocumentsConfig, Transform};
|
||||
use super::IndexerConfig;
|
||||
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
|
||||
use crate::attribute_patterns::PatternMatch;
|
||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
||||
use crate::criterion::Criterion;
|
||||
use crate::error::UserError;
|
||||
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
||||
use crate::filterable_attributes_rules::match_faceted_field;
|
||||
use crate::index::{
|
||||
IndexEmbeddingConfig, PrefixSearch, DEFAULT_MIN_WORD_LEN_ONE_TYPO,
|
||||
DEFAULT_MIN_WORD_LEN_TWO_TYPOS,
|
||||
@ -31,10 +34,7 @@ use crate::vector::settings::{
|
||||
WriteBackToDocuments,
|
||||
};
|
||||
use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs};
|
||||
use crate::{
|
||||
FieldId, FieldsIdsMap, FilterableAttributesSettings, Index, LocalizedAttributesRule,
|
||||
LocalizedFieldIds, Result,
|
||||
};
|
||||
use crate::{FieldId, FilterableAttributesRule, Index, LocalizedAttributesRule, Result};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
|
||||
pub enum Setting<T> {
|
||||
@ -158,7 +158,7 @@ pub struct Settings<'a, 't, 'i> {
|
||||
|
||||
searchable_fields: Setting<Vec<String>>,
|
||||
displayed_fields: Setting<Vec<String>>,
|
||||
filterable_fields: Setting<Vec<FilterableAttributesSettings>>,
|
||||
filterable_fields: Setting<Vec<FilterableAttributesRule>>,
|
||||
sortable_fields: Setting<HashSet<String>>,
|
||||
criteria: Setting<Vec<Criterion>>,
|
||||
stop_words: Setting<BTreeSet<String>>,
|
||||
@ -244,7 +244,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
self.filterable_fields = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_filterable_fields(&mut self, rules: Vec<FilterableAttributesSettings>) {
|
||||
pub fn set_filterable_fields(&mut self, rules: Vec<FilterableAttributesRule>) {
|
||||
self.filterable_fields = Setting::Set(rules);
|
||||
}
|
||||
|
||||
@ -519,7 +519,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
}
|
||||
|
||||
/// Updates the index's searchable attributes.
|
||||
fn update_searchable(&mut self) -> Result<bool> {
|
||||
fn update_user_defined_searchable_attributes(&mut self) -> Result<bool> {
|
||||
match self.searchable_fields {
|
||||
Setting::Set(ref fields) => {
|
||||
// Check to see if the searchable fields changed before doing anything else
|
||||
@ -532,26 +532,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
// Since we're updating the settings we can only add new fields at the end of the field id map
|
||||
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
// fields are deduplicated, only the first occurrence is taken into account
|
||||
let names = fields.iter().unique().map(String::as_str).collect::<Vec<_>>();
|
||||
|
||||
// Add all the searchable attributes to the field map, and then add the
|
||||
// remaining fields from the old field map to the new one
|
||||
for name in names.iter() {
|
||||
// The fields ids map won't change the field id of already present elements thus only the
|
||||
// new fields will be inserted.
|
||||
fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
|
||||
}
|
||||
|
||||
self.index.put_all_searchable_fields_from_fields_ids_map(
|
||||
self.wtxn,
|
||||
&names,
|
||||
&fields_ids_map.nested_ids(RESERVED_VECTORS_FIELD_NAME),
|
||||
&fields_ids_map,
|
||||
)?;
|
||||
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||
self.index.put_user_defined_searchable_fields(self.wtxn, &names)?;
|
||||
Ok(true)
|
||||
}
|
||||
Setting::Reset => Ok(self.index.delete_all_searchable_fields(self.wtxn)?),
|
||||
@ -763,10 +747,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
fn update_filterable(&mut self) -> Result<()> {
|
||||
match self.filterable_fields {
|
||||
Setting::Set(ref fields) => {
|
||||
self.index.put_filterable_fields(self.wtxn, fields)?;
|
||||
self.index.put_filterable_attributes_rules(self.wtxn, fields)?;
|
||||
}
|
||||
Setting::Reset => {
|
||||
self.index.delete_filterable_fields(self.wtxn)?;
|
||||
self.index.delete_filterable_attributes_rules(self.wtxn)?;
|
||||
}
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
@ -1256,7 +1240,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
self.update_separator_tokens()?;
|
||||
self.update_dictionary()?;
|
||||
self.update_synonyms()?;
|
||||
self.update_searchable()?;
|
||||
self.update_user_defined_searchable_attributes()?;
|
||||
self.update_exact_attributes()?;
|
||||
self.update_proximity_precision()?;
|
||||
self.update_prefix_search()?;
|
||||
@ -1266,7 +1250,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
let embedding_config_updates = self.update_embedding_configs()?;
|
||||
|
||||
let mut new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn, None)?;
|
||||
new_inner_settings.recompute_facets(self.wtxn, self.index)?;
|
||||
// new_inner_settings.recompute_facets(self.wtxn, self.index)?;
|
||||
new_inner_settings.recompute_searchables(self.wtxn, self.index)?;
|
||||
|
||||
let primary_key_id = self
|
||||
.index
|
||||
@ -1318,8 +1303,8 @@ impl InnerIndexSettingsDiff {
|
||||
settings_update_only: bool,
|
||||
) -> Self {
|
||||
let only_additional_fields = match (
|
||||
&old_settings.user_defined_searchable_fields,
|
||||
&new_settings.user_defined_searchable_fields,
|
||||
&old_settings.user_defined_searchable_attributes,
|
||||
&new_settings.user_defined_searchable_attributes,
|
||||
) {
|
||||
(None, None) | (Some(_), None) | (None, Some(_)) => None, // None means *
|
||||
(Some(old), Some(new)) => {
|
||||
@ -1341,14 +1326,14 @@ impl InnerIndexSettingsDiff {
|
||||
|| old_settings.dictionary != new_settings.dictionary
|
||||
|| old_settings.proximity_precision != new_settings.proximity_precision
|
||||
|| old_settings.prefix_search != new_settings.prefix_search
|
||||
|| old_settings.localized_searchable_fields_ids
|
||||
!= new_settings.localized_searchable_fields_ids
|
||||
|| old_settings.localized_attributes_rules
|
||||
!= new_settings.localized_attributes_rules
|
||||
};
|
||||
|
||||
let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes;
|
||||
|
||||
let cache_user_defined_searchables = old_settings.user_defined_searchable_fields
|
||||
!= new_settings.user_defined_searchable_fields;
|
||||
let cache_user_defined_searchables = old_settings.user_defined_searchable_attributes
|
||||
!= new_settings.user_defined_searchable_attributes;
|
||||
|
||||
// if the user-defined searchables changed, then we need to reindex prompts.
|
||||
if cache_user_defined_searchables {
|
||||
@ -1431,30 +1416,39 @@ impl InnerIndexSettingsDiff {
|
||||
}
|
||||
}
|
||||
|
||||
/// List the faceted fields from the inner fid map.
|
||||
/// This is used to list the faceted fields when we are reindexing,
|
||||
/// but it can't be used in document addition because the field id map must be exhaustive.
|
||||
pub fn list_faceted_fields_from_fid_map(&self, del_add: DelAdd) -> BTreeSet<FieldId> {
|
||||
let settings = match del_add {
|
||||
DelAdd::Deletion => &self.old,
|
||||
DelAdd::Addition => &self.new,
|
||||
};
|
||||
|
||||
settings
|
||||
.fields_ids_map
|
||||
.iter_id_metadata()
|
||||
.filter(|(_, metadata)| metadata.is_faceted(&settings.filterable_attributes_rules))
|
||||
.map(|(id, _)| id)
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn facet_fids_changed(&self) -> bool {
|
||||
let existing_fields = &self.new.existing_fields;
|
||||
if existing_fields.iter().any(|field| field.contains('.')) {
|
||||
return true;
|
||||
}
|
||||
let new_faceted_fields: HashSet<_> = existing_fields
|
||||
.iter()
|
||||
.filter(|field| self.new.match_faceted_field(field) == PatternMatch::Match)
|
||||
.collect();
|
||||
let old_faceted_fields: HashSet<_> = existing_fields
|
||||
.iter()
|
||||
.filter(|field| self.old.match_faceted_field(field) == PatternMatch::Match)
|
||||
.collect();
|
||||
|
||||
let old_faceted_fields = &self.old.user_defined_faceted_fields;
|
||||
if old_faceted_fields.iter().any(|field| field.contains('.')) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// If there is new faceted fields we indicate that we must reindex as we must
|
||||
// index new fields as facets. It means that the distinct attribute,
|
||||
// an Asc/Desc criterion or a filtered attribute as be added or removed.
|
||||
let new_faceted_fields = &self.new.user_defined_faceted_fields;
|
||||
if new_faceted_fields.iter().any(|field| field.contains('.')) {
|
||||
return true;
|
||||
}
|
||||
|
||||
(existing_fields - old_faceted_fields) != (existing_fields - new_faceted_fields)
|
||||
old_faceted_fields != new_faceted_fields
|
||||
}
|
||||
|
||||
pub fn global_facet_settings_changed(&self) -> bool {
|
||||
self.old.localized_faceted_fields_ids != self.new.localized_faceted_fields_ids
|
||||
self.old.localized_attributes_rules != self.new.localized_attributes_rules
|
||||
|| self.old.facet_search != self.new.facet_search
|
||||
}
|
||||
|
||||
@ -1475,9 +1469,9 @@ impl InnerIndexSettingsDiff {
|
||||
|| (!self.settings_update_only && self.new.geo_fields_ids.is_some())
|
||||
}
|
||||
|
||||
pub fn modified_faceted_fields(&self) -> HashSet<String> {
|
||||
&self.old.user_defined_faceted_fields ^ &self.new.user_defined_faceted_fields
|
||||
}
|
||||
// pub fn modified_faceted_fields(&self) -> HashSet<String> {
|
||||
// &self.old.user_defined_faceted_fields ^ &self.new.user_defined_faceted_fields
|
||||
// }
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
@ -1485,20 +1479,18 @@ pub(crate) struct InnerIndexSettings {
|
||||
pub stop_words: Option<fst::Set<Vec<u8>>>,
|
||||
pub allowed_separators: Option<BTreeSet<String>>,
|
||||
pub dictionary: Option<BTreeSet<String>>,
|
||||
pub fields_ids_map: FieldsIdsMap,
|
||||
pub user_defined_faceted_fields: HashSet<String>,
|
||||
pub user_defined_searchable_fields: Option<Vec<String>>,
|
||||
pub faceted_fields_ids: HashSet<FieldId>,
|
||||
pub searchable_fields_ids: Vec<FieldId>,
|
||||
pub fields_ids_map: FieldIdMapWithMetadata,
|
||||
pub localized_attributes_rules: Vec<LocalizedAttributesRule>,
|
||||
pub filterable_attributes_rules: Vec<FilterableAttributesRule>,
|
||||
pub asc_desc_fields: HashSet<String>,
|
||||
pub distinct_field: Option<String>,
|
||||
pub user_defined_searchable_attributes: Option<Vec<String>>,
|
||||
pub sortable_fields: HashSet<String>,
|
||||
pub exact_attributes: HashSet<FieldId>,
|
||||
pub proximity_precision: ProximityPrecision,
|
||||
pub embedding_configs: EmbeddingConfigs,
|
||||
pub existing_fields: HashSet<String>,
|
||||
pub geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
pub non_searchable_fields_ids: Vec<FieldId>,
|
||||
pub non_faceted_fields_ids: Vec<FieldId>,
|
||||
pub localized_searchable_fields_ids: LocalizedFieldIds,
|
||||
pub localized_faceted_fields_ids: LocalizedFieldIds,
|
||||
pub prefix_search: PrefixSearch,
|
||||
pub facet_search: bool,
|
||||
}
|
||||
@ -1514,12 +1506,6 @@ impl InnerIndexSettings {
|
||||
let allowed_separators = index.allowed_separators(rtxn)?;
|
||||
let dictionary = index.dictionary(rtxn)?;
|
||||
let mut fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let user_defined_searchable_fields = index.user_defined_searchable_fields(rtxn)?;
|
||||
let user_defined_searchable_fields =
|
||||
user_defined_searchable_fields.map(|sf| sf.into_iter().map(String::from).collect());
|
||||
let user_defined_faceted_fields = index.user_defined_faceted_fields(rtxn)?;
|
||||
let mut searchable_fields_ids = index.searchable_fields_ids(rtxn)?;
|
||||
let mut faceted_fields_ids = index.faceted_fields_ids(rtxn)?;
|
||||
let exact_attributes = index.exact_attributes_ids(rtxn)?;
|
||||
let proximity_precision = index.proximity_precision(rtxn)?.unwrap_or_default();
|
||||
let embedding_configs = match embedding_configs {
|
||||
@ -1535,82 +1521,73 @@ impl InnerIndexSettings {
|
||||
.collect();
|
||||
// index.fields_ids_map($a)? ==>> fields_ids_map
|
||||
let geo_fields_ids = match fields_ids_map.id(RESERVED_GEO_FIELD_NAME) {
|
||||
Some(gfid) => {
|
||||
let is_sortable = index.sortable_fields_ids(rtxn)?.contains(&gfid);
|
||||
let is_filterable = index.filterable_fields_ids(rtxn)?.contains(&gfid);
|
||||
Some(_) if index.is_geo_activated(rtxn)? => {
|
||||
// if `_geo` is faceted then we get the `lat` and `lng`
|
||||
if is_sortable || is_filterable {
|
||||
let field_ids = fields_ids_map
|
||||
.insert("_geo.lat")
|
||||
.zip(fields_ids_map.insert("_geo.lng"))
|
||||
.ok_or(UserError::AttributeLimitReached)?;
|
||||
Some(field_ids)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
let field_ids = fields_ids_map
|
||||
.insert("_geo.lat")
|
||||
.zip(fields_ids_map.insert("_geo.lng"))
|
||||
.ok_or(UserError::AttributeLimitReached)?;
|
||||
Some(field_ids)
|
||||
}
|
||||
None => None,
|
||||
_ => None,
|
||||
};
|
||||
let localized_attributes_rules = index.localized_attributes_rules(rtxn)?;
|
||||
let localized_searchable_fields_ids = LocalizedFieldIds::new(
|
||||
&localized_attributes_rules,
|
||||
&fields_ids_map,
|
||||
searchable_fields_ids.iter().cloned(),
|
||||
);
|
||||
let localized_faceted_fields_ids = LocalizedFieldIds::new(
|
||||
&localized_attributes_rules,
|
||||
&fields_ids_map,
|
||||
faceted_fields_ids.iter().cloned(),
|
||||
);
|
||||
|
||||
let vectors_fids = fields_ids_map.nested_ids(RESERVED_VECTORS_FIELD_NAME);
|
||||
searchable_fields_ids.retain(|id| !vectors_fids.contains(id));
|
||||
faceted_fields_ids.retain(|id| !vectors_fids.contains(id));
|
||||
let localized_attributes_rules =
|
||||
index.localized_attributes_rules(rtxn)?.unwrap_or_default();
|
||||
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
|
||||
let sortable_fields = index.sortable_fields(rtxn)?;
|
||||
let asc_desc_fields = index.asc_desc_fields(rtxn)?;
|
||||
let distinct_field = index.distinct_field(rtxn)?.map(|f| f.to_string());
|
||||
let user_defined_searchable_attributes = index
|
||||
.user_defined_searchable_fields(rtxn)?
|
||||
.map(|fields| fields.into_iter().map(|f| f.to_string()).collect());
|
||||
let builder = MetadataBuilder::from_index(index, rtxn)?;
|
||||
let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder);
|
||||
|
||||
Ok(Self {
|
||||
stop_words,
|
||||
allowed_separators,
|
||||
dictionary,
|
||||
fields_ids_map,
|
||||
user_defined_faceted_fields,
|
||||
user_defined_searchable_fields,
|
||||
faceted_fields_ids,
|
||||
searchable_fields_ids,
|
||||
localized_attributes_rules,
|
||||
filterable_attributes_rules,
|
||||
asc_desc_fields,
|
||||
distinct_field,
|
||||
user_defined_searchable_attributes,
|
||||
sortable_fields,
|
||||
exact_attributes,
|
||||
proximity_precision,
|
||||
embedding_configs,
|
||||
existing_fields,
|
||||
geo_fields_ids,
|
||||
non_searchable_fields_ids: vectors_fids.clone(),
|
||||
non_faceted_fields_ids: vectors_fids.clone(),
|
||||
localized_searchable_fields_ids,
|
||||
localized_faceted_fields_ids,
|
||||
prefix_search,
|
||||
facet_search,
|
||||
})
|
||||
}
|
||||
|
||||
// find and insert the new field ids
|
||||
pub fn recompute_facets(&mut self, wtxn: &mut heed::RwTxn<'_>, index: &Index) -> Result<()> {
|
||||
let new_facets = self
|
||||
.fields_ids_map
|
||||
.iter()
|
||||
.filter(|(fid, _field)| !self.non_faceted_fields_ids.contains(fid))
|
||||
.filter(|(_fid, field)| crate::is_faceted(field, &self.user_defined_faceted_fields))
|
||||
.map(|(_fid, field)| field.to_string())
|
||||
.collect();
|
||||
index.put_faceted_fields(wtxn, &new_facets)?;
|
||||
|
||||
self.faceted_fields_ids = index.faceted_fields_ids(wtxn)?;
|
||||
let localized_attributes_rules = index.localized_attributes_rules(wtxn)?;
|
||||
self.localized_faceted_fields_ids = LocalizedFieldIds::new(
|
||||
&localized_attributes_rules,
|
||||
&self.fields_ids_map,
|
||||
self.faceted_fields_ids.iter().cloned(),
|
||||
);
|
||||
Ok(())
|
||||
pub fn match_faceted_field(&self, field: &str) -> PatternMatch {
|
||||
match_faceted_field(
|
||||
field,
|
||||
&self.filterable_attributes_rules,
|
||||
&self.sortable_fields,
|
||||
&self.asc_desc_fields,
|
||||
&self.distinct_field,
|
||||
)
|
||||
}
|
||||
|
||||
// find and insert the new field ids
|
||||
// pub fn recompute_facets(&mut self, wtxn: &mut heed::RwTxn<'_>, index: &Index) -> Result<()> {
|
||||
// let new_facets = self
|
||||
// .fields_ids_map
|
||||
// .iter()
|
||||
// .filter(|(_fid, field, metadata)| {
|
||||
// metadata.is_faceted(&self.filterable_attributes_rules)
|
||||
// })
|
||||
// .map(|(_fid, field, _metadata)| field.to_string())
|
||||
// .collect();
|
||||
// index.put_faceted_fields(wtxn, &new_facets)?;
|
||||
// Ok(())
|
||||
// }
|
||||
|
||||
// find and insert the new field ids
|
||||
pub fn recompute_searchables(
|
||||
&mut self,
|
||||
@ -1618,7 +1595,7 @@ impl InnerIndexSettings {
|
||||
index: &Index,
|
||||
) -> Result<()> {
|
||||
let searchable_fields = self
|
||||
.user_defined_searchable_fields
|
||||
.user_defined_searchable_attributes
|
||||
.as_ref()
|
||||
.map(|searchable| searchable.iter().map(|s| s.as_str()).collect::<Vec<_>>());
|
||||
|
||||
@ -1627,17 +1604,9 @@ impl InnerIndexSettings {
|
||||
index.put_all_searchable_fields_from_fields_ids_map(
|
||||
wtxn,
|
||||
&searchable_fields,
|
||||
&self.non_searchable_fields_ids,
|
||||
&self.fields_ids_map,
|
||||
)?;
|
||||
}
|
||||
self.searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
|
||||
let localized_attributes_rules = index.localized_attributes_rules(wtxn)?;
|
||||
self.localized_searchable_fields_ids = LocalizedFieldIds::new(
|
||||
&localized_attributes_rules,
|
||||
&self.fields_ids_map,
|
||||
self.searchable_fields_ids.iter().cloned(),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@ -2104,7 +2073,7 @@ mod tests {
|
||||
// Set the filterable fields to be the age.
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
"age".to_string(),
|
||||
)]);
|
||||
})
|
||||
@ -2121,8 +2090,8 @@ mod tests {
|
||||
|
||||
// Check that the displayed fields are correctly set.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let fields_ids = index.filterable_fields(&rtxn).unwrap();
|
||||
assert_eq!(fields_ids, vec![FilterableAttributesSettings::Field("age".to_string(),)]);
|
||||
let fields_ids = index.filterable_attributes_rules(&rtxn).unwrap();
|
||||
assert_eq!(fields_ids, vec![FilterableAttributesRule::Field("age".to_string(),)]);
|
||||
// Only count the field_id 0 and level 0 facet values.
|
||||
// TODO we must support typed CSVs for numbers to be understood.
|
||||
let fidmap = index.fields_ids_map(&rtxn).unwrap();
|
||||
@ -2165,20 +2134,20 @@ mod tests {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(vec![
|
||||
FilterableAttributesSettings::Field("age".to_string()),
|
||||
FilterableAttributesSettings::Field("name".to_string()),
|
||||
FilterableAttributesRule::Field("age".to_string()),
|
||||
FilterableAttributesRule::Field("name".to_string()),
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
// Check that the displayed fields are correctly set.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let fields_ids = index.filterable_fields(&rtxn).unwrap();
|
||||
let fields_ids = index.filterable_attributes_rules(&rtxn).unwrap();
|
||||
assert_eq!(
|
||||
fields_ids,
|
||||
vec![
|
||||
FilterableAttributesSettings::Field("age".to_string()),
|
||||
FilterableAttributesSettings::Field("name".to_string()),
|
||||
FilterableAttributesRule::Field("age".to_string()),
|
||||
FilterableAttributesRule::Field("name".to_string()),
|
||||
]
|
||||
);
|
||||
|
||||
@ -2205,7 +2174,7 @@ mod tests {
|
||||
// Remove the age from the filterable fields.
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
"name".to_string(),
|
||||
)]);
|
||||
})
|
||||
@ -2213,8 +2182,8 @@ mod tests {
|
||||
|
||||
// Check that the displayed fields are correctly set.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let fields_ids = index.filterable_fields(&rtxn).unwrap();
|
||||
assert_eq!(fields_ids, vec![FilterableAttributesSettings::Field("name".to_string())]);
|
||||
let fields_ids = index.filterable_attributes_rules(&rtxn).unwrap();
|
||||
assert_eq!(fields_ids, vec![FilterableAttributesRule::Field("name".to_string())]);
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
// Only count the field_id 2 and level 0 facet values.
|
||||
@ -2545,8 +2514,8 @@ mod tests {
|
||||
.update_settings(|settings| {
|
||||
settings.set_displayed_fields(vec!["hello".to_string()]);
|
||||
settings.set_filterable_fields(vec![
|
||||
FilterableAttributesSettings::Field("age".to_string()),
|
||||
FilterableAttributesSettings::Field("toto".to_string()),
|
||||
FilterableAttributesRule::Field("age".to_string()),
|
||||
FilterableAttributesRule::Field("toto".to_string()),
|
||||
]);
|
||||
settings.set_criteria(vec![Criterion::Asc(S("toto"))]);
|
||||
})
|
||||
@ -2664,7 +2633,7 @@ mod tests {
|
||||
// Set the genres setting
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
"genres".to_string(),
|
||||
)]);
|
||||
})
|
||||
|
Loading…
x
Reference in New Issue
Block a user