Refactor the FieldIdMapWithMetadata

**Changes:**
The FieldIdMapWithMetadata structure now stores more information about fields.
The metadata_for_field function computes all the needed information relying on the user provided data instead of the enriched data (searchable/sortable)
which may solve an indexing bug on sortable attributes that was not matching the nested fields.

The FieldIdMapWithMetadata structure was duplicated in the embeddings as FieldsIdsMapWithMetadata,
so the FieldsIdsMapWithMetadata has been removed in favor of FieldIdMapWithMetadata.

The Facet distribution is now relying on the FieldIdMapWithMetadata with metadata to match is a field can be faceted.

**Impact:**
- searchable attributes matching
- searchable attributes weight computation
- sortable attributes matching
- faceted fields matching
- prompt computing
- facet distribution
This commit is contained in:
ManyTheFish 2025-03-03 10:29:33 +01:00
parent 967033579d
commit 4f7ece2411
6 changed files with 281 additions and 143 deletions

View file

@ -7,14 +7,14 @@ use liquid::model::{
};
use liquid::{ObjectView, ValueView};
use super::{FieldMetadata, FieldsIdsMapWithMetadata};
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, Metadata};
use crate::GlobalFieldsIdsMap;
#[derive(Debug, Clone, Copy)]
pub struct FieldValue<'a, D: ObjectView> {
name: &'a str,
document: &'a D,
metadata: FieldMetadata,
metadata: Metadata,
}
impl<'a, D: ObjectView> ValueView for FieldValue<'a, D> {
@ -67,7 +67,10 @@ impl<'a, D: ObjectView> FieldValue<'a, D> {
}
pub fn is_searchable(&self) -> &bool {
&self.metadata.searchable
match self.metadata.is_searchable() {
true => &true,
false => &false,
}
}
pub fn is_empty(&self) -> bool {
@ -125,15 +128,11 @@ pub struct BorrowedFields<'a, 'map, D: ObjectView> {
}
impl<'a, D: ObjectView> OwnedFields<'a, D> {
pub fn new(document: &'a D, field_id_map: &'a FieldsIdsMapWithMetadata<'a>) -> Self {
pub fn new(document: &'a D, field_id_map: &'a FieldIdMapWithMetadata) -> Self {
Self(
std::iter::repeat(document)
.zip(field_id_map.iter())
.map(|(document, (fid, name))| FieldValue {
document,
name,
metadata: field_id_map.metadata(fid).unwrap_or_default(),
})
.map(|(document, (_fid, name, metadata))| FieldValue { document, name, metadata })
.collect(),
)
}
@ -187,7 +186,7 @@ impl<'a, 'map, D: ObjectView> ArrayView for BorrowedFields<'a, 'map, D> {
let fv = self.doc_alloc.alloc(FieldValue {
name: self.doc_alloc.alloc_str(&k),
document: self.document,
metadata: FieldMetadata { searchable: metadata.searchable },
metadata,
});
fv as _
}))
@ -207,7 +206,7 @@ impl<'a, 'map, D: ObjectView> ArrayView for BorrowedFields<'a, 'map, D> {
let fv = self.doc_alloc.alloc(FieldValue {
name: self.doc_alloc.alloc_str(&key),
document: self.document,
metadata: FieldMetadata { searchable: metadata.searchable },
metadata,
});
Some(fv as _)
}

View file

@ -5,11 +5,9 @@ mod fields;
mod template_checker;
use std::cell::RefCell;
use std::collections::BTreeMap;
use std::convert::TryFrom;
use std::fmt::Debug;
use std::num::NonZeroUsize;
use std::ops::Deref;
use bumpalo::Bump;
use document::ParseableDocument;
@ -18,8 +16,9 @@ use fields::{BorrowedFields, OwnedFields};
use self::context::Context;
use self::document::Document;
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
use crate::update::del_add::DelAdd;
use crate::{FieldId, FieldsIdsMap, GlobalFieldsIdsMap};
use crate::GlobalFieldsIdsMap;
pub struct Prompt {
template: liquid::Template,
@ -145,9 +144,9 @@ impl Prompt {
&self,
document: &obkv::KvReaderU16,
side: DelAdd,
field_id_map: &FieldsIdsMapWithMetadata,
field_id_map: &FieldIdMapWithMetadata,
) -> Result<String, RenderPromptError> {
let document = Document::new(document, side, field_id_map);
let document = Document::new(document, side, field_id_map.as_fields_ids_map());
let fields = OwnedFields::new(&document, field_id_map);
let context = Context::new(&document, &fields);
@ -172,40 +171,6 @@ fn truncate(s: &mut String, max_bytes: usize) {
}
}
pub struct FieldsIdsMapWithMetadata<'a> {
fields_ids_map: &'a FieldsIdsMap,
metadata: BTreeMap<FieldId, FieldMetadata>,
}
impl<'a> FieldsIdsMapWithMetadata<'a> {
pub fn new(fields_ids_map: &'a FieldsIdsMap, searchable_fields_ids: &'_ [FieldId]) -> Self {
let mut metadata: BTreeMap<FieldId, FieldMetadata> =
fields_ids_map.ids().map(|id| (id, Default::default())).collect();
for searchable_field_id in searchable_fields_ids {
let Some(metadata) = metadata.get_mut(searchable_field_id) else { continue };
metadata.searchable = true;
}
Self { fields_ids_map, metadata }
}
pub fn metadata(&self, field_id: FieldId) -> Option<FieldMetadata> {
self.metadata.get(&field_id).copied()
}
}
impl<'a> Deref for FieldsIdsMapWithMetadata<'a> {
type Target = FieldsIdsMap;
fn deref(&self) -> &Self::Target {
self.fields_ids_map
}
}
#[derive(Debug, Default, Clone, Copy)]
pub struct FieldMetadata {
pub searchable: bool,
}
#[cfg(test)]
mod test {
use super::Prompt;