mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-23 19:57:30 +01:00
Move the facet extraction to dedicated modules
This commit is contained in:
parent
34f11e3380
commit
0fc02f7351
137
milli/src/update/new/extract/faceted/extract_facets.rs
Normal file
137
milli/src/update/new/extract/faceted/extract_facets.rs
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
use std::collections::HashSet;
|
||||||
|
|
||||||
|
use heed::RoTxn;
|
||||||
|
use serde_json::Value;
|
||||||
|
|
||||||
|
use super::FacetedExtractor;
|
||||||
|
use crate::facet::value_encoding::f64_into_bytes;
|
||||||
|
use crate::{normalize_facet, FieldId, Index, Result, MAX_FACET_VALUE_LENGTH};
|
||||||
|
|
||||||
|
pub struct FieldIdFacetNumberDocidsExtractor;
|
||||||
|
|
||||||
|
impl FacetedExtractor for FieldIdFacetNumberDocidsExtractor {
|
||||||
|
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<HashSet<String>> {
|
||||||
|
index.user_defined_faceted_fields(rtxn)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_key<'b>(
|
||||||
|
field_id: FieldId,
|
||||||
|
value: &Value,
|
||||||
|
output: &'b mut Vec<u8>,
|
||||||
|
) -> Option<&'b [u8]> {
|
||||||
|
let number = value.as_number()?;
|
||||||
|
let n = number.as_f64()?;
|
||||||
|
let ordered = f64_into_bytes(n)?;
|
||||||
|
|
||||||
|
// fid - level - orderedf64 - orignalf64
|
||||||
|
output.extend_from_slice(&field_id.to_be_bytes());
|
||||||
|
output.push(1); // level 0
|
||||||
|
output.extend_from_slice(&ordered);
|
||||||
|
output.extend_from_slice(&n.to_be_bytes());
|
||||||
|
|
||||||
|
Some(&*output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct FieldIdFacetStringDocidsExtractor;
|
||||||
|
|
||||||
|
impl FacetedExtractor for FieldIdFacetStringDocidsExtractor {
|
||||||
|
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<HashSet<String>> {
|
||||||
|
index.user_defined_faceted_fields(rtxn)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_key<'b>(
|
||||||
|
field_id: FieldId,
|
||||||
|
value: &Value,
|
||||||
|
output: &'b mut Vec<u8>,
|
||||||
|
) -> Option<&'b [u8]> {
|
||||||
|
let string = value.as_str()?;
|
||||||
|
let normalize = normalize_facet(string);
|
||||||
|
let truncated = truncate_str(&normalize);
|
||||||
|
|
||||||
|
// fid - level - normalized string
|
||||||
|
output.extend_from_slice(&field_id.to_be_bytes());
|
||||||
|
output.push(1); // level 0
|
||||||
|
output.extend_from_slice(truncated.as_bytes());
|
||||||
|
|
||||||
|
Some(&*output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Truncates a string to the biggest valid LMDB key size.
|
||||||
|
fn truncate_str(s: &str) -> &str {
|
||||||
|
let index = s
|
||||||
|
.char_indices()
|
||||||
|
.map(|(idx, _)| idx)
|
||||||
|
.chain(std::iter::once(s.len()))
|
||||||
|
.take_while(|idx| idx <= &MAX_FACET_VALUE_LENGTH)
|
||||||
|
.last();
|
||||||
|
|
||||||
|
&s[..index.unwrap_or(0)]
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct FieldIdFacetIsNullDocidsExtractor;
|
||||||
|
|
||||||
|
impl FacetedExtractor for FieldIdFacetIsNullDocidsExtractor {
|
||||||
|
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<HashSet<String>> {
|
||||||
|
index.user_defined_faceted_fields(rtxn)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_key<'b>(
|
||||||
|
field_id: FieldId,
|
||||||
|
value: &Value,
|
||||||
|
output: &'b mut Vec<u8>,
|
||||||
|
) -> Option<&'b [u8]> {
|
||||||
|
if value.is_null() {
|
||||||
|
output.extend_from_slice(&field_id.to_be_bytes());
|
||||||
|
Some(&*output)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct FieldIdFacetExistsDocidsExtractor;
|
||||||
|
|
||||||
|
impl FacetedExtractor for FieldIdFacetExistsDocidsExtractor {
|
||||||
|
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<HashSet<String>> {
|
||||||
|
index.user_defined_faceted_fields(rtxn)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_key<'b>(
|
||||||
|
field_id: FieldId,
|
||||||
|
_value: &Value,
|
||||||
|
output: &'b mut Vec<u8>,
|
||||||
|
) -> Option<&'b [u8]> {
|
||||||
|
output.extend_from_slice(&field_id.to_be_bytes());
|
||||||
|
Some(&*output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct FieldIdFacetIsEmptyDocidsExtractor;
|
||||||
|
|
||||||
|
impl FacetedExtractor for FieldIdFacetIsEmptyDocidsExtractor {
|
||||||
|
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<HashSet<String>> {
|
||||||
|
index.user_defined_faceted_fields(rtxn)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_key<'b>(
|
||||||
|
field_id: FieldId,
|
||||||
|
value: &Value,
|
||||||
|
output: &'b mut Vec<u8>,
|
||||||
|
) -> Option<&'b [u8]> {
|
||||||
|
let is_empty = match value {
|
||||||
|
Value::Null | Value::Bool(_) | Value::Number(_) => false,
|
||||||
|
Value::String(s) => s.is_empty(),
|
||||||
|
Value::Array(a) => a.is_empty(),
|
||||||
|
Value::Object(o) => o.is_empty(),
|
||||||
|
};
|
||||||
|
|
||||||
|
if is_empty {
|
||||||
|
output.extend_from_slice(&field_id.to_be_bytes());
|
||||||
|
Some(&*output)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
51
milli/src/update/new/extract/faceted/facet_document.rs
Normal file
51
milli/src/update/new/extract/faceted/facet_document.rs
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
use serde_json::Value;
|
||||||
|
|
||||||
|
use crate::update::new::KvReaderFieldId;
|
||||||
|
use crate::{FieldId, GlobalFieldsIdsMap, InternalError, Result, UserError};
|
||||||
|
|
||||||
|
pub fn extract_document_facets(
|
||||||
|
attributes_to_extract: &[&str],
|
||||||
|
obkv: &KvReaderFieldId,
|
||||||
|
field_id_map: &mut GlobalFieldsIdsMap,
|
||||||
|
facet_fn: &mut impl FnMut(FieldId, &Value) -> Result<()>,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut field_name = String::new();
|
||||||
|
for (field_id, field_bytes) in obkv {
|
||||||
|
let Some(field_name) = field_id_map.name(field_id).map(|s| {
|
||||||
|
field_name.clear();
|
||||||
|
field_name.push_str(s);
|
||||||
|
&field_name
|
||||||
|
}) else {
|
||||||
|
unreachable!("field id not found in field id map");
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut tokenize_field = |name: &str, value: &Value| match field_id_map.id_or_insert(name) {
|
||||||
|
Some(field_id) => facet_fn(field_id, value),
|
||||||
|
None => Err(UserError::AttributeLimitReached.into()),
|
||||||
|
};
|
||||||
|
|
||||||
|
// if the current field is searchable or contains a searchable attribute
|
||||||
|
if perm_json_p::select_field(field_name, Some(attributes_to_extract), &[]) {
|
||||||
|
// parse json.
|
||||||
|
match serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)? {
|
||||||
|
Value::Object(object) => perm_json_p::seek_leaf_values_in_object(
|
||||||
|
&object,
|
||||||
|
Some(attributes_to_extract),
|
||||||
|
&[], // skip no attributes
|
||||||
|
field_name,
|
||||||
|
&mut tokenize_field,
|
||||||
|
)?,
|
||||||
|
Value::Array(array) => perm_json_p::seek_leaf_values_in_array(
|
||||||
|
&array,
|
||||||
|
Some(attributes_to_extract),
|
||||||
|
&[], // skip no attributes
|
||||||
|
field_name,
|
||||||
|
&mut tokenize_field,
|
||||||
|
)?,
|
||||||
|
value => tokenize_field(field_name, &value)?,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
@ -1,20 +1,19 @@
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
use std::fmt::Debug;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
|
||||||
use grenad::Merger;
|
use grenad::{MergeFunction, Merger};
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
use super::cache::CboCachedSorter;
|
use super::cache::CboCachedSorter;
|
||||||
use super::perm_json_p;
|
use crate::update::new::{DocumentChange, ItemsPool};
|
||||||
use crate::facet::value_encoding::f64_into_bytes;
|
|
||||||
use crate::update::new::{DocumentChange, ItemsPool, KvReaderFieldId};
|
|
||||||
use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps};
|
use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps};
|
||||||
use crate::{
|
use crate::{DocumentId, FieldId, GlobalFieldsIdsMap, Index, Result};
|
||||||
normalize_facet, FieldId, GlobalFieldsIdsMap, Index, InternalError, Result, UserError,
|
|
||||||
MAX_FACET_VALUE_LENGTH,
|
mod extract_facets;
|
||||||
};
|
mod facet_document;
|
||||||
|
|
||||||
pub trait FacetedExtractor {
|
pub trait FacetedExtractor {
|
||||||
fn run_extraction(
|
fn run_extraction(
|
||||||
@ -74,6 +73,27 @@ pub trait FacetedExtractor {
|
|||||||
Ok(builder.build())
|
Ok(builder.build())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO Shorten this
|
||||||
|
fn facet_fn_with_options<MF>(
|
||||||
|
buffer: &mut Vec<u8>,
|
||||||
|
cached_sorter: &mut CboCachedSorter<MF>,
|
||||||
|
cache_fn: impl Fn(&mut CboCachedSorter<MF>, &[u8], u32) -> grenad::Result<(), MF::Error>,
|
||||||
|
docid: DocumentId,
|
||||||
|
fid: FieldId,
|
||||||
|
value: &Value,
|
||||||
|
) -> Result<()>
|
||||||
|
where
|
||||||
|
MF: MergeFunction,
|
||||||
|
MF::Error: Debug,
|
||||||
|
{
|
||||||
|
buffer.clear();
|
||||||
|
match Self::build_key(fid, value, buffer) {
|
||||||
|
// TODO manage errors
|
||||||
|
Some(key) => Ok(cache_fn(cached_sorter, &key, docid).unwrap()),
|
||||||
|
None => Ok(()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn extract_document_change(
|
fn extract_document_change(
|
||||||
rtxn: &RoTxn,
|
rtxn: &RoTxn,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
@ -84,73 +104,69 @@ pub trait FacetedExtractor {
|
|||||||
document_change: DocumentChange,
|
document_change: DocumentChange,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
match document_change {
|
match document_change {
|
||||||
DocumentChange::Deletion(inner) => {
|
DocumentChange::Deletion(inner) => facet_document::extract_document_facets(
|
||||||
let mut facet_del_fn = |fid, value: &Value| -> Result<()> {
|
|
||||||
buffer.clear();
|
|
||||||
match Self::build_key(fid, value, buffer) {
|
|
||||||
// TODO manage errors
|
|
||||||
Some(key) => Ok(cached_sorter.insert_del_u32(&key, inner.docid()).unwrap()),
|
|
||||||
None => Ok(()),
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
extract_document_facets(
|
|
||||||
attributes_to_extract,
|
attributes_to_extract,
|
||||||
inner.current(rtxn, index)?.unwrap(),
|
inner.current(rtxn, index)?.unwrap(),
|
||||||
fields_ids_map,
|
fields_ids_map,
|
||||||
&mut facet_del_fn,
|
&mut |fid, value| {
|
||||||
|
Self::facet_fn_with_options(
|
||||||
|
buffer,
|
||||||
|
cached_sorter,
|
||||||
|
CboCachedSorter::insert_del_u32,
|
||||||
|
inner.docid(),
|
||||||
|
fid,
|
||||||
|
value,
|
||||||
)
|
)
|
||||||
}
|
},
|
||||||
|
),
|
||||||
DocumentChange::Update(inner) => {
|
DocumentChange::Update(inner) => {
|
||||||
let mut facet_del_fn = |fid, value: &Value| -> Result<()> {
|
facet_document::extract_document_facets(
|
||||||
buffer.clear();
|
|
||||||
match Self::build_key(fid, value, buffer) {
|
|
||||||
// TODO manage errors
|
|
||||||
Some(key) => Ok(cached_sorter.insert_del_u32(&key, inner.docid()).unwrap()),
|
|
||||||
None => Ok(()),
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
extract_document_facets(
|
|
||||||
attributes_to_extract,
|
attributes_to_extract,
|
||||||
inner.current(rtxn, index)?.unwrap(),
|
inner.current(rtxn, index)?.unwrap(),
|
||||||
fields_ids_map,
|
fields_ids_map,
|
||||||
&mut facet_del_fn,
|
&mut |fid, value| {
|
||||||
|
Self::facet_fn_with_options(
|
||||||
|
buffer,
|
||||||
|
cached_sorter,
|
||||||
|
CboCachedSorter::insert_del_u32,
|
||||||
|
inner.docid(),
|
||||||
|
fid,
|
||||||
|
value,
|
||||||
|
)
|
||||||
|
},
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let mut facet_add_fn = |fid, value: &Value| -> Result<()> {
|
facet_document::extract_document_facets(
|
||||||
buffer.clear();
|
|
||||||
match Self::build_key(fid, value, buffer) {
|
|
||||||
// TODO manage errors
|
|
||||||
Some(key) => Ok(cached_sorter.insert_add_u32(&key, inner.docid()).unwrap()),
|
|
||||||
None => Ok(()),
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
extract_document_facets(
|
|
||||||
attributes_to_extract,
|
attributes_to_extract,
|
||||||
inner.new(),
|
inner.new(),
|
||||||
fields_ids_map,
|
fields_ids_map,
|
||||||
&mut facet_add_fn,
|
&mut |fid, value| {
|
||||||
|
Self::facet_fn_with_options(
|
||||||
|
buffer,
|
||||||
|
cached_sorter,
|
||||||
|
CboCachedSorter::insert_add_u32,
|
||||||
|
inner.docid(),
|
||||||
|
fid,
|
||||||
|
value,
|
||||||
|
)
|
||||||
|
},
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
DocumentChange::Insertion(inner) => {
|
DocumentChange::Insertion(inner) => facet_document::extract_document_facets(
|
||||||
let mut facet_add_fn = |fid, value: &Value| -> Result<()> {
|
|
||||||
buffer.clear();
|
|
||||||
match Self::build_key(fid, value, buffer) {
|
|
||||||
// TODO manage errors
|
|
||||||
Some(key) => Ok(cached_sorter.insert_add_u32(&key, inner.docid()).unwrap()),
|
|
||||||
None => Ok(()),
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
extract_document_facets(
|
|
||||||
attributes_to_extract,
|
attributes_to_extract,
|
||||||
inner.new(),
|
inner.new(),
|
||||||
fields_ids_map,
|
fields_ids_map,
|
||||||
&mut facet_add_fn,
|
&mut |fid, value| {
|
||||||
|
Self::facet_fn_with_options(
|
||||||
|
buffer,
|
||||||
|
cached_sorter,
|
||||||
|
CboCachedSorter::insert_add_u32,
|
||||||
|
inner.docid(),
|
||||||
|
fid,
|
||||||
|
value,
|
||||||
)
|
)
|
||||||
}
|
},
|
||||||
|
),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -160,174 +176,3 @@ pub trait FacetedExtractor {
|
|||||||
fn build_key<'b>(field_id: FieldId, value: &Value, output: &'b mut Vec<u8>)
|
fn build_key<'b>(field_id: FieldId, value: &Value, output: &'b mut Vec<u8>)
|
||||||
-> Option<&'b [u8]>;
|
-> Option<&'b [u8]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct FieldIdFacetNumberDocidsExtractor;
|
|
||||||
impl FacetedExtractor for FieldIdFacetNumberDocidsExtractor {
|
|
||||||
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<HashSet<String>> {
|
|
||||||
index.user_defined_faceted_fields(rtxn)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_key<'b>(
|
|
||||||
field_id: FieldId,
|
|
||||||
value: &Value,
|
|
||||||
output: &'b mut Vec<u8>,
|
|
||||||
) -> Option<&'b [u8]> {
|
|
||||||
let number = value.as_number()?;
|
|
||||||
let n = number.as_f64()?;
|
|
||||||
let ordered = f64_into_bytes(n)?;
|
|
||||||
|
|
||||||
// fid - level - orderedf64 - orignalf64
|
|
||||||
output.extend_from_slice(&field_id.to_be_bytes());
|
|
||||||
output.push(1); // level 0
|
|
||||||
output.extend_from_slice(&ordered);
|
|
||||||
output.extend_from_slice(&n.to_be_bytes());
|
|
||||||
|
|
||||||
Some(&*output)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct FieldIdFacetStringDocidsExtractor;
|
|
||||||
impl FacetedExtractor for FieldIdFacetStringDocidsExtractor {
|
|
||||||
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<HashSet<String>> {
|
|
||||||
index.user_defined_faceted_fields(rtxn)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_key<'b>(
|
|
||||||
field_id: FieldId,
|
|
||||||
value: &Value,
|
|
||||||
output: &'b mut Vec<u8>,
|
|
||||||
) -> Option<&'b [u8]> {
|
|
||||||
let string = value.as_str()?;
|
|
||||||
let normalize = normalize_facet(string);
|
|
||||||
let truncated = truncate_str(&normalize);
|
|
||||||
|
|
||||||
// fid - level - normalized string
|
|
||||||
output.extend_from_slice(&field_id.to_be_bytes());
|
|
||||||
output.push(1); // level 0
|
|
||||||
output.extend_from_slice(truncated.as_bytes());
|
|
||||||
|
|
||||||
Some(&*output)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct FieldIdFacetIsNullDocidsExtractor;
|
|
||||||
impl FacetedExtractor for FieldIdFacetIsNullDocidsExtractor {
|
|
||||||
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<HashSet<String>> {
|
|
||||||
index.user_defined_faceted_fields(rtxn)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_key<'b>(
|
|
||||||
field_id: FieldId,
|
|
||||||
value: &Value,
|
|
||||||
output: &'b mut Vec<u8>,
|
|
||||||
) -> Option<&'b [u8]> {
|
|
||||||
if value.is_null() {
|
|
||||||
output.extend_from_slice(&field_id.to_be_bytes());
|
|
||||||
Some(&*output)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct FieldIdFacetExistsDocidsExtractor;
|
|
||||||
impl FacetedExtractor for FieldIdFacetExistsDocidsExtractor {
|
|
||||||
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<HashSet<String>> {
|
|
||||||
index.user_defined_faceted_fields(rtxn)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_key<'b>(
|
|
||||||
field_id: FieldId,
|
|
||||||
_value: &Value,
|
|
||||||
output: &'b mut Vec<u8>,
|
|
||||||
) -> Option<&'b [u8]> {
|
|
||||||
output.extend_from_slice(&field_id.to_be_bytes());
|
|
||||||
Some(&*output)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct FieldIdFacetIsEmptyDocidsExtractor;
|
|
||||||
impl FacetedExtractor for FieldIdFacetIsEmptyDocidsExtractor {
|
|
||||||
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<HashSet<String>> {
|
|
||||||
index.user_defined_faceted_fields(rtxn)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_key<'b>(
|
|
||||||
field_id: FieldId,
|
|
||||||
value: &Value,
|
|
||||||
output: &'b mut Vec<u8>,
|
|
||||||
) -> Option<&'b [u8]> {
|
|
||||||
let is_empty = match value {
|
|
||||||
Value::Null | Value::Bool(_) | Value::Number(_) => false,
|
|
||||||
Value::String(s) => s.is_empty(),
|
|
||||||
Value::Array(a) => a.is_empty(),
|
|
||||||
Value::Object(o) => o.is_empty(),
|
|
||||||
};
|
|
||||||
|
|
||||||
if is_empty {
|
|
||||||
output.extend_from_slice(&field_id.to_be_bytes());
|
|
||||||
Some(&*output)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn extract_document_facets(
|
|
||||||
attributes_to_extract: &[&str],
|
|
||||||
obkv: &KvReaderFieldId,
|
|
||||||
field_id_map: &mut GlobalFieldsIdsMap,
|
|
||||||
facet_fn: &mut impl FnMut(FieldId, &Value) -> Result<()>,
|
|
||||||
) -> Result<()> {
|
|
||||||
let mut field_name = String::new();
|
|
||||||
for (field_id, field_bytes) in obkv {
|
|
||||||
let Some(field_name) = field_id_map.name(field_id).map(|s| {
|
|
||||||
field_name.clear();
|
|
||||||
field_name.push_str(s);
|
|
||||||
&field_name
|
|
||||||
}) else {
|
|
||||||
unreachable!("field id not found in field id map");
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut tokenize_field = |name: &str, value: &Value| match field_id_map.id_or_insert(name) {
|
|
||||||
Some(field_id) => facet_fn(field_id, value),
|
|
||||||
None => Err(UserError::AttributeLimitReached.into()),
|
|
||||||
};
|
|
||||||
|
|
||||||
// if the current field is searchable or contains a searchable attribute
|
|
||||||
if perm_json_p::select_field(field_name, Some(attributes_to_extract), &[]) {
|
|
||||||
// parse json.
|
|
||||||
match serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)? {
|
|
||||||
Value::Object(object) => perm_json_p::seek_leaf_values_in_object(
|
|
||||||
&object,
|
|
||||||
Some(attributes_to_extract),
|
|
||||||
&[], // skip no attributes
|
|
||||||
field_name,
|
|
||||||
&mut tokenize_field,
|
|
||||||
)?,
|
|
||||||
Value::Array(array) => perm_json_p::seek_leaf_values_in_array(
|
|
||||||
&array,
|
|
||||||
Some(attributes_to_extract),
|
|
||||||
&[], // skip no attributes
|
|
||||||
field_name,
|
|
||||||
&mut tokenize_field,
|
|
||||||
)?,
|
|
||||||
value => tokenize_field(field_name, &value)?,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Truncates a string to the biggest valid LMDB key size.
|
|
||||||
fn truncate_str(s: &str) -> &str {
|
|
||||||
let index = s
|
|
||||||
.char_indices()
|
|
||||||
.map(|(idx, _)| idx)
|
|
||||||
.chain(std::iter::once(s.len()))
|
|
||||||
.take_while(|idx| idx <= &MAX_FACET_VALUE_LENGTH)
|
|
||||||
.last();
|
|
||||||
|
|
||||||
&s[..index.unwrap_or(0)]
|
|
||||||
}
|
|
||||||
|
@ -2,11 +2,12 @@ mod cache;
|
|||||||
mod faceted;
|
mod faceted;
|
||||||
mod searchable;
|
mod searchable;
|
||||||
|
|
||||||
pub use faceted::{
|
pub use faceted::modname::{
|
||||||
FacetedExtractor, FieldIdFacetExistsDocidsExtractor, FieldIdFacetIsEmptyDocidsExtractor,
|
FieldIdFacetExistsDocidsExtractor, FieldIdFacetIsEmptyDocidsExtractor,
|
||||||
FieldIdFacetIsNullDocidsExtractor, FieldIdFacetNumberDocidsExtractor,
|
FieldIdFacetIsNullDocidsExtractor, FieldIdFacetNumberDocidsExtractor,
|
||||||
FieldIdFacetStringDocidsExtractor,
|
FieldIdFacetStringDocidsExtractor,
|
||||||
};
|
};
|
||||||
|
pub use faceted::FacetedExtractor;
|
||||||
pub use searchable::{
|
pub use searchable::{
|
||||||
ExactWordDocidsExtractor, SearchableExtractor, WordDocidsExtractor, WordFidDocidsExtractor,
|
ExactWordDocidsExtractor, SearchableExtractor, WordDocidsExtractor, WordFidDocidsExtractor,
|
||||||
WordPositionDocidsExtractor,
|
WordPositionDocidsExtractor,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user