mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 08:44:27 +01:00
Add depth to facet extraction so that null inside an array doesn't mark the entire field as null
This commit is contained in:
parent
50d1bd01df
commit
8049df125b
@ -14,6 +14,7 @@ use super::FacetKind;
|
|||||||
use crate::heed_codec::facet::OrderedF64Codec;
|
use crate::heed_codec::facet::OrderedF64Codec;
|
||||||
use crate::update::del_add::DelAdd;
|
use crate::update::del_add::DelAdd;
|
||||||
use crate::update::new::channel::FieldIdDocidFacetSender;
|
use crate::update::new::channel::FieldIdDocidFacetSender;
|
||||||
|
use crate::update::new::extract::perm_json_p;
|
||||||
use crate::update::new::indexer::document_changes::{
|
use crate::update::new::indexer::document_changes::{
|
||||||
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress,
|
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress,
|
||||||
};
|
};
|
||||||
@ -81,7 +82,7 @@ impl FacetedDocidsExtractor {
|
|||||||
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
||||||
inner.external_document_id(),
|
inner.external_document_id(),
|
||||||
new_fields_ids_map.deref_mut(),
|
new_fields_ids_map.deref_mut(),
|
||||||
&mut |fid, value| {
|
&mut |fid, depth, value| {
|
||||||
Self::facet_fn_with_options(
|
Self::facet_fn_with_options(
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
cached_sorter.deref_mut(),
|
cached_sorter.deref_mut(),
|
||||||
@ -90,6 +91,7 @@ impl FacetedDocidsExtractor {
|
|||||||
DelAddFacetValue::insert_del,
|
DelAddFacetValue::insert_del,
|
||||||
docid,
|
docid,
|
||||||
fid,
|
fid,
|
||||||
|
depth,
|
||||||
value,
|
value,
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
@ -100,7 +102,7 @@ impl FacetedDocidsExtractor {
|
|||||||
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
||||||
inner.external_document_id(),
|
inner.external_document_id(),
|
||||||
new_fields_ids_map.deref_mut(),
|
new_fields_ids_map.deref_mut(),
|
||||||
&mut |fid, value| {
|
&mut |fid, depth, value| {
|
||||||
Self::facet_fn_with_options(
|
Self::facet_fn_with_options(
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
cached_sorter.deref_mut(),
|
cached_sorter.deref_mut(),
|
||||||
@ -109,6 +111,7 @@ impl FacetedDocidsExtractor {
|
|||||||
DelAddFacetValue::insert_del,
|
DelAddFacetValue::insert_del,
|
||||||
docid,
|
docid,
|
||||||
fid,
|
fid,
|
||||||
|
depth,
|
||||||
value,
|
value,
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
@ -119,7 +122,7 @@ impl FacetedDocidsExtractor {
|
|||||||
inner.merged(rtxn, index, context.db_fields_ids_map)?,
|
inner.merged(rtxn, index, context.db_fields_ids_map)?,
|
||||||
inner.external_document_id(),
|
inner.external_document_id(),
|
||||||
new_fields_ids_map.deref_mut(),
|
new_fields_ids_map.deref_mut(),
|
||||||
&mut |fid, value| {
|
&mut |fid, depth, value| {
|
||||||
Self::facet_fn_with_options(
|
Self::facet_fn_with_options(
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
cached_sorter.deref_mut(),
|
cached_sorter.deref_mut(),
|
||||||
@ -128,6 +131,7 @@ impl FacetedDocidsExtractor {
|
|||||||
DelAddFacetValue::insert_add,
|
DelAddFacetValue::insert_add,
|
||||||
docid,
|
docid,
|
||||||
fid,
|
fid,
|
||||||
|
depth,
|
||||||
value,
|
value,
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
@ -138,7 +142,7 @@ impl FacetedDocidsExtractor {
|
|||||||
inner.inserted(),
|
inner.inserted(),
|
||||||
inner.external_document_id(),
|
inner.external_document_id(),
|
||||||
new_fields_ids_map.deref_mut(),
|
new_fields_ids_map.deref_mut(),
|
||||||
&mut |fid, value| {
|
&mut |fid, depth, value| {
|
||||||
Self::facet_fn_with_options(
|
Self::facet_fn_with_options(
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
cached_sorter.deref_mut(),
|
cached_sorter.deref_mut(),
|
||||||
@ -147,6 +151,7 @@ impl FacetedDocidsExtractor {
|
|||||||
DelAddFacetValue::insert_add,
|
DelAddFacetValue::insert_add,
|
||||||
docid,
|
docid,
|
||||||
fid,
|
fid,
|
||||||
|
depth,
|
||||||
value,
|
value,
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
@ -166,6 +171,7 @@ impl FacetedDocidsExtractor {
|
|||||||
facet_fn: impl Fn(&mut DelAddFacetValue<'doc>, FieldId, BVec<'doc, u8>, FacetKind),
|
facet_fn: impl Fn(&mut DelAddFacetValue<'doc>, FieldId, BVec<'doc, u8>, FacetKind),
|
||||||
docid: DocumentId,
|
docid: DocumentId,
|
||||||
fid: FieldId,
|
fid: FieldId,
|
||||||
|
depth: perm_json_p::Depth,
|
||||||
value: &Value,
|
value: &Value,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut buffer = BVec::new_in(doc_alloc);
|
let mut buffer = BVec::new_in(doc_alloc);
|
||||||
@ -217,7 +223,7 @@ impl FacetedDocidsExtractor {
|
|||||||
}
|
}
|
||||||
// Null
|
// Null
|
||||||
// key: fid
|
// key: fid
|
||||||
Value::Null => {
|
Value::Null if depth == perm_json_p::Depth::OnBaseKey => {
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
buffer.push(FacetKind::Null as u8);
|
buffer.push(FacetKind::Null as u8);
|
||||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||||
@ -225,13 +231,13 @@ impl FacetedDocidsExtractor {
|
|||||||
}
|
}
|
||||||
// Empty
|
// Empty
|
||||||
// key: fid
|
// key: fid
|
||||||
Value::Array(a) if a.is_empty() => {
|
Value::Array(a) if a.is_empty() && depth == perm_json_p::Depth::OnBaseKey => {
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
buffer.push(FacetKind::Empty as u8);
|
buffer.push(FacetKind::Empty as u8);
|
||||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||||
cache_fn(cached_sorter, &buffer, docid)
|
cache_fn(cached_sorter, &buffer, docid)
|
||||||
}
|
}
|
||||||
Value::Object(o) if o.is_empty() => {
|
Value::Object(o) if o.is_empty() && depth == perm_json_p::Depth::OnBaseKey => {
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
buffer.push(FacetKind::Empty as u8);
|
buffer.push(FacetKind::Empty as u8);
|
||||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||||
|
@ -10,13 +10,16 @@ pub fn extract_document_facets<'doc>(
|
|||||||
document: impl Document<'doc>,
|
document: impl Document<'doc>,
|
||||||
external_document_id: &str,
|
external_document_id: &str,
|
||||||
field_id_map: &mut GlobalFieldsIdsMap,
|
field_id_map: &mut GlobalFieldsIdsMap,
|
||||||
facet_fn: &mut impl FnMut(FieldId, &Value) -> Result<()>,
|
facet_fn: &mut impl FnMut(FieldId, perm_json_p::Depth, &Value) -> Result<()>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
for res in document.iter_top_level_fields() {
|
for res in document.iter_top_level_fields() {
|
||||||
let (field_name, value) = res?;
|
let (field_name, value) = res?;
|
||||||
|
|
||||||
let mut tokenize_field = |name: &str, value: &Value| match field_id_map.id_or_insert(name) {
|
let mut tokenize_field =
|
||||||
Some(field_id) => facet_fn(field_id, value),
|
|name: &str, depth: perm_json_p::Depth, value: &Value| match field_id_map
|
||||||
|
.id_or_insert(name)
|
||||||
|
{
|
||||||
|
Some(field_id) => facet_fn(field_id, depth, value),
|
||||||
None => Err(UserError::AttributeLimitReached.into()),
|
None => Err(UserError::AttributeLimitReached.into()),
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -29,6 +32,7 @@ pub fn extract_document_facets<'doc>(
|
|||||||
Some(attributes_to_extract),
|
Some(attributes_to_extract),
|
||||||
&[], // skip no attributes
|
&[], // skip no attributes
|
||||||
field_name,
|
field_name,
|
||||||
|
perm_json_p::Depth::OnBaseKey,
|
||||||
&mut tokenize_field,
|
&mut tokenize_field,
|
||||||
)?,
|
)?,
|
||||||
Value::Array(array) => perm_json_p::seek_leaf_values_in_array(
|
Value::Array(array) => perm_json_p::seek_leaf_values_in_array(
|
||||||
@ -36,9 +40,10 @@ pub fn extract_document_facets<'doc>(
|
|||||||
Some(attributes_to_extract),
|
Some(attributes_to_extract),
|
||||||
&[], // skip no attributes
|
&[], // skip no attributes
|
||||||
field_name,
|
field_name,
|
||||||
|
perm_json_p::Depth::OnBaseKey,
|
||||||
&mut tokenize_field,
|
&mut tokenize_field,
|
||||||
)?,
|
)?,
|
||||||
value => tokenize_field(field_name, &value)?,
|
value => tokenize_field(field_name, perm_json_p::Depth::OnBaseKey, &value)?,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -51,8 +56,8 @@ pub fn extract_document_facets<'doc>(
|
|||||||
.zip(field_id_map.id_or_insert("_geo.lng"))
|
.zip(field_id_map.id_or_insert("_geo.lng"))
|
||||||
.ok_or(UserError::AttributeLimitReached)?;
|
.ok_or(UserError::AttributeLimitReached)?;
|
||||||
|
|
||||||
facet_fn(lat_fid, &lat.into())?;
|
facet_fn(lat_fid, perm_json_p::Depth::OnBaseKey, &lat.into())?;
|
||||||
facet_fn(lng_fid, &lng.into())?;
|
facet_fn(lng_fid, perm_json_p::Depth::OnBaseKey, &lng.into())?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -59,15 +59,24 @@ pub mod perm_json_p {
|
|||||||
&& selector[key.len()..].chars().next().map(|c| c == SPLIT_SYMBOL).unwrap_or(true)
|
&& selector[key.len()..].chars().next().map(|c| c == SPLIT_SYMBOL).unwrap_or(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum Depth {
|
||||||
|
/// The perm json ptr is currently on the field of an object
|
||||||
|
OnBaseKey,
|
||||||
|
/// The perm json ptr is currently inside of an array
|
||||||
|
InsideArray,
|
||||||
|
}
|
||||||
|
|
||||||
pub fn seek_leaf_values_in_object(
|
pub fn seek_leaf_values_in_object(
|
||||||
value: &Map<String, Value>,
|
value: &Map<String, Value>,
|
||||||
selectors: Option<&[&str]>,
|
selectors: Option<&[&str]>,
|
||||||
skip_selectors: &[&str],
|
skip_selectors: &[&str],
|
||||||
base_key: &str,
|
base_key: &str,
|
||||||
seeker: &mut impl FnMut(&str, &Value) -> Result<()>,
|
base_depth: Depth,
|
||||||
|
seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<()>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
if value.is_empty() {
|
if value.is_empty() {
|
||||||
seeker(base_key, &Value::Object(Map::with_capacity(0)))?;
|
seeker(base_key, base_depth, &Value::Object(Map::with_capacity(0)))?;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (key, value) in value.iter() {
|
for (key, value) in value.iter() {
|
||||||
@ -87,6 +96,7 @@ pub mod perm_json_p {
|
|||||||
selectors,
|
selectors,
|
||||||
skip_selectors,
|
skip_selectors,
|
||||||
&base_key,
|
&base_key,
|
||||||
|
Depth::OnBaseKey,
|
||||||
seeker,
|
seeker,
|
||||||
),
|
),
|
||||||
Value::Array(array) => seek_leaf_values_in_array(
|
Value::Array(array) => seek_leaf_values_in_array(
|
||||||
@ -94,9 +104,10 @@ pub mod perm_json_p {
|
|||||||
selectors,
|
selectors,
|
||||||
skip_selectors,
|
skip_selectors,
|
||||||
&base_key,
|
&base_key,
|
||||||
|
Depth::OnBaseKey,
|
||||||
seeker,
|
seeker,
|
||||||
),
|
),
|
||||||
value => seeker(&base_key, value),
|
value => seeker(&base_key, Depth::OnBaseKey, value),
|
||||||
}?;
|
}?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -109,21 +120,32 @@ pub mod perm_json_p {
|
|||||||
selectors: Option<&[&str]>,
|
selectors: Option<&[&str]>,
|
||||||
skip_selectors: &[&str],
|
skip_selectors: &[&str],
|
||||||
base_key: &str,
|
base_key: &str,
|
||||||
seeker: &mut impl FnMut(&str, &Value) -> Result<()>,
|
base_depth: Depth,
|
||||||
|
seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<()>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
if values.is_empty() {
|
if values.is_empty() {
|
||||||
seeker(base_key, &Value::Array(vec![]))?;
|
seeker(base_key, base_depth, &Value::Array(vec![]))?;
|
||||||
}
|
}
|
||||||
|
|
||||||
for value in values {
|
for value in values {
|
||||||
match value {
|
match value {
|
||||||
Value::Object(object) => {
|
Value::Object(object) => seek_leaf_values_in_object(
|
||||||
seek_leaf_values_in_object(object, selectors, skip_selectors, base_key, seeker)
|
object,
|
||||||
}
|
selectors,
|
||||||
Value::Array(array) => {
|
skip_selectors,
|
||||||
seek_leaf_values_in_array(array, selectors, skip_selectors, base_key, seeker)
|
base_key,
|
||||||
}
|
Depth::InsideArray,
|
||||||
value => seeker(base_key, value),
|
seeker,
|
||||||
|
),
|
||||||
|
Value::Array(array) => seek_leaf_values_in_array(
|
||||||
|
array,
|
||||||
|
selectors,
|
||||||
|
skip_selectors,
|
||||||
|
base_key,
|
||||||
|
Depth::InsideArray,
|
||||||
|
seeker,
|
||||||
|
),
|
||||||
|
value => seeker(base_key, Depth::InsideArray, value),
|
||||||
}?;
|
}?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@ use serde_json::Value;
|
|||||||
|
|
||||||
use crate::update::new::document::Document;
|
use crate::update::new::document::Document;
|
||||||
use crate::update::new::extract::perm_json_p::{
|
use crate::update::new::extract::perm_json_p::{
|
||||||
seek_leaf_values_in_array, seek_leaf_values_in_object, select_field,
|
seek_leaf_values_in_array, seek_leaf_values_in_object, select_field, Depth,
|
||||||
};
|
};
|
||||||
use crate::{
|
use crate::{
|
||||||
FieldId, GlobalFieldsIdsMap, InternalError, LocalizedAttributesRule, Result, UserError,
|
FieldId, GlobalFieldsIdsMap, InternalError, LocalizedAttributesRule, Result, UserError,
|
||||||
@ -35,7 +35,7 @@ impl<'a> DocumentTokenizer<'a> {
|
|||||||
for entry in document.iter_top_level_fields() {
|
for entry in document.iter_top_level_fields() {
|
||||||
let (field_name, value) = entry?;
|
let (field_name, value) = entry?;
|
||||||
|
|
||||||
let mut tokenize_field = |field_name: &str, value: &Value| {
|
let mut tokenize_field = |field_name: &str, _depth, value: &Value| {
|
||||||
let Some(field_id) = field_id_map.id_or_insert(field_name) else {
|
let Some(field_id) = field_id_map.id_or_insert(field_name) else {
|
||||||
return Err(UserError::AttributeLimitReached.into());
|
return Err(UserError::AttributeLimitReached.into());
|
||||||
};
|
};
|
||||||
@ -96,6 +96,7 @@ impl<'a> DocumentTokenizer<'a> {
|
|||||||
self.attribute_to_extract,
|
self.attribute_to_extract,
|
||||||
self.attribute_to_skip,
|
self.attribute_to_skip,
|
||||||
field_name,
|
field_name,
|
||||||
|
Depth::OnBaseKey,
|
||||||
&mut tokenize_field,
|
&mut tokenize_field,
|
||||||
)?,
|
)?,
|
||||||
Value::Array(array) => seek_leaf_values_in_array(
|
Value::Array(array) => seek_leaf_values_in_array(
|
||||||
@ -103,9 +104,10 @@ impl<'a> DocumentTokenizer<'a> {
|
|||||||
self.attribute_to_extract,
|
self.attribute_to_extract,
|
||||||
self.attribute_to_skip,
|
self.attribute_to_skip,
|
||||||
field_name,
|
field_name,
|
||||||
|
Depth::OnBaseKey,
|
||||||
&mut tokenize_field,
|
&mut tokenize_field,
|
||||||
)?,
|
)?,
|
||||||
value => tokenize_field(field_name, &value)?,
|
value => tokenize_field(field_name, Depth::OnBaseKey, &value)?,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user