2024-08-28 18:45:16 +02:00
|
|
|
mod cache;
|
2024-09-04 17:03:09 +02:00
|
|
|
mod faceted;
|
2024-09-25 14:54:56 +02:00
|
|
|
mod lru;
|
2024-09-04 10:20:18 +02:00
|
|
|
mod searchable;
|
2024-09-03 11:02:39 +02:00
|
|
|
|
2024-09-16 09:34:10 +02:00
|
|
|
use std::fs::File;
|
|
|
|
|
2024-09-05 10:56:22 +02:00
|
|
|
pub use faceted::*;
|
2024-09-16 09:34:10 +02:00
|
|
|
use grenad::Merger;
|
|
|
|
use rayon::iter::IntoParallelIterator;
|
2024-09-05 10:56:22 +02:00
|
|
|
pub use searchable::*;
|
2024-09-04 17:03:09 +02:00
|
|
|
|
2024-09-16 09:34:10 +02:00
|
|
|
use super::DocumentChange;
|
2024-09-25 14:54:56 +02:00
|
|
|
use crate::update::{GrenadParameters, MergeDeladdCboRoaringBitmaps};
|
|
|
|
use crate::{GlobalFieldsIdsMap, Index, Result};
|
2024-09-16 09:34:10 +02:00
|
|
|
|
|
|
|
pub trait DocidsExtractor {
|
|
|
|
fn run_extraction(
|
|
|
|
index: &Index,
|
|
|
|
fields_ids_map: &GlobalFieldsIdsMap,
|
|
|
|
indexer: GrenadParameters,
|
|
|
|
document_changes: impl IntoParallelIterator<Item = Result<DocumentChange>>,
|
|
|
|
) -> Result<Merger<File, MergeDeladdCboRoaringBitmaps>>;
|
|
|
|
}
|
|
|
|
|
2024-09-04 17:03:09 +02:00
|
|
|
/// TODO move in permissive json pointer
|
|
|
|
pub mod perm_json_p {
|
|
|
|
use serde_json::{Map, Value};
|
|
|
|
|
|
|
|
use crate::Result;
|
|
|
|
const SPLIT_SYMBOL: char = '.';
|
|
|
|
|
|
|
|
/// Returns `true` if the `selector` match the `key`.
|
|
|
|
///
|
|
|
|
/// ```text
|
|
|
|
/// Example:
|
|
|
|
/// `animaux` match `animaux`
|
|
|
|
/// `animaux.chien` match `animaux`
|
|
|
|
/// `animaux.chien` match `animaux`
|
|
|
|
/// `animaux.chien.nom` match `animaux`
|
|
|
|
/// `animaux.chien.nom` match `animaux.chien`
|
|
|
|
/// -----------------------------------------
|
|
|
|
/// `animaux` doesn't match `animaux.chien`
|
|
|
|
/// `animaux.` doesn't match `animaux`
|
|
|
|
/// `animaux.ch` doesn't match `animaux.chien`
|
|
|
|
/// `animau` doesn't match `animaux`
|
|
|
|
/// ```
|
|
|
|
pub fn contained_in(selector: &str, key: &str) -> bool {
|
|
|
|
selector.starts_with(key)
|
|
|
|
&& selector[key.len()..].chars().next().map(|c| c == SPLIT_SYMBOL).unwrap_or(true)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn seek_leaf_values_in_object(
|
|
|
|
value: &Map<String, Value>,
|
|
|
|
selectors: Option<&[&str]>,
|
|
|
|
skip_selectors: &[&str],
|
|
|
|
base_key: &str,
|
|
|
|
seeker: &mut impl FnMut(&str, &Value) -> Result<()>,
|
|
|
|
) -> Result<()> {
|
2024-09-16 09:34:10 +02:00
|
|
|
if value.is_empty() {
|
|
|
|
seeker(&base_key, &Value::Object(Map::with_capacity(0)))?;
|
|
|
|
}
|
|
|
|
|
2024-09-04 17:03:09 +02:00
|
|
|
for (key, value) in value.iter() {
|
|
|
|
let base_key = if base_key.is_empty() {
|
|
|
|
key.to_string()
|
|
|
|
} else {
|
|
|
|
format!("{}{}{}", base_key, SPLIT_SYMBOL, key)
|
|
|
|
};
|
|
|
|
|
|
|
|
// here if the user only specified `doggo` we need to iterate in all the fields of `doggo`
|
|
|
|
// so we check the contained_in on both side
|
|
|
|
let should_continue = select_field(&base_key, selectors, skip_selectors);
|
|
|
|
if should_continue {
|
|
|
|
match value {
|
|
|
|
Value::Object(object) => seek_leaf_values_in_object(
|
|
|
|
object,
|
|
|
|
selectors,
|
|
|
|
skip_selectors,
|
|
|
|
&base_key,
|
|
|
|
seeker,
|
|
|
|
),
|
|
|
|
Value::Array(array) => seek_leaf_values_in_array(
|
|
|
|
array,
|
|
|
|
selectors,
|
|
|
|
skip_selectors,
|
|
|
|
&base_key,
|
|
|
|
seeker,
|
|
|
|
),
|
|
|
|
value => seeker(&base_key, value),
|
|
|
|
}?;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn seek_leaf_values_in_array(
|
|
|
|
values: &[Value],
|
|
|
|
selectors: Option<&[&str]>,
|
|
|
|
skip_selectors: &[&str],
|
|
|
|
base_key: &str,
|
|
|
|
seeker: &mut impl FnMut(&str, &Value) -> Result<()>,
|
|
|
|
) -> Result<()> {
|
2024-09-16 09:34:10 +02:00
|
|
|
if values.is_empty() {
|
|
|
|
seeker(&base_key, &Value::Array(vec![]))?;
|
|
|
|
}
|
|
|
|
|
2024-09-04 17:03:09 +02:00
|
|
|
for value in values {
|
|
|
|
match value {
|
|
|
|
Value::Object(object) => {
|
|
|
|
seek_leaf_values_in_object(object, selectors, skip_selectors, base_key, seeker)
|
|
|
|
}
|
|
|
|
Value::Array(array) => {
|
|
|
|
seek_leaf_values_in_array(array, selectors, skip_selectors, base_key, seeker)
|
|
|
|
}
|
|
|
|
value => seeker(base_key, value),
|
|
|
|
}?;
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn select_field(
|
|
|
|
field_name: &str,
|
|
|
|
selectors: Option<&[&str]>,
|
|
|
|
skip_selectors: &[&str],
|
|
|
|
) -> bool {
|
|
|
|
selectors.map_or(true, |selectors| {
|
|
|
|
selectors.iter().any(|selector| {
|
|
|
|
contained_in(selector, &field_name) || contained_in(&field_name, selector)
|
|
|
|
})
|
|
|
|
}) && !skip_selectors.iter().any(|skip_selector| {
|
|
|
|
contained_in(skip_selector, &field_name) || contained_in(&field_name, skip_selector)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|