2336: Move permissive-json-pointer in the meilisearch repository r=Kerollmops a=irevoire

Move the permissive-json-pointer crate in the meilisearch repository.

Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
bors[bot] 2022-04-20 17:25:44 +00:00 committed by GitHub
commit 04381011b0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 941 additions and 3 deletions

9
Cargo.lock generated
View File

@ -402,6 +402,12 @@ version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
[[package]]
name = "big_s"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "199edb7b90631283b10c2422e6a0bc8b7d987bf732995ba1de53b576c97e51a8"
[[package]]
name = "bimap"
version = "0.6.2"
@ -2486,9 +2492,8 @@ checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e"
[[package]]
name = "permissive-json-pointer"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2125f5fc44a45ffd265ce6ab343842f71df469d173f923f234e3a8df7a8f1ba6"
dependencies = [
"big_s",
"serde_json",
]

View File

@ -5,4 +5,5 @@ members = [
"meilisearch-error",
"meilisearch-lib",
"meilisearch-auth",
"permissive-json-pointer",
]

View File

@ -36,7 +36,7 @@ num_cpus = "1.13.1"
obkv = "0.2.0"
once_cell = "1.10.0"
parking_lot = "0.12.0"
permissive-json-pointer = "0.2.0"
permissive-json-pointer = { path = "../permissive-json-pointer" }
rand = "0.8.5"
rayon = "1.5.1"
regex = "1.5.5"

View File

@ -0,0 +1,12 @@
[package]
name = "permissive-json-pointer"
version = "0.2.0"
edition = "2021"
description = "A permissive json pointer"
readme = "README.md"
[dependencies]
serde_json = "1.0"
[dev-dependencies]
big_s = "1.0"

View File

@ -0,0 +1,134 @@
# Permissive json pointer
This crate provide an interface a little bit similar to what you know as “json pointer”.
But its actually doing something quite different.
## The API
The crate provide only one function called [`select_values`].
It takes one object in parameter and a list of selectors.
It then returns a new object containing only the fields you selected.
## The selectors
The syntax for the selector is easier than with other API.
There is only ONE special symbol, its the `.`.
If you write `dog` and provide the following object;
```json
{
"dog": "bob",
"cat": "michel"
}
```
Youll get back;
```json
{
"dog": "bob",
}
```
Easy right?
Now the dot can either be used as a field name, or as a nested object.
For example, if you have the following json;
```json
{
"dog.name": "jean",
"dog": {
"name": "bob",
"age": 6
}
}
```
What a crappy json! But never underestimate your users, they [_WILL_](https://xkcd.com/1172/)
somehow base their entire workflow on this kind of json.
Here with the `dog.name` selector both fields will be
selected and the following json will be returned;
```json
{
"dog.name": "jean",
"dog": {
"name": "bob",
}
}
```
And as you can guess, this crate is as permissive as possible.
Itll match everything it can!
Consider this even more crappy json;
```json
{
"pet.dog.name": "jean",
"pet.dog": {
"name": "bob"
},
"pet": {
"dog.name": "michel"
},
"pet": {
"dog": {
"name": "milan"
}
}
}
```
If you write `pet.dog.name` everything will be selected.
## Matching arrays
With this kind of selectors you cant match a specific element in an array.
Your selector will be applied to all the element _in_ the array.
Consider the following json;
```json
{
"pets": [
{
"animal": "dog",
"race": "bernese mountain",
},
{
"animal": "dog",
"race": "golden retriever",
},
{
"animal": "cat",
"age": 8,
}
]
}
```
With the filter `pets.animal` youll get;
```json
{
"pets": [
{
"animal": "dog",
},
{
"animal": "dog",
},
{
"animal": "cat",
}
]
}
```
The empty element in an array gets removed. So if you were to look
for `pets.age` you would only get;
```json
{
"pets": [
{
"age": 8,
}
]
}
```
And I think thats all you need to know 🎉

View File

@ -0,0 +1,786 @@
#![doc = include_str!("../README.md")]
use std::collections::HashSet;
use serde_json::*;
type Document = Map<String, Value>;
const SPLIT_SYMBOL: char = '.';
/// Returns `true` if the `selector` match the `key`.
///
/// ```text
/// Example:
/// `animaux` match `animaux`
/// `animaux.chien` match `animaux`
/// `animaux.chien` match `animaux`
/// `animaux.chien.nom` match `animaux`
/// `animaux.chien.nom` match `animaux.chien`
/// -----------------------------------------
/// `animaux` doesn't match `animaux.chien`
/// `animaux.` doesn't match `animaux`
/// `animaux.ch` doesn't match `animaux.chien`
/// `animau` doesn't match `animaux`
/// ```
fn contained_in(selector: &str, key: &str) -> bool {
selector.starts_with(key)
&& selector[key.len()..]
.chars()
.next()
.map(|c| c == SPLIT_SYMBOL)
.unwrap_or(true)
}
/// Map the selected leaf values of a json allowing you to update only the fields that were selected.
/// ```
/// use serde_json::{Value, json};
/// use permissive_json_pointer::map_leaf_values;
///
/// let mut value: Value = json!({
/// "jean": {
/// "age": 8,
/// "race": {
/// "name": "bernese mountain",
/// "size": "80cm",
/// }
/// }
/// });
/// map_leaf_values(
/// value.as_object_mut().unwrap(),
/// ["jean.race.name"],
/// |key, value| match (value, dbg!(key)) {
/// (Value::String(name), "jean.race.name") => *name = "patou".to_string(),
/// _ => unreachable!(),
/// },
/// );
/// assert_eq!(
/// value,
/// json!({
/// "jean": {
/// "age": 8,
/// "race": {
/// "name": "patou",
/// "size": "80cm",
/// }
/// }
/// })
/// );
/// ```
pub fn map_leaf_values<'a>(
value: &mut Map<String, Value>,
selectors: impl IntoIterator<Item = &'a str>,
mut mapper: impl FnMut(&str, &mut Value),
) {
let selectors: Vec<_> = selectors.into_iter().collect();
map_leaf_values_in_object(value, &selectors, "", &mut mapper);
}
pub fn map_leaf_values_in_object<'a>(
value: &mut Map<String, Value>,
selectors: &[&'a str],
base_key: &str,
mapper: &mut impl FnMut(&str, &mut Value),
) {
for (key, value) in value.iter_mut() {
let base_key = if base_key.is_empty() {
key.to_string()
} else {
format!("{}{}{}", base_key, SPLIT_SYMBOL, key)
};
// here if the user only specified `doggo` we need to iterate in all the fields of `doggo`
// so we check the contained_in on both side
let should_continue = selectors
.iter()
.any(|selector| contained_in(selector, &base_key) || contained_in(&base_key, selector));
if should_continue {
match value {
Value::Object(object) => {
map_leaf_values_in_object(object, selectors, &base_key, mapper)
}
Value::Array(array) => {
map_leaf_values_in_array(array, selectors, &base_key, mapper)
}
value => mapper(&base_key, value),
}
}
}
}
pub fn map_leaf_values_in_array(
values: &mut [Value],
selectors: &[&str],
base_key: &str,
mapper: &mut impl FnMut(&str, &mut Value),
) {
for value in values.iter_mut() {
match value {
Value::Object(object) => map_leaf_values_in_object(object, selectors, base_key, mapper),
Value::Array(array) => map_leaf_values_in_array(array, selectors, base_key, mapper),
value => mapper(base_key, value),
}
}
}
/// Permissively selects values in a json with a list of selectors.
/// Returns a new json containing all the selected fields.
/// ```
/// use serde_json::*;
/// use permissive_json_pointer::select_values;
///
/// let value: Value = json!({
/// "name": "peanut",
/// "age": 8,
/// "race": {
/// "name": "bernese mountain",
/// "avg_age": 12,
/// "size": "80cm",
/// },
/// });
/// let value: &Map<String, Value> = value.as_object().unwrap();
///
/// let res: Value = select_values(value, vec!["name", "race.name"]).into();
/// assert_eq!(
/// res,
/// json!({
/// "name": "peanut",
/// "race": {
/// "name": "bernese mountain",
/// },
/// })
/// );
/// ```
pub fn select_values<'a>(
value: &Map<String, Value>,
selectors: impl IntoIterator<Item = &'a str>,
) -> Map<String, Value> {
let selectors = selectors.into_iter().collect();
create_value(value, selectors)
}
fn create_value(value: &Document, mut selectors: HashSet<&str>) -> Document {
let mut new_value: Document = Map::new();
for (key, value) in value.iter() {
// first we insert all the key at the root level
if selectors.contains(key as &str) {
new_value.insert(key.to_string(), value.clone());
// if the key was simple we can delete it and move to
// the next key
if is_simple(key) {
selectors.remove(key as &str);
continue;
}
}
// we extract all the sub selectors matching the current field
// if there was [person.name, person.age] and if we are on the field
// `person`. Then we generate the following sub selectors: [name, age].
let sub_selectors: HashSet<&str> = selectors
.iter()
.filter(|s| contained_in(s, key))
.filter_map(|s| s.trim_start_matches(key).get(SPLIT_SYMBOL.len_utf8()..))
.collect();
if !sub_selectors.is_empty() {
match value {
Value::Array(array) => {
let array = create_array(array, &sub_selectors);
if !array.is_empty() {
new_value.insert(key.to_string(), array.into());
}
}
Value::Object(object) => {
let object = create_value(object, sub_selectors);
if !object.is_empty() {
new_value.insert(key.to_string(), object.into());
}
}
_ => (),
}
}
}
new_value
}
fn create_array(array: &Vec<Value>, selectors: &HashSet<&str>) -> Vec<Value> {
let mut res = Vec::new();
for value in array {
match value {
Value::Array(array) => {
let array = create_array(array, selectors);
if !array.is_empty() {
res.push(array.into());
}
}
Value::Object(object) => {
let object = create_value(object, selectors.clone());
if !object.is_empty() {
res.push(object.into());
}
}
_ => (),
}
}
res
}
fn is_simple(key: impl AsRef<str>) -> bool {
!key.as_ref().contains(SPLIT_SYMBOL)
}
#[cfg(test)]
mod tests {
use big_s::S;
use super::*;
#[test]
fn test_contained_in() {
assert!(contained_in("animaux", "animaux"));
assert!(contained_in("animaux.chien", "animaux"));
assert!(contained_in(
"animaux.chien.race.bouvier bernois.fourrure.couleur",
"animaux"
));
assert!(contained_in(
"animaux.chien.race.bouvier bernois.fourrure.couleur",
"animaux.chien"
));
assert!(contained_in(
"animaux.chien.race.bouvier bernois.fourrure.couleur",
"animaux.chien.race.bouvier bernois"
));
assert!(contained_in(
"animaux.chien.race.bouvier bernois.fourrure.couleur",
"animaux.chien.race.bouvier bernois.fourrure"
));
assert!(contained_in(
"animaux.chien.race.bouvier bernois.fourrure.couleur",
"animaux.chien.race.bouvier bernois.fourrure.couleur"
));
// -- the wrongs
assert!(!contained_in("chien", "chat"));
assert!(!contained_in("animaux", "animaux.chien"));
assert!(!contained_in("animaux.chien", "animaux.chat"));
// -- the strange edge cases
assert!(!contained_in("animaux.chien", "anima"));
assert!(!contained_in("animaux.chien", "animau"));
assert!(!contained_in("animaux.chien", "animaux."));
assert!(!contained_in("animaux.chien", "animaux.c"));
assert!(!contained_in("animaux.chien", "animaux.ch"));
assert!(!contained_in("animaux.chien", "animaux.chi"));
assert!(!contained_in("animaux.chien", "animaux.chie"));
}
#[test]
fn simple_key() {
let value: Value = json!({
"name": "peanut",
"age": 8,
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
});
let value: &Document = value.as_object().unwrap();
let res: Value = select_values(value, vec!["name"]).into();
assert_eq!(
res,
json!({
"name": "peanut",
})
);
let res: Value = select_values(value, vec!["age"]).into();
assert_eq!(
res,
json!({
"age": 8,
})
);
let res: Value = select_values(value, vec!["name", "age"]).into();
assert_eq!(
res,
json!({
"name": "peanut",
"age": 8,
})
);
let res: Value = select_values(value, vec!["race"]).into();
assert_eq!(
res,
json!({
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
})
);
let res: Value = select_values(value, vec!["name", "age", "race"]).into();
assert_eq!(
res,
json!({
"name": "peanut",
"age": 8,
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
})
);
}
#[test]
fn complex_key() {
let value: Value = json!({
"name": "peanut",
"age": 8,
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
});
let value: &Document = value.as_object().unwrap();
let res: Value = select_values(value, vec!["race"]).into();
assert_eq!(
res,
json!({
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
})
);
println!("RIGHTBEFORE");
let res: Value = select_values(value, vec!["race.name"]).into();
assert_eq!(
res,
json!({
"race": {
"name": "bernese mountain",
}
})
);
let res: Value = select_values(value, vec!["race.name", "race.size"]).into();
assert_eq!(
res,
json!({
"race": {
"name": "bernese mountain",
"size": "80cm",
}
})
);
let res: Value = select_values(
value,
vec!["race.name", "race.size", "race.avg_age", "race.size", "age"],
)
.into();
assert_eq!(
res,
json!({
"age": 8,
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
})
);
let res: Value = select_values(value, vec!["race.name", "race"]).into();
assert_eq!(
res,
json!({
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
})
);
let res: Value = select_values(value, vec!["race", "race.name"]).into();
assert_eq!(
res,
json!({
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
})
);
}
#[test]
fn multi_level_nested() {
let value: Value = json!({
"jean": {
"age": 8,
"race": {
"name": "bernese mountain",
"size": "80cm",
}
}
});
let value: &Document = value.as_object().unwrap();
let res: Value = select_values(value, vec!["jean"]).into();
assert_eq!(
res,
json!({
"jean": {
"age": 8,
"race": {
"name": "bernese mountain",
"size": "80cm",
}
}
})
);
let res: Value = select_values(value, vec!["jean.age"]).into();
assert_eq!(
res,
json!({
"jean": {
"age": 8,
}
})
);
let res: Value = select_values(value, vec!["jean.race.size"]).into();
assert_eq!(
res,
json!({
"jean": {
"race": {
"size": "80cm",
}
}
})
);
let res: Value = select_values(value, vec!["jean.race.name", "jean.age"]).into();
assert_eq!(
res,
json!({
"jean": {
"age": 8,
"race": {
"name": "bernese mountain",
}
}
})
);
let res: Value = select_values(value, vec!["jean.race"]).into();
assert_eq!(
res,
json!({
"jean": {
"race": {
"name": "bernese mountain",
"size": "80cm",
}
}
})
);
}
#[test]
fn array_and_deep_nested() {
let value: Value = json!({
"doggos": [
{
"jean": {
"age": 8,
"race": {
"name": "bernese mountain",
"size": "80cm",
}
}
},
{
"marc": {
"age": 4,
"race": {
"name": "golden retriever",
"size": "60cm",
}
}
},
]
});
let value: &Document = value.as_object().unwrap();
let res: Value = select_values(value, vec!["doggos.jean"]).into();
assert_eq!(
res,
json!({
"doggos": [
{
"jean": {
"age": 8,
"race": {
"name": "bernese mountain",
"size": "80cm",
}
}
}
]
})
);
let res: Value = select_values(value, vec!["doggos.marc"]).into();
assert_eq!(
res,
json!({
"doggos": [
{
"marc": {
"age": 4,
"race": {
"name": "golden retriever",
"size": "60cm",
}
}
}
]
})
);
let res: Value = select_values(value, vec!["doggos.marc.race"]).into();
assert_eq!(
res,
json!({
"doggos": [
{
"marc": {
"race": {
"name": "golden retriever",
"size": "60cm",
}
}
}
]
})
);
let res: Value =
select_values(value, vec!["doggos.marc.race.name", "doggos.marc.age"]).into();
assert_eq!(
res,
json!({
"doggos": [
{
"marc": {
"age": 4,
"race": {
"name": "golden retriever",
}
}
}
]
})
);
let res: Value = select_values(
value,
vec![
"doggos.marc.race.name",
"doggos.marc.age",
"doggos.jean.race.name",
"other.field",
],
)
.into();
assert_eq!(
res,
json!({
"doggos": [
{
"jean": {
"race": {
"name": "bernese mountain",
}
}
},
{
"marc": {
"age": 4,
"race": {
"name": "golden retriever",
}
}
}
]
})
);
}
#[test]
fn all_conflict_variation() {
let value: Value = json!({
"pet.dog.name": "jean",
"pet.dog": {
"name": "bob"
},
"pet": {
"dog.name": "michel"
},
"pet": {
"dog": {
"name": "milan"
}
}
});
let value: &Document = value.as_object().unwrap();
let res: Value = select_values(value, vec!["pet.dog.name"]).into();
assert_eq!(
res,
json!({
"pet.dog.name": "jean",
"pet.dog": {
"name": "bob"
},
"pet": {
"dog.name": "michel"
},
"pet": {
"dog": {
"name": "milan"
}
}
})
);
let value: Value = json!({
"pet.dog.name": "jean",
"pet.dog": {
"name": "bob",
},
"pet": {
"dog.name": "michel",
"dog": {
"name": "milan",
}
}
});
let value: &Document = value.as_object().unwrap();
let res: Value = select_values(value, vec!["pet.dog.name", "pet.dog", "pet"]).into();
assert_eq!(
res,
json!({
"pet.dog.name": "jean",
"pet.dog": {
"name": "bob",
},
"pet": {
"dog.name": "michel",
"dog": {
"name": "milan",
}
}
})
);
}
#[test]
fn map_object() {
let mut value: Value = json!({
"jean": {
"age": 8,
"race": {
"name": "bernese mountain",
"size": "80cm",
}
}
});
map_leaf_values(
value.as_object_mut().unwrap(),
["jean.race.name"],
|key, value| match (value, dbg!(key)) {
(Value::String(name), "jean.race.name") => *name = S("patou"),
_ => unreachable!(),
},
);
assert_eq!(
value,
json!({
"jean": {
"age": 8,
"race": {
"name": "patou",
"size": "80cm",
}
}
})
);
let mut value: Value = json!({
"jean": {
"age": 8,
"race": {
"name": "bernese mountain",
"size": "80cm",
}
},
"bob": "lolpied",
});
let mut calls = 0;
map_leaf_values(value.as_object_mut().unwrap(), ["jean"], |key, value| {
calls += 1;
match (value, key) {
(Value::String(name), "jean.race.name") => *name = S("patou"),
_ => println!("Called with {key}"),
}
});
assert_eq!(calls, 3);
assert_eq!(
value,
json!({
"jean": {
"age": 8,
"race": {
"name": "patou",
"size": "80cm",
}
},
"bob": "lolpied",
})
);
}
}