diff --git a/Cargo.lock b/Cargo.lock index d4eb6d3d5..ff5c0b5ad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -402,6 +402,12 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" +[[package]] +name = "big_s" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "199edb7b90631283b10c2422e6a0bc8b7d987bf732995ba1de53b576c97e51a8" + [[package]] name = "bimap" version = "0.6.2" @@ -2486,9 +2492,8 @@ checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" [[package]] name = "permissive-json-pointer" version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2125f5fc44a45ffd265ce6ab343842f71df469d173f923f234e3a8df7a8f1ba6" dependencies = [ + "big_s", "serde_json", ] diff --git a/Cargo.toml b/Cargo.toml index a27a29634..03f4f5597 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,4 +5,5 @@ members = [ "meilisearch-error", "meilisearch-lib", "meilisearch-auth", + "permissive-json-pointer", ] diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index 9316253d5..aa18eb97d 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -36,7 +36,7 @@ num_cpus = "1.13.1" obkv = "0.2.0" once_cell = "1.10.0" parking_lot = "0.12.0" -permissive-json-pointer = "0.2.0" +permissive-json-pointer = { path = "../permissive-json-pointer" } rand = "0.8.5" rayon = "1.5.1" regex = "1.5.5" diff --git a/permissive-json-pointer/Cargo.toml b/permissive-json-pointer/Cargo.toml new file mode 100644 index 000000000..b50f30f19 --- /dev/null +++ b/permissive-json-pointer/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "permissive-json-pointer" +version = "0.2.0" +edition = "2021" +description = "A permissive json pointer" +readme = "README.md" + +[dependencies] +serde_json = "1.0" + +[dev-dependencies] +big_s = "1.0" diff --git a/permissive-json-pointer/README.md b/permissive-json-pointer/README.md new file mode 100644 index 000000000..6a94cf00d --- /dev/null +++ b/permissive-json-pointer/README.md @@ -0,0 +1,134 @@ +# Permissive json pointer + +This crate provide an interface a little bit similar to what you know as “json pointer”. +But it’s actually doing something quite different. + +## The API + +The crate provide only one function called [`select_values`]. +It takes one object in parameter and a list of selectors. +It then returns a new object containing only the fields you selected. + +## The selectors + +The syntax for the selector is easier than with other API. +There is only ONE special symbol, it’s the `.`. + +If you write `dog` and provide the following object; +```json +{ + "dog": "bob", + "cat": "michel" +} +``` +You’ll get back; +```json +{ + "dog": "bob", +} +``` + +Easy right? + +Now the dot can either be used as a field name, or as a nested object. + +For example, if you have the following json; +```json +{ + "dog.name": "jean", + "dog": { + "name": "bob", + "age": 6 + } +} +``` + +What a crappy json! But never underestimate your users, they [_WILL_](https://xkcd.com/1172/) +somehow base their entire workflow on this kind of json. +Here with the `dog.name` selector both fields will be +selected and the following json will be returned; +```json +{ + "dog.name": "jean", + "dog": { + "name": "bob", + } +} +``` + +And as you can guess, this crate is as permissive as possible. +It’ll match everything it can! +Consider this even more crappy json; +```json +{ + "pet.dog.name": "jean", + "pet.dog": { + "name": "bob" + }, + "pet": { + "dog.name": "michel" + }, + "pet": { + "dog": { + "name": "milan" + } + } +} +``` +If you write `pet.dog.name` everything will be selected. + +## Matching arrays + +With this kind of selectors you can’t match a specific element in an array. +Your selector will be applied to all the element _in_ the array. + +Consider the following json; +```json +{ + "pets": [ + { + "animal": "dog", + "race": "bernese mountain", + }, + { + "animal": "dog", + "race": "golden retriever", + }, + { + "animal": "cat", + "age": 8, + } + ] +} +``` + +With the filter `pets.animal` you’ll get; +```json +{ + "pets": [ + { + "animal": "dog", + }, + { + "animal": "dog", + }, + { + "animal": "cat", + } + ] +} +``` + +The empty element in an array gets removed. So if you were to look +for `pets.age` you would only get; +```json +{ + "pets": [ + { + "age": 8, + } + ] +} +``` + +And I think that’s all you need to know 🎉 \ No newline at end of file diff --git a/permissive-json-pointer/src/lib.rs b/permissive-json-pointer/src/lib.rs new file mode 100644 index 000000000..56382beae --- /dev/null +++ b/permissive-json-pointer/src/lib.rs @@ -0,0 +1,786 @@ +#![doc = include_str!("../README.md")] + +use std::collections::HashSet; + +use serde_json::*; + +type Document = Map; + +const SPLIT_SYMBOL: char = '.'; + +/// Returns `true` if the `selector` match the `key`. +/// +/// ```text +/// Example: +/// `animaux` match `animaux` +/// `animaux.chien` match `animaux` +/// `animaux.chien` match `animaux` +/// `animaux.chien.nom` match `animaux` +/// `animaux.chien.nom` match `animaux.chien` +/// ----------------------------------------- +/// `animaux` doesn't match `animaux.chien` +/// `animaux.` doesn't match `animaux` +/// `animaux.ch` doesn't match `animaux.chien` +/// `animau` doesn't match `animaux` +/// ``` +fn contained_in(selector: &str, key: &str) -> bool { + selector.starts_with(key) + && selector[key.len()..] + .chars() + .next() + .map(|c| c == SPLIT_SYMBOL) + .unwrap_or(true) +} + +/// Map the selected leaf values of a json allowing you to update only the fields that were selected. +/// ``` +/// use serde_json::{Value, json}; +/// use permissive_json_pointer::map_leaf_values; +/// +/// let mut value: Value = json!({ +/// "jean": { +/// "age": 8, +/// "race": { +/// "name": "bernese mountain", +/// "size": "80cm", +/// } +/// } +/// }); +/// map_leaf_values( +/// value.as_object_mut().unwrap(), +/// ["jean.race.name"], +/// |key, value| match (value, dbg!(key)) { +/// (Value::String(name), "jean.race.name") => *name = "patou".to_string(), +/// _ => unreachable!(), +/// }, +/// ); +/// assert_eq!( +/// value, +/// json!({ +/// "jean": { +/// "age": 8, +/// "race": { +/// "name": "patou", +/// "size": "80cm", +/// } +/// } +/// }) +/// ); +/// ``` +pub fn map_leaf_values<'a>( + value: &mut Map, + selectors: impl IntoIterator, + mut mapper: impl FnMut(&str, &mut Value), +) { + let selectors: Vec<_> = selectors.into_iter().collect(); + map_leaf_values_in_object(value, &selectors, "", &mut mapper); +} + +pub fn map_leaf_values_in_object<'a>( + value: &mut Map, + selectors: &[&'a str], + base_key: &str, + mapper: &mut impl FnMut(&str, &mut Value), +) { + for (key, value) in value.iter_mut() { + let base_key = if base_key.is_empty() { + key.to_string() + } else { + format!("{}{}{}", base_key, SPLIT_SYMBOL, key) + }; + + // here if the user only specified `doggo` we need to iterate in all the fields of `doggo` + // so we check the contained_in on both side + let should_continue = selectors + .iter() + .any(|selector| contained_in(selector, &base_key) || contained_in(&base_key, selector)); + + if should_continue { + match value { + Value::Object(object) => { + map_leaf_values_in_object(object, selectors, &base_key, mapper) + } + Value::Array(array) => { + map_leaf_values_in_array(array, selectors, &base_key, mapper) + } + value => mapper(&base_key, value), + } + } + } +} + +pub fn map_leaf_values_in_array( + values: &mut [Value], + selectors: &[&str], + base_key: &str, + mapper: &mut impl FnMut(&str, &mut Value), +) { + for value in values.iter_mut() { + match value { + Value::Object(object) => map_leaf_values_in_object(object, selectors, base_key, mapper), + Value::Array(array) => map_leaf_values_in_array(array, selectors, base_key, mapper), + value => mapper(base_key, value), + } + } +} + +/// Permissively selects values in a json with a list of selectors. +/// Returns a new json containing all the selected fields. +/// ``` +/// use serde_json::*; +/// use permissive_json_pointer::select_values; +/// +/// let value: Value = json!({ +/// "name": "peanut", +/// "age": 8, +/// "race": { +/// "name": "bernese mountain", +/// "avg_age": 12, +/// "size": "80cm", +/// }, +/// }); +/// let value: &Map = value.as_object().unwrap(); +/// +/// let res: Value = select_values(value, vec!["name", "race.name"]).into(); +/// assert_eq!( +/// res, +/// json!({ +/// "name": "peanut", +/// "race": { +/// "name": "bernese mountain", +/// }, +/// }) +/// ); +/// ``` +pub fn select_values<'a>( + value: &Map, + selectors: impl IntoIterator, +) -> Map { + let selectors = selectors.into_iter().collect(); + create_value(value, selectors) +} + +fn create_value(value: &Document, mut selectors: HashSet<&str>) -> Document { + let mut new_value: Document = Map::new(); + + for (key, value) in value.iter() { + // first we insert all the key at the root level + if selectors.contains(key as &str) { + new_value.insert(key.to_string(), value.clone()); + // if the key was simple we can delete it and move to + // the next key + if is_simple(key) { + selectors.remove(key as &str); + continue; + } + } + + // we extract all the sub selectors matching the current field + // if there was [person.name, person.age] and if we are on the field + // `person`. Then we generate the following sub selectors: [name, age]. + let sub_selectors: HashSet<&str> = selectors + .iter() + .filter(|s| contained_in(s, key)) + .filter_map(|s| s.trim_start_matches(key).get(SPLIT_SYMBOL.len_utf8()..)) + .collect(); + + if !sub_selectors.is_empty() { + match value { + Value::Array(array) => { + let array = create_array(array, &sub_selectors); + if !array.is_empty() { + new_value.insert(key.to_string(), array.into()); + } + } + Value::Object(object) => { + let object = create_value(object, sub_selectors); + if !object.is_empty() { + new_value.insert(key.to_string(), object.into()); + } + } + _ => (), + } + } + } + + new_value +} + +fn create_array(array: &Vec, selectors: &HashSet<&str>) -> Vec { + let mut res = Vec::new(); + + for value in array { + match value { + Value::Array(array) => { + let array = create_array(array, selectors); + if !array.is_empty() { + res.push(array.into()); + } + } + Value::Object(object) => { + let object = create_value(object, selectors.clone()); + if !object.is_empty() { + res.push(object.into()); + } + } + _ => (), + } + } + + res +} + +fn is_simple(key: impl AsRef) -> bool { + !key.as_ref().contains(SPLIT_SYMBOL) +} + +#[cfg(test)] +mod tests { + use big_s::S; + + use super::*; + + #[test] + fn test_contained_in() { + assert!(contained_in("animaux", "animaux")); + assert!(contained_in("animaux.chien", "animaux")); + assert!(contained_in( + "animaux.chien.race.bouvier bernois.fourrure.couleur", + "animaux" + )); + assert!(contained_in( + "animaux.chien.race.bouvier bernois.fourrure.couleur", + "animaux.chien" + )); + assert!(contained_in( + "animaux.chien.race.bouvier bernois.fourrure.couleur", + "animaux.chien.race.bouvier bernois" + )); + assert!(contained_in( + "animaux.chien.race.bouvier bernois.fourrure.couleur", + "animaux.chien.race.bouvier bernois.fourrure" + )); + assert!(contained_in( + "animaux.chien.race.bouvier bernois.fourrure.couleur", + "animaux.chien.race.bouvier bernois.fourrure.couleur" + )); + + // -- the wrongs + assert!(!contained_in("chien", "chat")); + assert!(!contained_in("animaux", "animaux.chien")); + assert!(!contained_in("animaux.chien", "animaux.chat")); + + // -- the strange edge cases + assert!(!contained_in("animaux.chien", "anima")); + assert!(!contained_in("animaux.chien", "animau")); + assert!(!contained_in("animaux.chien", "animaux.")); + assert!(!contained_in("animaux.chien", "animaux.c")); + assert!(!contained_in("animaux.chien", "animaux.ch")); + assert!(!contained_in("animaux.chien", "animaux.chi")); + assert!(!contained_in("animaux.chien", "animaux.chie")); + } + + #[test] + fn simple_key() { + let value: Value = json!({ + "name": "peanut", + "age": 8, + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["name"]).into(); + assert_eq!( + res, + json!({ + "name": "peanut", + }) + ); + + let res: Value = select_values(value, vec!["age"]).into(); + assert_eq!( + res, + json!({ + "age": 8, + }) + ); + + let res: Value = select_values(value, vec!["name", "age"]).into(); + assert_eq!( + res, + json!({ + "name": "peanut", + "age": 8, + }) + ); + + let res: Value = select_values(value, vec!["race"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + + let res: Value = select_values(value, vec!["name", "age", "race"]).into(); + assert_eq!( + res, + json!({ + "name": "peanut", + "age": 8, + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + } + + #[test] + fn complex_key() { + let value: Value = json!({ + "name": "peanut", + "age": 8, + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["race"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + + println!("RIGHT BEFORE"); + + let res: Value = select_values(value, vec!["race.name"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + } + }) + ); + + let res: Value = select_values(value, vec!["race.name", "race.size"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + "size": "80cm", + } + }) + ); + + let res: Value = select_values( + value, + vec!["race.name", "race.size", "race.avg_age", "race.size", "age"], + ) + .into(); + assert_eq!( + res, + json!({ + "age": 8, + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + + let res: Value = select_values(value, vec!["race.name", "race"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + + let res: Value = select_values(value, vec!["race", "race.name"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + } + + #[test] + fn multi_level_nested() { + let value: Value = json!({ + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["jean"]).into(); + assert_eq!( + res, + json!({ + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + }) + ); + + let res: Value = select_values(value, vec!["jean.age"]).into(); + assert_eq!( + res, + json!({ + "jean": { + "age": 8, + } + }) + ); + + let res: Value = select_values(value, vec!["jean.race.size"]).into(); + assert_eq!( + res, + json!({ + "jean": { + "race": { + "size": "80cm", + } + } + }) + ); + + let res: Value = select_values(value, vec!["jean.race.name", "jean.age"]).into(); + assert_eq!( + res, + json!({ + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + } + } + }) + ); + + let res: Value = select_values(value, vec!["jean.race"]).into(); + assert_eq!( + res, + json!({ + "jean": { + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + }) + ); + } + + #[test] + fn array_and_deep_nested() { + let value: Value = json!({ + "doggos": [ + { + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + }, + { + "marc": { + "age": 4, + "race": { + "name": "golden retriever", + "size": "60cm", + } + } + }, + ] + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["doggos.jean"]).into(); + assert_eq!( + res, + json!({ + "doggos": [ + { + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + } + ] + }) + ); + + let res: Value = select_values(value, vec!["doggos.marc"]).into(); + assert_eq!( + res, + json!({ + "doggos": [ + { + "marc": { + "age": 4, + "race": { + "name": "golden retriever", + "size": "60cm", + } + } + } + ] + }) + ); + + let res: Value = select_values(value, vec!["doggos.marc.race"]).into(); + assert_eq!( + res, + json!({ + "doggos": [ + { + "marc": { + "race": { + "name": "golden retriever", + "size": "60cm", + } + } + } + ] + }) + ); + + let res: Value = + select_values(value, vec!["doggos.marc.race.name", "doggos.marc.age"]).into(); + + assert_eq!( + res, + json!({ + "doggos": [ + { + "marc": { + "age": 4, + "race": { + "name": "golden retriever", + } + } + } + ] + }) + ); + + let res: Value = select_values( + value, + vec![ + "doggos.marc.race.name", + "doggos.marc.age", + "doggos.jean.race.name", + "other.field", + ], + ) + .into(); + + assert_eq!( + res, + json!({ + "doggos": [ + { + "jean": { + "race": { + "name": "bernese mountain", + } + } + }, + { + "marc": { + "age": 4, + "race": { + "name": "golden retriever", + } + } + } + ] + }) + ); + } + + #[test] + fn all_conflict_variation() { + let value: Value = json!({ + "pet.dog.name": "jean", + "pet.dog": { + "name": "bob" + }, + "pet": { + "dog.name": "michel" + }, + "pet": { + "dog": { + "name": "milan" + } + } + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["pet.dog.name"]).into(); + assert_eq!( + res, + json!({ + "pet.dog.name": "jean", + "pet.dog": { + "name": "bob" + }, + "pet": { + "dog.name": "michel" + }, + "pet": { + "dog": { + "name": "milan" + } + } + }) + ); + + let value: Value = json!({ + "pet.dog.name": "jean", + "pet.dog": { + "name": "bob", + }, + "pet": { + "dog.name": "michel", + "dog": { + "name": "milan", + } + } + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["pet.dog.name", "pet.dog", "pet"]).into(); + + assert_eq!( + res, + json!({ + "pet.dog.name": "jean", + "pet.dog": { + "name": "bob", + }, + "pet": { + "dog.name": "michel", + "dog": { + "name": "milan", + } + } + }) + ); + } + + #[test] + fn map_object() { + let mut value: Value = json!({ + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + }); + + map_leaf_values( + value.as_object_mut().unwrap(), + ["jean.race.name"], + |key, value| match (value, dbg!(key)) { + (Value::String(name), "jean.race.name") => *name = S("patou"), + _ => unreachable!(), + }, + ); + + assert_eq!( + value, + json!({ + "jean": { + "age": 8, + "race": { + "name": "patou", + "size": "80cm", + } + } + }) + ); + + let mut value: Value = json!({ + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + }, + "bob": "lolpied", + }); + + let mut calls = 0; + map_leaf_values(value.as_object_mut().unwrap(), ["jean"], |key, value| { + calls += 1; + match (value, key) { + (Value::String(name), "jean.race.name") => *name = S("patou"), + _ => println!("Called with {key}"), + } + }); + + assert_eq!(calls, 3); + assert_eq!( + value, + json!({ + "jean": { + "age": 8, + "race": { + "name": "patou", + "size": "80cm", + } + }, + "bob": "lolpied", + }) + ); + } +}