mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-22 12:40:04 +01:00
move the flatten-serde-json crate inside of milli
This commit is contained in:
parent
ab458d8840
commit
bab898ce86
@ -1,6 +1,6 @@
|
||||
[workspace]
|
||||
resolver = "2"
|
||||
members = ["milli", "filter-parser", "http-ui", "benchmarks", "infos", "helpers", "cli"]
|
||||
members = ["milli", "filter-parser", "flatten-serde-json", "http-ui", "benchmarks", "infos", "helpers", "cli"]
|
||||
default-members = ["milli"]
|
||||
|
||||
[profile.dev]
|
||||
|
15
flatten-serde-json/Cargo.toml
Normal file
15
flatten-serde-json/Cargo.toml
Normal file
@ -0,0 +1,15 @@
|
||||
[package]
|
||||
name = "flatten-serde-json"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
description = "Flatten serde-json objects like elastic search"
|
||||
readme = "README.md"
|
||||
author = ["Tamo tamo@meilisearch.com"]
|
||||
repository = "https://github.com/irevoire/flatten-serde-json"
|
||||
keywords = ["json", "flatten"]
|
||||
categories = ["command-line-utilities"]
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
serde_json = "1.0"
|
153
flatten-serde-json/README.md
Normal file
153
flatten-serde-json/README.md
Normal file
@ -0,0 +1,153 @@
|
||||
# Flatten serde Json
|
||||
|
||||
This crate flatten [`serde_json`](https://docs.rs/serde_json/latest/serde_json/) `Object` in a format
|
||||
similar to [elastic search](https://www.elastic.co/guide/en/elasticsearch/reference/current/nested.html).
|
||||
|
||||
## Examples
|
||||
|
||||
### There is nothing to do
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "287947",
|
||||
"title": "Shazam!",
|
||||
"release_date": 1553299200,
|
||||
"genres": [
|
||||
"Action",
|
||||
"Comedy",
|
||||
"Fantasy"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Flattens to:
|
||||
```json
|
||||
{
|
||||
"id": "287947",
|
||||
"title": "Shazam!",
|
||||
"release_date": 1553299200,
|
||||
"genres": [
|
||||
"Action",
|
||||
"Comedy",
|
||||
"Fantasy"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
------------
|
||||
|
||||
### Objects
|
||||
|
||||
```json
|
||||
{
|
||||
"a": {
|
||||
"b": "c",
|
||||
"d": "e",
|
||||
"f": "g"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Flattens to:
|
||||
```json
|
||||
{
|
||||
"a.b": "c",
|
||||
"a.d": "e",
|
||||
"a.f": "g"
|
||||
}
|
||||
```
|
||||
|
||||
------------
|
||||
|
||||
### Array of objects
|
||||
|
||||
```json
|
||||
{
|
||||
"a": [
|
||||
{ "b": "c" },
|
||||
{ "b": "d" },
|
||||
{ "b": "e" },
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Flattens to:
|
||||
```json
|
||||
{
|
||||
"a.b": ["c", "d", "e"],
|
||||
}
|
||||
```
|
||||
|
||||
------------
|
||||
|
||||
### Array of objects with normal value in the array
|
||||
|
||||
```json
|
||||
{
|
||||
"a": [
|
||||
42,
|
||||
{ "b": "c" },
|
||||
{ "b": "d" },
|
||||
{ "b": "e" },
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Flattens to:
|
||||
```json
|
||||
{
|
||||
"a": 42,
|
||||
"a.b": ["c", "d", "e"],
|
||||
}
|
||||
```
|
||||
|
||||
------------
|
||||
|
||||
### Array of objects of array of objects of ...
|
||||
|
||||
```json
|
||||
{
|
||||
"a": [
|
||||
"b",
|
||||
["c", "d"],
|
||||
{ "e": ["f", "g"] },
|
||||
[
|
||||
{ "h": "i" },
|
||||
{ "e": ["j", { "z": "y" }] },
|
||||
],
|
||||
["l"],
|
||||
"m",
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Flattens to:
|
||||
```json
|
||||
{
|
||||
"a": ["b", "c", "d", "l", "m"],
|
||||
"a.e": ["f", "g", "j"],
|
||||
"a.h": "i",
|
||||
"a.e.z": "y",
|
||||
}
|
||||
```
|
||||
|
||||
------------
|
||||
|
||||
### Collision between a generated field name and an already existing field
|
||||
|
||||
```json
|
||||
{
|
||||
"a": {
|
||||
"b": "c",
|
||||
},
|
||||
"a.b": "d",
|
||||
}
|
||||
```
|
||||
|
||||
Flattens to:
|
||||
```json
|
||||
{
|
||||
"a.b": ["c", "d"],
|
||||
}
|
||||
```
|
||||
|
26
flatten-serde-json/fuzz/Cargo.toml
Normal file
26
flatten-serde-json/fuzz/Cargo.toml
Normal file
@ -0,0 +1,26 @@
|
||||
[package]
|
||||
name = "flatten_serde_json-fuzz"
|
||||
version = "0.0.0"
|
||||
authors = ["Automatically generated"]
|
||||
publish = false
|
||||
edition = "2018"
|
||||
|
||||
[package.metadata]
|
||||
cargo-fuzz = true
|
||||
|
||||
[dependencies]
|
||||
libfuzzer-sys = "0.4"
|
||||
arbitrary-json = "0.1.1"
|
||||
|
||||
[dependencies.flatten_serde_json]
|
||||
path = ".."
|
||||
|
||||
# Prevent this from interfering with workspaces
|
||||
[workspace]
|
||||
members = ["."]
|
||||
|
||||
[[bin]]
|
||||
name = "flatten"
|
||||
path = "fuzz_targets/flatten.rs"
|
||||
test = false
|
||||
doc = false
|
8
flatten-serde-json/fuzz/fuzz_targets/flatten.rs
Normal file
8
flatten-serde-json/fuzz/fuzz_targets/flatten.rs
Normal file
@ -0,0 +1,8 @@
|
||||
#![no_main]
|
||||
use arbitrary_json::ArbitraryObject;
|
||||
use flatten_serde_json::flatten;
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
|
||||
fuzz_target!(|object: ArbitraryObject| {
|
||||
let _ = flatten(&object);
|
||||
});
|
264
flatten-serde-json/src/lib.rs
Normal file
264
flatten-serde-json/src/lib.rs
Normal file
@ -0,0 +1,264 @@
|
||||
#![doc = include_str!("../README.md")]
|
||||
|
||||
use serde_json::{json, Map, Value};
|
||||
|
||||
pub fn flatten(json: &Map<String, Value>) -> Map<String, Value> {
|
||||
let mut obj = Map::new();
|
||||
insert_object(&mut obj, None, json);
|
||||
obj
|
||||
}
|
||||
|
||||
fn insert_object(
|
||||
base_json: &mut Map<String, Value>,
|
||||
base_key: Option<&str>,
|
||||
object: &Map<String, Value>,
|
||||
) {
|
||||
for (key, value) in object {
|
||||
let new_key = base_key.map_or_else(|| key.clone(), |base_key| format!("{base_key}.{key}"));
|
||||
|
||||
if let Some(array) = value.as_array() {
|
||||
insert_array(base_json, &new_key, array);
|
||||
} else if let Some(object) = value.as_object() {
|
||||
insert_object(base_json, Some(&new_key), object);
|
||||
} else {
|
||||
insert_value(base_json, &new_key, value.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn insert_array(base_json: &mut Map<String, Value>, base_key: &str, array: &Vec<Value>) {
|
||||
for value in array {
|
||||
if let Some(object) = value.as_object() {
|
||||
insert_object(base_json, Some(base_key), object);
|
||||
} else if let Some(sub_array) = value.as_array() {
|
||||
insert_array(base_json, base_key, sub_array);
|
||||
} else {
|
||||
insert_value(base_json, base_key, value.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn insert_value(base_json: &mut Map<String, Value>, key: &str, to_insert: Value) {
|
||||
debug_assert!(!to_insert.is_object());
|
||||
debug_assert!(!to_insert.is_array());
|
||||
|
||||
// does the field aleardy exists?
|
||||
if let Some(value) = base_json.get_mut(key) {
|
||||
// is it already an array
|
||||
if let Some(array) = value.as_array_mut() {
|
||||
array.push(to_insert);
|
||||
// or is there a collision
|
||||
} else {
|
||||
let value = std::mem::take(value);
|
||||
base_json[key] = json!([value, to_insert]);
|
||||
}
|
||||
// if it does not exist we can push the value untouched
|
||||
} else {
|
||||
base_json.insert(key.to_string(), json!(to_insert));
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn no_flattening() {
|
||||
let mut base: Value = json!({
|
||||
"id": "287947",
|
||||
"title": "Shazam!",
|
||||
"release_date": 1553299200,
|
||||
"genres": [
|
||||
"Action",
|
||||
"Comedy",
|
||||
"Fantasy"
|
||||
]
|
||||
});
|
||||
let json = std::mem::take(base.as_object_mut().unwrap());
|
||||
let flat = flatten(&json);
|
||||
|
||||
println!(
|
||||
"got:\n{}\nexpected:\n{}\n",
|
||||
serde_json::to_string_pretty(&flat).unwrap(),
|
||||
serde_json::to_string_pretty(&json).unwrap()
|
||||
);
|
||||
|
||||
assert_eq!(flat, json);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flatten_object() {
|
||||
let mut base: Value = json!({
|
||||
"a": {
|
||||
"b": "c",
|
||||
"d": "e",
|
||||
"f": "g"
|
||||
}
|
||||
});
|
||||
let json = std::mem::take(base.as_object_mut().unwrap());
|
||||
let flat = flatten(&json);
|
||||
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a.b": "c",
|
||||
"a.d": "e",
|
||||
"a.f": "g"
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flatten_array() {
|
||||
let mut base: Value = json!({
|
||||
"a": [
|
||||
{ "b": "c" },
|
||||
{ "b": "d" },
|
||||
{ "b": "e" },
|
||||
]
|
||||
});
|
||||
let json = std::mem::take(base.as_object_mut().unwrap());
|
||||
let flat = flatten(&json);
|
||||
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a.b": ["c", "d", "e"],
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
// here we must keep 42 in "a"
|
||||
let mut base: Value = json!({
|
||||
"a": [
|
||||
42,
|
||||
{ "b": "c" },
|
||||
{ "b": "d" },
|
||||
{ "b": "e" },
|
||||
]
|
||||
});
|
||||
let json = std::mem::take(base.as_object_mut().unwrap());
|
||||
let flat = flatten(&json);
|
||||
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a": 42,
|
||||
"a.b": ["c", "d", "e"],
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn collision_with_object() {
|
||||
let mut base: Value = json!({
|
||||
"a": {
|
||||
"b": "c",
|
||||
},
|
||||
"a.b": "d",
|
||||
});
|
||||
let json = std::mem::take(base.as_object_mut().unwrap());
|
||||
let flat = flatten(&json);
|
||||
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a.b": ["c", "d"],
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn collision_with_array() {
|
||||
let mut base: Value = json!({
|
||||
"a": [
|
||||
{ "b": "c" },
|
||||
{ "b": "d", "c": "e" },
|
||||
[35],
|
||||
],
|
||||
"a.b": "f",
|
||||
});
|
||||
let json = std::mem::take(base.as_object_mut().unwrap());
|
||||
let flat = flatten(&json);
|
||||
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a.b": ["c", "d", "f"],
|
||||
"a.c": "e",
|
||||
"a": 35,
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flatten_nested_arrays() {
|
||||
let mut base: Value = json!({
|
||||
"a": [
|
||||
["b", "c"],
|
||||
{ "d": "e" },
|
||||
["f", "g"],
|
||||
[
|
||||
{ "h": "i" },
|
||||
{ "d": "j" },
|
||||
],
|
||||
["k", "l"],
|
||||
]
|
||||
});
|
||||
let json = std::mem::take(base.as_object_mut().unwrap());
|
||||
let flat = flatten(&json);
|
||||
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a": ["b", "c", "f", "g", "k", "l"],
|
||||
"a.d": ["e", "j"],
|
||||
"a.h": "i",
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flatten_nested_arrays_and_objects() {
|
||||
let mut base: Value = json!({
|
||||
"a": [
|
||||
"b",
|
||||
["c", "d"],
|
||||
{ "e": ["f", "g"] },
|
||||
[
|
||||
{ "h": "i" },
|
||||
{ "e": ["j", { "z": "y" }] },
|
||||
],
|
||||
["l"],
|
||||
"m",
|
||||
]
|
||||
});
|
||||
let json = std::mem::take(base.as_object_mut().unwrap());
|
||||
let flat = flatten(&json);
|
||||
|
||||
println!("{}", serde_json::to_string_pretty(&flat).unwrap());
|
||||
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a": ["b", "c", "d", "l", "m"],
|
||||
"a.e": ["f", "g", "j"],
|
||||
"a.h": "i",
|
||||
"a.e.z": "y",
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
}
|
11
flatten-serde-json/src/main.rs
Normal file
11
flatten-serde-json/src/main.rs
Normal file
@ -0,0 +1,11 @@
|
||||
use std::io::stdin;
|
||||
|
||||
use flatten_serde_json::flatten;
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
fn main() {
|
||||
let json: Map<String, Value> = serde_json::from_reader(stdin()).unwrap();
|
||||
|
||||
let result = flatten(&json);
|
||||
println!("{}", serde_json::to_string_pretty(&result).unwrap());
|
||||
}
|
@ -14,7 +14,7 @@ crossbeam-channel = "0.5.2"
|
||||
either = "1.6.1"
|
||||
fst = "0.4.7"
|
||||
fxhash = "0.2.1"
|
||||
flatten-serde-json = "0.1.0"
|
||||
flatten-serde-json = { path = "../flatten-serde-json" }
|
||||
grenad = { version = "0.4.1", default-features = false, features = ["tempfile"] }
|
||||
geoutils = "0.4.1"
|
||||
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.1", default-features = false, features = ["lmdb", "sync-read-txn"] }
|
||||
|
Loading…
x
Reference in New Issue
Block a user