2383: v0.27.0: bring `stable` into `main` r=Kerollmops a=curquiza

Bring `stable` into `main`

Co-authored-by: ad hoc <postma.marin@protonmail.com>
Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Paul Sanders <psanders1@gmail.com>
Co-authored-by: Irevoire <tamo@meilisearch.com>
Co-authored-by: Morgane Dubus <30866152+mdubus@users.noreply.github.com>
Co-authored-by: Guillaume Mourier <guillaume@meilisearch.com>
This commit is contained in:
bors[bot] 2022-05-16 08:35:25 +00:00 committed by GitHub
commit b9b9cba154
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 1523 additions and 93 deletions

56
Cargo.lock generated
View File

@ -402,6 +402,12 @@ version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
[[package]]
name = "big_s"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "199edb7b90631283b10c2422e6a0bc8b7d987bf732995ba1de53b576c97e51a8"
[[package]] [[package]]
name = "bimap" name = "bimap"
version = "0.6.2" version = "0.6.2"
@ -1086,8 +1092,8 @@ dependencies = [
[[package]] [[package]]
name = "filter-parser" name = "filter-parser"
version = "0.1.0" version = "0.26.4"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.0#9ac2fd1c379d5b91c80471c23079dbba57b9a841" source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.4#6ee67df128b63f40eedfe1cb2edfa1a59492e310"
dependencies = [ dependencies = [
"nom", "nom",
"nom_locate", "nom_locate",
@ -1113,8 +1119,8 @@ dependencies = [
[[package]] [[package]]
name = "flatten-serde-json" name = "flatten-serde-json"
version = "0.1.0" version = "0.26.4"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.0#9ac2fd1c379d5b91c80471c23079dbba57b9a841" source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.4#6ee67df128b63f40eedfe1cb2edfa1a59492e310"
dependencies = [ dependencies = [
"serde_json", "serde_json",
] ]
@ -1614,6 +1620,14 @@ dependencies = [
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]]
name = "json-depth-checker"
version = "0.26.4"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.4#6ee67df128b63f40eedfe1cb2edfa1a59492e310"
dependencies = [
"serde_json",
]
[[package]] [[package]]
name = "jsonwebtoken" name = "jsonwebtoken"
version = "8.0.1" version = "8.0.1"
@ -1936,7 +1950,7 @@ checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f"
[[package]] [[package]]
name = "meilisearch-auth" name = "meilisearch-auth"
version = "0.26.0" version = "0.27.0"
dependencies = [ dependencies = [
"enum-iterator", "enum-iterator",
"meilisearch-error", "meilisearch-error",
@ -1951,7 +1965,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch-error" name = "meilisearch-error"
version = "0.26.0" version = "0.27.0"
dependencies = [ dependencies = [
"actix-web", "actix-web",
"proptest", "proptest",
@ -1962,7 +1976,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch-http" name = "meilisearch-http"
version = "0.26.0" version = "0.27.0"
dependencies = [ dependencies = [
"actix-cors", "actix-cors",
"actix-rt", "actix-rt",
@ -2034,7 +2048,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch-lib" name = "meilisearch-lib"
version = "0.26.0" version = "0.27.0"
dependencies = [ dependencies = [
"actix-rt", "actix-rt",
"actix-web", "actix-web",
@ -2137,8 +2151,8 @@ dependencies = [
[[package]] [[package]]
name = "milli" name = "milli"
version = "0.26.0" version = "0.26.4"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.0#9ac2fd1c379d5b91c80471c23079dbba57b9a841" source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.4#6ee67df128b63f40eedfe1cb2edfa1a59492e310"
dependencies = [ dependencies = [
"bimap", "bimap",
"bincode", "bincode",
@ -2156,6 +2170,7 @@ dependencies = [
"grenad", "grenad",
"heed", "heed",
"itertools", "itertools",
"json-depth-checker",
"levenshtein_automata", "levenshtein_automata",
"log", "log",
"logging_timer", "logging_timer",
@ -2172,6 +2187,7 @@ dependencies = [
"slice-group-by", "slice-group-by",
"smallstr", "smallstr",
"smallvec", "smallvec",
"smartstring",
"tempfile", "tempfile",
"time 0.3.9", "time 0.3.9",
"uuid", "uuid",
@ -2476,9 +2492,8 @@ checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e"
[[package]] [[package]]
name = "permissive-json-pointer" name = "permissive-json-pointer"
version = "0.2.0" version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2125f5fc44a45ffd265ce6ab343842f71df469d173f923f234e3a8df7a8f1ba6"
dependencies = [ dependencies = [
"big_s",
"serde_json", "serde_json",
] ]
@ -3174,6 +3189,17 @@ version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83"
[[package]]
name = "smartstring"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29"
dependencies = [
"autocfg",
"static_assertions",
"version_check",
]
[[package]] [[package]]
name = "socket2" name = "socket2"
version = "0.4.4" version = "0.4.4"
@ -3216,6 +3242,12 @@ dependencies = [
"path-slash", "path-slash",
] ]
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]] [[package]]
name = "strsim" name = "strsim"
version = "0.10.0" version = "0.10.0"

View File

@ -5,4 +5,5 @@ members = [
"meilisearch-error", "meilisearch-error",
"meilisearch-lib", "meilisearch-lib",
"meilisearch-auth", "meilisearch-auth",
"permissive-json-pointer",
] ]

View File

@ -1,12 +1,12 @@
[package] [package]
name = "meilisearch-auth" name = "meilisearch-auth"
version = "0.26.0" version = "0.27.0"
edition = "2021" edition = "2021"
[dependencies] [dependencies]
enum-iterator = "0.7.0" enum-iterator = "0.7.0"
meilisearch-error = { path = "../meilisearch-error" } meilisearch-error = { path = "../meilisearch-error" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.0" } milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.4" }
rand = "0.8.4" rand = "0.8.4"
serde = { version = "1.0.136", features = ["derive"] } serde = { version = "1.0.136", features = ["derive"] }
serde_json = { version = "1.0.79", features = ["preserve_order"] } serde_json = { version = "1.0.79", features = ["preserve_order"] }

View File

@ -1,6 +1,6 @@
[package] [package]
name = "meilisearch-error" name = "meilisearch-error"
version = "0.26.0" version = "0.27.0"
authors = ["marin <postma.marin@protonmail.com>"] authors = ["marin <postma.marin@protonmail.com>"]
edition = "2021" edition = "2021"

View File

@ -4,7 +4,7 @@ description = "Meilisearch HTTP server"
edition = "2021" edition = "2021"
license = "MIT" license = "MIT"
name = "meilisearch-http" name = "meilisearch-http"
version = "0.26.0" version = "0.27.0"
[[bin]] [[bin]]
name = "meilisearch" name = "meilisearch"
@ -103,5 +103,5 @@ mini-dashboard = [
tikv-jemallocator = "0.4.3" tikv-jemallocator = "0.4.3"
[package.metadata.mini-dashboard] [package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.1.9/build.zip" assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.1.10/build.zip"
sha1 = "b1833c3e5dc6b5d9d519ae4834935ae6c8a47024" sha1 = "1adf96592c267425c110bfefc36b7fc6bfb0f93d"

View File

@ -8,7 +8,10 @@ use actix_web::http::header::USER_AGENT;
use actix_web::HttpRequest; use actix_web::HttpRequest;
use http::header::CONTENT_TYPE; use http::header::CONTENT_TYPE;
use meilisearch_auth::SearchRules; use meilisearch_auth::SearchRules;
use meilisearch_lib::index::{SearchQuery, SearchResult}; use meilisearch_lib::index::{
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
};
use meilisearch_lib::index_controller::Stats; use meilisearch_lib::index_controller::Stats;
use meilisearch_lib::MeiliSearch; use meilisearch_lib::MeiliSearch;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
@ -355,6 +358,13 @@ pub struct SearchAggregator {
// pagination // pagination
max_limit: usize, max_limit: usize,
max_offset: usize, max_offset: usize,
// formatting
highlight_pre_tag: bool,
highlight_post_tag: bool,
crop_marker: bool,
matches: bool,
crop_length: bool,
} }
impl SearchAggregator { impl SearchAggregator {
@ -405,6 +415,12 @@ impl SearchAggregator {
ret.max_limit = query.limit; ret.max_limit = query.limit;
ret.max_offset = query.offset.unwrap_or_default(); ret.max_offset = query.offset.unwrap_or_default();
ret.highlight_pre_tag = query.highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG;
ret.highlight_post_tag = query.highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG;
ret.crop_marker = query.crop_marker != DEFAULT_CROP_MARKER;
ret.crop_length = query.crop_length != DEFAULT_CROP_LENGTH;
ret.matches = query.matches;
ret ret
} }
@ -452,6 +468,12 @@ impl SearchAggregator {
// pagination // pagination
self.max_limit = self.max_limit.max(other.max_limit); self.max_limit = self.max_limit.max(other.max_limit);
self.max_offset = self.max_offset.max(other.max_offset); self.max_offset = self.max_offset.max(other.max_offset);
self.highlight_pre_tag |= other.highlight_pre_tag;
self.highlight_post_tag |= other.highlight_post_tag;
self.crop_marker |= other.crop_marker;
self.matches |= other.matches;
self.crop_length |= other.crop_length;
} }
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> { pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
@ -489,6 +511,13 @@ impl SearchAggregator {
"max_limit": self.max_limit, "max_limit": self.max_limit,
"max_offset": self.max_offset, "max_offset": self.max_offset,
}, },
"formatting": {
"highlight_pre_tag": self.highlight_pre_tag,
"highlight_post_tag": self.highlight_post_tag,
"crop_marker": self.crop_marker,
"matches": self.matches,
"crop_length": self.crop_length,
},
}); });
Some(Track { Some(Track {

View File

@ -19,6 +19,7 @@ use serde::Serialize;
const POSSIBLE_ENV: [&str; 2] = ["development", "production"]; const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
#[derive(Debug, Clone, Parser, Serialize)] #[derive(Debug, Clone, Parser, Serialize)]
#[clap(version)]
pub struct Opt { pub struct Opt {
/// The destination where the database must be created. /// The destination where the database must be created.
#[clap(long, env = "MEILI_DB_PATH", default_value = "./data.ms")] #[clap(long, env = "MEILI_DB_PATH", default_value = "./data.ms")]

View File

@ -21,11 +21,11 @@ macro_rules! make_setting_route {
use meilisearch_lib::milli::update::Setting; use meilisearch_lib::milli::update::Setting;
use meilisearch_lib::{index::Settings, index_controller::Update, MeiliSearch}; use meilisearch_lib::{index::Settings, index_controller::Update, MeiliSearch};
use crate::analytics::Analytics;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
use crate::task::SummarizedTaskView;
use meilisearch_error::ResponseError; use meilisearch_error::ResponseError;
use $crate::analytics::Analytics;
use $crate::extractors::authentication::{policies::*, GuardedData};
use $crate::extractors::sequential_extractor::SeqHandler;
use $crate::task::SummarizedTaskView;
pub async fn delete( pub async fn delete(
meilisearch: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, MeiliSearch>, meilisearch: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, MeiliSearch>,
@ -145,8 +145,8 @@ make_setting_route!(
"SortableAttributes Updated".to_string(), "SortableAttributes Updated".to_string(),
json!({ json!({
"sortable_attributes": { "sortable_attributes": {
"total": setting.as_ref().map(|sort| sort.len()).unwrap_or(0), "total": setting.as_ref().map(|sort| sort.len()),
"has_geo": setting.as_ref().map(|sort| sort.contains("_geo")).unwrap_or(false), "has_geo": setting.as_ref().map(|sort| sort.contains("_geo")),
}, },
}), }),
Some(req), Some(req),
@ -162,10 +162,44 @@ make_setting_route!(
); );
make_setting_route!( make_setting_route!(
"/typo", "/typo-tolerance",
meilisearch_lib::index::updates::TypoSettings, meilisearch_lib::index::updates::TypoSettings,
typo, typo_tolerance,
"typo" "typoTolerance",
analytics,
|setting: &Option<meilisearch_lib::index::updates::TypoSettings>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"TypoTolerance Updated".to_string(),
json!({
"typo_tolerance": {
"enabled": setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))),
"disable_on_attributes": setting
.as_ref()
.and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
"disable_on_words": setting
.as_ref()
.and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
"min_word_size_for_one_typo": setting
.as_ref()
.and_then(|s| s.min_word_size_for_typos
.as_ref()
.set()
.map(|s| s.one_typo.set()))
.flatten(),
"min_word_size_for_two_typos": setting
.as_ref()
.and_then(|s| s.min_word_size_for_typos
.as_ref()
.set()
.map(|s| s.two_typos.set()))
.flatten(),
},
}),
Some(req),
);
}
); );
make_setting_route!( make_setting_route!(
@ -181,7 +215,7 @@ make_setting_route!(
"SearchableAttributes Updated".to_string(), "SearchableAttributes Updated".to_string(),
json!({ json!({
"searchable_attributes": { "searchable_attributes": {
"total": setting.as_ref().map(|searchable| searchable.len()).unwrap_or(0), "total": setting.as_ref().map(|searchable| searchable.len()),
}, },
}), }),
Some(req), Some(req),
@ -254,7 +288,7 @@ generate_configure!(
stop_words, stop_words,
synonyms, synonyms,
ranking_rules, ranking_rules,
typo typo_tolerance
); );
pub async fn update_all( pub async fn update_all(
@ -273,15 +307,46 @@ pub async fn update_all(
"sort_position": settings.ranking_rules.as_ref().set().map(|sort| sort.iter().position(|s| s == "sort")), "sort_position": settings.ranking_rules.as_ref().set().map(|sort| sort.iter().position(|s| s == "sort")),
}, },
"searchable_attributes": { "searchable_attributes": {
"total": settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()).unwrap_or(0), "total": settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()),
}, },
"sortable_attributes": { "sortable_attributes": {
"total": settings.sortable_attributes.as_ref().set().map(|sort| sort.len()).unwrap_or(0), "total": settings.sortable_attributes.as_ref().set().map(|sort| sort.len()),
"has_geo": settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")).unwrap_or(false), "has_geo": settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")),
}, },
"filterable_attributes": { "filterable_attributes": {
"total": settings.filterable_attributes.as_ref().set().map(|filter| filter.len()).unwrap_or(0), "total": settings.filterable_attributes.as_ref().set().map(|filter| filter.len()),
"has_geo": settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")).unwrap_or(false), "has_geo": settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")),
},
"typo_tolerance": {
"enabled": settings.typo_tolerance
.as_ref()
.set()
.and_then(|s| s.enabled.as_ref().set())
.copied(),
"disable_on_attributes": settings.typo_tolerance
.as_ref()
.set()
.and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
"disable_on_words": settings.typo_tolerance
.as_ref()
.set()
.and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
"min_word_size_for_one_typo": settings.typo_tolerance
.as_ref()
.set()
.and_then(|s| s.min_word_size_for_typos
.as_ref()
.set()
.map(|s| s.one_typo.set()))
.flatten(),
"min_word_size_for_two_typos": settings.typo_tolerance
.as_ref()
.set()
.and_then(|s| s.min_word_size_for_typos
.as_ref()
.set()
.map(|s| s.two_typos.set()))
.flatten(),
}, },
}), }),
Some(&req), Some(&req),

View File

@ -0,0 +1,376 @@
use super::*;
use crate::common::Server;
use serde_json::json;
#[actix_rt::test]
async fn formatted_contain_wildcard() {
let server = Server::new().await;
let index = server.index("test");
index
.update_settings(json!({ "displayedAttributes": ["id", "cattos"] }))
.await;
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (response, code) = index
.search_post(json!({ "q": "pesti", "attributesToRetrieve": ["father", "mother"], "attributesToHighlight": ["father", "mother", "*"], "attributesToCrop": ["doggos"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"id": "852",
"cattos": "<em>pesti</em>",
}
})
);
let (response, code) = index
.search_post(json!({ "q": "pesti", "attributesToRetrieve": ["*"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"cattos": "pesti",
})
);
let (response, code) = index
.search_post(
json!({ "q": "pesti", "attributesToRetrieve": ["*"], "attributesToHighlight": ["id"] }),
)
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"cattos": "pesti",
"_formatted": {
"id": "852",
"cattos": "pesti",
}
})
);
let (response, code) = index
.search_post(
json!({ "q": "pesti", "attributesToRetrieve": ["*"], "attributesToCrop": ["*"] }),
)
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"cattos": "pesti",
"_formatted": {
"id": "852",
"cattos": "pesti",
}
})
);
let (response, code) = index
.search_post(json!({ "q": "pesti", "attributesToCrop": ["*"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"cattos": "pesti",
"_formatted": {
"id": "852",
"cattos": "pesti",
}
})
);
}
#[actix_rt::test]
async fn format_nested() {
let server = Server::new().await;
let index = server.index("test");
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let (response, code) = index
.search_post(json!({ "q": "pesti", "attributesToRetrieve": ["doggos"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"doggos": [
{
"name": "bobby",
"age": 2,
},
{
"name": "buddy",
"age": 4,
},
],
})
);
let (response, code) = index
.search_post(json!({ "q": "pesti", "attributesToRetrieve": ["doggos.name"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"doggos": [
{
"name": "bobby",
},
{
"name": "buddy",
},
],
})
);
let (response, code) = index
.search_post(json!({ "q": "pesti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.name"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"doggos": [
{
"name": "bobby",
},
{
"name": "buddy",
},
],
},
})
);
let (response, code) = index
.search_post(json!({ "q": "pesti", "attributesToRetrieve": [], "attributesToCrop": ["doggos.name"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"doggos": [
{
"name": "bobby",
},
{
"name": "buddy",
},
],
},
})
);
let (response, code) = index
.search_post(json!({ "q": "pesti", "attributesToRetrieve": ["doggos.name"], "attributesToHighlight": ["doggos.age"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"doggos": [
{
"name": "bobby",
},
{
"name": "buddy",
},
],
"_formatted": {
"doggos": [
{
"name": "bobby",
"age": "2",
},
{
"name": "buddy",
"age": "4",
},
],
},
})
);
let (response, code) = index
.search_post(json!({ "q": "pesti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.age"], "attributesToCrop": ["doggos.name"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"doggos": [
{
"name": "bobby",
"age": "2",
},
{
"name": "buddy",
"age": "4",
},
],
},
})
);
}
#[actix_rt::test]
async fn displayedattr_2_smol() {
let server = Server::new().await;
let index = server.index("test");
// not enough displayed for the other settings
index
.update_settings(json!({ "displayedAttributes": ["id"] }))
.await;
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (response, code) = index
.search_post(json!({ "attributesToRetrieve": ["father", "id"], "attributesToHighlight": ["mother"], "attributesToCrop": ["cattos"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
})
);
let (response, code) = index
.search_post(json!({ "attributesToRetrieve": ["id"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
})
);
let (response, code) = index
.search_post(json!({ "attributesToHighlight": ["id"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"_formatted": {
"id": "852",
}
})
);
let (response, code) = index
.search_post(json!({ "attributesToCrop": ["id"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"_formatted": {
"id": "852",
}
})
);
let (response, code) = index
.search_post(json!({ "attributesToHighlight": ["id"], "attributesToCrop": ["id"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"_formatted": {
"id": "852",
}
})
);
let (response, code) = index
.search_post(json!({ "attributesToHighlight": ["cattos"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
})
);
let (response, code) = index
.search_post(json!({ "attributesToCrop": ["cattos"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
})
);
let (response, code) = index
.search_post(json!({ "attributesToRetrieve": ["cattos"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"][0], json!({}));
let (response, code) = index
.search_post(
json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["cattos"], "attributesToCrop": ["cattos"] }),
)
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"][0], json!({}));
let (response, code) = index
.search_post(json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["id"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"id": "852",
}
})
);
let (response, code) = index
.search_post(json!({ "attributesToRetrieve": ["cattos"], "attributesToCrop": ["id"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"id": "852",
}
})
);
}

View File

@ -2,12 +2,13 @@
// should be tested in its own module to isolate tests and keep the tests readable. // should be tested in its own module to isolate tests and keep the tests readable.
mod errors; mod errors;
mod formatted;
use crate::common::Server; use crate::common::Server;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use serde_json::{json, Value}; use serde_json::{json, Value};
static DOCUMENTS: Lazy<Value> = Lazy::new(|| { pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([ json!([
{ {
"title": "Shazam!", "title": "Shazam!",
@ -32,7 +33,7 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
]) ])
}); });
static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| { pub(self) static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([ json!([
{ {
"id": 852, "id": 852,
@ -496,7 +497,7 @@ async fn search_facet_distribution() {
assert_eq!(code, 200, "{}", response); assert_eq!(code, 200, "{}", response);
let dist = response["facetsDistribution"].as_object().unwrap(); let dist = response["facetsDistribution"].as_object().unwrap();
dbg!(&dist); dbg!(&dist);
assert_eq!(dist.len(), 2); assert_eq!(dist.len(), 3);
assert_eq!( assert_eq!(
dist["doggos.name"], dist["doggos.name"],
json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1}) json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1})

View File

@ -1,6 +1,6 @@
[package] [package]
name = "meilisearch-lib" name = "meilisearch-lib"
version = "0.26.0" version = "0.27.0"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@ -30,13 +30,13 @@ lazy_static = "1.4.0"
log = "0.4.14" log = "0.4.14"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-error = { path = "../meilisearch-error" } meilisearch-error = { path = "../meilisearch-error" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.0" } milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.4" }
mime = "0.3.16" mime = "0.3.16"
num_cpus = "1.13.1" num_cpus = "1.13.1"
obkv = "0.2.0" obkv = "0.2.0"
once_cell = "1.10.0" once_cell = "1.10.0"
parking_lot = "0.12.0" parking_lot = "0.12.0"
permissive-json-pointer = "0.2.0" permissive-json-pointer = { path = "../permissive-json-pointer" }
rand = "0.8.5" rand = "0.8.5"
rayon = "1.5.1" rayon = "1.5.1"
regex = "1.5.5" regex = "1.5.5"

View File

@ -18,7 +18,7 @@ use crate::EnvSizer;
use super::error::IndexError; use super::error::IndexError;
use super::error::Result; use super::error::Result;
use super::updates::{MinWordLengthTypoSetting, TypoSettings}; use super::updates::{MinWordSizeTyposSetting, TypoSettings};
use super::{Checked, Settings}; use super::{Checked, Settings};
pub type Document = Map<String, Value>; pub type Document = Map<String, Value>;
@ -170,7 +170,7 @@ impl Index {
}) })
.collect(); .collect();
let min_typo_word_len = MinWordLengthTypoSetting { let min_typo_word_len = MinWordSizeTyposSetting {
one_typo: Setting::Set(self.min_word_len_one_typo(txn)?), one_typo: Setting::Set(self.min_word_len_one_typo(txn)?),
two_typos: Setting::Set(self.min_word_len_two_typos(txn)?), two_typos: Setting::Set(self.min_word_len_two_typos(txn)?),
}; };
@ -190,7 +190,7 @@ impl Index {
let typo_tolerance = TypoSettings { let typo_tolerance = TypoSettings {
enabled: Setting::Set(self.authorize_typos(txn)?), enabled: Setting::Set(self.authorize_typos(txn)?),
min_word_length_for_typo: Setting::Set(min_typo_word_len), min_word_size_for_typos: Setting::Set(min_typo_word_len),
disable_on_words: Setting::Set(disabled_words), disable_on_words: Setting::Set(disabled_words),
disable_on_attributes: Setting::Set(disabled_attributes), disable_on_attributes: Setting::Set(disabled_attributes),
}; };
@ -213,7 +213,7 @@ impl Index {
None => Setting::Reset, None => Setting::Reset,
}, },
synonyms: Setting::Set(synonyms), synonyms: Setting::Set(synonyms),
typo: Setting::Set(typo_tolerance), typo_tolerance: Setting::Set(typo_tolerance),
_kind: PhantomData, _kind: PhantomData,
}) })
} }

View File

@ -1,6 +1,7 @@
pub use search::{ pub use search::{
default_crop_length, default_crop_marker, default_highlight_post_tag, default_crop_length, default_crop_marker, default_highlight_post_tag,
default_highlight_pre_tag, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT, default_highlight_pre_tag, SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
}; };
pub use updates::{apply_settings_to_builder, Checked, Facets, Settings, Unchecked}; pub use updates::{apply_settings_to_builder, Checked, Facets, Settings, Unchecked};

View File

@ -4,7 +4,6 @@ use std::str::FromStr;
use std::time::Instant; use std::time::Instant;
use either::Either; use either::Either;
use indexmap::IndexMap;
use milli::tokenizer::{Analyzer, AnalyzerConfig, Token}; use milli::tokenizer::{Analyzer, AnalyzerConfig, Token};
use milli::{AscDesc, FieldId, FieldsIdsMap, Filter, MatchingWords, SortError}; use milli::{AscDesc, FieldId, FieldsIdsMap, Filter, MatchingWords, SortError};
use regex::Regex; use regex::Regex;
@ -16,7 +15,7 @@ use crate::index::error::FacetError;
use super::error::{IndexError, Result}; use super::error::{IndexError, Result};
use super::index::Index; use super::index::Index;
pub type Document = IndexMap<String, Value>; pub type Document = serde_json::Map<String, Value>;
type MatchesInfo = BTreeMap<String, Vec<MatchInfo>>; type MatchesInfo = BTreeMap<String, Vec<MatchInfo>>;
#[derive(Serialize, Debug, Clone, PartialEq)] #[derive(Serialize, Debug, Clone, PartialEq)]
@ -35,17 +34,17 @@ pub const fn default_crop_length() -> usize {
DEFAULT_CROP_LENGTH DEFAULT_CROP_LENGTH
} }
const DEFAULT_CROP_MARKER: &str = ""; pub const DEFAULT_CROP_MARKER: &str = "";
pub fn default_crop_marker() -> String { pub fn default_crop_marker() -> String {
DEFAULT_CROP_MARKER.to_string() DEFAULT_CROP_MARKER.to_string()
} }
const DEFAULT_HIGHLIGHT_PRE_TAG: &str = "<em>"; pub const DEFAULT_HIGHLIGHT_PRE_TAG: &str = "<em>";
pub fn default_highlight_pre_tag() -> String { pub fn default_highlight_pre_tag() -> String {
DEFAULT_HIGHLIGHT_PRE_TAG.to_string() DEFAULT_HIGHLIGHT_PRE_TAG.to_string()
} }
const DEFAULT_HIGHLIGHT_POST_TAG: &str = "</em>"; pub const DEFAULT_HIGHLIGHT_POST_TAG: &str = "</em>";
pub fn default_highlight_post_tag() -> String { pub fn default_highlight_post_tag() -> String {
DEFAULT_HIGHLIGHT_POST_TAG.to_string() DEFAULT_HIGHLIGHT_POST_TAG.to_string()
} }
@ -233,14 +232,22 @@ impl Index {
let documents_iter = self.documents(&rtxn, documents_ids)?; let documents_iter = self.documents(&rtxn, documents_ids)?;
for (_id, obkv) in documents_iter { for (_id, obkv) in documents_iter {
let mut document = make_document(&to_retrieve_ids, &fields_ids_map, obkv)?; // First generate a document with all the displayed fields
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
// select the attributes to retrieve
let attributes_to_retrieve = to_retrieve_ids
.iter()
.map(|&fid| fields_ids_map.name(fid).expect("Missing field name"));
let mut document =
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
let matches_info = query let matches_info = query
.matches .matches
.then(|| compute_matches(&matching_words, &document, &analyzer)); .then(|| compute_matches(&matching_words, &document, &analyzer));
let formatted = format_fields( let formatted = format_fields(
&document, &displayed_document,
&fields_ids_map, &fields_ids_map,
&formatter, &formatter,
&matching_words, &matching_words,
@ -476,7 +483,7 @@ fn add_non_formatted_ids_to_formatted_options(
} }
fn make_document( fn make_document(
attributes_to_retrieve: &BTreeSet<FieldId>, displayed_attributes: &BTreeSet<FieldId>,
field_ids_map: &FieldsIdsMap, field_ids_map: &FieldsIdsMap,
obkv: obkv::KvReaderU16, obkv: obkv::KvReaderU16,
) -> Result<Document> { ) -> Result<Document> {
@ -494,15 +501,11 @@ fn make_document(
} }
// select the attributes to retrieve // select the attributes to retrieve
let attributes_to_retrieve = attributes_to_retrieve let displayed_attributes = displayed_attributes
.iter() .iter()
.map(|&fid| field_ids_map.name(fid).expect("Missing field name")); .map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
let document = permissive_json_pointer::select_values(&document, attributes_to_retrieve); let document = permissive_json_pointer::select_values(&document, displayed_attributes);
// then we need to convert the `serde_json::Map` into an `IndexMap`.
let document = document.into_iter().collect();
Ok(document) Ok(document)
} }
@ -513,20 +516,13 @@ fn format_fields<A: AsRef<[u8]>>(
matching_words: &impl Matcher, matching_words: &impl Matcher,
formatted_options: &BTreeMap<FieldId, FormatOptions>, formatted_options: &BTreeMap<FieldId, FormatOptions>,
) -> Result<Document> { ) -> Result<Document> {
// Convert the `IndexMap` into a `serde_json::Map`.
let document = document
.iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
let selectors: Vec<_> = formatted_options let selectors: Vec<_> = formatted_options
.keys() .keys()
// This unwrap must be safe since we got the ids from the fields_ids_map just // This unwrap must be safe since we got the ids from the fields_ids_map just
// before. // before.
.map(|&fid| field_ids_map.name(fid).unwrap()) .map(|&fid| field_ids_map.name(fid).unwrap())
.collect(); .collect();
let mut document = permissive_json_pointer::select_values(document, selectors.iter().copied());
let mut document = permissive_json_pointer::select_values(&document, selectors.iter().copied());
permissive_json_pointer::map_leaf_values(&mut document, selectors, |key, value| { permissive_json_pointer::map_leaf_values(&mut document, selectors, |key, value| {
// To get the formatting option of each key we need to see all the rules that applies // To get the formatting option of each key we need to see all the rules that applies
@ -542,13 +538,9 @@ fn format_fields<A: AsRef<[u8]>>(
.fold(FormatOptions::default(), |acc, (_, option)| { .fold(FormatOptions::default(), |acc, (_, option)| {
acc.merge(*option) acc.merge(*option)
}); });
// TODO: remove this useless clone *value = formatter.format_value(std::mem::take(value), matching_words, format);
*value = formatter.format_value(value.clone(), matching_words, format);
}); });
// we need to convert back the `serde_json::Map` into an `IndexMap`.
let document = document.into_iter().collect();
Ok(document) Ok(document)
} }

View File

@ -41,7 +41,7 @@ pub struct Unchecked;
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)] #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)] #[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct MinWordLengthTypoSetting { pub struct MinWordSizeTyposSetting {
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
#[serde(default, skip_serializing_if = "Setting::is_not_set")] #[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub one_typo: Setting<u8>, pub one_typo: Setting<u8>,
@ -60,7 +60,7 @@ pub struct TypoSettings {
pub enabled: Setting<bool>, pub enabled: Setting<bool>,
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
#[serde(default, skip_serializing_if = "Setting::is_not_set")] #[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub min_word_length_for_typo: Setting<MinWordLengthTypoSetting>, pub min_word_size_for_typos: Setting<MinWordSizeTyposSetting>,
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
#[serde(default, skip_serializing_if = "Setting::is_not_set")] #[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub disable_on_words: Setting<BTreeSet<String>>, pub disable_on_words: Setting<BTreeSet<String>>,
@ -113,7 +113,7 @@ pub struct Settings<T> {
pub distinct_attribute: Setting<String>, pub distinct_attribute: Setting<String>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")] #[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
pub typo: Setting<TypoSettings>, pub typo_tolerance: Setting<TypoSettings>,
#[serde(skip)] #[serde(skip)]
pub _kind: PhantomData<T>, pub _kind: PhantomData<T>,
@ -130,7 +130,7 @@ impl Settings<Checked> {
stop_words: Setting::Reset, stop_words: Setting::Reset,
synonyms: Setting::Reset, synonyms: Setting::Reset,
distinct_attribute: Setting::Reset, distinct_attribute: Setting::Reset,
typo: Setting::Reset, typo_tolerance: Setting::Reset,
_kind: PhantomData, _kind: PhantomData,
} }
} }
@ -145,7 +145,7 @@ impl Settings<Checked> {
stop_words, stop_words,
synonyms, synonyms,
distinct_attribute, distinct_attribute,
typo: typo_tolerance, typo_tolerance,
.. ..
} = self; } = self;
@ -158,7 +158,7 @@ impl Settings<Checked> {
stop_words, stop_words,
synonyms, synonyms,
distinct_attribute, distinct_attribute,
typo: typo_tolerance, typo_tolerance,
_kind: PhantomData, _kind: PhantomData,
} }
} }
@ -197,7 +197,7 @@ impl Settings<Unchecked> {
stop_words: self.stop_words, stop_words: self.stop_words,
synonyms: self.synonyms, synonyms: self.synonyms,
distinct_attribute: self.distinct_attribute, distinct_attribute: self.distinct_attribute,
typo: self.typo, typo_tolerance: self.typo_tolerance,
_kind: PhantomData, _kind: PhantomData,
} }
} }
@ -373,7 +373,7 @@ pub fn apply_settings_to_builder(
Setting::NotSet => (), Setting::NotSet => (),
} }
match settings.typo { match settings.typo_tolerance {
Setting::Set(ref value) => { Setting::Set(ref value) => {
match value.enabled { match value.enabled {
Setting::Set(val) => builder.set_autorize_typos(val), Setting::Set(val) => builder.set_autorize_typos(val),
@ -381,7 +381,7 @@ pub fn apply_settings_to_builder(
Setting::NotSet => (), Setting::NotSet => (),
} }
match value.min_word_length_for_typo { match value.min_word_size_for_typos {
Setting::Set(ref setting) => { Setting::Set(ref setting) => {
match setting.one_typo { match setting.one_typo {
Setting::Set(val) => builder.set_min_word_len_one_typo(val), Setting::Set(val) => builder.set_min_word_len_one_typo(val),
@ -455,7 +455,7 @@ pub(crate) mod test {
stop_words: Setting::NotSet, stop_words: Setting::NotSet,
synonyms: Setting::NotSet, synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet, distinct_attribute: Setting::NotSet,
typo: Setting::NotSet, typo_tolerance: Setting::NotSet,
_kind: PhantomData::<Unchecked>, _kind: PhantomData::<Unchecked>,
}; };
@ -477,7 +477,7 @@ pub(crate) mod test {
stop_words: Setting::NotSet, stop_words: Setting::NotSet,
synonyms: Setting::NotSet, synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet, distinct_attribute: Setting::NotSet,
typo: Setting::NotSet, typo_tolerance: Setting::NotSet,
_kind: PhantomData::<Unchecked>, _kind: PhantomData::<Unchecked>,
}; };

View File

@ -178,15 +178,6 @@ impl IndexControllerBuilder {
.max_task_store_size .max_task_store_size
.ok_or_else(|| anyhow::anyhow!("Missing update database size"))?; .ok_or_else(|| anyhow::anyhow!("Missing update database size"))?;
let db_exists = db_path.as_ref().exists();
if db_exists {
// Directory could be pre-created without any database in.
let db_is_empty = db_path.as_ref().read_dir()?.next().is_none();
if !db_is_empty {
versioning::check_version_file(db_path.as_ref())?;
}
}
if let Some(ref path) = self.import_snapshot { if let Some(ref path) = self.import_snapshot {
log::info!("Loading from snapshot {:?}", path); log::info!("Loading from snapshot {:?}", path);
load_snapshot( load_snapshot(
@ -207,6 +198,15 @@ impl IndexControllerBuilder {
)?; )?;
} }
let db_exists = db_path.as_ref().exists();
if db_exists {
// Directory could be pre-created without any database in.
let db_is_empty = db_path.as_ref().read_dir()?.next().is_none();
if !db_is_empty {
versioning::check_version_file(db_path.as_ref())?;
}
}
std::fs::create_dir_all(db_path.as_ref())?; std::fs::create_dir_all(db_path.as_ref())?;
let meta_env = Arc::new(open_meta_env(db_path.as_ref(), task_store_size)?); let meta_env = Arc::new(open_meta_env(db_path.as_ref(), task_store_size)?);

View File

@ -0,0 +1,12 @@
[package]
name = "permissive-json-pointer"
version = "0.2.0"
edition = "2021"
description = "A permissive json pointer"
readme = "README.md"
[dependencies]
serde_json = "1.0"
[dev-dependencies]
big_s = "1.0"

View File

@ -0,0 +1,134 @@
# Permissive json pointer
This crate provide an interface a little bit similar to what you know as “json pointer”.
But its actually doing something quite different.
## The API
The crate provide only one function called [`select_values`].
It takes one object in parameter and a list of selectors.
It then returns a new object containing only the fields you selected.
## The selectors
The syntax for the selector is easier than with other API.
There is only ONE special symbol, its the `.`.
If you write `dog` and provide the following object;
```json
{
"dog": "bob",
"cat": "michel"
}
```
Youll get back;
```json
{
"dog": "bob",
}
```
Easy right?
Now the dot can either be used as a field name, or as a nested object.
For example, if you have the following json;
```json
{
"dog.name": "jean",
"dog": {
"name": "bob",
"age": 6
}
}
```
What a crappy json! But never underestimate your users, they [_WILL_](https://xkcd.com/1172/)
somehow base their entire workflow on this kind of json.
Here with the `dog.name` selector both fields will be
selected and the following json will be returned;
```json
{
"dog.name": "jean",
"dog": {
"name": "bob",
}
}
```
And as you can guess, this crate is as permissive as possible.
Itll match everything it can!
Consider this even more crappy json;
```json
{
"pet.dog.name": "jean",
"pet.dog": {
"name": "bob"
},
"pet": {
"dog.name": "michel"
},
"pet": {
"dog": {
"name": "milan"
}
}
}
```
If you write `pet.dog.name` everything will be selected.
## Matching arrays
With this kind of selectors you cant match a specific element in an array.
Your selector will be applied to all the element _in_ the array.
Consider the following json;
```json
{
"pets": [
{
"animal": "dog",
"race": "bernese mountain",
},
{
"animal": "dog",
"race": "golden retriever",
},
{
"animal": "cat",
"age": 8,
}
]
}
```
With the filter `pets.animal` youll get;
```json
{
"pets": [
{
"animal": "dog",
},
{
"animal": "dog",
},
{
"animal": "cat",
}
]
}
```
The empty element in an array gets removed. So if you were to look
for `pets.age` you would only get;
```json
{
"pets": [
{
"age": 8,
}
]
}
```
And I think thats all you need to know 🎉

View File

@ -0,0 +1,786 @@
#![doc = include_str!("../README.md")]
use std::collections::HashSet;
use serde_json::*;
type Document = Map<String, Value>;
const SPLIT_SYMBOL: char = '.';
/// Returns `true` if the `selector` match the `key`.
///
/// ```text
/// Example:
/// `animaux` match `animaux`
/// `animaux.chien` match `animaux`
/// `animaux.chien` match `animaux`
/// `animaux.chien.nom` match `animaux`
/// `animaux.chien.nom` match `animaux.chien`
/// -----------------------------------------
/// `animaux` doesn't match `animaux.chien`
/// `animaux.` doesn't match `animaux`
/// `animaux.ch` doesn't match `animaux.chien`
/// `animau` doesn't match `animaux`
/// ```
fn contained_in(selector: &str, key: &str) -> bool {
selector.starts_with(key)
&& selector[key.len()..]
.chars()
.next()
.map(|c| c == SPLIT_SYMBOL)
.unwrap_or(true)
}
/// Map the selected leaf values of a json allowing you to update only the fields that were selected.
/// ```
/// use serde_json::{Value, json};
/// use permissive_json_pointer::map_leaf_values;
///
/// let mut value: Value = json!({
/// "jean": {
/// "age": 8,
/// "race": {
/// "name": "bernese mountain",
/// "size": "80cm",
/// }
/// }
/// });
/// map_leaf_values(
/// value.as_object_mut().unwrap(),
/// ["jean.race.name"],
/// |key, value| match (value, dbg!(key)) {
/// (Value::String(name), "jean.race.name") => *name = "patou".to_string(),
/// _ => unreachable!(),
/// },
/// );
/// assert_eq!(
/// value,
/// json!({
/// "jean": {
/// "age": 8,
/// "race": {
/// "name": "patou",
/// "size": "80cm",
/// }
/// }
/// })
/// );
/// ```
pub fn map_leaf_values<'a>(
value: &mut Map<String, Value>,
selectors: impl IntoIterator<Item = &'a str>,
mut mapper: impl FnMut(&str, &mut Value),
) {
let selectors: Vec<_> = selectors.into_iter().collect();
map_leaf_values_in_object(value, &selectors, "", &mut mapper);
}
pub fn map_leaf_values_in_object<'a>(
value: &mut Map<String, Value>,
selectors: &[&'a str],
base_key: &str,
mapper: &mut impl FnMut(&str, &mut Value),
) {
for (key, value) in value.iter_mut() {
let base_key = if base_key.is_empty() {
key.to_string()
} else {
format!("{}{}{}", base_key, SPLIT_SYMBOL, key)
};
// here if the user only specified `doggo` we need to iterate in all the fields of `doggo`
// so we check the contained_in on both side
let should_continue = selectors
.iter()
.any(|selector| contained_in(selector, &base_key) || contained_in(&base_key, selector));
if should_continue {
match value {
Value::Object(object) => {
map_leaf_values_in_object(object, selectors, &base_key, mapper)
}
Value::Array(array) => {
map_leaf_values_in_array(array, selectors, &base_key, mapper)
}
value => mapper(&base_key, value),
}
}
}
}
pub fn map_leaf_values_in_array(
values: &mut [Value],
selectors: &[&str],
base_key: &str,
mapper: &mut impl FnMut(&str, &mut Value),
) {
for value in values.iter_mut() {
match value {
Value::Object(object) => map_leaf_values_in_object(object, selectors, base_key, mapper),
Value::Array(array) => map_leaf_values_in_array(array, selectors, base_key, mapper),
value => mapper(base_key, value),
}
}
}
/// Permissively selects values in a json with a list of selectors.
/// Returns a new json containing all the selected fields.
/// ```
/// use serde_json::*;
/// use permissive_json_pointer::select_values;
///
/// let value: Value = json!({
/// "name": "peanut",
/// "age": 8,
/// "race": {
/// "name": "bernese mountain",
/// "avg_age": 12,
/// "size": "80cm",
/// },
/// });
/// let value: &Map<String, Value> = value.as_object().unwrap();
///
/// let res: Value = select_values(value, vec!["name", "race.name"]).into();
/// assert_eq!(
/// res,
/// json!({
/// "name": "peanut",
/// "race": {
/// "name": "bernese mountain",
/// },
/// })
/// );
/// ```
pub fn select_values<'a>(
value: &Map<String, Value>,
selectors: impl IntoIterator<Item = &'a str>,
) -> Map<String, Value> {
let selectors = selectors.into_iter().collect();
create_value(value, selectors)
}
fn create_value(value: &Document, mut selectors: HashSet<&str>) -> Document {
let mut new_value: Document = Map::new();
for (key, value) in value.iter() {
// first we insert all the key at the root level
if selectors.contains(key as &str) {
new_value.insert(key.to_string(), value.clone());
// if the key was simple we can delete it and move to
// the next key
if is_simple(key) {
selectors.remove(key as &str);
continue;
}
}
// we extract all the sub selectors matching the current field
// if there was [person.name, person.age] and if we are on the field
// `person`. Then we generate the following sub selectors: [name, age].
let sub_selectors: HashSet<&str> = selectors
.iter()
.filter(|s| contained_in(s, key))
.filter_map(|s| s.trim_start_matches(key).get(SPLIT_SYMBOL.len_utf8()..))
.collect();
if !sub_selectors.is_empty() {
match value {
Value::Array(array) => {
let array = create_array(array, &sub_selectors);
if !array.is_empty() {
new_value.insert(key.to_string(), array.into());
}
}
Value::Object(object) => {
let object = create_value(object, sub_selectors);
if !object.is_empty() {
new_value.insert(key.to_string(), object.into());
}
}
_ => (),
}
}
}
new_value
}
fn create_array(array: &Vec<Value>, selectors: &HashSet<&str>) -> Vec<Value> {
let mut res = Vec::new();
for value in array {
match value {
Value::Array(array) => {
let array = create_array(array, selectors);
if !array.is_empty() {
res.push(array.into());
}
}
Value::Object(object) => {
let object = create_value(object, selectors.clone());
if !object.is_empty() {
res.push(object.into());
}
}
_ => (),
}
}
res
}
fn is_simple(key: impl AsRef<str>) -> bool {
!key.as_ref().contains(SPLIT_SYMBOL)
}
#[cfg(test)]
mod tests {
use big_s::S;
use super::*;
#[test]
fn test_contained_in() {
assert!(contained_in("animaux", "animaux"));
assert!(contained_in("animaux.chien", "animaux"));
assert!(contained_in(
"animaux.chien.race.bouvier bernois.fourrure.couleur",
"animaux"
));
assert!(contained_in(
"animaux.chien.race.bouvier bernois.fourrure.couleur",
"animaux.chien"
));
assert!(contained_in(
"animaux.chien.race.bouvier bernois.fourrure.couleur",
"animaux.chien.race.bouvier bernois"
));
assert!(contained_in(
"animaux.chien.race.bouvier bernois.fourrure.couleur",
"animaux.chien.race.bouvier bernois.fourrure"
));
assert!(contained_in(
"animaux.chien.race.bouvier bernois.fourrure.couleur",
"animaux.chien.race.bouvier bernois.fourrure.couleur"
));
// -- the wrongs
assert!(!contained_in("chien", "chat"));
assert!(!contained_in("animaux", "animaux.chien"));
assert!(!contained_in("animaux.chien", "animaux.chat"));
// -- the strange edge cases
assert!(!contained_in("animaux.chien", "anima"));
assert!(!contained_in("animaux.chien", "animau"));
assert!(!contained_in("animaux.chien", "animaux."));
assert!(!contained_in("animaux.chien", "animaux.c"));
assert!(!contained_in("animaux.chien", "animaux.ch"));
assert!(!contained_in("animaux.chien", "animaux.chi"));
assert!(!contained_in("animaux.chien", "animaux.chie"));
}
#[test]
fn simple_key() {
let value: Value = json!({
"name": "peanut",
"age": 8,
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
});
let value: &Document = value.as_object().unwrap();
let res: Value = select_values(value, vec!["name"]).into();
assert_eq!(
res,
json!({
"name": "peanut",
})
);
let res: Value = select_values(value, vec!["age"]).into();
assert_eq!(
res,
json!({
"age": 8,
})
);
let res: Value = select_values(value, vec!["name", "age"]).into();
assert_eq!(
res,
json!({
"name": "peanut",
"age": 8,
})
);
let res: Value = select_values(value, vec!["race"]).into();
assert_eq!(
res,
json!({
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
})
);
let res: Value = select_values(value, vec!["name", "age", "race"]).into();
assert_eq!(
res,
json!({
"name": "peanut",
"age": 8,
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
})
);
}
#[test]
fn complex_key() {
let value: Value = json!({
"name": "peanut",
"age": 8,
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
});
let value: &Document = value.as_object().unwrap();
let res: Value = select_values(value, vec!["race"]).into();
assert_eq!(
res,
json!({
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
})
);
println!("RIGHTBEFORE");
let res: Value = select_values(value, vec!["race.name"]).into();
assert_eq!(
res,
json!({
"race": {
"name": "bernese mountain",
}
})
);
let res: Value = select_values(value, vec!["race.name", "race.size"]).into();
assert_eq!(
res,
json!({
"race": {
"name": "bernese mountain",
"size": "80cm",
}
})
);
let res: Value = select_values(
value,
vec!["race.name", "race.size", "race.avg_age", "race.size", "age"],
)
.into();
assert_eq!(
res,
json!({
"age": 8,
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
})
);
let res: Value = select_values(value, vec!["race.name", "race"]).into();
assert_eq!(
res,
json!({
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
})
);
let res: Value = select_values(value, vec!["race", "race.name"]).into();
assert_eq!(
res,
json!({
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
})
);
}
#[test]
fn multi_level_nested() {
let value: Value = json!({
"jean": {
"age": 8,
"race": {
"name": "bernese mountain",
"size": "80cm",
}
}
});
let value: &Document = value.as_object().unwrap();
let res: Value = select_values(value, vec!["jean"]).into();
assert_eq!(
res,
json!({
"jean": {
"age": 8,
"race": {
"name": "bernese mountain",
"size": "80cm",
}
}
})
);
let res: Value = select_values(value, vec!["jean.age"]).into();
assert_eq!(
res,
json!({
"jean": {
"age": 8,
}
})
);
let res: Value = select_values(value, vec!["jean.race.size"]).into();
assert_eq!(
res,
json!({
"jean": {
"race": {
"size": "80cm",
}
}
})
);
let res: Value = select_values(value, vec!["jean.race.name", "jean.age"]).into();
assert_eq!(
res,
json!({
"jean": {
"age": 8,
"race": {
"name": "bernese mountain",
}
}
})
);
let res: Value = select_values(value, vec!["jean.race"]).into();
assert_eq!(
res,
json!({
"jean": {
"race": {
"name": "bernese mountain",
"size": "80cm",
}
}
})
);
}
#[test]
fn array_and_deep_nested() {
let value: Value = json!({
"doggos": [
{
"jean": {
"age": 8,
"race": {
"name": "bernese mountain",
"size": "80cm",
}
}
},
{
"marc": {
"age": 4,
"race": {
"name": "golden retriever",
"size": "60cm",
}
}
},
]
});
let value: &Document = value.as_object().unwrap();
let res: Value = select_values(value, vec!["doggos.jean"]).into();
assert_eq!(
res,
json!({
"doggos": [
{
"jean": {
"age": 8,
"race": {
"name": "bernese mountain",
"size": "80cm",
}
}
}
]
})
);
let res: Value = select_values(value, vec!["doggos.marc"]).into();
assert_eq!(
res,
json!({
"doggos": [
{
"marc": {
"age": 4,
"race": {
"name": "golden retriever",
"size": "60cm",
}
}
}
]
})
);
let res: Value = select_values(value, vec!["doggos.marc.race"]).into();
assert_eq!(
res,
json!({
"doggos": [
{
"marc": {
"race": {
"name": "golden retriever",
"size": "60cm",
}
}
}
]
})
);
let res: Value =
select_values(value, vec!["doggos.marc.race.name", "doggos.marc.age"]).into();
assert_eq!(
res,
json!({
"doggos": [
{
"marc": {
"age": 4,
"race": {
"name": "golden retriever",
}
}
}
]
})
);
let res: Value = select_values(
value,
vec![
"doggos.marc.race.name",
"doggos.marc.age",
"doggos.jean.race.name",
"other.field",
],
)
.into();
assert_eq!(
res,
json!({
"doggos": [
{
"jean": {
"race": {
"name": "bernese mountain",
}
}
},
{
"marc": {
"age": 4,
"race": {
"name": "golden retriever",
}
}
}
]
})
);
}
#[test]
fn all_conflict_variation() {
let value: Value = json!({
"pet.dog.name": "jean",
"pet.dog": {
"name": "bob"
},
"pet": {
"dog.name": "michel"
},
"pet": {
"dog": {
"name": "milan"
}
}
});
let value: &Document = value.as_object().unwrap();
let res: Value = select_values(value, vec!["pet.dog.name"]).into();
assert_eq!(
res,
json!({
"pet.dog.name": "jean",
"pet.dog": {
"name": "bob"
},
"pet": {
"dog.name": "michel"
},
"pet": {
"dog": {
"name": "milan"
}
}
})
);
let value: Value = json!({
"pet.dog.name": "jean",
"pet.dog": {
"name": "bob",
},
"pet": {
"dog.name": "michel",
"dog": {
"name": "milan",
}
}
});
let value: &Document = value.as_object().unwrap();
let res: Value = select_values(value, vec!["pet.dog.name", "pet.dog", "pet"]).into();
assert_eq!(
res,
json!({
"pet.dog.name": "jean",
"pet.dog": {
"name": "bob",
},
"pet": {
"dog.name": "michel",
"dog": {
"name": "milan",
}
}
})
);
}
#[test]
fn map_object() {
let mut value: Value = json!({
"jean": {
"age": 8,
"race": {
"name": "bernese mountain",
"size": "80cm",
}
}
});
map_leaf_values(
value.as_object_mut().unwrap(),
["jean.race.name"],
|key, value| match (value, dbg!(key)) {
(Value::String(name), "jean.race.name") => *name = S("patou"),
_ => unreachable!(),
},
);
assert_eq!(
value,
json!({
"jean": {
"age": 8,
"race": {
"name": "patou",
"size": "80cm",
}
}
})
);
let mut value: Value = json!({
"jean": {
"age": 8,
"race": {
"name": "bernese mountain",
"size": "80cm",
}
},
"bob": "lolpied",
});
let mut calls = 0;
map_leaf_values(value.as_object_mut().unwrap(), ["jean"], |key, value| {
calls += 1;
match (value, key) {
(Value::String(name), "jean.race.name") => *name = S("patou"),
_ => println!("Called with {key}"),
}
});
assert_eq!(calls, 3);
assert_eq!(
value,
json!({
"jean": {
"age": 8,
"race": {
"name": "patou",
"size": "80cm",
}
},
"bob": "lolpied",
})
);
}
}