diff --git a/Cargo.lock b/Cargo.lock index ea045b696..2187020e2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -402,6 +402,12 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" +[[package]] +name = "big_s" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "199edb7b90631283b10c2422e6a0bc8b7d987bf732995ba1de53b576c97e51a8" + [[package]] name = "bimap" version = "0.6.2" @@ -1086,8 +1092,8 @@ dependencies = [ [[package]] name = "filter-parser" -version = "0.1.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.0#9ac2fd1c379d5b91c80471c23079dbba57b9a841" +version = "0.26.4" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.4#6ee67df128b63f40eedfe1cb2edfa1a59492e310" dependencies = [ "nom", "nom_locate", @@ -1113,8 +1119,8 @@ dependencies = [ [[package]] name = "flatten-serde-json" -version = "0.1.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.0#9ac2fd1c379d5b91c80471c23079dbba57b9a841" +version = "0.26.4" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.4#6ee67df128b63f40eedfe1cb2edfa1a59492e310" dependencies = [ "serde_json", ] @@ -1614,6 +1620,14 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "json-depth-checker" +version = "0.26.4" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.4#6ee67df128b63f40eedfe1cb2edfa1a59492e310" +dependencies = [ + "serde_json", +] + [[package]] name = "jsonwebtoken" version = "8.0.1" @@ -1936,7 +1950,7 @@ checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" [[package]] name = "meilisearch-auth" -version = "0.26.0" +version = "0.27.0" dependencies = [ "enum-iterator", "meilisearch-error", @@ -1951,7 +1965,7 @@ dependencies = [ [[package]] name = "meilisearch-error" -version = "0.26.0" +version = "0.27.0" dependencies = [ "actix-web", "proptest", @@ -1962,7 +1976,7 @@ dependencies = [ [[package]] name = "meilisearch-http" -version = "0.26.0" +version = "0.27.0" dependencies = [ "actix-cors", "actix-rt", @@ -2034,7 +2048,7 @@ dependencies = [ [[package]] name = "meilisearch-lib" -version = "0.26.0" +version = "0.27.0" dependencies = [ "actix-rt", "actix-web", @@ -2137,8 +2151,8 @@ dependencies = [ [[package]] name = "milli" -version = "0.26.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.0#9ac2fd1c379d5b91c80471c23079dbba57b9a841" +version = "0.26.4" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.4#6ee67df128b63f40eedfe1cb2edfa1a59492e310" dependencies = [ "bimap", "bincode", @@ -2156,6 +2170,7 @@ dependencies = [ "grenad", "heed", "itertools", + "json-depth-checker", "levenshtein_automata", "log", "logging_timer", @@ -2172,6 +2187,7 @@ dependencies = [ "slice-group-by", "smallstr", "smallvec", + "smartstring", "tempfile", "time 0.3.9", "uuid", @@ -2476,9 +2492,8 @@ checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" [[package]] name = "permissive-json-pointer" version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2125f5fc44a45ffd265ce6ab343842f71df469d173f923f234e3a8df7a8f1ba6" dependencies = [ + "big_s", "serde_json", ] @@ -3174,6 +3189,17 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" +[[package]] +name = "smartstring" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" +dependencies = [ + "autocfg", + "static_assertions", + "version_check", +] + [[package]] name = "socket2" version = "0.4.4" @@ -3216,6 +3242,12 @@ dependencies = [ "path-slash", ] +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.10.0" diff --git a/Cargo.toml b/Cargo.toml index a27a29634..03f4f5597 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,4 +5,5 @@ members = [ "meilisearch-error", "meilisearch-lib", "meilisearch-auth", + "permissive-json-pointer", ] diff --git a/meilisearch-auth/Cargo.toml b/meilisearch-auth/Cargo.toml index 0d0d2a0f2..193959b05 100644 --- a/meilisearch-auth/Cargo.toml +++ b/meilisearch-auth/Cargo.toml @@ -1,12 +1,12 @@ [package] name = "meilisearch-auth" -version = "0.26.0" +version = "0.27.0" edition = "2021" [dependencies] enum-iterator = "0.7.0" meilisearch-error = { path = "../meilisearch-error" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.0" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.4" } rand = "0.8.4" serde = { version = "1.0.136", features = ["derive"] } serde_json = { version = "1.0.79", features = ["preserve_order"] } diff --git a/meilisearch-error/Cargo.toml b/meilisearch-error/Cargo.toml index ac1a4bddd..77e24fe9a 100644 --- a/meilisearch-error/Cargo.toml +++ b/meilisearch-error/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "meilisearch-error" -version = "0.26.0" +version = "0.27.0" authors = ["marin "] edition = "2021" diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 546e414ce..86068e7a5 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -4,7 +4,7 @@ description = "Meilisearch HTTP server" edition = "2021" license = "MIT" name = "meilisearch-http" -version = "0.26.0" +version = "0.27.0" [[bin]] name = "meilisearch" @@ -103,5 +103,5 @@ mini-dashboard = [ tikv-jemallocator = "0.4.3" [package.metadata.mini-dashboard] -assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.1.9/build.zip" -sha1 = "b1833c3e5dc6b5d9d519ae4834935ae6c8a47024" +assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.1.10/build.zip" +sha1 = "1adf96592c267425c110bfefc36b7fc6bfb0f93d" diff --git a/meilisearch-http/src/analytics/segment_analytics.rs b/meilisearch-http/src/analytics/segment_analytics.rs index 693d63015..3d3b23d70 100644 --- a/meilisearch-http/src/analytics/segment_analytics.rs +++ b/meilisearch-http/src/analytics/segment_analytics.rs @@ -8,7 +8,10 @@ use actix_web::http::header::USER_AGENT; use actix_web::HttpRequest; use http::header::CONTENT_TYPE; use meilisearch_auth::SearchRules; -use meilisearch_lib::index::{SearchQuery, SearchResult}; +use meilisearch_lib::index::{ + SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, + DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, +}; use meilisearch_lib::index_controller::Stats; use meilisearch_lib::MeiliSearch; use once_cell::sync::Lazy; @@ -355,6 +358,13 @@ pub struct SearchAggregator { // pagination max_limit: usize, max_offset: usize, + + // formatting + highlight_pre_tag: bool, + highlight_post_tag: bool, + crop_marker: bool, + matches: bool, + crop_length: bool, } impl SearchAggregator { @@ -405,6 +415,12 @@ impl SearchAggregator { ret.max_limit = query.limit; ret.max_offset = query.offset.unwrap_or_default(); + ret.highlight_pre_tag = query.highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG; + ret.highlight_post_tag = query.highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG; + ret.crop_marker = query.crop_marker != DEFAULT_CROP_MARKER; + ret.crop_length = query.crop_length != DEFAULT_CROP_LENGTH; + ret.matches = query.matches; + ret } @@ -452,6 +468,12 @@ impl SearchAggregator { // pagination self.max_limit = self.max_limit.max(other.max_limit); self.max_offset = self.max_offset.max(other.max_offset); + + self.highlight_pre_tag |= other.highlight_pre_tag; + self.highlight_post_tag |= other.highlight_post_tag; + self.crop_marker |= other.crop_marker; + self.matches |= other.matches; + self.crop_length |= other.crop_length; } pub fn into_event(self, user: &User, event_name: &str) -> Option { @@ -489,6 +511,13 @@ impl SearchAggregator { "max_limit": self.max_limit, "max_offset": self.max_offset, }, + "formatting": { + "highlight_pre_tag": self.highlight_pre_tag, + "highlight_post_tag": self.highlight_post_tag, + "crop_marker": self.crop_marker, + "matches": self.matches, + "crop_length": self.crop_length, + }, }); Some(Track { diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 9fefb4231..04b61f74e 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -19,6 +19,7 @@ use serde::Serialize; const POSSIBLE_ENV: [&str; 2] = ["development", "production"]; #[derive(Debug, Clone, Parser, Serialize)] +#[clap(version)] pub struct Opt { /// The destination where the database must be created. #[clap(long, env = "MEILI_DB_PATH", default_value = "./data.ms")] diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index 601f0d833..222aca580 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -21,11 +21,11 @@ macro_rules! make_setting_route { use meilisearch_lib::milli::update::Setting; use meilisearch_lib::{index::Settings, index_controller::Update, MeiliSearch}; - use crate::analytics::Analytics; - use crate::extractors::authentication::{policies::*, GuardedData}; - use crate::extractors::sequential_extractor::SeqHandler; - use crate::task::SummarizedTaskView; use meilisearch_error::ResponseError; + use $crate::analytics::Analytics; + use $crate::extractors::authentication::{policies::*, GuardedData}; + use $crate::extractors::sequential_extractor::SeqHandler; + use $crate::task::SummarizedTaskView; pub async fn delete( meilisearch: GuardedData, MeiliSearch>, @@ -145,8 +145,8 @@ make_setting_route!( "SortableAttributes Updated".to_string(), json!({ "sortable_attributes": { - "total": setting.as_ref().map(|sort| sort.len()).unwrap_or(0), - "has_geo": setting.as_ref().map(|sort| sort.contains("_geo")).unwrap_or(false), + "total": setting.as_ref().map(|sort| sort.len()), + "has_geo": setting.as_ref().map(|sort| sort.contains("_geo")), }, }), Some(req), @@ -162,10 +162,44 @@ make_setting_route!( ); make_setting_route!( - "/typo", + "/typo-tolerance", meilisearch_lib::index::updates::TypoSettings, - typo, - "typo" + typo_tolerance, + "typoTolerance", + analytics, + |setting: &Option, req: &HttpRequest| { + use serde_json::json; + + analytics.publish( + "TypoTolerance Updated".to_string(), + json!({ + "typo_tolerance": { + "enabled": setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))), + "disable_on_attributes": setting + .as_ref() + .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())), + "disable_on_words": setting + .as_ref() + .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())), + "min_word_size_for_one_typo": setting + .as_ref() + .and_then(|s| s.min_word_size_for_typos + .as_ref() + .set() + .map(|s| s.one_typo.set())) + .flatten(), + "min_word_size_for_two_typos": setting + .as_ref() + .and_then(|s| s.min_word_size_for_typos + .as_ref() + .set() + .map(|s| s.two_typos.set())) + .flatten(), + }, + }), + Some(req), + ); + } ); make_setting_route!( @@ -181,7 +215,7 @@ make_setting_route!( "SearchableAttributes Updated".to_string(), json!({ "searchable_attributes": { - "total": setting.as_ref().map(|searchable| searchable.len()).unwrap_or(0), + "total": setting.as_ref().map(|searchable| searchable.len()), }, }), Some(req), @@ -254,7 +288,7 @@ generate_configure!( stop_words, synonyms, ranking_rules, - typo + typo_tolerance ); pub async fn update_all( @@ -273,15 +307,46 @@ pub async fn update_all( "sort_position": settings.ranking_rules.as_ref().set().map(|sort| sort.iter().position(|s| s == "sort")), }, "searchable_attributes": { - "total": settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()).unwrap_or(0), + "total": settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()), }, "sortable_attributes": { - "total": settings.sortable_attributes.as_ref().set().map(|sort| sort.len()).unwrap_or(0), - "has_geo": settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")).unwrap_or(false), + "total": settings.sortable_attributes.as_ref().set().map(|sort| sort.len()), + "has_geo": settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")), }, "filterable_attributes": { - "total": settings.filterable_attributes.as_ref().set().map(|filter| filter.len()).unwrap_or(0), - "has_geo": settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")).unwrap_or(false), + "total": settings.filterable_attributes.as_ref().set().map(|filter| filter.len()), + "has_geo": settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")), + }, + "typo_tolerance": { + "enabled": settings.typo_tolerance + .as_ref() + .set() + .and_then(|s| s.enabled.as_ref().set()) + .copied(), + "disable_on_attributes": settings.typo_tolerance + .as_ref() + .set() + .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())), + "disable_on_words": settings.typo_tolerance + .as_ref() + .set() + .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())), + "min_word_size_for_one_typo": settings.typo_tolerance + .as_ref() + .set() + .and_then(|s| s.min_word_size_for_typos + .as_ref() + .set() + .map(|s| s.one_typo.set())) + .flatten(), + "min_word_size_for_two_typos": settings.typo_tolerance + .as_ref() + .set() + .and_then(|s| s.min_word_size_for_typos + .as_ref() + .set() + .map(|s| s.two_typos.set())) + .flatten(), }, }), Some(&req), diff --git a/meilisearch-http/tests/search/formatted.rs b/meilisearch-http/tests/search/formatted.rs new file mode 100644 index 000000000..13b8a07d8 --- /dev/null +++ b/meilisearch-http/tests/search/formatted.rs @@ -0,0 +1,376 @@ +use super::*; +use crate::common::Server; +use serde_json::json; + +#[actix_rt::test] +async fn formatted_contain_wildcard() { + let server = Server::new().await; + let index = server.index("test"); + + index + .update_settings(json!({ "displayedAttributes": ["id", "cattos"] })) + .await; + + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(1).await; + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": ["father", "mother"], "attributesToHighlight": ["father", "mother", "*"], "attributesToCrop": ["doggos"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "_formatted": { + "id": "852", + "cattos": "pesti", + } + }) + ); + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": ["*"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "cattos": "pesti", + }) + ); + + let (response, code) = index + .search_post( + json!({ "q": "pesti", "attributesToRetrieve": ["*"], "attributesToHighlight": ["id"] }), + ) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "cattos": "pesti", + "_formatted": { + "id": "852", + "cattos": "pesti", + } + }) + ); + + let (response, code) = index + .search_post( + json!({ "q": "pesti", "attributesToRetrieve": ["*"], "attributesToCrop": ["*"] }), + ) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "cattos": "pesti", + "_formatted": { + "id": "852", + "cattos": "pesti", + } + }) + ); + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToCrop": ["*"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "cattos": "pesti", + "_formatted": { + "id": "852", + "cattos": "pesti", + } + }) + ); +} + +#[actix_rt::test] +async fn format_nested() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": ["doggos"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "doggos": [ + { + "name": "bobby", + "age": 2, + }, + { + "name": "buddy", + "age": 4, + }, + ], + }) + ); + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": ["doggos.name"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "doggos": [ + { + "name": "bobby", + }, + { + "name": "buddy", + }, + ], + }) + ); + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.name"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "_formatted": { + "doggos": [ + { + "name": "bobby", + }, + { + "name": "buddy", + }, + ], + }, + }) + ); + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": [], "attributesToCrop": ["doggos.name"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "_formatted": { + "doggos": [ + { + "name": "bobby", + }, + { + "name": "buddy", + }, + ], + }, + }) + ); + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": ["doggos.name"], "attributesToHighlight": ["doggos.age"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "doggos": [ + { + "name": "bobby", + }, + { + "name": "buddy", + }, + ], + "_formatted": { + "doggos": [ + { + "name": "bobby", + "age": "2", + }, + { + "name": "buddy", + "age": "4", + }, + ], + }, + }) + ); + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.age"], "attributesToCrop": ["doggos.name"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "_formatted": { + "doggos": [ + { + "name": "bobby", + "age": "2", + }, + { + "name": "buddy", + "age": "4", + }, + ], + }, + }) + ); +} + +#[actix_rt::test] +async fn displayedattr_2_smol() { + let server = Server::new().await; + let index = server.index("test"); + + // not enough displayed for the other settings + index + .update_settings(json!({ "displayedAttributes": ["id"] })) + .await; + + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(1).await; + + let (response, code) = index + .search_post(json!({ "attributesToRetrieve": ["father", "id"], "attributesToHighlight": ["mother"], "attributesToCrop": ["cattos"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToRetrieve": ["id"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToHighlight": ["id"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "_formatted": { + "id": "852", + } + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToCrop": ["id"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "_formatted": { + "id": "852", + } + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToHighlight": ["id"], "attributesToCrop": ["id"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "_formatted": { + "id": "852", + } + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToHighlight": ["cattos"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToCrop": ["cattos"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToRetrieve": ["cattos"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"][0], json!({})); + + let (response, code) = index + .search_post( + json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["cattos"], "attributesToCrop": ["cattos"] }), + ) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"][0], json!({})); + + let (response, code) = index + .search_post(json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["id"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "_formatted": { + "id": "852", + } + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToRetrieve": ["cattos"], "attributesToCrop": ["id"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "_formatted": { + "id": "852", + } + }) + ); +} diff --git a/meilisearch-http/tests/search/mod.rs b/meilisearch-http/tests/search/mod.rs index 65ea67a70..d9b36e85d 100644 --- a/meilisearch-http/tests/search/mod.rs +++ b/meilisearch-http/tests/search/mod.rs @@ -2,12 +2,13 @@ // should be tested in its own module to isolate tests and keep the tests readable. mod errors; +mod formatted; use crate::common::Server; use once_cell::sync::Lazy; use serde_json::{json, Value}; -static DOCUMENTS: Lazy = Lazy::new(|| { +pub(self) static DOCUMENTS: Lazy = Lazy::new(|| { json!([ { "title": "Shazam!", @@ -32,7 +33,7 @@ static DOCUMENTS: Lazy = Lazy::new(|| { ]) }); -static NESTED_DOCUMENTS: Lazy = Lazy::new(|| { +pub(self) static NESTED_DOCUMENTS: Lazy = Lazy::new(|| { json!([ { "id": 852, @@ -496,7 +497,7 @@ async fn search_facet_distribution() { assert_eq!(code, 200, "{}", response); let dist = response["facetsDistribution"].as_object().unwrap(); dbg!(&dist); - assert_eq!(dist.len(), 2); + assert_eq!(dist.len(), 3); assert_eq!( dist["doggos.name"], json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1}) diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index 6594230e8..30e67646e 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "meilisearch-lib" -version = "0.26.0" +version = "0.27.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -30,13 +30,13 @@ lazy_static = "1.4.0" log = "0.4.14" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-error = { path = "../meilisearch-error" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.0" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.4" } mime = "0.3.16" num_cpus = "1.13.1" obkv = "0.2.0" once_cell = "1.10.0" parking_lot = "0.12.0" -permissive-json-pointer = "0.2.0" +permissive-json-pointer = { path = "../permissive-json-pointer" } rand = "0.8.5" rayon = "1.5.1" regex = "1.5.5" diff --git a/meilisearch-lib/src/index/index.rs b/meilisearch-lib/src/index/index.rs index 81dedbeba..03b4ca7dd 100644 --- a/meilisearch-lib/src/index/index.rs +++ b/meilisearch-lib/src/index/index.rs @@ -18,7 +18,7 @@ use crate::EnvSizer; use super::error::IndexError; use super::error::Result; -use super::updates::{MinWordLengthTypoSetting, TypoSettings}; +use super::updates::{MinWordSizeTyposSetting, TypoSettings}; use super::{Checked, Settings}; pub type Document = Map; @@ -170,7 +170,7 @@ impl Index { }) .collect(); - let min_typo_word_len = MinWordLengthTypoSetting { + let min_typo_word_len = MinWordSizeTyposSetting { one_typo: Setting::Set(self.min_word_len_one_typo(txn)?), two_typos: Setting::Set(self.min_word_len_two_typos(txn)?), }; @@ -190,7 +190,7 @@ impl Index { let typo_tolerance = TypoSettings { enabled: Setting::Set(self.authorize_typos(txn)?), - min_word_length_for_typo: Setting::Set(min_typo_word_len), + min_word_size_for_typos: Setting::Set(min_typo_word_len), disable_on_words: Setting::Set(disabled_words), disable_on_attributes: Setting::Set(disabled_attributes), }; @@ -213,7 +213,7 @@ impl Index { None => Setting::Reset, }, synonyms: Setting::Set(synonyms), - typo: Setting::Set(typo_tolerance), + typo_tolerance: Setting::Set(typo_tolerance), _kind: PhantomData, }) } diff --git a/meilisearch-lib/src/index/mod.rs b/meilisearch-lib/src/index/mod.rs index cbeeffdfd..3a42b2617 100644 --- a/meilisearch-lib/src/index/mod.rs +++ b/meilisearch-lib/src/index/mod.rs @@ -1,6 +1,7 @@ pub use search::{ default_crop_length, default_crop_marker, default_highlight_post_tag, - default_highlight_pre_tag, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT, + default_highlight_pre_tag, SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, + DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, }; pub use updates::{apply_settings_to_builder, Checked, Facets, Settings, Unchecked}; diff --git a/meilisearch-lib/src/index/search.rs b/meilisearch-lib/src/index/search.rs index c63be6aab..7c12f985e 100644 --- a/meilisearch-lib/src/index/search.rs +++ b/meilisearch-lib/src/index/search.rs @@ -4,7 +4,6 @@ use std::str::FromStr; use std::time::Instant; use either::Either; -use indexmap::IndexMap; use milli::tokenizer::{Analyzer, AnalyzerConfig, Token}; use milli::{AscDesc, FieldId, FieldsIdsMap, Filter, MatchingWords, SortError}; use regex::Regex; @@ -16,7 +15,7 @@ use crate::index::error::FacetError; use super::error::{IndexError, Result}; use super::index::Index; -pub type Document = IndexMap; +pub type Document = serde_json::Map; type MatchesInfo = BTreeMap>; #[derive(Serialize, Debug, Clone, PartialEq)] @@ -35,17 +34,17 @@ pub const fn default_crop_length() -> usize { DEFAULT_CROP_LENGTH } -const DEFAULT_CROP_MARKER: &str = "…"; +pub const DEFAULT_CROP_MARKER: &str = "…"; pub fn default_crop_marker() -> String { DEFAULT_CROP_MARKER.to_string() } -const DEFAULT_HIGHLIGHT_PRE_TAG: &str = ""; +pub const DEFAULT_HIGHLIGHT_PRE_TAG: &str = ""; pub fn default_highlight_pre_tag() -> String { DEFAULT_HIGHLIGHT_PRE_TAG.to_string() } -const DEFAULT_HIGHLIGHT_POST_TAG: &str = ""; +pub const DEFAULT_HIGHLIGHT_POST_TAG: &str = ""; pub fn default_highlight_post_tag() -> String { DEFAULT_HIGHLIGHT_POST_TAG.to_string() } @@ -233,14 +232,22 @@ impl Index { let documents_iter = self.documents(&rtxn, documents_ids)?; for (_id, obkv) in documents_iter { - let mut document = make_document(&to_retrieve_ids, &fields_ids_map, obkv)?; + // First generate a document with all the displayed fields + let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?; + + // select the attributes to retrieve + let attributes_to_retrieve = to_retrieve_ids + .iter() + .map(|&fid| fields_ids_map.name(fid).expect("Missing field name")); + let mut document = + permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve); let matches_info = query .matches .then(|| compute_matches(&matching_words, &document, &analyzer)); let formatted = format_fields( - &document, + &displayed_document, &fields_ids_map, &formatter, &matching_words, @@ -476,7 +483,7 @@ fn add_non_formatted_ids_to_formatted_options( } fn make_document( - attributes_to_retrieve: &BTreeSet, + displayed_attributes: &BTreeSet, field_ids_map: &FieldsIdsMap, obkv: obkv::KvReaderU16, ) -> Result { @@ -494,15 +501,11 @@ fn make_document( } // select the attributes to retrieve - let attributes_to_retrieve = attributes_to_retrieve + let displayed_attributes = displayed_attributes .iter() .map(|&fid| field_ids_map.name(fid).expect("Missing field name")); - let document = permissive_json_pointer::select_values(&document, attributes_to_retrieve); - - // then we need to convert the `serde_json::Map` into an `IndexMap`. - let document = document.into_iter().collect(); - + let document = permissive_json_pointer::select_values(&document, displayed_attributes); Ok(document) } @@ -513,20 +516,13 @@ fn format_fields>( matching_words: &impl Matcher, formatted_options: &BTreeMap, ) -> Result { - // Convert the `IndexMap` into a `serde_json::Map`. - let document = document - .iter() - .map(|(k, v)| (k.clone(), v.clone())) - .collect(); - let selectors: Vec<_> = formatted_options .keys() // This unwrap must be safe since we got the ids from the fields_ids_map just // before. .map(|&fid| field_ids_map.name(fid).unwrap()) .collect(); - - let mut document = permissive_json_pointer::select_values(&document, selectors.iter().copied()); + let mut document = permissive_json_pointer::select_values(document, selectors.iter().copied()); permissive_json_pointer::map_leaf_values(&mut document, selectors, |key, value| { // To get the formatting option of each key we need to see all the rules that applies @@ -542,13 +538,9 @@ fn format_fields>( .fold(FormatOptions::default(), |acc, (_, option)| { acc.merge(*option) }); - // TODO: remove this useless clone - *value = formatter.format_value(value.clone(), matching_words, format); + *value = formatter.format_value(std::mem::take(value), matching_words, format); }); - // we need to convert back the `serde_json::Map` into an `IndexMap`. - let document = document.into_iter().collect(); - Ok(document) } diff --git a/meilisearch-lib/src/index/updates.rs b/meilisearch-lib/src/index/updates.rs index 0019c226a..3aefa1f5e 100644 --- a/meilisearch-lib/src/index/updates.rs +++ b/meilisearch-lib/src/index/updates.rs @@ -41,7 +41,7 @@ pub struct Unchecked; #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)] #[serde(deny_unknown_fields)] #[serde(rename_all = "camelCase")] -pub struct MinWordLengthTypoSetting { +pub struct MinWordSizeTyposSetting { #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] #[serde(default, skip_serializing_if = "Setting::is_not_set")] pub one_typo: Setting, @@ -60,7 +60,7 @@ pub struct TypoSettings { pub enabled: Setting, #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] #[serde(default, skip_serializing_if = "Setting::is_not_set")] - pub min_word_length_for_typo: Setting, + pub min_word_size_for_typos: Setting, #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] #[serde(default, skip_serializing_if = "Setting::is_not_set")] pub disable_on_words: Setting>, @@ -113,7 +113,7 @@ pub struct Settings { pub distinct_attribute: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] - pub typo: Setting, + pub typo_tolerance: Setting, #[serde(skip)] pub _kind: PhantomData, @@ -130,7 +130,7 @@ impl Settings { stop_words: Setting::Reset, synonyms: Setting::Reset, distinct_attribute: Setting::Reset, - typo: Setting::Reset, + typo_tolerance: Setting::Reset, _kind: PhantomData, } } @@ -145,7 +145,7 @@ impl Settings { stop_words, synonyms, distinct_attribute, - typo: typo_tolerance, + typo_tolerance, .. } = self; @@ -158,7 +158,7 @@ impl Settings { stop_words, synonyms, distinct_attribute, - typo: typo_tolerance, + typo_tolerance, _kind: PhantomData, } } @@ -197,7 +197,7 @@ impl Settings { stop_words: self.stop_words, synonyms: self.synonyms, distinct_attribute: self.distinct_attribute, - typo: self.typo, + typo_tolerance: self.typo_tolerance, _kind: PhantomData, } } @@ -373,7 +373,7 @@ pub fn apply_settings_to_builder( Setting::NotSet => (), } - match settings.typo { + match settings.typo_tolerance { Setting::Set(ref value) => { match value.enabled { Setting::Set(val) => builder.set_autorize_typos(val), @@ -381,7 +381,7 @@ pub fn apply_settings_to_builder( Setting::NotSet => (), } - match value.min_word_length_for_typo { + match value.min_word_size_for_typos { Setting::Set(ref setting) => { match setting.one_typo { Setting::Set(val) => builder.set_min_word_len_one_typo(val), @@ -455,7 +455,7 @@ pub(crate) mod test { stop_words: Setting::NotSet, synonyms: Setting::NotSet, distinct_attribute: Setting::NotSet, - typo: Setting::NotSet, + typo_tolerance: Setting::NotSet, _kind: PhantomData::, }; @@ -477,7 +477,7 @@ pub(crate) mod test { stop_words: Setting::NotSet, synonyms: Setting::NotSet, distinct_attribute: Setting::NotSet, - typo: Setting::NotSet, + typo_tolerance: Setting::NotSet, _kind: PhantomData::, }; diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index ae15e8abb..4cbba1e42 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -178,15 +178,6 @@ impl IndexControllerBuilder { .max_task_store_size .ok_or_else(|| anyhow::anyhow!("Missing update database size"))?; - let db_exists = db_path.as_ref().exists(); - if db_exists { - // Directory could be pre-created without any database in. - let db_is_empty = db_path.as_ref().read_dir()?.next().is_none(); - if !db_is_empty { - versioning::check_version_file(db_path.as_ref())?; - } - } - if let Some(ref path) = self.import_snapshot { log::info!("Loading from snapshot {:?}", path); load_snapshot( @@ -207,6 +198,15 @@ impl IndexControllerBuilder { )?; } + let db_exists = db_path.as_ref().exists(); + if db_exists { + // Directory could be pre-created without any database in. + let db_is_empty = db_path.as_ref().read_dir()?.next().is_none(); + if !db_is_empty { + versioning::check_version_file(db_path.as_ref())?; + } + } + std::fs::create_dir_all(db_path.as_ref())?; let meta_env = Arc::new(open_meta_env(db_path.as_ref(), task_store_size)?); diff --git a/permissive-json-pointer/Cargo.toml b/permissive-json-pointer/Cargo.toml new file mode 100644 index 000000000..b50f30f19 --- /dev/null +++ b/permissive-json-pointer/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "permissive-json-pointer" +version = "0.2.0" +edition = "2021" +description = "A permissive json pointer" +readme = "README.md" + +[dependencies] +serde_json = "1.0" + +[dev-dependencies] +big_s = "1.0" diff --git a/permissive-json-pointer/README.md b/permissive-json-pointer/README.md new file mode 100644 index 000000000..6a94cf00d --- /dev/null +++ b/permissive-json-pointer/README.md @@ -0,0 +1,134 @@ +# Permissive json pointer + +This crate provide an interface a little bit similar to what you know as “json pointer”. +But it’s actually doing something quite different. + +## The API + +The crate provide only one function called [`select_values`]. +It takes one object in parameter and a list of selectors. +It then returns a new object containing only the fields you selected. + +## The selectors + +The syntax for the selector is easier than with other API. +There is only ONE special symbol, it’s the `.`. + +If you write `dog` and provide the following object; +```json +{ + "dog": "bob", + "cat": "michel" +} +``` +You’ll get back; +```json +{ + "dog": "bob", +} +``` + +Easy right? + +Now the dot can either be used as a field name, or as a nested object. + +For example, if you have the following json; +```json +{ + "dog.name": "jean", + "dog": { + "name": "bob", + "age": 6 + } +} +``` + +What a crappy json! But never underestimate your users, they [_WILL_](https://xkcd.com/1172/) +somehow base their entire workflow on this kind of json. +Here with the `dog.name` selector both fields will be +selected and the following json will be returned; +```json +{ + "dog.name": "jean", + "dog": { + "name": "bob", + } +} +``` + +And as you can guess, this crate is as permissive as possible. +It’ll match everything it can! +Consider this even more crappy json; +```json +{ + "pet.dog.name": "jean", + "pet.dog": { + "name": "bob" + }, + "pet": { + "dog.name": "michel" + }, + "pet": { + "dog": { + "name": "milan" + } + } +} +``` +If you write `pet.dog.name` everything will be selected. + +## Matching arrays + +With this kind of selectors you can’t match a specific element in an array. +Your selector will be applied to all the element _in_ the array. + +Consider the following json; +```json +{ + "pets": [ + { + "animal": "dog", + "race": "bernese mountain", + }, + { + "animal": "dog", + "race": "golden retriever", + }, + { + "animal": "cat", + "age": 8, + } + ] +} +``` + +With the filter `pets.animal` you’ll get; +```json +{ + "pets": [ + { + "animal": "dog", + }, + { + "animal": "dog", + }, + { + "animal": "cat", + } + ] +} +``` + +The empty element in an array gets removed. So if you were to look +for `pets.age` you would only get; +```json +{ + "pets": [ + { + "age": 8, + } + ] +} +``` + +And I think that’s all you need to know 🎉 \ No newline at end of file diff --git a/permissive-json-pointer/src/lib.rs b/permissive-json-pointer/src/lib.rs new file mode 100644 index 000000000..56382beae --- /dev/null +++ b/permissive-json-pointer/src/lib.rs @@ -0,0 +1,786 @@ +#![doc = include_str!("../README.md")] + +use std::collections::HashSet; + +use serde_json::*; + +type Document = Map; + +const SPLIT_SYMBOL: char = '.'; + +/// Returns `true` if the `selector` match the `key`. +/// +/// ```text +/// Example: +/// `animaux` match `animaux` +/// `animaux.chien` match `animaux` +/// `animaux.chien` match `animaux` +/// `animaux.chien.nom` match `animaux` +/// `animaux.chien.nom` match `animaux.chien` +/// ----------------------------------------- +/// `animaux` doesn't match `animaux.chien` +/// `animaux.` doesn't match `animaux` +/// `animaux.ch` doesn't match `animaux.chien` +/// `animau` doesn't match `animaux` +/// ``` +fn contained_in(selector: &str, key: &str) -> bool { + selector.starts_with(key) + && selector[key.len()..] + .chars() + .next() + .map(|c| c == SPLIT_SYMBOL) + .unwrap_or(true) +} + +/// Map the selected leaf values of a json allowing you to update only the fields that were selected. +/// ``` +/// use serde_json::{Value, json}; +/// use permissive_json_pointer::map_leaf_values; +/// +/// let mut value: Value = json!({ +/// "jean": { +/// "age": 8, +/// "race": { +/// "name": "bernese mountain", +/// "size": "80cm", +/// } +/// } +/// }); +/// map_leaf_values( +/// value.as_object_mut().unwrap(), +/// ["jean.race.name"], +/// |key, value| match (value, dbg!(key)) { +/// (Value::String(name), "jean.race.name") => *name = "patou".to_string(), +/// _ => unreachable!(), +/// }, +/// ); +/// assert_eq!( +/// value, +/// json!({ +/// "jean": { +/// "age": 8, +/// "race": { +/// "name": "patou", +/// "size": "80cm", +/// } +/// } +/// }) +/// ); +/// ``` +pub fn map_leaf_values<'a>( + value: &mut Map, + selectors: impl IntoIterator, + mut mapper: impl FnMut(&str, &mut Value), +) { + let selectors: Vec<_> = selectors.into_iter().collect(); + map_leaf_values_in_object(value, &selectors, "", &mut mapper); +} + +pub fn map_leaf_values_in_object<'a>( + value: &mut Map, + selectors: &[&'a str], + base_key: &str, + mapper: &mut impl FnMut(&str, &mut Value), +) { + for (key, value) in value.iter_mut() { + let base_key = if base_key.is_empty() { + key.to_string() + } else { + format!("{}{}{}", base_key, SPLIT_SYMBOL, key) + }; + + // here if the user only specified `doggo` we need to iterate in all the fields of `doggo` + // so we check the contained_in on both side + let should_continue = selectors + .iter() + .any(|selector| contained_in(selector, &base_key) || contained_in(&base_key, selector)); + + if should_continue { + match value { + Value::Object(object) => { + map_leaf_values_in_object(object, selectors, &base_key, mapper) + } + Value::Array(array) => { + map_leaf_values_in_array(array, selectors, &base_key, mapper) + } + value => mapper(&base_key, value), + } + } + } +} + +pub fn map_leaf_values_in_array( + values: &mut [Value], + selectors: &[&str], + base_key: &str, + mapper: &mut impl FnMut(&str, &mut Value), +) { + for value in values.iter_mut() { + match value { + Value::Object(object) => map_leaf_values_in_object(object, selectors, base_key, mapper), + Value::Array(array) => map_leaf_values_in_array(array, selectors, base_key, mapper), + value => mapper(base_key, value), + } + } +} + +/// Permissively selects values in a json with a list of selectors. +/// Returns a new json containing all the selected fields. +/// ``` +/// use serde_json::*; +/// use permissive_json_pointer::select_values; +/// +/// let value: Value = json!({ +/// "name": "peanut", +/// "age": 8, +/// "race": { +/// "name": "bernese mountain", +/// "avg_age": 12, +/// "size": "80cm", +/// }, +/// }); +/// let value: &Map = value.as_object().unwrap(); +/// +/// let res: Value = select_values(value, vec!["name", "race.name"]).into(); +/// assert_eq!( +/// res, +/// json!({ +/// "name": "peanut", +/// "race": { +/// "name": "bernese mountain", +/// }, +/// }) +/// ); +/// ``` +pub fn select_values<'a>( + value: &Map, + selectors: impl IntoIterator, +) -> Map { + let selectors = selectors.into_iter().collect(); + create_value(value, selectors) +} + +fn create_value(value: &Document, mut selectors: HashSet<&str>) -> Document { + let mut new_value: Document = Map::new(); + + for (key, value) in value.iter() { + // first we insert all the key at the root level + if selectors.contains(key as &str) { + new_value.insert(key.to_string(), value.clone()); + // if the key was simple we can delete it and move to + // the next key + if is_simple(key) { + selectors.remove(key as &str); + continue; + } + } + + // we extract all the sub selectors matching the current field + // if there was [person.name, person.age] and if we are on the field + // `person`. Then we generate the following sub selectors: [name, age]. + let sub_selectors: HashSet<&str> = selectors + .iter() + .filter(|s| contained_in(s, key)) + .filter_map(|s| s.trim_start_matches(key).get(SPLIT_SYMBOL.len_utf8()..)) + .collect(); + + if !sub_selectors.is_empty() { + match value { + Value::Array(array) => { + let array = create_array(array, &sub_selectors); + if !array.is_empty() { + new_value.insert(key.to_string(), array.into()); + } + } + Value::Object(object) => { + let object = create_value(object, sub_selectors); + if !object.is_empty() { + new_value.insert(key.to_string(), object.into()); + } + } + _ => (), + } + } + } + + new_value +} + +fn create_array(array: &Vec, selectors: &HashSet<&str>) -> Vec { + let mut res = Vec::new(); + + for value in array { + match value { + Value::Array(array) => { + let array = create_array(array, selectors); + if !array.is_empty() { + res.push(array.into()); + } + } + Value::Object(object) => { + let object = create_value(object, selectors.clone()); + if !object.is_empty() { + res.push(object.into()); + } + } + _ => (), + } + } + + res +} + +fn is_simple(key: impl AsRef) -> bool { + !key.as_ref().contains(SPLIT_SYMBOL) +} + +#[cfg(test)] +mod tests { + use big_s::S; + + use super::*; + + #[test] + fn test_contained_in() { + assert!(contained_in("animaux", "animaux")); + assert!(contained_in("animaux.chien", "animaux")); + assert!(contained_in( + "animaux.chien.race.bouvier bernois.fourrure.couleur", + "animaux" + )); + assert!(contained_in( + "animaux.chien.race.bouvier bernois.fourrure.couleur", + "animaux.chien" + )); + assert!(contained_in( + "animaux.chien.race.bouvier bernois.fourrure.couleur", + "animaux.chien.race.bouvier bernois" + )); + assert!(contained_in( + "animaux.chien.race.bouvier bernois.fourrure.couleur", + "animaux.chien.race.bouvier bernois.fourrure" + )); + assert!(contained_in( + "animaux.chien.race.bouvier bernois.fourrure.couleur", + "animaux.chien.race.bouvier bernois.fourrure.couleur" + )); + + // -- the wrongs + assert!(!contained_in("chien", "chat")); + assert!(!contained_in("animaux", "animaux.chien")); + assert!(!contained_in("animaux.chien", "animaux.chat")); + + // -- the strange edge cases + assert!(!contained_in("animaux.chien", "anima")); + assert!(!contained_in("animaux.chien", "animau")); + assert!(!contained_in("animaux.chien", "animaux.")); + assert!(!contained_in("animaux.chien", "animaux.c")); + assert!(!contained_in("animaux.chien", "animaux.ch")); + assert!(!contained_in("animaux.chien", "animaux.chi")); + assert!(!contained_in("animaux.chien", "animaux.chie")); + } + + #[test] + fn simple_key() { + let value: Value = json!({ + "name": "peanut", + "age": 8, + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["name"]).into(); + assert_eq!( + res, + json!({ + "name": "peanut", + }) + ); + + let res: Value = select_values(value, vec!["age"]).into(); + assert_eq!( + res, + json!({ + "age": 8, + }) + ); + + let res: Value = select_values(value, vec!["name", "age"]).into(); + assert_eq!( + res, + json!({ + "name": "peanut", + "age": 8, + }) + ); + + let res: Value = select_values(value, vec!["race"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + + let res: Value = select_values(value, vec!["name", "age", "race"]).into(); + assert_eq!( + res, + json!({ + "name": "peanut", + "age": 8, + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + } + + #[test] + fn complex_key() { + let value: Value = json!({ + "name": "peanut", + "age": 8, + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["race"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + + println!("RIGHT BEFORE"); + + let res: Value = select_values(value, vec!["race.name"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + } + }) + ); + + let res: Value = select_values(value, vec!["race.name", "race.size"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + "size": "80cm", + } + }) + ); + + let res: Value = select_values( + value, + vec!["race.name", "race.size", "race.avg_age", "race.size", "age"], + ) + .into(); + assert_eq!( + res, + json!({ + "age": 8, + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + + let res: Value = select_values(value, vec!["race.name", "race"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + + let res: Value = select_values(value, vec!["race", "race.name"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + } + + #[test] + fn multi_level_nested() { + let value: Value = json!({ + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["jean"]).into(); + assert_eq!( + res, + json!({ + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + }) + ); + + let res: Value = select_values(value, vec!["jean.age"]).into(); + assert_eq!( + res, + json!({ + "jean": { + "age": 8, + } + }) + ); + + let res: Value = select_values(value, vec!["jean.race.size"]).into(); + assert_eq!( + res, + json!({ + "jean": { + "race": { + "size": "80cm", + } + } + }) + ); + + let res: Value = select_values(value, vec!["jean.race.name", "jean.age"]).into(); + assert_eq!( + res, + json!({ + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + } + } + }) + ); + + let res: Value = select_values(value, vec!["jean.race"]).into(); + assert_eq!( + res, + json!({ + "jean": { + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + }) + ); + } + + #[test] + fn array_and_deep_nested() { + let value: Value = json!({ + "doggos": [ + { + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + }, + { + "marc": { + "age": 4, + "race": { + "name": "golden retriever", + "size": "60cm", + } + } + }, + ] + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["doggos.jean"]).into(); + assert_eq!( + res, + json!({ + "doggos": [ + { + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + } + ] + }) + ); + + let res: Value = select_values(value, vec!["doggos.marc"]).into(); + assert_eq!( + res, + json!({ + "doggos": [ + { + "marc": { + "age": 4, + "race": { + "name": "golden retriever", + "size": "60cm", + } + } + } + ] + }) + ); + + let res: Value = select_values(value, vec!["doggos.marc.race"]).into(); + assert_eq!( + res, + json!({ + "doggos": [ + { + "marc": { + "race": { + "name": "golden retriever", + "size": "60cm", + } + } + } + ] + }) + ); + + let res: Value = + select_values(value, vec!["doggos.marc.race.name", "doggos.marc.age"]).into(); + + assert_eq!( + res, + json!({ + "doggos": [ + { + "marc": { + "age": 4, + "race": { + "name": "golden retriever", + } + } + } + ] + }) + ); + + let res: Value = select_values( + value, + vec![ + "doggos.marc.race.name", + "doggos.marc.age", + "doggos.jean.race.name", + "other.field", + ], + ) + .into(); + + assert_eq!( + res, + json!({ + "doggos": [ + { + "jean": { + "race": { + "name": "bernese mountain", + } + } + }, + { + "marc": { + "age": 4, + "race": { + "name": "golden retriever", + } + } + } + ] + }) + ); + } + + #[test] + fn all_conflict_variation() { + let value: Value = json!({ + "pet.dog.name": "jean", + "pet.dog": { + "name": "bob" + }, + "pet": { + "dog.name": "michel" + }, + "pet": { + "dog": { + "name": "milan" + } + } + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["pet.dog.name"]).into(); + assert_eq!( + res, + json!({ + "pet.dog.name": "jean", + "pet.dog": { + "name": "bob" + }, + "pet": { + "dog.name": "michel" + }, + "pet": { + "dog": { + "name": "milan" + } + } + }) + ); + + let value: Value = json!({ + "pet.dog.name": "jean", + "pet.dog": { + "name": "bob", + }, + "pet": { + "dog.name": "michel", + "dog": { + "name": "milan", + } + } + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["pet.dog.name", "pet.dog", "pet"]).into(); + + assert_eq!( + res, + json!({ + "pet.dog.name": "jean", + "pet.dog": { + "name": "bob", + }, + "pet": { + "dog.name": "michel", + "dog": { + "name": "milan", + } + } + }) + ); + } + + #[test] + fn map_object() { + let mut value: Value = json!({ + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + }); + + map_leaf_values( + value.as_object_mut().unwrap(), + ["jean.race.name"], + |key, value| match (value, dbg!(key)) { + (Value::String(name), "jean.race.name") => *name = S("patou"), + _ => unreachable!(), + }, + ); + + assert_eq!( + value, + json!({ + "jean": { + "age": 8, + "race": { + "name": "patou", + "size": "80cm", + } + } + }) + ); + + let mut value: Value = json!({ + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + }, + "bob": "lolpied", + }); + + let mut calls = 0; + map_leaf_values(value.as_object_mut().unwrap(), ["jean"], |key, value| { + calls += 1; + match (value, key) { + (Value::String(name), "jean.race.name") => *name = S("patou"), + _ => println!("Called with {key}"), + } + }); + + assert_eq!(calls, 3); + assert_eq!( + value, + json!({ + "jean": { + "age": 8, + "race": { + "name": "patou", + "size": "80cm", + } + }, + "bob": "lolpied", + }) + ); + } +}