From a87faa0db7423a9c873c6642276449d5f6d610c0 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 16 Mar 2022 13:32:59 +0100 Subject: [PATCH 01/20] bug(http): fix panic on startup --- meilisearch-lib/src/options.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/meilisearch-lib/src/options.rs b/meilisearch-lib/src/options.rs index 54a411250..195576799 100644 --- a/meilisearch-lib/src/options.rs +++ b/meilisearch-lib/src/options.rs @@ -52,20 +52,20 @@ pub struct SchedulerConfig { // The maximum number of updates of the same type that can be batched together. // If unspecified, this is unlimited. A value of 0 is interpreted as 1. - #[clap(long, requires = "enable-autobatching", hide = true)] + #[clap(long, requires = "enable-auto-batching", hide = true)] pub max_batch_size: Option, // The maximum number of documents in a document batch. Since batches must contain at least one // update for the scheduler to make progress, the number of documents in a batch will be at // least the number of documents of its first update. - #[clap(long, requires = "enable-autobatching", hide = true)] + #[clap(long, requires = "enable-auto-batching", hide = true)] pub max_documents_per_batch: Option, /// Debounce duration in seconds /// /// When a new task is enqueued, the scheduler waits for `debounce_duration_sec` seconds for new updates before /// starting to process a batch of updates. - #[clap(long, requires = "enable-autobatching", hide = true)] + #[clap(long, requires = "enable-auto-batching", hide = true)] pub debounce_duration_sec: Option, } From 7468a5e96c631584077ae7797fe8fb76bef26e62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Wed, 16 Mar 2022 18:03:54 +0100 Subject: [PATCH 02/20] Update the version (from v0.26.0 to v0.26.1) --- Cargo.lock | 8 ++++---- meilisearch-auth/Cargo.toml | 2 +- meilisearch-error/Cargo.toml | 2 +- meilisearch-http/Cargo.toml | 2 +- meilisearch-lib/Cargo.toml | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 815fdf3b4..3022dc517 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1713,7 +1713,7 @@ checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" [[package]] name = "meilisearch-auth" -version = "0.26.0" +version = "0.26.1" dependencies = [ "enum-iterator", "heed", @@ -1728,7 +1728,7 @@ dependencies = [ [[package]] name = "meilisearch-error" -version = "0.26.0" +version = "0.26.1" dependencies = [ "actix-web", "proptest", @@ -1739,7 +1739,7 @@ dependencies = [ [[package]] name = "meilisearch-http" -version = "0.26.0" +version = "0.26.1" dependencies = [ "actix-cors", "actix-rt", @@ -1814,7 +1814,7 @@ dependencies = [ [[package]] name = "meilisearch-lib" -version = "0.26.0" +version = "0.26.1" dependencies = [ "actix-rt", "actix-web", diff --git a/meilisearch-auth/Cargo.toml b/meilisearch-auth/Cargo.toml index 6fdbe0a46..e29f8cec3 100644 --- a/meilisearch-auth/Cargo.toml +++ b/meilisearch-auth/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "meilisearch-auth" -version = "0.26.0" +version = "0.26.1" edition = "2021" [dependencies] diff --git a/meilisearch-error/Cargo.toml b/meilisearch-error/Cargo.toml index 0b75e4151..2bda89a3c 100644 --- a/meilisearch-error/Cargo.toml +++ b/meilisearch-error/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "meilisearch-error" -version = "0.26.0" +version = "0.26.1" authors = ["marin "] edition = "2021" diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index da7d9e61a..aeb13275b 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -4,7 +4,7 @@ description = "Meilisearch HTTP server" edition = "2021" license = "MIT" name = "meilisearch-http" -version = "0.26.0" +version = "0.26.1" [[bin]] name = "meilisearch" diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index b64104219..7af40257d 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "meilisearch-lib" -version = "0.26.0" +version = "0.26.1" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html From 0990e95830a43dd7b0b4f513f86f442efedc42d5 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 11 Apr 2022 14:18:47 +0200 Subject: [PATCH 03/20] Feat(Analytics): Add analytics for search format options --- .../src/analytics/segment_analytics.rs | 31 ++++++++++++++++++- meilisearch-lib/src/index/mod.rs | 3 +- meilisearch-lib/src/index/search.rs | 6 ++-- 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/meilisearch-http/src/analytics/segment_analytics.rs b/meilisearch-http/src/analytics/segment_analytics.rs index 693d63015..3d3b23d70 100644 --- a/meilisearch-http/src/analytics/segment_analytics.rs +++ b/meilisearch-http/src/analytics/segment_analytics.rs @@ -8,7 +8,10 @@ use actix_web::http::header::USER_AGENT; use actix_web::HttpRequest; use http::header::CONTENT_TYPE; use meilisearch_auth::SearchRules; -use meilisearch_lib::index::{SearchQuery, SearchResult}; +use meilisearch_lib::index::{ + SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, + DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, +}; use meilisearch_lib::index_controller::Stats; use meilisearch_lib::MeiliSearch; use once_cell::sync::Lazy; @@ -355,6 +358,13 @@ pub struct SearchAggregator { // pagination max_limit: usize, max_offset: usize, + + // formatting + highlight_pre_tag: bool, + highlight_post_tag: bool, + crop_marker: bool, + matches: bool, + crop_length: bool, } impl SearchAggregator { @@ -405,6 +415,12 @@ impl SearchAggregator { ret.max_limit = query.limit; ret.max_offset = query.offset.unwrap_or_default(); + ret.highlight_pre_tag = query.highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG; + ret.highlight_post_tag = query.highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG; + ret.crop_marker = query.crop_marker != DEFAULT_CROP_MARKER; + ret.crop_length = query.crop_length != DEFAULT_CROP_LENGTH; + ret.matches = query.matches; + ret } @@ -452,6 +468,12 @@ impl SearchAggregator { // pagination self.max_limit = self.max_limit.max(other.max_limit); self.max_offset = self.max_offset.max(other.max_offset); + + self.highlight_pre_tag |= other.highlight_pre_tag; + self.highlight_post_tag |= other.highlight_post_tag; + self.crop_marker |= other.crop_marker; + self.matches |= other.matches; + self.crop_length |= other.crop_length; } pub fn into_event(self, user: &User, event_name: &str) -> Option { @@ -489,6 +511,13 @@ impl SearchAggregator { "max_limit": self.max_limit, "max_offset": self.max_offset, }, + "formatting": { + "highlight_pre_tag": self.highlight_pre_tag, + "highlight_post_tag": self.highlight_post_tag, + "crop_marker": self.crop_marker, + "matches": self.matches, + "crop_length": self.crop_length, + }, }); Some(Track { diff --git a/meilisearch-lib/src/index/mod.rs b/meilisearch-lib/src/index/mod.rs index cbeeffdfd..3a42b2617 100644 --- a/meilisearch-lib/src/index/mod.rs +++ b/meilisearch-lib/src/index/mod.rs @@ -1,6 +1,7 @@ pub use search::{ default_crop_length, default_crop_marker, default_highlight_post_tag, - default_highlight_pre_tag, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT, + default_highlight_pre_tag, SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, + DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, }; pub use updates::{apply_settings_to_builder, Checked, Facets, Settings, Unchecked}; diff --git a/meilisearch-lib/src/index/search.rs b/meilisearch-lib/src/index/search.rs index c63be6aab..c628980d2 100644 --- a/meilisearch-lib/src/index/search.rs +++ b/meilisearch-lib/src/index/search.rs @@ -35,17 +35,17 @@ pub const fn default_crop_length() -> usize { DEFAULT_CROP_LENGTH } -const DEFAULT_CROP_MARKER: &str = "…"; +pub const DEFAULT_CROP_MARKER: &str = "…"; pub fn default_crop_marker() -> String { DEFAULT_CROP_MARKER.to_string() } -const DEFAULT_HIGHLIGHT_PRE_TAG: &str = ""; +pub const DEFAULT_HIGHLIGHT_PRE_TAG: &str = ""; pub fn default_highlight_pre_tag() -> String { DEFAULT_HIGHLIGHT_PRE_TAG.to_string() } -const DEFAULT_HIGHLIGHT_POST_TAG: &str = ""; +pub const DEFAULT_HIGHLIGHT_POST_TAG: &str = ""; pub fn default_highlight_post_tag() -> String { DEFAULT_HIGHLIGHT_POST_TAG.to_string() } From b3661bf8eceb027c92e473462a1a45a4037da564 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Mon, 11 Apr 2022 16:25:15 +0200 Subject: [PATCH 04/20] Change version for the next release (v0.27.0) --- Cargo.lock | 8 ++++---- meilisearch-auth/Cargo.toml | 2 +- meilisearch-error/Cargo.toml | 2 +- meilisearch-http/Cargo.toml | 2 +- meilisearch-lib/Cargo.toml | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 68bb172aa..e2c77604c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1936,7 +1936,7 @@ checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" [[package]] name = "meilisearch-auth" -version = "0.26.0" +version = "0.27.0" dependencies = [ "enum-iterator", "meilisearch-error", @@ -1951,7 +1951,7 @@ dependencies = [ [[package]] name = "meilisearch-error" -version = "0.26.0" +version = "0.27.0" dependencies = [ "actix-web", "proptest", @@ -1962,7 +1962,7 @@ dependencies = [ [[package]] name = "meilisearch-http" -version = "0.26.0" +version = "0.27.0" dependencies = [ "actix-cors", "actix-rt", @@ -2034,7 +2034,7 @@ dependencies = [ [[package]] name = "meilisearch-lib" -version = "0.26.0" +version = "0.27.0" dependencies = [ "actix-rt", "actix-web", diff --git a/meilisearch-auth/Cargo.toml b/meilisearch-auth/Cargo.toml index 0d0d2a0f2..db100ba45 100644 --- a/meilisearch-auth/Cargo.toml +++ b/meilisearch-auth/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "meilisearch-auth" -version = "0.26.0" +version = "0.27.0" edition = "2021" [dependencies] diff --git a/meilisearch-error/Cargo.toml b/meilisearch-error/Cargo.toml index ac1a4bddd..77e24fe9a 100644 --- a/meilisearch-error/Cargo.toml +++ b/meilisearch-error/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "meilisearch-error" -version = "0.26.0" +version = "0.27.0" authors = ["marin "] edition = "2021" diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 546e414ce..6c7d04f83 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -4,7 +4,7 @@ description = "Meilisearch HTTP server" edition = "2021" license = "MIT" name = "meilisearch-http" -version = "0.26.0" +version = "0.27.0" [[bin]] name = "meilisearch" diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index 37c6af488..524d094bf 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "meilisearch-lib" -version = "0.26.0" +version = "0.27.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html From 2ee210483f2ead933f6c12c4d7deeee2ce09af9f Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 12 Apr 2022 15:22:47 +0200 Subject: [PATCH 05/20] fix(search): remove the back and forth between the IndexMap and the serde_json::Map This is ok because we're using the preserve_order feature in serde_json which is already internally using an IndexMap. --- .../src/routes/indexes/settings.rs | 8 +++---- meilisearch-lib/src/index/search.rs | 21 +++---------------- 2 files changed, 7 insertions(+), 22 deletions(-) diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index 601f0d833..ab7d42e6f 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -21,11 +21,11 @@ macro_rules! make_setting_route { use meilisearch_lib::milli::update::Setting; use meilisearch_lib::{index::Settings, index_controller::Update, MeiliSearch}; - use crate::analytics::Analytics; - use crate::extractors::authentication::{policies::*, GuardedData}; - use crate::extractors::sequential_extractor::SeqHandler; - use crate::task::SummarizedTaskView; use meilisearch_error::ResponseError; + use $crate::analytics::Analytics; + use $crate::extractors::authentication::{policies::*, GuardedData}; + use $crate::extractors::sequential_extractor::SeqHandler; + use $crate::task::SummarizedTaskView; pub async fn delete( meilisearch: GuardedData, MeiliSearch>, diff --git a/meilisearch-lib/src/index/search.rs b/meilisearch-lib/src/index/search.rs index c63be6aab..8d7b900f5 100644 --- a/meilisearch-lib/src/index/search.rs +++ b/meilisearch-lib/src/index/search.rs @@ -4,7 +4,6 @@ use std::str::FromStr; use std::time::Instant; use either::Either; -use indexmap::IndexMap; use milli::tokenizer::{Analyzer, AnalyzerConfig, Token}; use milli::{AscDesc, FieldId, FieldsIdsMap, Filter, MatchingWords, SortError}; use regex::Regex; @@ -16,7 +15,7 @@ use crate::index::error::FacetError; use super::error::{IndexError, Result}; use super::index::Index; -pub type Document = IndexMap; +pub type Document = serde_json::Map; type MatchesInfo = BTreeMap>; #[derive(Serialize, Debug, Clone, PartialEq)] @@ -499,10 +498,6 @@ fn make_document( .map(|&fid| field_ids_map.name(fid).expect("Missing field name")); let document = permissive_json_pointer::select_values(&document, attributes_to_retrieve); - - // then we need to convert the `serde_json::Map` into an `IndexMap`. - let document = document.into_iter().collect(); - Ok(document) } @@ -513,12 +508,6 @@ fn format_fields>( matching_words: &impl Matcher, formatted_options: &BTreeMap, ) -> Result { - // Convert the `IndexMap` into a `serde_json::Map`. - let document = document - .iter() - .map(|(k, v)| (k.clone(), v.clone())) - .collect(); - let selectors: Vec<_> = formatted_options .keys() // This unwrap must be safe since we got the ids from the fields_ids_map just @@ -526,7 +515,7 @@ fn format_fields>( .map(|&fid| field_ids_map.name(fid).unwrap()) .collect(); - let mut document = permissive_json_pointer::select_values(&document, selectors.iter().copied()); + let mut document = permissive_json_pointer::select_values(document, selectors.iter().copied()); permissive_json_pointer::map_leaf_values(&mut document, selectors, |key, value| { // To get the formatting option of each key we need to see all the rules that applies @@ -542,13 +531,9 @@ fn format_fields>( .fold(FormatOptions::default(), |acc, (_, option)| { acc.merge(*option) }); - // TODO: remove this useless clone - *value = formatter.format_value(value.clone(), matching_words, format); + *value = formatter.format_value(std::mem::take(value), matching_words, format); }); - // we need to convert back the `serde_json::Map` into an `IndexMap`. - let document = document.into_iter().collect(); - Ok(document) } From 41249be27423b25cbc4a447dd0262bc9d27972df Mon Sep 17 00:00:00 2001 From: Paul Sanders Date: Tue, 12 Apr 2022 15:22:36 -0400 Subject: [PATCH 06/20] Add version flag --- meilisearch-http/src/option.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 9fefb4231..04b61f74e 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -19,6 +19,7 @@ use serde::Serialize; const POSSIBLE_ENV: [&str; 2] = ["development", "production"]; #[derive(Debug, Clone, Parser, Serialize)] +#[clap(version)] pub struct Opt { /// The destination where the database must be created. #[clap(long, env = "MEILI_DB_PATH", default_value = "./data.ms")] From 276dc6043a54a078092630e2ec629b53e9a11395 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 14 Apr 2022 10:42:06 +0200 Subject: [PATCH 07/20] chore(http, lib): rename typo to typo_tolerance --- meilisearch-http/src/routes/indexes/settings.rs | 8 ++++---- meilisearch-lib/src/index/index.rs | 2 +- meilisearch-lib/src/index/updates.rs | 16 ++++++++-------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index 601f0d833..5cca8e68f 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -162,10 +162,10 @@ make_setting_route!( ); make_setting_route!( - "/typo", + "/typo-tolerance", meilisearch_lib::index::updates::TypoSettings, - typo, - "typo" + typo_tolerance, + "typoTolerance" ); make_setting_route!( @@ -254,7 +254,7 @@ generate_configure!( stop_words, synonyms, ranking_rules, - typo + typo_tolerance ); pub async fn update_all( diff --git a/meilisearch-lib/src/index/index.rs b/meilisearch-lib/src/index/index.rs index 81dedbeba..25727236a 100644 --- a/meilisearch-lib/src/index/index.rs +++ b/meilisearch-lib/src/index/index.rs @@ -213,7 +213,7 @@ impl Index { None => Setting::Reset, }, synonyms: Setting::Set(synonyms), - typo: Setting::Set(typo_tolerance), + typo_tolerance: Setting::Set(typo_tolerance), _kind: PhantomData, }) } diff --git a/meilisearch-lib/src/index/updates.rs b/meilisearch-lib/src/index/updates.rs index 0019c226a..265f80762 100644 --- a/meilisearch-lib/src/index/updates.rs +++ b/meilisearch-lib/src/index/updates.rs @@ -113,7 +113,7 @@ pub struct Settings { pub distinct_attribute: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] - pub typo: Setting, + pub typo_tolerance: Setting, #[serde(skip)] pub _kind: PhantomData, @@ -130,7 +130,7 @@ impl Settings { stop_words: Setting::Reset, synonyms: Setting::Reset, distinct_attribute: Setting::Reset, - typo: Setting::Reset, + typo_tolerance: Setting::Reset, _kind: PhantomData, } } @@ -145,7 +145,7 @@ impl Settings { stop_words, synonyms, distinct_attribute, - typo: typo_tolerance, + typo_tolerance, .. } = self; @@ -158,7 +158,7 @@ impl Settings { stop_words, synonyms, distinct_attribute, - typo: typo_tolerance, + typo_tolerance, _kind: PhantomData, } } @@ -197,7 +197,7 @@ impl Settings { stop_words: self.stop_words, synonyms: self.synonyms, distinct_attribute: self.distinct_attribute, - typo: self.typo, + typo_tolerance: self.typo_tolerance, _kind: PhantomData, } } @@ -373,7 +373,7 @@ pub fn apply_settings_to_builder( Setting::NotSet => (), } - match settings.typo { + match settings.typo_tolerance { Setting::Set(ref value) => { match value.enabled { Setting::Set(val) => builder.set_autorize_typos(val), @@ -455,7 +455,7 @@ pub(crate) mod test { stop_words: Setting::NotSet, synonyms: Setting::NotSet, distinct_attribute: Setting::NotSet, - typo: Setting::NotSet, + typo_tolerance: Setting::NotSet, _kind: PhantomData::, }; @@ -477,7 +477,7 @@ pub(crate) mod test { stop_words: Setting::NotSet, synonyms: Setting::NotSet, distinct_attribute: Setting::NotSet, - typo: Setting::NotSet, + typo_tolerance: Setting::NotSet, _kind: PhantomData::, }; From 64b0a50a58cf5331e359749d340327030420ca0d Mon Sep 17 00:00:00 2001 From: Irevoire Date: Thu, 14 Apr 2022 12:12:54 +0200 Subject: [PATCH 08/20] chore: bump milli --- Cargo.lock | 35 +++++++++++++++++++++++++++++++---- meilisearch-auth/Cargo.toml | 2 +- meilisearch-lib/Cargo.toml | 2 +- 3 files changed, 33 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e2c77604c..965328ab7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1087,7 +1087,7 @@ dependencies = [ [[package]] name = "filter-parser" version = "0.1.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.0#9ac2fd1c379d5b91c80471c23079dbba57b9a841" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.1#a68e3a79fbdbb366ab162428884e5879a9af9d5c" dependencies = [ "nom", "nom_locate", @@ -1114,7 +1114,7 @@ dependencies = [ [[package]] name = "flatten-serde-json" version = "0.1.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.0#9ac2fd1c379d5b91c80471c23079dbba57b9a841" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.1#a68e3a79fbdbb366ab162428884e5879a9af9d5c" dependencies = [ "serde_json", ] @@ -1614,6 +1614,14 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "json-depth-checker" +version = "0.1.0" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.1#a68e3a79fbdbb366ab162428884e5879a9af9d5c" +dependencies = [ + "serde_json", +] + [[package]] name = "jsonwebtoken" version = "8.0.1" @@ -2137,8 +2145,8 @@ dependencies = [ [[package]] name = "milli" -version = "0.26.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.0#9ac2fd1c379d5b91c80471c23079dbba57b9a841" +version = "0.26.1" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.1#a68e3a79fbdbb366ab162428884e5879a9af9d5c" dependencies = [ "bimap", "bincode", @@ -2156,6 +2164,7 @@ dependencies = [ "grenad", "heed", "itertools", + "json-depth-checker", "levenshtein_automata", "log", "logging_timer", @@ -2172,6 +2181,7 @@ dependencies = [ "slice-group-by", "smallstr", "smallvec", + "smartstring", "tempfile", "time 0.3.9", "uuid", @@ -3174,6 +3184,17 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" +[[package]] +name = "smartstring" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" +dependencies = [ + "autocfg", + "static_assertions", + "version_check", +] + [[package]] name = "socket2" version = "0.4.4" @@ -3216,6 +3237,12 @@ dependencies = [ "path-slash", ] +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.10.0" diff --git a/meilisearch-auth/Cargo.toml b/meilisearch-auth/Cargo.toml index db100ba45..92d3fdbe1 100644 --- a/meilisearch-auth/Cargo.toml +++ b/meilisearch-auth/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" [dependencies] enum-iterator = "0.7.0" meilisearch-error = { path = "../meilisearch-error" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.0" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.1" } rand = "0.8.4" serde = { version = "1.0.136", features = ["derive"] } serde_json = { version = "1.0.79", features = ["preserve_order"] } diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index 524d094bf..392cff95e 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -30,7 +30,7 @@ lazy_static = "1.4.0" log = "0.4.14" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-error = { path = "../meilisearch-error" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.0" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.1" } mime = "0.3.16" num_cpus = "1.13.1" obkv = "0.2.0" From 9b064e53e78a5bfe2f68fb916bc4b8e04fd60729 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Sat, 16 Apr 2022 15:35:33 +0200 Subject: [PATCH 09/20] fix(http, lib): rename_min_word_length_for_typo into rename_min_word_size_for_typo --- meilisearch-lib/src/index/index.rs | 6 +++--- meilisearch-lib/src/index/updates.rs | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/meilisearch-lib/src/index/index.rs b/meilisearch-lib/src/index/index.rs index 25727236a..03b4ca7dd 100644 --- a/meilisearch-lib/src/index/index.rs +++ b/meilisearch-lib/src/index/index.rs @@ -18,7 +18,7 @@ use crate::EnvSizer; use super::error::IndexError; use super::error::Result; -use super::updates::{MinWordLengthTypoSetting, TypoSettings}; +use super::updates::{MinWordSizeTyposSetting, TypoSettings}; use super::{Checked, Settings}; pub type Document = Map; @@ -170,7 +170,7 @@ impl Index { }) .collect(); - let min_typo_word_len = MinWordLengthTypoSetting { + let min_typo_word_len = MinWordSizeTyposSetting { one_typo: Setting::Set(self.min_word_len_one_typo(txn)?), two_typos: Setting::Set(self.min_word_len_two_typos(txn)?), }; @@ -190,7 +190,7 @@ impl Index { let typo_tolerance = TypoSettings { enabled: Setting::Set(self.authorize_typos(txn)?), - min_word_length_for_typo: Setting::Set(min_typo_word_len), + min_word_size_for_typos: Setting::Set(min_typo_word_len), disable_on_words: Setting::Set(disabled_words), disable_on_attributes: Setting::Set(disabled_attributes), }; diff --git a/meilisearch-lib/src/index/updates.rs b/meilisearch-lib/src/index/updates.rs index 265f80762..3aefa1f5e 100644 --- a/meilisearch-lib/src/index/updates.rs +++ b/meilisearch-lib/src/index/updates.rs @@ -41,7 +41,7 @@ pub struct Unchecked; #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)] #[serde(deny_unknown_fields)] #[serde(rename_all = "camelCase")] -pub struct MinWordLengthTypoSetting { +pub struct MinWordSizeTyposSetting { #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] #[serde(default, skip_serializing_if = "Setting::is_not_set")] pub one_typo: Setting, @@ -60,7 +60,7 @@ pub struct TypoSettings { pub enabled: Setting, #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] #[serde(default, skip_serializing_if = "Setting::is_not_set")] - pub min_word_length_for_typo: Setting, + pub min_word_size_for_typos: Setting, #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] #[serde(default, skip_serializing_if = "Setting::is_not_set")] pub disable_on_words: Setting>, @@ -381,7 +381,7 @@ pub fn apply_settings_to_builder( Setting::NotSet => (), } - match value.min_word_length_for_typo { + match value.min_word_size_for_typos { Setting::Set(ref setting) => { match setting.one_typo { Setting::Set(val) => builder.set_min_word_len_one_typo(val), From b57ad15a242d498412083b5fc7f32b29d3434a8b Mon Sep 17 00:00:00 2001 From: Morgane Dubus <30866152+mdubus@users.noreply.github.com> Date: Wed, 20 Apr 2022 11:14:42 +0200 Subject: [PATCH 10/20] Update dashboard to v.0.1.10 --- meilisearch-http/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 6c7d04f83..86068e7a5 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -103,5 +103,5 @@ mini-dashboard = [ tikv-jemallocator = "0.4.3" [package.metadata.mini-dashboard] -assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.1.9/build.zip" -sha1 = "b1833c3e5dc6b5d9d519ae4834935ae6c8a47024" +assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.1.10/build.zip" +sha1 = "1adf96592c267425c110bfefc36b7fc6bfb0f93d" From 58a1124e9a496c3072d55bc19312620e755f5fa7 Mon Sep 17 00:00:00 2001 From: Irevoire Date: Tue, 19 Apr 2022 16:49:38 +0200 Subject: [PATCH 11/20] fix(search): formatted field --- meilisearch-http/tests/search/formatted.rs | 376 +++++++++++++++++++++ meilisearch-http/tests/search/mod.rs | 5 +- meilisearch-lib/src/index/search.rs | 19 +- 3 files changed, 392 insertions(+), 8 deletions(-) create mode 100644 meilisearch-http/tests/search/formatted.rs diff --git a/meilisearch-http/tests/search/formatted.rs b/meilisearch-http/tests/search/formatted.rs new file mode 100644 index 000000000..13b8a07d8 --- /dev/null +++ b/meilisearch-http/tests/search/formatted.rs @@ -0,0 +1,376 @@ +use super::*; +use crate::common::Server; +use serde_json::json; + +#[actix_rt::test] +async fn formatted_contain_wildcard() { + let server = Server::new().await; + let index = server.index("test"); + + index + .update_settings(json!({ "displayedAttributes": ["id", "cattos"] })) + .await; + + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(1).await; + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": ["father", "mother"], "attributesToHighlight": ["father", "mother", "*"], "attributesToCrop": ["doggos"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "_formatted": { + "id": "852", + "cattos": "pesti", + } + }) + ); + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": ["*"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "cattos": "pesti", + }) + ); + + let (response, code) = index + .search_post( + json!({ "q": "pesti", "attributesToRetrieve": ["*"], "attributesToHighlight": ["id"] }), + ) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "cattos": "pesti", + "_formatted": { + "id": "852", + "cattos": "pesti", + } + }) + ); + + let (response, code) = index + .search_post( + json!({ "q": "pesti", "attributesToRetrieve": ["*"], "attributesToCrop": ["*"] }), + ) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "cattos": "pesti", + "_formatted": { + "id": "852", + "cattos": "pesti", + } + }) + ); + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToCrop": ["*"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "cattos": "pesti", + "_formatted": { + "id": "852", + "cattos": "pesti", + } + }) + ); +} + +#[actix_rt::test] +async fn format_nested() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": ["doggos"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "doggos": [ + { + "name": "bobby", + "age": 2, + }, + { + "name": "buddy", + "age": 4, + }, + ], + }) + ); + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": ["doggos.name"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "doggos": [ + { + "name": "bobby", + }, + { + "name": "buddy", + }, + ], + }) + ); + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.name"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "_formatted": { + "doggos": [ + { + "name": "bobby", + }, + { + "name": "buddy", + }, + ], + }, + }) + ); + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": [], "attributesToCrop": ["doggos.name"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "_formatted": { + "doggos": [ + { + "name": "bobby", + }, + { + "name": "buddy", + }, + ], + }, + }) + ); + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": ["doggos.name"], "attributesToHighlight": ["doggos.age"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "doggos": [ + { + "name": "bobby", + }, + { + "name": "buddy", + }, + ], + "_formatted": { + "doggos": [ + { + "name": "bobby", + "age": "2", + }, + { + "name": "buddy", + "age": "4", + }, + ], + }, + }) + ); + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.age"], "attributesToCrop": ["doggos.name"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "_formatted": { + "doggos": [ + { + "name": "bobby", + "age": "2", + }, + { + "name": "buddy", + "age": "4", + }, + ], + }, + }) + ); +} + +#[actix_rt::test] +async fn displayedattr_2_smol() { + let server = Server::new().await; + let index = server.index("test"); + + // not enough displayed for the other settings + index + .update_settings(json!({ "displayedAttributes": ["id"] })) + .await; + + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(1).await; + + let (response, code) = index + .search_post(json!({ "attributesToRetrieve": ["father", "id"], "attributesToHighlight": ["mother"], "attributesToCrop": ["cattos"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToRetrieve": ["id"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToHighlight": ["id"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "_formatted": { + "id": "852", + } + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToCrop": ["id"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "_formatted": { + "id": "852", + } + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToHighlight": ["id"], "attributesToCrop": ["id"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "_formatted": { + "id": "852", + } + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToHighlight": ["cattos"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToCrop": ["cattos"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToRetrieve": ["cattos"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"][0], json!({})); + + let (response, code) = index + .search_post( + json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["cattos"], "attributesToCrop": ["cattos"] }), + ) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"][0], json!({})); + + let (response, code) = index + .search_post(json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["id"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "_formatted": { + "id": "852", + } + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToRetrieve": ["cattos"], "attributesToCrop": ["id"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "_formatted": { + "id": "852", + } + }) + ); +} diff --git a/meilisearch-http/tests/search/mod.rs b/meilisearch-http/tests/search/mod.rs index 65ea67a70..3353f16c7 100644 --- a/meilisearch-http/tests/search/mod.rs +++ b/meilisearch-http/tests/search/mod.rs @@ -2,12 +2,13 @@ // should be tested in its own module to isolate tests and keep the tests readable. mod errors; +mod formatted; use crate::common::Server; use once_cell::sync::Lazy; use serde_json::{json, Value}; -static DOCUMENTS: Lazy = Lazy::new(|| { +pub(self) static DOCUMENTS: Lazy = Lazy::new(|| { json!([ { "title": "Shazam!", @@ -32,7 +33,7 @@ static DOCUMENTS: Lazy = Lazy::new(|| { ]) }); -static NESTED_DOCUMENTS: Lazy = Lazy::new(|| { +pub(self) static NESTED_DOCUMENTS: Lazy = Lazy::new(|| { json!([ { "id": 852, diff --git a/meilisearch-lib/src/index/search.rs b/meilisearch-lib/src/index/search.rs index ea542ec10..7c12f985e 100644 --- a/meilisearch-lib/src/index/search.rs +++ b/meilisearch-lib/src/index/search.rs @@ -232,14 +232,22 @@ impl Index { let documents_iter = self.documents(&rtxn, documents_ids)?; for (_id, obkv) in documents_iter { - let mut document = make_document(&to_retrieve_ids, &fields_ids_map, obkv)?; + // First generate a document with all the displayed fields + let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?; + + // select the attributes to retrieve + let attributes_to_retrieve = to_retrieve_ids + .iter() + .map(|&fid| fields_ids_map.name(fid).expect("Missing field name")); + let mut document = + permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve); let matches_info = query .matches .then(|| compute_matches(&matching_words, &document, &analyzer)); let formatted = format_fields( - &document, + &displayed_document, &fields_ids_map, &formatter, &matching_words, @@ -475,7 +483,7 @@ fn add_non_formatted_ids_to_formatted_options( } fn make_document( - attributes_to_retrieve: &BTreeSet, + displayed_attributes: &BTreeSet, field_ids_map: &FieldsIdsMap, obkv: obkv::KvReaderU16, ) -> Result { @@ -493,11 +501,11 @@ fn make_document( } // select the attributes to retrieve - let attributes_to_retrieve = attributes_to_retrieve + let displayed_attributes = displayed_attributes .iter() .map(|&fid| field_ids_map.name(fid).expect("Missing field name")); - let document = permissive_json_pointer::select_values(&document, attributes_to_retrieve); + let document = permissive_json_pointer::select_values(&document, displayed_attributes); Ok(document) } @@ -514,7 +522,6 @@ fn format_fields>( // before. .map(|&fid| field_ids_map.name(fid).unwrap()) .collect(); - let mut document = permissive_json_pointer::select_values(document, selectors.iter().copied()); permissive_json_pointer::map_leaf_values(&mut document, selectors, |key, value| { From a7fd199ded931450e35c3a6f2c7ee32c05b39f41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Wed, 20 Apr 2022 12:24:46 +0200 Subject: [PATCH 12/20] Fix typo reseting by upgrading milli to v0.26.2 --- Cargo.lock | 10 +++++----- meilisearch-auth/Cargo.toml | 2 +- meilisearch-lib/Cargo.toml | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 965328ab7..d4eb6d3d5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1087,7 +1087,7 @@ dependencies = [ [[package]] name = "filter-parser" version = "0.1.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.1#a68e3a79fbdbb366ab162428884e5879a9af9d5c" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.2#b0c4789301acb70e740f971ec0996c550cac8540" dependencies = [ "nom", "nom_locate", @@ -1114,7 +1114,7 @@ dependencies = [ [[package]] name = "flatten-serde-json" version = "0.1.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.1#a68e3a79fbdbb366ab162428884e5879a9af9d5c" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.2#b0c4789301acb70e740f971ec0996c550cac8540" dependencies = [ "serde_json", ] @@ -1617,7 +1617,7 @@ dependencies = [ [[package]] name = "json-depth-checker" version = "0.1.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.1#a68e3a79fbdbb366ab162428884e5879a9af9d5c" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.2#b0c4789301acb70e740f971ec0996c550cac8540" dependencies = [ "serde_json", ] @@ -2145,8 +2145,8 @@ dependencies = [ [[package]] name = "milli" -version = "0.26.1" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.1#a68e3a79fbdbb366ab162428884e5879a9af9d5c" +version = "0.26.2" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.2#b0c4789301acb70e740f971ec0996c550cac8540" dependencies = [ "bimap", "bincode", diff --git a/meilisearch-auth/Cargo.toml b/meilisearch-auth/Cargo.toml index 92d3fdbe1..c1e047da9 100644 --- a/meilisearch-auth/Cargo.toml +++ b/meilisearch-auth/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" [dependencies] enum-iterator = "0.7.0" meilisearch-error = { path = "../meilisearch-error" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.1" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.2" } rand = "0.8.4" serde = { version = "1.0.136", features = ["derive"] } serde_json = { version = "1.0.79", features = ["preserve_order"] } diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index 392cff95e..9316253d5 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -30,7 +30,7 @@ lazy_static = "1.4.0" log = "0.4.14" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-error = { path = "../meilisearch-error" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.1" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.2" } mime = "0.3.16" num_cpus = "1.13.1" obkv = "0.2.0" From 1ef87cc6d0a80d8f044c5bc23b361dd6cce590e2 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 20 Apr 2022 17:44:08 +0200 Subject: [PATCH 13/20] chore: move permissive-json-pointer in the meilisearch repository MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update permissive-json-pointer/src/lib.rs Co-authored-by: Clément Renault --- Cargo.lock | 9 +- Cargo.toml | 1 + meilisearch-lib/Cargo.toml | 2 +- permissive-json-pointer/Cargo.toml | 12 + permissive-json-pointer/README.md | 134 +++++ permissive-json-pointer/src/lib.rs | 786 +++++++++++++++++++++++++++++ 6 files changed, 941 insertions(+), 3 deletions(-) create mode 100644 permissive-json-pointer/Cargo.toml create mode 100644 permissive-json-pointer/README.md create mode 100644 permissive-json-pointer/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index d4eb6d3d5..ff5c0b5ad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -402,6 +402,12 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" +[[package]] +name = "big_s" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "199edb7b90631283b10c2422e6a0bc8b7d987bf732995ba1de53b576c97e51a8" + [[package]] name = "bimap" version = "0.6.2" @@ -2486,9 +2492,8 @@ checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" [[package]] name = "permissive-json-pointer" version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2125f5fc44a45ffd265ce6ab343842f71df469d173f923f234e3a8df7a8f1ba6" dependencies = [ + "big_s", "serde_json", ] diff --git a/Cargo.toml b/Cargo.toml index a27a29634..03f4f5597 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,4 +5,5 @@ members = [ "meilisearch-error", "meilisearch-lib", "meilisearch-auth", + "permissive-json-pointer", ] diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index 9316253d5..aa18eb97d 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -36,7 +36,7 @@ num_cpus = "1.13.1" obkv = "0.2.0" once_cell = "1.10.0" parking_lot = "0.12.0" -permissive-json-pointer = "0.2.0" +permissive-json-pointer = { path = "../permissive-json-pointer" } rand = "0.8.5" rayon = "1.5.1" regex = "1.5.5" diff --git a/permissive-json-pointer/Cargo.toml b/permissive-json-pointer/Cargo.toml new file mode 100644 index 000000000..b50f30f19 --- /dev/null +++ b/permissive-json-pointer/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "permissive-json-pointer" +version = "0.2.0" +edition = "2021" +description = "A permissive json pointer" +readme = "README.md" + +[dependencies] +serde_json = "1.0" + +[dev-dependencies] +big_s = "1.0" diff --git a/permissive-json-pointer/README.md b/permissive-json-pointer/README.md new file mode 100644 index 000000000..6a94cf00d --- /dev/null +++ b/permissive-json-pointer/README.md @@ -0,0 +1,134 @@ +# Permissive json pointer + +This crate provide an interface a little bit similar to what you know as “json pointer”. +But it’s actually doing something quite different. + +## The API + +The crate provide only one function called [`select_values`]. +It takes one object in parameter and a list of selectors. +It then returns a new object containing only the fields you selected. + +## The selectors + +The syntax for the selector is easier than with other API. +There is only ONE special symbol, it’s the `.`. + +If you write `dog` and provide the following object; +```json +{ + "dog": "bob", + "cat": "michel" +} +``` +You’ll get back; +```json +{ + "dog": "bob", +} +``` + +Easy right? + +Now the dot can either be used as a field name, or as a nested object. + +For example, if you have the following json; +```json +{ + "dog.name": "jean", + "dog": { + "name": "bob", + "age": 6 + } +} +``` + +What a crappy json! But never underestimate your users, they [_WILL_](https://xkcd.com/1172/) +somehow base their entire workflow on this kind of json. +Here with the `dog.name` selector both fields will be +selected and the following json will be returned; +```json +{ + "dog.name": "jean", + "dog": { + "name": "bob", + } +} +``` + +And as you can guess, this crate is as permissive as possible. +It’ll match everything it can! +Consider this even more crappy json; +```json +{ + "pet.dog.name": "jean", + "pet.dog": { + "name": "bob" + }, + "pet": { + "dog.name": "michel" + }, + "pet": { + "dog": { + "name": "milan" + } + } +} +``` +If you write `pet.dog.name` everything will be selected. + +## Matching arrays + +With this kind of selectors you can’t match a specific element in an array. +Your selector will be applied to all the element _in_ the array. + +Consider the following json; +```json +{ + "pets": [ + { + "animal": "dog", + "race": "bernese mountain", + }, + { + "animal": "dog", + "race": "golden retriever", + }, + { + "animal": "cat", + "age": 8, + } + ] +} +``` + +With the filter `pets.animal` you’ll get; +```json +{ + "pets": [ + { + "animal": "dog", + }, + { + "animal": "dog", + }, + { + "animal": "cat", + } + ] +} +``` + +The empty element in an array gets removed. So if you were to look +for `pets.age` you would only get; +```json +{ + "pets": [ + { + "age": 8, + } + ] +} +``` + +And I think that’s all you need to know 🎉 \ No newline at end of file diff --git a/permissive-json-pointer/src/lib.rs b/permissive-json-pointer/src/lib.rs new file mode 100644 index 000000000..56382beae --- /dev/null +++ b/permissive-json-pointer/src/lib.rs @@ -0,0 +1,786 @@ +#![doc = include_str!("../README.md")] + +use std::collections::HashSet; + +use serde_json::*; + +type Document = Map; + +const SPLIT_SYMBOL: char = '.'; + +/// Returns `true` if the `selector` match the `key`. +/// +/// ```text +/// Example: +/// `animaux` match `animaux` +/// `animaux.chien` match `animaux` +/// `animaux.chien` match `animaux` +/// `animaux.chien.nom` match `animaux` +/// `animaux.chien.nom` match `animaux.chien` +/// ----------------------------------------- +/// `animaux` doesn't match `animaux.chien` +/// `animaux.` doesn't match `animaux` +/// `animaux.ch` doesn't match `animaux.chien` +/// `animau` doesn't match `animaux` +/// ``` +fn contained_in(selector: &str, key: &str) -> bool { + selector.starts_with(key) + && selector[key.len()..] + .chars() + .next() + .map(|c| c == SPLIT_SYMBOL) + .unwrap_or(true) +} + +/// Map the selected leaf values of a json allowing you to update only the fields that were selected. +/// ``` +/// use serde_json::{Value, json}; +/// use permissive_json_pointer::map_leaf_values; +/// +/// let mut value: Value = json!({ +/// "jean": { +/// "age": 8, +/// "race": { +/// "name": "bernese mountain", +/// "size": "80cm", +/// } +/// } +/// }); +/// map_leaf_values( +/// value.as_object_mut().unwrap(), +/// ["jean.race.name"], +/// |key, value| match (value, dbg!(key)) { +/// (Value::String(name), "jean.race.name") => *name = "patou".to_string(), +/// _ => unreachable!(), +/// }, +/// ); +/// assert_eq!( +/// value, +/// json!({ +/// "jean": { +/// "age": 8, +/// "race": { +/// "name": "patou", +/// "size": "80cm", +/// } +/// } +/// }) +/// ); +/// ``` +pub fn map_leaf_values<'a>( + value: &mut Map, + selectors: impl IntoIterator, + mut mapper: impl FnMut(&str, &mut Value), +) { + let selectors: Vec<_> = selectors.into_iter().collect(); + map_leaf_values_in_object(value, &selectors, "", &mut mapper); +} + +pub fn map_leaf_values_in_object<'a>( + value: &mut Map, + selectors: &[&'a str], + base_key: &str, + mapper: &mut impl FnMut(&str, &mut Value), +) { + for (key, value) in value.iter_mut() { + let base_key = if base_key.is_empty() { + key.to_string() + } else { + format!("{}{}{}", base_key, SPLIT_SYMBOL, key) + }; + + // here if the user only specified `doggo` we need to iterate in all the fields of `doggo` + // so we check the contained_in on both side + let should_continue = selectors + .iter() + .any(|selector| contained_in(selector, &base_key) || contained_in(&base_key, selector)); + + if should_continue { + match value { + Value::Object(object) => { + map_leaf_values_in_object(object, selectors, &base_key, mapper) + } + Value::Array(array) => { + map_leaf_values_in_array(array, selectors, &base_key, mapper) + } + value => mapper(&base_key, value), + } + } + } +} + +pub fn map_leaf_values_in_array( + values: &mut [Value], + selectors: &[&str], + base_key: &str, + mapper: &mut impl FnMut(&str, &mut Value), +) { + for value in values.iter_mut() { + match value { + Value::Object(object) => map_leaf_values_in_object(object, selectors, base_key, mapper), + Value::Array(array) => map_leaf_values_in_array(array, selectors, base_key, mapper), + value => mapper(base_key, value), + } + } +} + +/// Permissively selects values in a json with a list of selectors. +/// Returns a new json containing all the selected fields. +/// ``` +/// use serde_json::*; +/// use permissive_json_pointer::select_values; +/// +/// let value: Value = json!({ +/// "name": "peanut", +/// "age": 8, +/// "race": { +/// "name": "bernese mountain", +/// "avg_age": 12, +/// "size": "80cm", +/// }, +/// }); +/// let value: &Map = value.as_object().unwrap(); +/// +/// let res: Value = select_values(value, vec!["name", "race.name"]).into(); +/// assert_eq!( +/// res, +/// json!({ +/// "name": "peanut", +/// "race": { +/// "name": "bernese mountain", +/// }, +/// }) +/// ); +/// ``` +pub fn select_values<'a>( + value: &Map, + selectors: impl IntoIterator, +) -> Map { + let selectors = selectors.into_iter().collect(); + create_value(value, selectors) +} + +fn create_value(value: &Document, mut selectors: HashSet<&str>) -> Document { + let mut new_value: Document = Map::new(); + + for (key, value) in value.iter() { + // first we insert all the key at the root level + if selectors.contains(key as &str) { + new_value.insert(key.to_string(), value.clone()); + // if the key was simple we can delete it and move to + // the next key + if is_simple(key) { + selectors.remove(key as &str); + continue; + } + } + + // we extract all the sub selectors matching the current field + // if there was [person.name, person.age] and if we are on the field + // `person`. Then we generate the following sub selectors: [name, age]. + let sub_selectors: HashSet<&str> = selectors + .iter() + .filter(|s| contained_in(s, key)) + .filter_map(|s| s.trim_start_matches(key).get(SPLIT_SYMBOL.len_utf8()..)) + .collect(); + + if !sub_selectors.is_empty() { + match value { + Value::Array(array) => { + let array = create_array(array, &sub_selectors); + if !array.is_empty() { + new_value.insert(key.to_string(), array.into()); + } + } + Value::Object(object) => { + let object = create_value(object, sub_selectors); + if !object.is_empty() { + new_value.insert(key.to_string(), object.into()); + } + } + _ => (), + } + } + } + + new_value +} + +fn create_array(array: &Vec, selectors: &HashSet<&str>) -> Vec { + let mut res = Vec::new(); + + for value in array { + match value { + Value::Array(array) => { + let array = create_array(array, selectors); + if !array.is_empty() { + res.push(array.into()); + } + } + Value::Object(object) => { + let object = create_value(object, selectors.clone()); + if !object.is_empty() { + res.push(object.into()); + } + } + _ => (), + } + } + + res +} + +fn is_simple(key: impl AsRef) -> bool { + !key.as_ref().contains(SPLIT_SYMBOL) +} + +#[cfg(test)] +mod tests { + use big_s::S; + + use super::*; + + #[test] + fn test_contained_in() { + assert!(contained_in("animaux", "animaux")); + assert!(contained_in("animaux.chien", "animaux")); + assert!(contained_in( + "animaux.chien.race.bouvier bernois.fourrure.couleur", + "animaux" + )); + assert!(contained_in( + "animaux.chien.race.bouvier bernois.fourrure.couleur", + "animaux.chien" + )); + assert!(contained_in( + "animaux.chien.race.bouvier bernois.fourrure.couleur", + "animaux.chien.race.bouvier bernois" + )); + assert!(contained_in( + "animaux.chien.race.bouvier bernois.fourrure.couleur", + "animaux.chien.race.bouvier bernois.fourrure" + )); + assert!(contained_in( + "animaux.chien.race.bouvier bernois.fourrure.couleur", + "animaux.chien.race.bouvier bernois.fourrure.couleur" + )); + + // -- the wrongs + assert!(!contained_in("chien", "chat")); + assert!(!contained_in("animaux", "animaux.chien")); + assert!(!contained_in("animaux.chien", "animaux.chat")); + + // -- the strange edge cases + assert!(!contained_in("animaux.chien", "anima")); + assert!(!contained_in("animaux.chien", "animau")); + assert!(!contained_in("animaux.chien", "animaux.")); + assert!(!contained_in("animaux.chien", "animaux.c")); + assert!(!contained_in("animaux.chien", "animaux.ch")); + assert!(!contained_in("animaux.chien", "animaux.chi")); + assert!(!contained_in("animaux.chien", "animaux.chie")); + } + + #[test] + fn simple_key() { + let value: Value = json!({ + "name": "peanut", + "age": 8, + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["name"]).into(); + assert_eq!( + res, + json!({ + "name": "peanut", + }) + ); + + let res: Value = select_values(value, vec!["age"]).into(); + assert_eq!( + res, + json!({ + "age": 8, + }) + ); + + let res: Value = select_values(value, vec!["name", "age"]).into(); + assert_eq!( + res, + json!({ + "name": "peanut", + "age": 8, + }) + ); + + let res: Value = select_values(value, vec!["race"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + + let res: Value = select_values(value, vec!["name", "age", "race"]).into(); + assert_eq!( + res, + json!({ + "name": "peanut", + "age": 8, + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + } + + #[test] + fn complex_key() { + let value: Value = json!({ + "name": "peanut", + "age": 8, + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["race"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + + println!("RIGHT BEFORE"); + + let res: Value = select_values(value, vec!["race.name"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + } + }) + ); + + let res: Value = select_values(value, vec!["race.name", "race.size"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + "size": "80cm", + } + }) + ); + + let res: Value = select_values( + value, + vec!["race.name", "race.size", "race.avg_age", "race.size", "age"], + ) + .into(); + assert_eq!( + res, + json!({ + "age": 8, + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + + let res: Value = select_values(value, vec!["race.name", "race"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + + let res: Value = select_values(value, vec!["race", "race.name"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + } + + #[test] + fn multi_level_nested() { + let value: Value = json!({ + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["jean"]).into(); + assert_eq!( + res, + json!({ + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + }) + ); + + let res: Value = select_values(value, vec!["jean.age"]).into(); + assert_eq!( + res, + json!({ + "jean": { + "age": 8, + } + }) + ); + + let res: Value = select_values(value, vec!["jean.race.size"]).into(); + assert_eq!( + res, + json!({ + "jean": { + "race": { + "size": "80cm", + } + } + }) + ); + + let res: Value = select_values(value, vec!["jean.race.name", "jean.age"]).into(); + assert_eq!( + res, + json!({ + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + } + } + }) + ); + + let res: Value = select_values(value, vec!["jean.race"]).into(); + assert_eq!( + res, + json!({ + "jean": { + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + }) + ); + } + + #[test] + fn array_and_deep_nested() { + let value: Value = json!({ + "doggos": [ + { + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + }, + { + "marc": { + "age": 4, + "race": { + "name": "golden retriever", + "size": "60cm", + } + } + }, + ] + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["doggos.jean"]).into(); + assert_eq!( + res, + json!({ + "doggos": [ + { + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + } + ] + }) + ); + + let res: Value = select_values(value, vec!["doggos.marc"]).into(); + assert_eq!( + res, + json!({ + "doggos": [ + { + "marc": { + "age": 4, + "race": { + "name": "golden retriever", + "size": "60cm", + } + } + } + ] + }) + ); + + let res: Value = select_values(value, vec!["doggos.marc.race"]).into(); + assert_eq!( + res, + json!({ + "doggos": [ + { + "marc": { + "race": { + "name": "golden retriever", + "size": "60cm", + } + } + } + ] + }) + ); + + let res: Value = + select_values(value, vec!["doggos.marc.race.name", "doggos.marc.age"]).into(); + + assert_eq!( + res, + json!({ + "doggos": [ + { + "marc": { + "age": 4, + "race": { + "name": "golden retriever", + } + } + } + ] + }) + ); + + let res: Value = select_values( + value, + vec![ + "doggos.marc.race.name", + "doggos.marc.age", + "doggos.jean.race.name", + "other.field", + ], + ) + .into(); + + assert_eq!( + res, + json!({ + "doggos": [ + { + "jean": { + "race": { + "name": "bernese mountain", + } + } + }, + { + "marc": { + "age": 4, + "race": { + "name": "golden retriever", + } + } + } + ] + }) + ); + } + + #[test] + fn all_conflict_variation() { + let value: Value = json!({ + "pet.dog.name": "jean", + "pet.dog": { + "name": "bob" + }, + "pet": { + "dog.name": "michel" + }, + "pet": { + "dog": { + "name": "milan" + } + } + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["pet.dog.name"]).into(); + assert_eq!( + res, + json!({ + "pet.dog.name": "jean", + "pet.dog": { + "name": "bob" + }, + "pet": { + "dog.name": "michel" + }, + "pet": { + "dog": { + "name": "milan" + } + } + }) + ); + + let value: Value = json!({ + "pet.dog.name": "jean", + "pet.dog": { + "name": "bob", + }, + "pet": { + "dog.name": "michel", + "dog": { + "name": "milan", + } + } + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["pet.dog.name", "pet.dog", "pet"]).into(); + + assert_eq!( + res, + json!({ + "pet.dog.name": "jean", + "pet.dog": { + "name": "bob", + }, + "pet": { + "dog.name": "michel", + "dog": { + "name": "milan", + } + } + }) + ); + } + + #[test] + fn map_object() { + let mut value: Value = json!({ + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + }); + + map_leaf_values( + value.as_object_mut().unwrap(), + ["jean.race.name"], + |key, value| match (value, dbg!(key)) { + (Value::String(name), "jean.race.name") => *name = S("patou"), + _ => unreachable!(), + }, + ); + + assert_eq!( + value, + json!({ + "jean": { + "age": 8, + "race": { + "name": "patou", + "size": "80cm", + } + } + }) + ); + + let mut value: Value = json!({ + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + }, + "bob": "lolpied", + }); + + let mut calls = 0; + map_leaf_values(value.as_object_mut().unwrap(), ["jean"], |key, value| { + calls += 1; + match (value, key) { + (Value::String(name), "jean.race.name") => *name = S("patou"), + _ => println!("Called with {key}"), + } + }); + + assert_eq!(calls, 3); + assert_eq!( + value, + json!({ + "jean": { + "age": 8, + "race": { + "name": "patou", + "size": "80cm", + } + }, + "bob": "lolpied", + }) + ); + } +} From 3d10af033346a0be3c78585e213c78ccc64bf438 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Tue, 26 Apr 2022 14:59:48 +0200 Subject: [PATCH 14/20] feat(http): add analytics on typo tolerance setting --- .../src/routes/indexes/settings.rs | 85 ++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index a234b226e..9d3cc9e83 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -165,7 +165,49 @@ make_setting_route!( "/typo-tolerance", meilisearch_lib::index::updates::TypoSettings, typo_tolerance, - "typoTolerance" + "typoTolerance", + analytics, + |setting: &Option, req: &HttpRequest| { + use serde_json::json; + + analytics.publish( + "TypoToleranceUpdated Updated".to_string(), + json!({ + "typo_tolerance": { + "enabled": setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))).unwrap_or(true), + "disable_on_attributes": setting + .as_ref() + .map(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())) + .flatten() + .unwrap_or(false), + "disable_on_words": setting + .as_ref() + .map(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())) + .flatten() + .unwrap_or(false), + "min_word_size_for_one_typo": setting + .as_ref() + .map(|s| s.min_word_size_for_typos + .as_ref() + .set() + .map(|s| s.one_typo.set())) + .flatten() + .flatten() + .unwrap_or(5), + "min_word_size_for_two_typos": setting + .as_ref() + .map(|s| s.min_word_size_for_typos + .as_ref() + .set() + .map(|s| s.two_typos.set())) + .flatten() + .flatten() + .unwrap_or(9), + }, + }), + Some(req), + ); + } ); make_setting_route!( @@ -283,6 +325,47 @@ pub async fn update_all( "total": settings.filterable_attributes.as_ref().set().map(|filter| filter.len()).unwrap_or(0), "has_geo": settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")).unwrap_or(false), }, + "typo_tolerance": { + "enabled": settings.typo_tolerance + .as_ref() + .set() + .map(|s| s.enabled.as_ref().set()) + .flatten() + .copied() + .unwrap_or(true), + "disable_on_attributes": settings.typo_tolerance + .as_ref() + .set() + .map(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())) + .flatten() + .unwrap_or(false), + "disable_on_words": settings.typo_tolerance + .as_ref() + .set() + .map(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())) + .flatten() + .unwrap_or(false), + "min_word_size_for_one_typo": settings.typo_tolerance + .as_ref() + .set() + .map(|s| s.min_word_size_for_typos + .as_ref() + .set() + .map(|s| s.one_typo.set())) + .flatten() + .flatten() + .unwrap_or(5), + "min_word_size_for_two_typos": settings.typo_tolerance + .as_ref() + .set() + .map(|s| s.min_word_size_for_typos + .as_ref() + .set() + .map(|s| s.two_typos.set())) + .flatten() + .flatten() + .unwrap_or(9), + }, }), Some(&req), ); From 60253725650f60123d138fa198ebaa54dcb7157e Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 27 Apr 2022 10:41:09 +0200 Subject: [PATCH 15/20] fix(lib): Check db presence after dumps --- meilisearch-lib/src/index_controller/mod.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index ae15e8abb..4cbba1e42 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -178,15 +178,6 @@ impl IndexControllerBuilder { .max_task_store_size .ok_or_else(|| anyhow::anyhow!("Missing update database size"))?; - let db_exists = db_path.as_ref().exists(); - if db_exists { - // Directory could be pre-created without any database in. - let db_is_empty = db_path.as_ref().read_dir()?.next().is_none(); - if !db_is_empty { - versioning::check_version_file(db_path.as_ref())?; - } - } - if let Some(ref path) = self.import_snapshot { log::info!("Loading from snapshot {:?}", path); load_snapshot( @@ -207,6 +198,15 @@ impl IndexControllerBuilder { )?; } + let db_exists = db_path.as_ref().exists(); + if db_exists { + // Directory could be pre-created without any database in. + let db_is_empty = db_path.as_ref().read_dir()?.next().is_none(); + if !db_is_empty { + versioning::check_version_file(db_path.as_ref())?; + } + } + std::fs::create_dir_all(db_path.as_ref())?; let meta_env = Arc::new(open_meta_env(db_path.as_ref(), task_store_size)?); From 3d0a4a3d18cc3f61e8369bef1e22cf1d5182ad9c Mon Sep 17 00:00:00 2001 From: Guillaume Mourier Date: Wed, 27 Apr 2022 14:49:21 +0200 Subject: [PATCH 16/20] fix(http): fix event name for typo tolerance settings update --- meilisearch-http/src/routes/indexes/settings.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index 9d3cc9e83..d5df067d4 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -171,7 +171,7 @@ make_setting_route!( use serde_json::json; analytics.publish( - "TypoToleranceUpdated Updated".to_string(), + "TypoTolerance Updated".to_string(), json!({ "typo_tolerance": { "enabled": setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))).unwrap_or(true), From 34f75d9792a5c9c5881dab3b65d1220ca295186f Mon Sep 17 00:00:00 2001 From: ad hoc Date: Fri, 29 Apr 2022 16:38:21 +0200 Subject: [PATCH 17/20] settings analytics return null when no set --- .../src/routes/indexes/settings.rs | 45 ++++++++----------- 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index d5df067d4..7817d49d6 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -145,8 +145,8 @@ make_setting_route!( "SortableAttributes Updated".to_string(), json!({ "sortable_attributes": { - "total": setting.as_ref().map(|sort| sort.len()).unwrap_or(0), - "has_geo": setting.as_ref().map(|sort| sort.contains("_geo")).unwrap_or(false), + "total": setting.as_ref().map(|sort| sort.len()), + "has_geo": setting.as_ref().map(|sort| sort.contains("_geo")), }, }), Some(req), @@ -174,17 +174,15 @@ make_setting_route!( "TypoTolerance Updated".to_string(), json!({ "typo_tolerance": { - "enabled": setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))).unwrap_or(true), + "enabled": setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))), "disable_on_attributes": setting .as_ref() .map(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())) - .flatten() - .unwrap_or(false), + .flatten(), "disable_on_words": setting .as_ref() .map(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())) - .flatten() - .unwrap_or(false), + .flatten(), "min_word_size_for_one_typo": setting .as_ref() .map(|s| s.min_word_size_for_typos @@ -192,8 +190,7 @@ make_setting_route!( .set() .map(|s| s.one_typo.set())) .flatten() - .flatten() - .unwrap_or(5), + .flatten(), "min_word_size_for_two_typos": setting .as_ref() .map(|s| s.min_word_size_for_typos @@ -201,8 +198,7 @@ make_setting_route!( .set() .map(|s| s.two_typos.set())) .flatten() - .flatten() - .unwrap_or(9), + .flatten(), }, }), Some(req), @@ -223,7 +219,7 @@ make_setting_route!( "SearchableAttributes Updated".to_string(), json!({ "searchable_attributes": { - "total": setting.as_ref().map(|searchable| searchable.len()).unwrap_or(0), + "total": setting.as_ref().map(|searchable| searchable.len()), }, }), Some(req), @@ -315,15 +311,15 @@ pub async fn update_all( "sort_position": settings.ranking_rules.as_ref().set().map(|sort| sort.iter().position(|s| s == "sort")), }, "searchable_attributes": { - "total": settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()).unwrap_or(0), + "total": settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()), }, "sortable_attributes": { - "total": settings.sortable_attributes.as_ref().set().map(|sort| sort.len()).unwrap_or(0), - "has_geo": settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")).unwrap_or(false), + "total": settings.sortable_attributes.as_ref().set().map(|sort| sort.len()), + "has_geo": settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")), }, "filterable_attributes": { - "total": settings.filterable_attributes.as_ref().set().map(|filter| filter.len()).unwrap_or(0), - "has_geo": settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")).unwrap_or(false), + "total": settings.filterable_attributes.as_ref().set().map(|filter| filter.len()), + "has_geo": settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")), }, "typo_tolerance": { "enabled": settings.typo_tolerance @@ -331,20 +327,17 @@ pub async fn update_all( .set() .map(|s| s.enabled.as_ref().set()) .flatten() - .copied() - .unwrap_or(true), + .copied(), "disable_on_attributes": settings.typo_tolerance .as_ref() .set() .map(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())) - .flatten() - .unwrap_or(false), + .flatten(), "disable_on_words": settings.typo_tolerance .as_ref() .set() .map(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())) - .flatten() - .unwrap_or(false), + .flatten(), "min_word_size_for_one_typo": settings.typo_tolerance .as_ref() .set() @@ -353,8 +346,7 @@ pub async fn update_all( .set() .map(|s| s.one_typo.set())) .flatten() - .flatten() - .unwrap_or(5), + .flatten(), "min_word_size_for_two_typos": settings.typo_tolerance .as_ref() .set() @@ -363,8 +355,7 @@ pub async fn update_all( .set() .map(|s| s.two_typos.set())) .flatten() - .flatten() - .unwrap_or(9), + .flatten(), }, }), Some(&req), From c46f3587def0383e24e7293d9e4aee8058c8a371 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Wed, 4 May 2022 11:25:36 +0200 Subject: [PATCH 18/20] Bump milli to v0.26.4 --- Cargo.lock | 16 ++++++++-------- meilisearch-auth/Cargo.toml | 2 +- meilisearch-lib/Cargo.toml | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ff5c0b5ad..173162bc8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1092,8 +1092,8 @@ dependencies = [ [[package]] name = "filter-parser" -version = "0.1.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.2#b0c4789301acb70e740f971ec0996c550cac8540" +version = "0.26.4" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.4#6ee67df128b63f40eedfe1cb2edfa1a59492e310" dependencies = [ "nom", "nom_locate", @@ -1119,8 +1119,8 @@ dependencies = [ [[package]] name = "flatten-serde-json" -version = "0.1.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.2#b0c4789301acb70e740f971ec0996c550cac8540" +version = "0.26.4" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.4#6ee67df128b63f40eedfe1cb2edfa1a59492e310" dependencies = [ "serde_json", ] @@ -1622,8 +1622,8 @@ dependencies = [ [[package]] name = "json-depth-checker" -version = "0.1.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.2#b0c4789301acb70e740f971ec0996c550cac8540" +version = "0.26.4" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.4#6ee67df128b63f40eedfe1cb2edfa1a59492e310" dependencies = [ "serde_json", ] @@ -2151,8 +2151,8 @@ dependencies = [ [[package]] name = "milli" -version = "0.26.2" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.2#b0c4789301acb70e740f971ec0996c550cac8540" +version = "0.26.4" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.4#6ee67df128b63f40eedfe1cb2edfa1a59492e310" dependencies = [ "bimap", "bincode", diff --git a/meilisearch-auth/Cargo.toml b/meilisearch-auth/Cargo.toml index c1e047da9..193959b05 100644 --- a/meilisearch-auth/Cargo.toml +++ b/meilisearch-auth/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" [dependencies] enum-iterator = "0.7.0" meilisearch-error = { path = "../meilisearch-error" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.2" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.4" } rand = "0.8.4" serde = { version = "1.0.136", features = ["derive"] } serde_json = { version = "1.0.79", features = ["preserve_order"] } diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index aa18eb97d..41ef7169c 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -30,7 +30,7 @@ lazy_static = "1.4.0" log = "0.4.14" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-error = { path = "../meilisearch-error" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.2" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.4" } mime = "0.3.16" num_cpus = "1.13.1" obkv = "0.2.0" From b94eabe48c0e02bd67871e592f3d5fc0e92bb061 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 4 May 2022 11:33:43 +0200 Subject: [PATCH 19/20] apply clippy fixes --- .../src/routes/indexes/settings.rs | 27 +++++++------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index 7817d49d6..222aca580 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -177,27 +177,23 @@ make_setting_route!( "enabled": setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))), "disable_on_attributes": setting .as_ref() - .map(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())) - .flatten(), + .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())), "disable_on_words": setting .as_ref() - .map(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())) - .flatten(), + .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())), "min_word_size_for_one_typo": setting .as_ref() - .map(|s| s.min_word_size_for_typos + .and_then(|s| s.min_word_size_for_typos .as_ref() .set() .map(|s| s.one_typo.set())) - .flatten() .flatten(), "min_word_size_for_two_typos": setting .as_ref() - .map(|s| s.min_word_size_for_typos + .and_then(|s| s.min_word_size_for_typos .as_ref() .set() .map(|s| s.two_typos.set())) - .flatten() .flatten(), }, }), @@ -325,36 +321,31 @@ pub async fn update_all( "enabled": settings.typo_tolerance .as_ref() .set() - .map(|s| s.enabled.as_ref().set()) - .flatten() + .and_then(|s| s.enabled.as_ref().set()) .copied(), "disable_on_attributes": settings.typo_tolerance .as_ref() .set() - .map(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())) - .flatten(), + .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())), "disable_on_words": settings.typo_tolerance .as_ref() .set() - .map(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())) - .flatten(), + .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())), "min_word_size_for_one_typo": settings.typo_tolerance .as_ref() .set() - .map(|s| s.min_word_size_for_typos + .and_then(|s| s.min_word_size_for_typos .as_ref() .set() .map(|s| s.one_typo.set())) - .flatten() .flatten(), "min_word_size_for_two_typos": settings.typo_tolerance .as_ref() .set() - .map(|s| s.min_word_size_for_typos + .and_then(|s| s.min_word_size_for_typos .as_ref() .set() .map(|s| s.two_typos.set())) - .flatten() .flatten(), }, }), From 5c4c38c79c0fdf960932ecbb4eb53b0a41057796 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 4 May 2022 12:10:52 +0200 Subject: [PATCH 20/20] Fix the tests about the nested fields --- meilisearch-http/tests/search/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-http/tests/search/mod.rs b/meilisearch-http/tests/search/mod.rs index 3353f16c7..d9b36e85d 100644 --- a/meilisearch-http/tests/search/mod.rs +++ b/meilisearch-http/tests/search/mod.rs @@ -497,7 +497,7 @@ async fn search_facet_distribution() { assert_eq!(code, 200, "{}", response); let dist = response["facetsDistribution"].as_object().unwrap(); dbg!(&dist); - assert_eq!(dist.len(), 2); + assert_eq!(dist.len(), 3); assert_eq!( dist["doggos.name"], json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1})