From 02fd06ea0bab560ada6491ed898c2b6cbe4cf065 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Wed, 11 Jan 2023 12:14:17 +0100 Subject: [PATCH] Integrate deserr --- benchmarks/benches/utils.rs | 5 ++-- cli/src/main.rs | 2 +- milli/Cargo.toml | 1 + milli/src/search/criteria/asc_desc.rs | 6 ++-- milli/src/search/criteria/exactness.rs | 4 +-- milli/src/search/criteria/proximity.rs | 8 ++--- milli/src/update/settings.rs | 41 ++++++++++++++++++-------- milli/tests/search/mod.rs | 3 +- milli/tests/search/query_criteria.rs | 4 +-- 9 files changed, 45 insertions(+), 29 deletions(-) diff --git a/benchmarks/benches/utils.rs b/benchmarks/benches/utils.rs index 511b3b8d5..470d2030d 100644 --- a/benchmarks/benches/utils.rs +++ b/benchmarks/benches/utils.rs @@ -4,6 +4,7 @@ use std::fs::{create_dir_all, remove_dir_all, File}; use std::io::{self, BufRead, BufReader, Cursor, Read, Seek}; use std::num::ParseFloatError; use std::path::Path; +use std::str::FromStr; use criterion::BenchmarkId; use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; @@ -11,7 +12,7 @@ use milli::heed::EnvOpenOptions; use milli::update::{ IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings, }; -use milli::{Filter, Index, Object, TermsMatchingStrategy}; +use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy}; use serde_json::Value; pub struct Conf<'a> { @@ -80,7 +81,7 @@ pub fn base_setup(conf: &Conf) -> Index { builder.reset_criteria(); builder.reset_stop_words(); - let criterion = criterion.iter().map(|s| s.to_string()).collect(); + let criterion = criterion.iter().map(|s| Criterion::from_str(s).unwrap()).collect(); builder.set_criteria(criterion); } diff --git a/cli/src/main.rs b/cli/src/main.rs index c28d3de59..09ee7f984 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -521,7 +521,7 @@ impl Performer for SettingsUpdate { if let Some(criteria) = self.criteria { if !criteria.is_empty() { - update.set_criteria(criteria); + update.set_criteria(criteria.iter().map(|c| c.parse()).collect::>()?); } else { update.reset_criteria(); } diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 4e4fdc483..5bbd7a8ff 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -12,6 +12,7 @@ byteorder = "1.4.3" charabia = { version = "0.7.0", default-features = false } concat-arrays = "0.1.2" crossbeam-channel = "0.5.6" +deserr = "0.1.4" either = "1.8.0" flatten-serde-json = { path = "../flatten-serde-json" } fst = "0.4.7" diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs index c0fdbada3..b5afe6778 100644 --- a/milli/src/search/criteria/asc_desc.rs +++ b/milli/src/search/criteria/asc_desc.rs @@ -343,7 +343,7 @@ mod tests { use maplit::hashset; use crate::index::tests::TempIndex; - use crate::{AscDesc, Filter, Search, SearchResult}; + use crate::{AscDesc, Criterion, Filter, Search, SearchResult}; // Note that in this test, only the iterative sort algorithms are used. Set the CANDIDATES_THESHOLD // constant to 0 to ensure that the other sort algorithms are also correct. @@ -356,7 +356,7 @@ mod tests { settings.set_primary_key("id".to_owned()); settings .set_sortable_fields(maplit::hashset! { S("id"), S("mod_10"), S("mod_20") }); - settings.set_criteria(vec!["sort".to_owned()]); + settings.set_criteria(vec![Criterion::Sort]); }) .unwrap(); @@ -443,7 +443,7 @@ mod tests { settings.set_primary_key("id".to_owned()); settings.set_filterable_fields(hashset! { S("id"), S("mod_10"), S("mod_20") }); settings.set_sortable_fields(hashset! { S("id"), S("mod_10"), S("mod_20") }); - settings.set_criteria(vec!["sort".to_owned()]); + settings.set_criteria(vec![Criterion::Sort]); }) .unwrap(); diff --git a/milli/src/search/criteria/exactness.rs b/milli/src/search/criteria/exactness.rs index d4a90576c..078a9cd6c 100644 --- a/milli/src/search/criteria/exactness.rs +++ b/milli/src/search/criteria/exactness.rs @@ -497,7 +497,7 @@ mod tests { create_disjoint_combinations, create_non_disjoint_combinations, }; use crate::snapshot_tests::display_bitmap; - use crate::SearchResult; + use crate::{Criterion, SearchResult}; #[test] fn test_exact_words_subcriterion() { @@ -506,7 +506,7 @@ mod tests { index .update_settings(|settings| { settings.set_primary_key(S("id")); - settings.set_criteria(vec!["exactness".to_owned()]); + settings.set_criteria(vec![Criterion::Exactness]); }) .unwrap(); diff --git a/milli/src/search/criteria/proximity.rs b/milli/src/search/criteria/proximity.rs index 1d86f4da1..66e5c95bf 100644 --- a/milli/src/search/criteria/proximity.rs +++ b/milli/src/search/criteria/proximity.rs @@ -599,7 +599,7 @@ mod tests { use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use crate::index::tests::TempIndex; - use crate::{CriterionImplementationStrategy, SearchResult}; + use crate::{Criterion, CriterionImplementationStrategy, SearchResult}; fn documents_with_enough_different_words_for_prefixes(prefixes: &[&str]) -> Vec { let mut documents = Vec::new(); @@ -627,9 +627,9 @@ mod tests { .update_settings(|settings| { settings.set_primary_key(S("id")); settings.set_criteria(vec![ - "words".to_owned(), - "typo".to_owned(), - "proximity".to_owned(), + Criterion::Words, + Criterion::Typo, + Criterion::Proximity, ]); }) .unwrap(); diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 5f75910dc..f10bfe4e9 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -2,6 +2,7 @@ use std::collections::{BTreeSet, HashMap, HashSet}; use std::result::Result as StdResult; use charabia::{Tokenizer, TokenizerBuilder}; +use deserr::{DeserializeError, DeserializeFromValue}; use itertools::Itertools; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use time::OffsetDateTime; @@ -22,6 +23,25 @@ pub enum Setting { NotSet, } +impl DeserializeFromValue for Setting +where + T: DeserializeFromValue, + E: DeserializeError, +{ + fn deserialize_from_value( + value: deserr::Value, + location: deserr::ValuePointerRef, + ) -> std::result::Result { + match value { + deserr::Value::Null => Ok(Setting::Reset), + _ => T::deserialize_from_value(value, location).map(Setting::Set), + } + } + fn default() -> Option { + Some(Self::NotSet) + } +} + impl Default for Setting { fn default() -> Self { Self::NotSet @@ -93,7 +113,7 @@ pub struct Settings<'a, 't, 'u, 'i> { displayed_fields: Setting>, filterable_fields: Setting>, sortable_fields: Setting>, - criteria: Setting>, + criteria: Setting>, stop_words: Setting>, distinct_field: Setting, synonyms: Setting>>, @@ -173,7 +193,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { self.criteria = Setting::Reset; } - pub fn set_criteria(&mut self, criteria: Vec) { + pub fn set_criteria(&mut self, criteria: Vec) { self.criteria = Setting::Set(criteria); } @@ -526,14 +546,9 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { } fn update_criteria(&mut self) -> Result<()> { - match self.criteria { - Setting::Set(ref fields) => { - let mut new_criteria = Vec::new(); - for name in fields { - let criterion: Criterion = name.parse()?; - new_criteria.push(criterion); - } - self.index.put_criteria(self.wtxn, &new_criteria)?; + match &self.criteria { + Setting::Set(criteria) => { + self.index.put_criteria(self.wtxn, criteria)?; } Setting::Reset => { self.index.delete_criteria(self.wtxn)?; @@ -977,7 +992,7 @@ mod tests { index .update_settings(|settings| { settings.set_displayed_fields(vec![S("name")]); - settings.set_criteria(vec![S("age:asc")]); + settings.set_criteria(vec![Criterion::Asc("age".to_owned())]); }) .unwrap(); @@ -1246,7 +1261,7 @@ mod tests { .update_settings(|settings| { settings.set_displayed_fields(vec!["hello".to_string()]); settings.set_filterable_fields(hashset! { S("age"), S("toto") }); - settings.set_criteria(vec!["toto:asc".to_string()]); + settings.set_criteria(vec![Criterion::Asc(S("toto"))]); }) .unwrap(); @@ -1280,7 +1295,7 @@ mod tests { .update_settings(|settings| { settings.set_displayed_fields(vec!["hello".to_string()]); // It is only Asc(toto), there is a facet database but it is denied to filter with toto. - settings.set_criteria(vec!["toto:asc".to_string()]); + settings.set_criteria(vec![Criterion::Asc(S("toto"))]); }) .unwrap(); diff --git a/milli/tests/search/mod.rs b/milli/tests/search/mod.rs index d63df96ec..c2f8acd4d 100644 --- a/milli/tests/search/mod.rs +++ b/milli/tests/search/mod.rs @@ -38,8 +38,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { let mut builder = Settings::new(&mut wtxn, &index, &config); - let criteria = criteria.iter().map(|c| c.to_string()).collect(); - builder.set_criteria(criteria); + builder.set_criteria(criteria.to_vec()); builder.set_filterable_fields(hashset! { S("tag"), S("asc_desc_rank"), diff --git a/milli/tests/search/query_criteria.rs b/milli/tests/search/query_criteria.rs index d4aa859a4..16058e941 100644 --- a/milli/tests/search/query_criteria.rs +++ b/milli/tests/search/query_criteria.rs @@ -344,7 +344,7 @@ fn criteria_mixup() { //update criteria let mut wtxn = index.write_txn().unwrap(); let mut builder = Settings::new(&mut wtxn, &index, &config); - builder.set_criteria(criteria.iter().map(ToString::to_string).collect()); + builder.set_criteria(criteria.clone()); builder.execute(|_| (), || false).unwrap(); wtxn.commit().unwrap(); @@ -436,7 +436,7 @@ fn criteria_ascdesc() { let mut wtxn = index.write_txn().unwrap(); let mut builder = Settings::new(&mut wtxn, &index, &config); - builder.set_criteria(vec![criterion.to_string()]); + builder.set_criteria(vec![criterion.clone()]); builder.execute(|_| (), || false).unwrap(); wtxn.commit().unwrap();