Integrate deserr

This commit is contained in:
Loïc Lecrenier 2023-01-11 12:14:17 +01:00
parent 11ee7daa0f
commit 02fd06ea0b
9 changed files with 45 additions and 29 deletions

View File

@ -4,6 +4,7 @@ use std::fs::{create_dir_all, remove_dir_all, File};
use std::io::{self, BufRead, BufReader, Cursor, Read, Seek}; use std::io::{self, BufRead, BufReader, Cursor, Read, Seek};
use std::num::ParseFloatError; use std::num::ParseFloatError;
use std::path::Path; use std::path::Path;
use std::str::FromStr;
use criterion::BenchmarkId; use criterion::BenchmarkId;
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
@ -11,7 +12,7 @@ use milli::heed::EnvOpenOptions;
use milli::update::{ use milli::update::{
IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings,
}; };
use milli::{Filter, Index, Object, TermsMatchingStrategy}; use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy};
use serde_json::Value; use serde_json::Value;
pub struct Conf<'a> { pub struct Conf<'a> {
@ -80,7 +81,7 @@ pub fn base_setup(conf: &Conf) -> Index {
builder.reset_criteria(); builder.reset_criteria();
builder.reset_stop_words(); builder.reset_stop_words();
let criterion = criterion.iter().map(|s| s.to_string()).collect(); let criterion = criterion.iter().map(|s| Criterion::from_str(s).unwrap()).collect();
builder.set_criteria(criterion); builder.set_criteria(criterion);
} }

View File

@ -521,7 +521,7 @@ impl Performer for SettingsUpdate {
if let Some(criteria) = self.criteria { if let Some(criteria) = self.criteria {
if !criteria.is_empty() { if !criteria.is_empty() {
update.set_criteria(criteria); update.set_criteria(criteria.iter().map(|c| c.parse()).collect::<Result<_, _>>()?);
} else { } else {
update.reset_criteria(); update.reset_criteria();
} }

View File

@ -12,6 +12,7 @@ byteorder = "1.4.3"
charabia = { version = "0.7.0", default-features = false } charabia = { version = "0.7.0", default-features = false }
concat-arrays = "0.1.2" concat-arrays = "0.1.2"
crossbeam-channel = "0.5.6" crossbeam-channel = "0.5.6"
deserr = "0.1.4"
either = "1.8.0" either = "1.8.0"
flatten-serde-json = { path = "../flatten-serde-json" } flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7" fst = "0.4.7"

View File

@ -343,7 +343,7 @@ mod tests {
use maplit::hashset; use maplit::hashset;
use crate::index::tests::TempIndex; use crate::index::tests::TempIndex;
use crate::{AscDesc, Filter, Search, SearchResult}; use crate::{AscDesc, Criterion, Filter, Search, SearchResult};
// Note that in this test, only the iterative sort algorithms are used. Set the CANDIDATES_THESHOLD // Note that in this test, only the iterative sort algorithms are used. Set the CANDIDATES_THESHOLD
// constant to 0 to ensure that the other sort algorithms are also correct. // constant to 0 to ensure that the other sort algorithms are also correct.
@ -356,7 +356,7 @@ mod tests {
settings.set_primary_key("id".to_owned()); settings.set_primary_key("id".to_owned());
settings settings
.set_sortable_fields(maplit::hashset! { S("id"), S("mod_10"), S("mod_20") }); .set_sortable_fields(maplit::hashset! { S("id"), S("mod_10"), S("mod_20") });
settings.set_criteria(vec!["sort".to_owned()]); settings.set_criteria(vec![Criterion::Sort]);
}) })
.unwrap(); .unwrap();
@ -443,7 +443,7 @@ mod tests {
settings.set_primary_key("id".to_owned()); settings.set_primary_key("id".to_owned());
settings.set_filterable_fields(hashset! { S("id"), S("mod_10"), S("mod_20") }); settings.set_filterable_fields(hashset! { S("id"), S("mod_10"), S("mod_20") });
settings.set_sortable_fields(hashset! { S("id"), S("mod_10"), S("mod_20") }); settings.set_sortable_fields(hashset! { S("id"), S("mod_10"), S("mod_20") });
settings.set_criteria(vec!["sort".to_owned()]); settings.set_criteria(vec![Criterion::Sort]);
}) })
.unwrap(); .unwrap();

View File

@ -497,7 +497,7 @@ mod tests {
create_disjoint_combinations, create_non_disjoint_combinations, create_disjoint_combinations, create_non_disjoint_combinations,
}; };
use crate::snapshot_tests::display_bitmap; use crate::snapshot_tests::display_bitmap;
use crate::SearchResult; use crate::{Criterion, SearchResult};
#[test] #[test]
fn test_exact_words_subcriterion() { fn test_exact_words_subcriterion() {
@ -506,7 +506,7 @@ mod tests {
index index
.update_settings(|settings| { .update_settings(|settings| {
settings.set_primary_key(S("id")); settings.set_primary_key(S("id"));
settings.set_criteria(vec!["exactness".to_owned()]); settings.set_criteria(vec![Criterion::Exactness]);
}) })
.unwrap(); .unwrap();

View File

@ -599,7 +599,7 @@ mod tests {
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use crate::index::tests::TempIndex; use crate::index::tests::TempIndex;
use crate::{CriterionImplementationStrategy, SearchResult}; use crate::{Criterion, CriterionImplementationStrategy, SearchResult};
fn documents_with_enough_different_words_for_prefixes(prefixes: &[&str]) -> Vec<crate::Object> { fn documents_with_enough_different_words_for_prefixes(prefixes: &[&str]) -> Vec<crate::Object> {
let mut documents = Vec::new(); let mut documents = Vec::new();
@ -627,9 +627,9 @@ mod tests {
.update_settings(|settings| { .update_settings(|settings| {
settings.set_primary_key(S("id")); settings.set_primary_key(S("id"));
settings.set_criteria(vec![ settings.set_criteria(vec![
"words".to_owned(), Criterion::Words,
"typo".to_owned(), Criterion::Typo,
"proximity".to_owned(), Criterion::Proximity,
]); ]);
}) })
.unwrap(); .unwrap();

View File

@ -2,6 +2,7 @@ use std::collections::{BTreeSet, HashMap, HashSet};
use std::result::Result as StdResult; use std::result::Result as StdResult;
use charabia::{Tokenizer, TokenizerBuilder}; use charabia::{Tokenizer, TokenizerBuilder};
use deserr::{DeserializeError, DeserializeFromValue};
use itertools::Itertools; use itertools::Itertools;
use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde::{Deserialize, Deserializer, Serialize, Serializer};
use time::OffsetDateTime; use time::OffsetDateTime;
@ -22,6 +23,25 @@ pub enum Setting<T> {
NotSet, NotSet,
} }
impl<T, E> DeserializeFromValue<E> for Setting<T>
where
T: DeserializeFromValue<E>,
E: DeserializeError,
{
fn deserialize_from_value<V: deserr::IntoValue>(
value: deserr::Value<V>,
location: deserr::ValuePointerRef,
) -> std::result::Result<Self, E> {
match value {
deserr::Value::Null => Ok(Setting::Reset),
_ => T::deserialize_from_value(value, location).map(Setting::Set),
}
}
fn default() -> Option<Self> {
Some(Self::NotSet)
}
}
impl<T> Default for Setting<T> { impl<T> Default for Setting<T> {
fn default() -> Self { fn default() -> Self {
Self::NotSet Self::NotSet
@ -93,7 +113,7 @@ pub struct Settings<'a, 't, 'u, 'i> {
displayed_fields: Setting<Vec<String>>, displayed_fields: Setting<Vec<String>>,
filterable_fields: Setting<HashSet<String>>, filterable_fields: Setting<HashSet<String>>,
sortable_fields: Setting<HashSet<String>>, sortable_fields: Setting<HashSet<String>>,
criteria: Setting<Vec<String>>, criteria: Setting<Vec<Criterion>>,
stop_words: Setting<BTreeSet<String>>, stop_words: Setting<BTreeSet<String>>,
distinct_field: Setting<String>, distinct_field: Setting<String>,
synonyms: Setting<HashMap<String, Vec<String>>>, synonyms: Setting<HashMap<String, Vec<String>>>,
@ -173,7 +193,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
self.criteria = Setting::Reset; self.criteria = Setting::Reset;
} }
pub fn set_criteria(&mut self, criteria: Vec<String>) { pub fn set_criteria(&mut self, criteria: Vec<Criterion>) {
self.criteria = Setting::Set(criteria); self.criteria = Setting::Set(criteria);
} }
@ -526,14 +546,9 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
} }
fn update_criteria(&mut self) -> Result<()> { fn update_criteria(&mut self) -> Result<()> {
match self.criteria { match &self.criteria {
Setting::Set(ref fields) => { Setting::Set(criteria) => {
let mut new_criteria = Vec::new(); self.index.put_criteria(self.wtxn, criteria)?;
for name in fields {
let criterion: Criterion = name.parse()?;
new_criteria.push(criterion);
}
self.index.put_criteria(self.wtxn, &new_criteria)?;
} }
Setting::Reset => { Setting::Reset => {
self.index.delete_criteria(self.wtxn)?; self.index.delete_criteria(self.wtxn)?;
@ -977,7 +992,7 @@ mod tests {
index index
.update_settings(|settings| { .update_settings(|settings| {
settings.set_displayed_fields(vec![S("name")]); settings.set_displayed_fields(vec![S("name")]);
settings.set_criteria(vec![S("age:asc")]); settings.set_criteria(vec![Criterion::Asc("age".to_owned())]);
}) })
.unwrap(); .unwrap();
@ -1246,7 +1261,7 @@ mod tests {
.update_settings(|settings| { .update_settings(|settings| {
settings.set_displayed_fields(vec!["hello".to_string()]); settings.set_displayed_fields(vec!["hello".to_string()]);
settings.set_filterable_fields(hashset! { S("age"), S("toto") }); settings.set_filterable_fields(hashset! { S("age"), S("toto") });
settings.set_criteria(vec!["toto:asc".to_string()]); settings.set_criteria(vec![Criterion::Asc(S("toto"))]);
}) })
.unwrap(); .unwrap();
@ -1280,7 +1295,7 @@ mod tests {
.update_settings(|settings| { .update_settings(|settings| {
settings.set_displayed_fields(vec!["hello".to_string()]); settings.set_displayed_fields(vec!["hello".to_string()]);
// It is only Asc(toto), there is a facet database but it is denied to filter with toto. // It is only Asc(toto), there is a facet database but it is denied to filter with toto.
settings.set_criteria(vec!["toto:asc".to_string()]); settings.set_criteria(vec![Criterion::Asc(S("toto"))]);
}) })
.unwrap(); .unwrap();

View File

@ -38,8 +38,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
let mut builder = Settings::new(&mut wtxn, &index, &config); let mut builder = Settings::new(&mut wtxn, &index, &config);
let criteria = criteria.iter().map(|c| c.to_string()).collect(); builder.set_criteria(criteria.to_vec());
builder.set_criteria(criteria);
builder.set_filterable_fields(hashset! { builder.set_filterable_fields(hashset! {
S("tag"), S("tag"),
S("asc_desc_rank"), S("asc_desc_rank"),

View File

@ -344,7 +344,7 @@ fn criteria_mixup() {
//update criteria //update criteria
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, &config); let mut builder = Settings::new(&mut wtxn, &index, &config);
builder.set_criteria(criteria.iter().map(ToString::to_string).collect()); builder.set_criteria(criteria.clone());
builder.execute(|_| (), || false).unwrap(); builder.execute(|_| (), || false).unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -436,7 +436,7 @@ fn criteria_ascdesc() {
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, &config); let mut builder = Settings::new(&mut wtxn, &index, &config);
builder.set_criteria(vec![criterion.to_string()]); builder.set_criteria(vec![criterion.clone()]);
builder.execute(|_| (), || false).unwrap(); builder.execute(|_| (), || false).unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();