From d1df0d20f9ef29f169ec070fc5ca302f7aed1fd5 Mon Sep 17 00:00:00 2001 From: many Date: Wed, 18 Aug 2021 16:08:40 +0200 Subject: [PATCH] Add integration test of SortBy criterion --- milli/src/lib.rs | 2 +- milli/tests/assets/test_set.ndjson | 34 +++---- milli/tests/search/distinct.rs | 2 +- milli/tests/search/filters.rs | 2 +- milli/tests/search/mod.rs | 24 ++++- milli/tests/search/query_criteria.rs | 138 ++++++++++++++++++++++----- 6 files changed, 152 insertions(+), 50 deletions(-) diff --git a/milli/src/lib.rs b/milli/src/lib.rs index f3bababf6..2b0bd2ed4 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -22,7 +22,7 @@ use std::result::Result as StdResult; use fxhash::{FxHasher32, FxHasher64}; use serde_json::{Map, Value}; -pub use self::criterion::{default_criteria, Criterion}; +pub use self::criterion::{default_criteria, AscDesc, Criterion}; pub use self::error::{ Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError, }; diff --git a/milli/tests/assets/test_set.ndjson b/milli/tests/assets/test_set.ndjson index 599d479ed..89d9f1109 100644 --- a/milli/tests/assets/test_set.ndjson +++ b/milli/tests/assets/test_set.ndjson @@ -1,17 +1,17 @@ -{"id":"A","word_rank":0,"typo_rank":1,"proximity_rank":15,"attribute_rank":505,"exact_rank":5,"asc_desc_rank":0,"title":"hell o","description":"hell o is the fourteenth episode of the american television series glee performing songs with this word","tag":"blue","":""} -{"id":"B","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":1,"title":"hello","description":"hello is a song recorded by english singer songwriter adele","tag":"red","":""} -{"id":"C","word_rank":0,"typo_rank":1,"proximity_rank":8,"attribute_rank":336,"exact_rank":4,"asc_desc_rank":2,"title":"hell on earth","description":"hell on earth is the third studio album by american hip hop duo mobb deep","tag":"blue","":""} -{"id":"D","word_rank":0,"typo_rank":1,"proximity_rank":10,"attribute_rank":757,"exact_rank":4,"asc_desc_rank":3,"title":"hell on wheels tv series","description":"the construction of the first transcontinental railroad across the united states in the world","tag":"red","":""} -{"id":"E","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":4,"title":"hello kitty","description":"also known by her full name kitty white is a fictional character produced by the japanese company sanrio","tag":"green","":""} -{"id":"F","word_rank":2,"typo_rank":1,"proximity_rank":0,"attribute_rank":1017,"exact_rank":5,"asc_desc_rank":5,"title":"laptop orchestra","description":"a laptop orchestra lork or lo is a chamber music ensemble consisting primarily of laptops like helo huddersfield experimental laptop orchestra","tag":"blue","":""} -{"id":"G","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":5,"title":"hello world film","description":"hello world is a 2019 japanese animated sci fi romantic drama film directed by tomohiko ito and produced by graphinica","tag":"red","":""} -{"id":"H","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":4,"title":"world hello day","description":"holiday observed on november 21 to express that conflicts should be resolved through communication rather than the use of force","tag":"green","":""} -{"id":"I","word_rank":0,"typo_rank":0,"proximity_rank":8,"attribute_rank":338,"exact_rank":3,"asc_desc_rank":3,"title":"hello world song","description":"hello world is a song written by tom douglas tony lane and david lee and recorded by american country music group lady antebellum","tag":"blue","":""} -{"id":"J","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":1,"exact_rank":3,"asc_desc_rank":2,"title":"hello cruel world","description":"hello cruel world is an album by new zealand band tall dwarfs","tag":"green","":""} -{"id":"K","word_rank":0,"typo_rank":2,"proximity_rank":9,"attribute_rank":670,"exact_rank":5,"asc_desc_rank":1,"title":"ello creation system","description":"in few word ello was a construction toy created by the american company mattel to engage girls in construction play","tag":"red","":""} -{"id":"L","word_rank":0,"typo_rank":0,"proximity_rank":2,"attribute_rank":250,"exact_rank":4,"asc_desc_rank":0,"title":"good morning world","description":"good morning world is an american sitcom broadcast on cbs tv during the 1967 1968 season","tag":"blue","":""} -{"id":"M","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":0,"asc_desc_rank":0,"title":"hello world america","description":"a perfect match for a perfect engine using the query hello world america","tag":"red","":""} -{"id":"N","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":1,"asc_desc_rank":4,"title":"hello world america unleashed","description":"a very good match for a very good engine using the query hello world america","tag":"green","":""} -{"id":"O","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":10,"exact_rank":0,"asc_desc_rank":6,"title":"a perfect match for a perfect engine using the query hello world america","description":"hello world america","tag":"blue","":""} -{"id":"P","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":12,"exact_rank":1,"asc_desc_rank":3,"title":"a very good match for a very good engine using the query hello world america","description":"hello world america unleashed","tag":"red","":""} -{"id":"Q","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":2,"title":"hello world","description":"a hello world program generally is a computer program that outputs or displays the message hello world","tag":"green","":""} +{"id":"A","word_rank":0,"typo_rank":1,"proximity_rank":15,"attribute_rank":505,"exact_rank":5,"asc_desc_rank":0,"sort_by_rank":0,"title":"hell o","description":"hell o is the fourteenth episode of the american television series glee performing songs with this word","tag":"blue","":""} +{"id":"B","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":1,"sort_by_rank":2,"title":"hello","description":"hello is a song recorded by english singer songwriter adele","tag":"red","":""} +{"id":"C","word_rank":0,"typo_rank":1,"proximity_rank":8,"attribute_rank":336,"exact_rank":4,"asc_desc_rank":2,"sort_by_rank":0,"title":"hell on earth","description":"hell on earth is the third studio album by american hip hop duo mobb deep","tag":"blue","":""} +{"id":"D","word_rank":0,"typo_rank":1,"proximity_rank":10,"attribute_rank":757,"exact_rank":4,"asc_desc_rank":3,"sort_by_rank":2,"title":"hell on wheels tv series","description":"the construction of the first transcontinental railroad across the united states in the world","tag":"red","":""} +{"id":"E","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":4,"sort_by_rank":1,"title":"hello kitty","description":"also known by her full name kitty white is a fictional character produced by the japanese company sanrio","tag":"green","":""} +{"id":"F","word_rank":2,"typo_rank":1,"proximity_rank":0,"attribute_rank":1017,"exact_rank":5,"asc_desc_rank":5,"sort_by_rank":0,"title":"laptop orchestra","description":"a laptop orchestra lork or lo is a chamber music ensemble consisting primarily of laptops like helo huddersfield experimental laptop orchestra","tag":"blue","":""} +{"id":"G","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":5,"sort_by_rank":2,"title":"hello world film","description":"hello world is a 2019 japanese animated sci fi romantic drama film directed by tomohiko ito and produced by graphinica","tag":"red","":""} +{"id":"H","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":4,"sort_by_rank":1,"title":"world hello day","description":"holiday observed on november 21 to express that conflicts should be resolved through communication rather than the use of force","tag":"green","":""} +{"id":"I","word_rank":0,"typo_rank":0,"proximity_rank":8,"attribute_rank":338,"exact_rank":3,"asc_desc_rank":3,"sort_by_rank":0,"title":"hello world song","description":"hello world is a song written by tom douglas tony lane and david lee and recorded by american country music group lady antebellum","tag":"blue","":""} +{"id":"J","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":1,"exact_rank":3,"asc_desc_rank":2,"sort_by_rank":1,"title":"hello cruel world","description":"hello cruel world is an album by new zealand band tall dwarfs","tag":"green","":""} +{"id":"K","word_rank":0,"typo_rank":2,"proximity_rank":9,"attribute_rank":670,"exact_rank":5,"asc_desc_rank":1,"sort_by_rank":2,"title":"ello creation system","description":"in few word ello was a construction toy created by the american company mattel to engage girls in construction play","tag":"red","":""} +{"id":"L","word_rank":0,"typo_rank":0,"proximity_rank":2,"attribute_rank":250,"exact_rank":4,"asc_desc_rank":0,"sort_by_rank":0,"title":"good morning world","description":"good morning world is an american sitcom broadcast on cbs tv during the 1967 1968 season","tag":"blue","":""} +{"id":"M","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":0,"asc_desc_rank":0,"sort_by_rank":2,"title":"hello world america","description":"a perfect match for a perfect engine using the query hello world america","tag":"red","":""} +{"id":"N","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":1,"asc_desc_rank":4,"sort_by_rank":1,"title":"hello world america unleashed","description":"a very good match for a very good engine using the query hello world america","tag":"green","":""} +{"id":"O","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":10,"exact_rank":0,"asc_desc_rank":6,"sort_by_rank":0,"title":"a perfect match for a perfect engine using the query hello world america","description":"hello world america","tag":"blue","":""} +{"id":"P","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":12,"exact_rank":1,"asc_desc_rank":3,"sort_by_rank":2,"title":"a very good match for a very good engine using the query hello world america","description":"hello world america unleashed","tag":"red","":""} +{"id":"Q","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":2,"sort_by_rank":1,"title":"hello world","description":"a hello world program generally is a computer program that outputs or displays the message hello world","tag":"green","":""} diff --git a/milli/tests/search/distinct.rs b/milli/tests/search/distinct.rs index ef5af3272..f044756eb 100644 --- a/milli/tests/search/distinct.rs +++ b/milli/tests/search/distinct.rs @@ -32,7 +32,7 @@ macro_rules! test_distinct { let SearchResult { documents_ids, .. } = search.execute().unwrap(); let mut distinct_values = HashSet::new(); - let expected_external_ids: Vec<_> = search::expected_order(&criteria, true, true) + let expected_external_ids: Vec<_> = search::expected_order(&criteria, true, true, &[]) .into_iter() .filter_map(|d| { if distinct_values.contains(&d.$distinct) { diff --git a/milli/tests/search/filters.rs b/milli/tests/search/filters.rs index 318197ea3..c810b47af 100644 --- a/milli/tests/search/filters.rs +++ b/milli/tests/search/filters.rs @@ -29,7 +29,7 @@ macro_rules! test_filter { let SearchResult { documents_ids, .. } = search.execute().unwrap(); let filtered_ids = search::expected_filtered_ids($filter); - let expected_external_ids: Vec<_> = search::expected_order(&criteria, true, true) + let expected_external_ids: Vec<_> = search::expected_order(&criteria, true, true, &[]) .into_iter() .filter_map(|d| if filtered_ids.contains(&d.id) { Some(d.id) } else { None }) .collect(); diff --git a/milli/tests/search/mod.rs b/milli/tests/search/mod.rs index b84d3fada..7d4043ff1 100644 --- a/milli/tests/search/mod.rs +++ b/milli/tests/search/mod.rs @@ -1,3 +1,4 @@ +use std::cmp::Reverse; use std::collections::HashSet; use big_s::S; @@ -5,7 +6,7 @@ use either::{Either, Left, Right}; use heed::EnvOpenOptions; use maplit::{hashmap, hashset}; use milli::update::{IndexDocuments, Settings, UpdateFormat}; -use milli::{Criterion, DocumentId, Index}; +use milli::{AscDesc, Criterion, DocumentId, Index}; use serde::Deserialize; use slice_group_by::GroupBy; @@ -36,6 +37,10 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { S("tag"), S("asc_desc_rank"), }); + builder.set_sortable_fields(hashset! { + S("tag"), + S("asc_desc_rank"), + }); builder.set_synonyms(hashmap! { S("hello") => vec![S("good morning")], S("world") => vec![S("earth")], @@ -67,6 +72,7 @@ pub fn expected_order( criteria: &[Criterion], authorize_typo: bool, optional_words: bool, + sort_by: &[AscDesc], ) -> Vec { let dataset = serde_json::Deserializer::from_str(CONTENT).into_iter().map(|r| r.unwrap()).collect(); @@ -90,7 +96,14 @@ pub fn expected_order( new_groups .extend(group.linear_group_by_key(|d| d.proximity_rank).map(Vec::from)); } - Criterion::Sort => todo!("sort not supported right now"), + Criterion::Sort if sort_by == [AscDesc::Asc(S("tag"))] => { + group.sort_by_key(|d| d.sort_by_rank); + new_groups.extend(group.linear_group_by_key(|d| d.sort_by_rank).map(Vec::from)); + } + Criterion::Sort if sort_by == [AscDesc::Desc(S("tag"))] => { + group.sort_by_key(|d| Reverse(d.sort_by_rank)); + new_groups.extend(group.linear_group_by_key(|d| d.sort_by_rank).map(Vec::from)); + } Criterion::Typo => { group.sort_by_key(|d| d.typo_rank); new_groups.extend(group.linear_group_by_key(|d| d.typo_rank).map(Vec::from)); @@ -105,11 +118,13 @@ pub fn expected_order( .extend(group.linear_group_by_key(|d| d.asc_desc_rank).map(Vec::from)); } Criterion::Desc(field_name) if field_name == "asc_desc_rank" => { - group.sort_by_key(|d| std::cmp::Reverse(d.asc_desc_rank)); + group.sort_by_key(|d| Reverse(d.asc_desc_rank)); new_groups .extend(group.linear_group_by_key(|d| d.asc_desc_rank).map(Vec::from)); } - Criterion::Asc(_) | Criterion::Desc(_) => new_groups.push(group.clone()), + Criterion::Asc(_) | Criterion::Desc(_) | Criterion::Sort => { + new_groups.push(group.clone()) + } } } groups = std::mem::take(&mut new_groups); @@ -186,6 +201,7 @@ pub struct TestDocument { pub attribute_rank: u32, pub exact_rank: u32, pub asc_desc_rank: u32, + pub sort_by_rank: u32, pub title: String, pub description: String, pub tag: String, diff --git a/milli/tests/search/query_criteria.rs b/milli/tests/search/query_criteria.rs index f814508f5..1723c1d6f 100644 --- a/milli/tests/search/query_criteria.rs +++ b/milli/tests/search/query_criteria.rs @@ -1,6 +1,6 @@ use big_s::S; use milli::update::Settings; -use milli::{Criterion, Search, SearchResult}; +use milli::{AscDesc, Criterion, Search, SearchResult}; use Criterion::*; use crate::search::{self, EXTERNAL_DOCUMENTS_IDS}; @@ -11,7 +11,7 @@ const ALLOW_OPTIONAL_WORDS: bool = true; const DISALLOW_OPTIONAL_WORDS: bool = false; macro_rules! test_criterion { - ($func:ident, $optional_word:ident, $authorize_typos:ident, $criteria:expr) => { + ($func:ident, $optional_word:ident, $authorize_typos:ident, $criteria:expr, $sort_criteria:expr) => { #[test] fn $func() { let criteria = $criteria; @@ -23,82 +23,168 @@ macro_rules! test_criterion { search.limit(EXTERNAL_DOCUMENTS_IDS.len()); search.authorize_typos($authorize_typos); search.optional_words($optional_word); + search.sort_criteria($sort_criteria); let SearchResult { documents_ids, .. } = search.execute().unwrap(); - let expected_external_ids: Vec<_> = - search::expected_order(&criteria, $authorize_typos, $optional_word) - .into_iter() - .map(|d| d.id) - .collect(); + let expected_external_ids: Vec<_> = search::expected_order( + &criteria, + $authorize_typos, + $optional_word, + &$sort_criteria[..], + ) + .into_iter() + .map(|d| d.id) + .collect(); let documents_ids = search::internal_to_external_ids(&index, &documents_ids); assert_eq!(documents_ids, expected_external_ids); } }; } -test_criterion!(none_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![]); -test_criterion!(none_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![]); -test_criterion!(words_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Words]); -test_criterion!(attribute_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Attribute]); -test_criterion!(attribute_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![Attribute]); -test_criterion!(exactness_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Exactness]); -test_criterion!(exactness_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![Exactness]); -test_criterion!(proximity_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Proximity]); -test_criterion!(proximity_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![Proximity]); +test_criterion!(none_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![], vec![]); +test_criterion!(none_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![], vec![]); +test_criterion!(words_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Words], vec![]); +test_criterion!( + attribute_allow_typo, + DISALLOW_OPTIONAL_WORDS, + ALLOW_TYPOS, + vec![Attribute], + vec![] +); +test_criterion!( + attribute_disallow_typo, + DISALLOW_OPTIONAL_WORDS, + DISALLOW_TYPOS, + vec![Attribute], + vec![] +); +test_criterion!( + exactness_allow_typo, + DISALLOW_OPTIONAL_WORDS, + ALLOW_TYPOS, + vec![Exactness], + vec![] +); +test_criterion!( + exactness_disallow_typo, + DISALLOW_OPTIONAL_WORDS, + DISALLOW_TYPOS, + vec![Exactness], + vec![] +); +test_criterion!( + proximity_allow_typo, + DISALLOW_OPTIONAL_WORDS, + ALLOW_TYPOS, + vec![Proximity], + vec![] +); +test_criterion!( + proximity_disallow_typo, + DISALLOW_OPTIONAL_WORDS, + DISALLOW_TYPOS, + vec![Proximity], + vec![] +); test_criterion!( asc_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, - vec![Asc(S("asc_desc_rank"))] + vec![Asc(S("asc_desc_rank"))], + vec![] ); test_criterion!( asc_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, - vec![Asc(S("asc_desc_rank"))] + vec![Asc(S("asc_desc_rank"))], + vec![] ); test_criterion!( desc_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, - vec![Desc(S("asc_desc_rank"))] + vec![Desc(S("asc_desc_rank"))], + vec![] ); test_criterion!( desc_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, - vec![Desc(S("asc_desc_rank"))] + vec![Desc(S("asc_desc_rank"))], + vec![] ); test_criterion!( asc_unexisting_field_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, - vec![Asc(S("unexisting_field"))] + vec![Asc(S("unexisting_field"))], + vec![] ); test_criterion!( asc_unexisting_field_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, - vec![Asc(S("unexisting_field"))] + vec![Asc(S("unexisting_field"))], + vec![] ); test_criterion!( desc_unexisting_field_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, - vec![Desc(S("unexisting_field"))] + vec![Desc(S("unexisting_field"))], + vec![] ); test_criterion!( desc_unexisting_field_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, - vec![Desc(S("unexisting_field"))] + vec![Desc(S("unexisting_field"))], + vec![] +); +test_criterion!(empty_sort_by_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Sort], vec![]); +test_criterion!( + empty_sort_by_disallow_typo, + DISALLOW_OPTIONAL_WORDS, + DISALLOW_TYPOS, + vec![Sort], + vec![] +); +test_criterion!( + sort_by_asc_allow_typo, + DISALLOW_OPTIONAL_WORDS, + ALLOW_TYPOS, + vec![Sort], + vec![AscDesc::Asc(S("tag"))] +); +test_criterion!( + sort_by_asc_disallow_typo, + DISALLOW_OPTIONAL_WORDS, + DISALLOW_TYPOS, + vec![Sort], + vec![AscDesc::Asc(S("tag"))] +); +test_criterion!( + sort_by_desc_allow_typo, + DISALLOW_OPTIONAL_WORDS, + ALLOW_TYPOS, + vec![Sort], + vec![AscDesc::Desc(S("tag"))] +); +test_criterion!( + sort_by_desc_disallow_typo, + DISALLOW_OPTIONAL_WORDS, + DISALLOW_TYPOS, + vec![Sort], + vec![AscDesc::Desc(S("tag"))] ); test_criterion!( default_criteria_order, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, - vec![Words, Typo, Proximity, Attribute, Exactness] + vec![Words, Typo, Proximity, Attribute, Exactness], + vec![] ); #[test] @@ -262,7 +348,7 @@ fn criteria_mixup() { let SearchResult { documents_ids, .. } = search.execute().unwrap(); let expected_external_ids: Vec<_> = - search::expected_order(&criteria, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS) + search::expected_order(&criteria, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, &[]) .into_iter() .map(|d| d.id) .collect();