From 1eb1e73bb3d909d2be54ff835126f008b812aa9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Mon, 4 Jul 2022 09:28:23 +0200 Subject: [PATCH] Add integration tests for the EXISTS filter --- milli/src/heed_codec/facet/mod.rs | 2 +- milli/src/update/index_documents/mod.rs | 11 ++++-- milli/tests/assets/test_set.ndjson | 30 +++++++------- milli/tests/search/filters.rs | 6 +++ milli/tests/search/mod.rs | 52 +++++++++++++++++++++++-- 5 files changed, 78 insertions(+), 23 deletions(-) diff --git a/milli/src/heed_codec/facet/mod.rs b/milli/src/heed_codec/facet/mod.rs index 51812d97a..0b2d9186f 100644 --- a/milli/src/heed_codec/facet/mod.rs +++ b/milli/src/heed_codec/facet/mod.rs @@ -6,7 +6,6 @@ mod facet_string_zero_bounds_value_codec; mod field_doc_id_facet_f64_codec; mod field_doc_id_facet_string_codec; -use crate::BEU16; use heed::types::OwnedType; pub use self::facet_level_value_f64_codec::FacetLevelValueF64Codec; @@ -18,6 +17,7 @@ pub use self::facet_string_level_zero_value_codec::{ pub use self::facet_string_zero_bounds_value_codec::FacetStringZeroBoundsValueCodec; pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec; pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec; +use crate::BEU16; pub type FieldIdCodec = OwnedType; diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 99f474eb6..950b3a417 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -592,7 +592,7 @@ mod tests { use super::*; use crate::documents::DocumentBatchBuilder; use crate::update::DeleteDocuments; - use crate::HashMap; + use crate::{HashMap, BEU16}; #[test] fn simple_document_replacement() { @@ -2012,11 +2012,14 @@ mod tests { let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap(); let bitmap_colour = - index.facet_id_exists_docids.get(&rtxn, &colour_id).unwrap().unwrap(); + index.facet_id_exists_docids.get(&rtxn, &BEU16::new(colour_id)).unwrap().unwrap(); assert_eq!(bitmap_colour.into_iter().collect::>(), vec![0, 1, 2, 3, 4, 6, 7]); - let bitmap_colour_green = - index.facet_id_exists_docids.get(&rtxn, &colour_green_id).unwrap().unwrap(); + let bitmap_colour_green = index + .facet_id_exists_docids + .get(&rtxn, &BEU16::new(colour_green_id)) + .unwrap() + .unwrap(); assert_eq!(bitmap_colour_green.into_iter().collect::>(), vec![6, 7]); }; diff --git a/milli/tests/assets/test_set.ndjson b/milli/tests/assets/test_set.ndjson index 6383d274e..427daca8c 100644 --- a/milli/tests/assets/test_set.ndjson +++ b/milli/tests/assets/test_set.ndjson @@ -1,17 +1,17 @@ -{"id":"A","word_rank":0,"typo_rank":1,"proximity_rank":15,"attribute_rank":505,"exact_rank":5,"asc_desc_rank":0,"sort_by_rank":0,"geo_rank":43,"title":"hell o","description":"hell o is the fourteenth episode of the american television series glee performing songs with this word","tag":"blue","_geo": { "lat": 50.62984446145472, "lng": 3.085712705162039 },"":""} -{"id":"B","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":1,"sort_by_rank":2,"geo_rank":191,"title":"hello","description":"hello is a song recorded by english singer songwriter adele","tag":"red","_geo": { "lat": 50.63047567664291, "lng": 3.088852230809636 },"":""} -{"id":"C","word_rank":0,"typo_rank":1,"proximity_rank":8,"attribute_rank":336,"exact_rank":4,"asc_desc_rank":2,"sort_by_rank":0,"geo_rank":283,"title":"hell on earth","description":"hell on earth is the third studio album by american hip hop duo mobb deep","tag":"blue","_geo": { "lat": 50.6321800003937, "lng": 3.088331882262139 },"":""} -{"id":"D","word_rank":0,"typo_rank":1,"proximity_rank":10,"attribute_rank":757,"exact_rank":4,"asc_desc_rank":3,"sort_by_rank":2,"geo_rank":1381,"title":"hell on wheels tv series","description":"the construction of the first transcontinental railroad across the united states in the world","tag":"red","_geo": { "lat": 50.63728851135729, "lng": 3.0703951595971626 },"":""} -{"id":"E","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":1979,"title":"hello kitty","description":"also known by her full name kitty white is a fictional character produced by the japanese company sanrio","tag":"green","_geo": { "lat": 50.64264610511925, "lng": 3.0665099941857634 },"":""} -{"id":"F","word_rank":2,"typo_rank":1,"proximity_rank":0,"attribute_rank":1017,"exact_rank":5,"asc_desc_rank":5,"sort_by_rank":0,"geo_rank":65022,"title":"laptop orchestra","description":"a laptop orchestra lork or lo is a chamber music ensemble consisting primarily of laptops like helo huddersfield experimental laptop orchestra","tag":"blue","_geo": { "lat": 51.05028653642387, "lng": 3.7301072771642096 },"":""} -{"id":"G","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":5,"sort_by_rank":2,"geo_rank":34692,"title":"hello world film","description":"hello world is a 2019 japanese animated sci fi romantic drama film directed by tomohiko ito and produced by graphinica","tag":"red","_geo": { "lat": 50.78776041427129, "lng": 2.661201766290338 },"":""} -{"id":"H","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":202182,"title":"world hello day","description":"holiday observed on november 21 to express that conflicts should be resolved through communication rather than the use of force","tag":"green","_geo": { "lat": 48.875617484531965, "lng": 2.346747821504194 },"":""} +{"id":"A","word_rank":0,"typo_rank":1,"proximity_rank":15,"attribute_rank":505,"exact_rank":5,"asc_desc_rank":0,"sort_by_rank":0,"geo_rank":43,"title":"hell o","description":"hell o is the fourteenth episode of the american television series glee performing songs with this word","tag":"blue","_geo": { "lat": 50.62984446145472, "lng": 3.085712705162039 },"":"", "opt1": [null]} +{"id":"B","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":1,"sort_by_rank":2,"geo_rank":191,"title":"hello","description":"hello is a song recorded by english singer songwriter adele","tag":"red","_geo": { "lat": 50.63047567664291, "lng": 3.088852230809636 },"":"", "opt1": []} +{"id":"C","word_rank":0,"typo_rank":1,"proximity_rank":8,"attribute_rank":336,"exact_rank":4,"asc_desc_rank":2,"sort_by_rank":0,"geo_rank":283,"title":"hell on earth","description":"hell on earth is the third studio album by american hip hop duo mobb deep","tag":"blue","_geo": { "lat": 50.6321800003937, "lng": 3.088331882262139 },"":"", "opt1": null} +{"id":"D","word_rank":0,"typo_rank":1,"proximity_rank":10,"attribute_rank":757,"exact_rank":4,"asc_desc_rank":3,"sort_by_rank":2,"geo_rank":1381,"title":"hell on wheels tv series","description":"the construction of the first transcontinental railroad across the united states in the world","tag":"red","_geo": { "lat": 50.63728851135729, "lng": 3.0703951595971626 },"":"", "opt1": 4} +{"id":"E","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":1979,"title":"hello kitty","description":"also known by her full name kitty white is a fictional character produced by the japanese company sanrio","tag":"green","_geo": { "lat": 50.64264610511925, "lng": 3.0665099941857634 },"":"", "opt1": "E"} +{"id":"F","word_rank":2,"typo_rank":1,"proximity_rank":0,"attribute_rank":1017,"exact_rank":5,"asc_desc_rank":5,"sort_by_rank":0,"geo_rank":65022,"title":"laptop orchestra","description":"a laptop orchestra lork or lo is a chamber music ensemble consisting primarily of laptops like helo huddersfield experimental laptop orchestra","tag":"blue","_geo": { "lat": 51.05028653642387, "lng": 3.7301072771642096 },"":"", "opt1": ["F"]} +{"id":"G","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":5,"sort_by_rank":2,"geo_rank":34692,"title":"hello world film","description":"hello world is a 2019 japanese animated sci fi romantic drama film directed by tomohiko ito and produced by graphinica","tag":"red","_geo": { "lat": 50.78776041427129, "lng": 2.661201766290338 },"":"", "opt1": [7]} +{"id":"H","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":202182,"title":"world hello day","description":"holiday observed on november 21 to express that conflicts should be resolved through communication rather than the use of force","tag":"green","_geo": { "lat": 48.875617484531965, "lng": 2.346747821504194 },"":"", "opt1": ["H", 8]} {"id":"I","word_rank":0,"typo_rank":0,"proximity_rank":8,"attribute_rank":338,"exact_rank":3,"asc_desc_rank":3,"sort_by_rank":0,"geo_rank":740667,"title":"hello world song","description":"hello world is a song written by tom douglas tony lane and david lee and recorded by american country music group lady antebellum","tag":"blue","_geo": { "lat": 43.973998070351065, "lng": 3.4661837318345032 },"":""} -{"id":"J","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":1,"exact_rank":3,"asc_desc_rank":2,"sort_by_rank":1,"geo_rank":739020,"title":"hello cruel world","description":"hello cruel world is an album by new zealand band tall dwarfs","tag":"green","_geo": { "lat": 43.98920130353838, "lng": 3.480519311627928 },"":""} -{"id":"K","word_rank":0,"typo_rank":2,"proximity_rank":9,"attribute_rank":670,"exact_rank":5,"asc_desc_rank":1,"sort_by_rank":2,"geo_rank":738830,"title":"hallo creation system","description":"in few word hallo was a construction toy created by the american company mattel to engage girls in construction play","tag":"red","_geo": { "lat": 43.99155030238669, "lng": 3.503453528249425 },"":""} -{"id":"L","word_rank":0,"typo_rank":0,"proximity_rank":2,"attribute_rank":250,"exact_rank":4,"asc_desc_rank":0,"sort_by_rank":0,"geo_rank":737861,"title":"good morning world","description":"good morning world is an american sitcom broadcast on cbs tv during the 1967 1968 season","tag":"blue","_geo": { "lat": 44.000507750283695, "lng": 3.5116812040621572 },"":""} -{"id":"M","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":0,"asc_desc_rank":0,"sort_by_rank":2,"geo_rank":739203,"title":"hello world america","description":"a perfect match for a perfect engine using the query hello world america","tag":"red","_geo": { "lat": 43.99150729038736, "lng": 3.606143957295055 },"":""} -{"id":"N","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":1,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":9499586,"title":"hello world america unleashed","description":"a very good match for a very good engine using the query hello world america","tag":"green","_geo": { "lat": 35.511540843367115, "lng": 138.764368875787 },"":""} -{"id":"O","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":10,"exact_rank":0,"asc_desc_rank":6,"sort_by_rank":0,"geo_rank":9425163,"title":"a perfect match for a perfect engine using the query hello world america","description":"hello world america","tag":"blue","_geo": { "lat": 35.00536702277189, "lng": 135.76118763940391 },"":""} -{"id":"P","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":12,"exact_rank":1,"asc_desc_rank":3,"sort_by_rank":2,"geo_rank":9422437,"title":"a very good match for a very good engine using the query hello world america","description":"hello world america unleashed","tag":"red","_geo": { "lat": 35.06462306367058, "lng": 135.8338440354251 },"":""} +{"id":"J","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":1,"exact_rank":3,"asc_desc_rank":2,"sort_by_rank":1,"geo_rank":739020,"title":"hello cruel world","description":"hello cruel world is an album by new zealand band tall dwarfs","tag":"green","_geo": { "lat": 43.98920130353838, "lng": 3.480519311627928 },"":"", "opt1": {}} +{"id":"K","word_rank":0,"typo_rank":2,"proximity_rank":9,"attribute_rank":670,"exact_rank":5,"asc_desc_rank":1,"sort_by_rank":2,"geo_rank":738830,"title":"hallo creation system","description":"in few word hallo was a construction toy created by the american company mattel to engage girls in construction play","tag":"red","_geo": { "lat": 43.99155030238669, "lng": 3.503453528249425 },"":"", "opt1": [{"opt2": 11}] } +{"id":"L","word_rank":0,"typo_rank":0,"proximity_rank":2,"attribute_rank":250,"exact_rank":4,"asc_desc_rank":0,"sort_by_rank":0,"geo_rank":737861,"title":"good morning world","description":"good morning world is an american sitcom broadcast on cbs tv during the 1967 1968 season","tag":"blue","_geo": { "lat": 44.000507750283695, "lng": 3.5116812040621572 },"":"", "opt1": {"opt2": [12]}} +{"id":"M","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":0,"asc_desc_rank":0,"sort_by_rank":2,"geo_rank":739203,"title":"hello world america","description":"a perfect match for a perfect engine using the query hello world america","tag":"red","_geo": { "lat": 43.99150729038736, "lng": 3.606143957295055 },"":"", "opt1": [13, [{"opt2": null}]]} +{"id":"N","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":1,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":9499586,"title":"hello world america unleashed","description":"a very good match for a very good engine using the query hello world america","tag":"green","_geo": { "lat": 35.511540843367115, "lng": 138.764368875787 },"":"", "opt1": {"a": 1, "opt2": {"opt3": 14}} } +{"id":"O","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":10,"exact_rank":0,"asc_desc_rank":6,"sort_by_rank":0,"geo_rank":9425163,"title":"a perfect match for a perfect engine using the query hello world america","description":"hello world america","tag":"blue","_geo": { "lat": 35.00536702277189, "lng": 135.76118763940391 },"":"", "opt1": [[[[]]]] } +{"id":"P","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":12,"exact_rank":1,"asc_desc_rank":3,"sort_by_rank":2,"geo_rank":9422437,"title":"a very good match for a very good engine using the query hello world america","description":"hello world america unleashed","tag":"red","_geo": { "lat": 35.06462306367058, "lng": 135.8338440354251 },"":"", "opt1.opt2": 16} {"id":"Q","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":2,"sort_by_rank":1,"geo_rank":9339230,"title":"hello world","description":"a hello world program generally is a computer program that outputs or displays the message hello world","tag":"green","_geo": { "lat": 34.39548365683149, "lng": 132.4535960928883 },"":""} diff --git a/milli/tests/search/filters.rs b/milli/tests/search/filters.rs index fe926d17a..1700a1478 100644 --- a/milli/tests/search/filters.rs +++ b/milli/tests/search/filters.rs @@ -80,3 +80,9 @@ test_filter!( lower_complex_filter_2, vec![Left(vec!["tag=red", "tag=green"]), Left(vec!["asc_desc_rank<3", "asc_desc_rank<1"])] ); +test_filter!(exists_filter_1, vec![Right("opt1 EXISTS")]); +test_filter!(exists_filter_1_not, vec![Right("opt1 NOT EXISTS")]); +test_filter!(exists_filter_1_not_alt, vec![Right("NOT opt1 EXISTS")]); +test_filter!(exists_filter_1_double_not, vec![Right("NOT opt1 NOT EXISTS")]); + +test_filter!(exists_filter_2, vec![Right("opt1.opt2 EXISTS")]); diff --git a/milli/tests/search/mod.rs b/milli/tests/search/mod.rs index 472fbafe0..ec784bfc0 100644 --- a/milli/tests/search/mod.rs +++ b/milli/tests/search/mod.rs @@ -9,7 +9,7 @@ use maplit::{hashmap, hashset}; use milli::documents::{DocumentBatchBuilder, DocumentBatchReader}; use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings}; use milli::{AscDesc, Criterion, DocumentId, Index, Member}; -use serde::Deserialize; +use serde::{Deserialize, Deserializer}; use slice_group_by::GroupBy; mod distinct; @@ -43,6 +43,8 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { S("tag"), S("asc_desc_rank"), S("_geo"), + S("opt1"), + S("opt1.opt2") }); builder.set_sortable_fields(hashset! { S("tag"), @@ -196,12 +198,44 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option { id = (document.geo_rank < 100000).then(|| document.id.clone()); } else if filter.starts_with("NOT _geoRadius") { id = (document.geo_rank > 1000000).then(|| document.id.clone()); + } else if matches!(filter, "opt1 EXISTS" | "NOT opt1 NOT EXISTS") { + id = document.opt1.is_some().then(|| document.id.clone()); + } else if matches!(filter, "NOT opt1 EXISTS" | "opt1 NOT EXISTS") { + id = document.opt1.is_none().then(|| document.id.clone()); + } else if matches!(filter, "opt1.opt2 EXISTS") { + if document.opt1opt2.is_some() { + id = Some(document.id.clone()); + } else if let Some(opt1) = &document.opt1 { + id = contains_key_rec(opt1, "opt2").then(|| document.id.clone()); + } } id } +pub fn contains_key_rec(v: &serde_json::Value, key: &str) -> bool { + match v { + serde_json::Value::Array(v) => { + for v in v.iter() { + if contains_key_rec(v, key) { + return true; + } + } + false + } + serde_json::Value::Object(v) => { + for (k, v) in v.iter() { + if k == key || contains_key_rec(v, key) { + return true; + } + } + false + } + _ => false, + } +} + pub fn expected_filtered_ids(filters: Vec, &str>>) -> HashSet { - let dataset: HashSet = + let dataset: Vec = serde_json::Deserializer::from_str(CONTENT).into_iter().map(|r| r.unwrap()).collect(); let mut filtered_ids: HashSet<_> = dataset.iter().map(|d| d.id.clone()).collect(); @@ -229,7 +263,7 @@ pub fn expected_filtered_ids(filters: Vec, &str>>) -> HashSet, + #[serde(default, deserialize_with = "some_option", rename = "opt1.opt2")] + pub opt1opt2: Option, +} + +fn some_option<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let result = serde_json::Value::deserialize(deserializer)?; + Ok(Some(result)) }