From 64571c8288b358096fff5bd992d2ee168516e4dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 15 Mar 2023 14:57:17 +0100 Subject: [PATCH] Improve the testing of the filters --- milli/src/snapshot_tests.rs | 8 +++ milli/src/update/index_documents/mod.rs | 77 ++++++++++++++++++++++++- milli/tests/search/filters.rs | 6 ++ milli/tests/search/mod.rs | 51 ++++++++-------- 4 files changed, 114 insertions(+), 28 deletions(-) diff --git a/milli/src/snapshot_tests.rs b/milli/src/snapshot_tests.rs index c6ea8f3dd..85d1bc626 100644 --- a/milli/src/snapshot_tests.rs +++ b/milli/src/snapshot_tests.rs @@ -276,6 +276,11 @@ pub fn snap_facet_id_is_null_docids(index: &Index) -> String { &format!("{facet_id:<3} {}", display_bitmap(&docids)) }) } +pub fn snap_facet_id_is_empty_docids(index: &Index) -> String { + make_db_snap_from_iter!(index, facet_id_is_empty_docids, |(facet_id, docids)| { + &format!("{facet_id:<3} {}", display_bitmap(&docids)) + }) +} pub fn snap_facet_id_string_docids(index: &Index) -> String { make_db_snap_from_iter!(index, facet_id_string_docids, |( FacetGroupKey { field_id, level, left_bound }, @@ -503,6 +508,9 @@ macro_rules! full_snap_of_db { ($index:ident, facet_id_is_null_docids) => {{ $crate::snapshot_tests::snap_facet_id_is_null_docids(&$index) }}; + ($index:ident, facet_id_is_empty_docids) => {{ + $crate::snapshot_tests::snap_facet_id_is_empty_docids(&$index) + }}; ($index:ident, documents_ids) => {{ $crate::snapshot_tests::snap_documents_ids(&$index) }}; diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 7b9bd7834..a0bf1400d 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -1766,6 +1766,10 @@ mod tests { "id": 0, "colour": null, }, + { + "id": 1, + "colour": [null], // must not be returned + }, { "id": 6, "colour": { @@ -1835,14 +1839,14 @@ mod tests { .get(&rtxn, &BEU16::new(colour_green_id)) .unwrap() .unwrap(); - assert_eq!(bitmap_colour_green.into_iter().collect::>(), vec![1]); + assert_eq!(bitmap_colour_green.into_iter().collect::>(), vec![2]); let bitmap_colour_blue = index .facet_id_is_null_docids .get(&rtxn, &BEU16::new(colour_blue_id)) .unwrap() .unwrap(); - assert_eq!(bitmap_colour_blue.into_iter().collect::>(), vec![2]); + assert_eq!(bitmap_colour_blue.into_iter().collect::>(), vec![3]); }; let faceted_fields = hashset!(S("colour")); @@ -1866,6 +1870,75 @@ mod tests { check_ok(&index); } + #[test] + fn index_documents_check_is_empty_database() { + let content = || { + documents!([ + {"id": 0, "tags": null }, + {"id": 1, "tags": [null] }, + {"id": 2, "tags": [] }, + {"id": 3, "tags": ["hello","world"] }, + {"id": 4, "tags": [""] }, + {"id": 5 }, + {"id": 6, "tags": {} }, + {"id": 7, "tags": {"green": "cool"} }, + {"id": 8, "tags": {"green": ""} }, + {"id": 9, "tags": "" }, + {"id": 10, "tags": { "green": null } }, + {"id": 11, "tags": { "green": { "blue": null } } }, + {"id": 12, "tags": { "green": { "blue": [] } } } + ]) + }; + + let check_ok = |index: &Index| { + let rtxn = index.read_txn().unwrap(); + let facets = index.faceted_fields(&rtxn).unwrap(); + assert_eq!(facets, hashset!(S("tags"), S("tags.green"), S("tags.green.blue"))); + + let tags_id = index.fields_ids_map(&rtxn).unwrap().id("tags").unwrap(); + let tags_green_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green").unwrap(); + let tags_blue_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green.blue").unwrap(); + + let bitmap_empty_tags = + index.facet_id_is_empty_docids.get(&rtxn, &BEU16::new(tags_id)).unwrap().unwrap(); + assert_eq!(bitmap_empty_tags.into_iter().collect::>(), vec![2, 6, 9]); + + let bitmap_tags_green = index + .facet_id_is_empty_docids + .get(&rtxn, &BEU16::new(tags_green_id)) + .unwrap() + .unwrap(); + assert_eq!(bitmap_tags_green.into_iter().collect::>(), vec![8]); + + let bitmap_tags_blue = index + .facet_id_is_empty_docids + .get(&rtxn, &BEU16::new(tags_blue_id)) + .unwrap() + .unwrap(); + assert_eq!(bitmap_tags_blue.into_iter().collect::>(), vec![12]); + }; + + let faceted_fields = hashset!(S("tags")); + + let index = TempIndex::new(); + index.add_documents(content()).unwrap(); + index + .update_settings(|settings| { + settings.set_filterable_fields(faceted_fields.clone()); + }) + .unwrap(); + check_ok(&index); + + let index = TempIndex::new(); + index + .update_settings(|settings| { + settings.set_filterable_fields(faceted_fields.clone()); + }) + .unwrap(); + index.add_documents(content()).unwrap(); + check_ok(&index); + } + #[test] fn primary_key_must_not_contain_floats() { let index = TempIndex::new_with_map_size(4096 * 100); diff --git a/milli/tests/search/filters.rs b/milli/tests/search/filters.rs index 57ad6a40b..db5a004e0 100644 --- a/milli/tests/search/filters.rs +++ b/milli/tests/search/filters.rs @@ -93,6 +93,12 @@ test_filter!(null_filter_1_not, vec![Right("opt1 IS NOT NULL")]); test_filter!(null_filter_1_not_alt, vec![Right("NOT opt1 IS NULL")]); test_filter!(null_filter_1_double_not, vec![Right("NOT opt1 IS NOT NULL")]); +test_filter!(empty_filter_1, vec![Right("opt1 IS EMPTY")]); +test_filter!(empty_filter_2, vec![Right("opt1.opt2 IS EMPTY")]); +test_filter!(empty_filter_1_not, vec![Right("opt1 IS NOT EMPTY")]); +test_filter!(empty_filter_1_not_alt, vec![Right("NOT opt1 IS EMPTY")]); +test_filter!(empty_filter_1_double_not, vec![Right("NOT opt1 IS NOT EMPTY")]); + test_filter!(in_filter, vec![Right("tag_in IN[1, 2, 3, four, five]")]); test_filter!(not_in_filter, vec![Right("tag_in NOT IN[1, 2, 3, four, five]")]); test_filter!(not_not_in_filter, vec![Right("NOT tag_in NOT IN[1, 2, 3, four, five]")]); diff --git a/milli/tests/search/mod.rs b/milli/tests/search/mod.rs index 51852cced..23744c005 100644 --- a/milli/tests/search/mod.rs +++ b/milli/tests/search/mod.rs @@ -212,10 +212,22 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option { } else if matches!(filter, "opt1.opt2 IS NULL") { if document.opt1opt2.as_ref().map_or(false, |v| v.is_null()) { id = Some(document.id.clone()); - } else if let Some(opt1) = &document.opt1 { - if !opt1.is_null() { - id = contains_null_rec(opt1, "opt2").then(|| document.id.clone()); - } + } + } else if matches!(filter, "opt1 IS EMPTY" | "NOT opt1 IS NOT EMPTY") { + id = document + .opt1 + .as_ref() + .map_or(false, |v| is_empty_value(v)) + .then(|| document.id.clone()); + } else if matches!(filter, "NOT opt1 IS EMPTY" | "opt1 IS NOT EMPTY") { + id = document + .opt1 + .as_ref() + .map_or(true, |v| !is_empty_value(v)) + .then(|| document.id.clone()); + } else if matches!(filter, "opt1.opt2 IS EMPTY") { + if document.opt1opt2.as_ref().map_or(false, |v| is_empty_value(v)) { + id = Some(document.id.clone()); } } else if matches!( filter, @@ -230,6 +242,15 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option { id } +pub fn is_empty_value(v: &serde_json::Value) -> bool { + match v { + serde_json::Value::String(s) => s.is_empty(), + serde_json::Value::Array(a) => a.is_empty(), + serde_json::Value::Object(o) => o.is_empty(), + _ => false, + } +} + pub fn contains_key_rec(v: &serde_json::Value, key: &str) -> bool { match v { serde_json::Value::Array(v) => { @@ -252,28 +273,6 @@ pub fn contains_key_rec(v: &serde_json::Value, key: &str) -> bool { } } -pub fn contains_null_rec(v: &serde_json::Value, key: &str) -> bool { - match v { - serde_json::Value::Object(v) => { - for (k, v) in v.iter() { - if k == key && v.is_null() || contains_null_rec(v, key) { - return true; - } - } - false - } - serde_json::Value::Array(v) => { - for v in v.iter() { - if contains_null_rec(v, key) { - return true; - } - } - false - } - _ => false, - } -} - pub fn expected_filtered_ids(filters: Vec, &str>>) -> HashSet { let dataset: Vec = serde_json::Deserializer::from_str(CONTENT).into_iter().map(|r| r.unwrap()).collect();