Improve the testing of the filters

This commit is contained in:
Clément Renault 2023-03-15 14:57:17 +01:00
parent 72123c458b
commit 64571c8288
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
4 changed files with 114 additions and 28 deletions

View File

@ -276,6 +276,11 @@ pub fn snap_facet_id_is_null_docids(index: &Index) -> String {
&format!("{facet_id:<3} {}", display_bitmap(&docids)) &format!("{facet_id:<3} {}", display_bitmap(&docids))
}) })
} }
pub fn snap_facet_id_is_empty_docids(index: &Index) -> String {
make_db_snap_from_iter!(index, facet_id_is_empty_docids, |(facet_id, docids)| {
&format!("{facet_id:<3} {}", display_bitmap(&docids))
})
}
pub fn snap_facet_id_string_docids(index: &Index) -> String { pub fn snap_facet_id_string_docids(index: &Index) -> String {
make_db_snap_from_iter!(index, facet_id_string_docids, |( make_db_snap_from_iter!(index, facet_id_string_docids, |(
FacetGroupKey { field_id, level, left_bound }, FacetGroupKey { field_id, level, left_bound },
@ -503,6 +508,9 @@ macro_rules! full_snap_of_db {
($index:ident, facet_id_is_null_docids) => {{ ($index:ident, facet_id_is_null_docids) => {{
$crate::snapshot_tests::snap_facet_id_is_null_docids(&$index) $crate::snapshot_tests::snap_facet_id_is_null_docids(&$index)
}}; }};
($index:ident, facet_id_is_empty_docids) => {{
$crate::snapshot_tests::snap_facet_id_is_empty_docids(&$index)
}};
($index:ident, documents_ids) => {{ ($index:ident, documents_ids) => {{
$crate::snapshot_tests::snap_documents_ids(&$index) $crate::snapshot_tests::snap_documents_ids(&$index)
}}; }};

View File

@ -1766,6 +1766,10 @@ mod tests {
"id": 0, "id": 0,
"colour": null, "colour": null,
}, },
{
"id": 1,
"colour": [null], // must not be returned
},
{ {
"id": 6, "id": 6,
"colour": { "colour": {
@ -1835,14 +1839,14 @@ mod tests {
.get(&rtxn, &BEU16::new(colour_green_id)) .get(&rtxn, &BEU16::new(colour_green_id))
.unwrap() .unwrap()
.unwrap(); .unwrap();
assert_eq!(bitmap_colour_green.into_iter().collect::<Vec<_>>(), vec![1]); assert_eq!(bitmap_colour_green.into_iter().collect::<Vec<_>>(), vec![2]);
let bitmap_colour_blue = index let bitmap_colour_blue = index
.facet_id_is_null_docids .facet_id_is_null_docids
.get(&rtxn, &BEU16::new(colour_blue_id)) .get(&rtxn, &BEU16::new(colour_blue_id))
.unwrap() .unwrap()
.unwrap(); .unwrap();
assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![2]); assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![3]);
}; };
let faceted_fields = hashset!(S("colour")); let faceted_fields = hashset!(S("colour"));
@ -1866,6 +1870,75 @@ mod tests {
check_ok(&index); check_ok(&index);
} }
#[test]
fn index_documents_check_is_empty_database() {
let content = || {
documents!([
{"id": 0, "tags": null },
{"id": 1, "tags": [null] },
{"id": 2, "tags": [] },
{"id": 3, "tags": ["hello","world"] },
{"id": 4, "tags": [""] },
{"id": 5 },
{"id": 6, "tags": {} },
{"id": 7, "tags": {"green": "cool"} },
{"id": 8, "tags": {"green": ""} },
{"id": 9, "tags": "" },
{"id": 10, "tags": { "green": null } },
{"id": 11, "tags": { "green": { "blue": null } } },
{"id": 12, "tags": { "green": { "blue": [] } } }
])
};
let check_ok = |index: &Index| {
let rtxn = index.read_txn().unwrap();
let facets = index.faceted_fields(&rtxn).unwrap();
assert_eq!(facets, hashset!(S("tags"), S("tags.green"), S("tags.green.blue")));
let tags_id = index.fields_ids_map(&rtxn).unwrap().id("tags").unwrap();
let tags_green_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green").unwrap();
let tags_blue_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green.blue").unwrap();
let bitmap_empty_tags =
index.facet_id_is_empty_docids.get(&rtxn, &BEU16::new(tags_id)).unwrap().unwrap();
assert_eq!(bitmap_empty_tags.into_iter().collect::<Vec<_>>(), vec![2, 6, 9]);
let bitmap_tags_green = index
.facet_id_is_empty_docids
.get(&rtxn, &BEU16::new(tags_green_id))
.unwrap()
.unwrap();
assert_eq!(bitmap_tags_green.into_iter().collect::<Vec<_>>(), vec![8]);
let bitmap_tags_blue = index
.facet_id_is_empty_docids
.get(&rtxn, &BEU16::new(tags_blue_id))
.unwrap()
.unwrap();
assert_eq!(bitmap_tags_blue.into_iter().collect::<Vec<_>>(), vec![12]);
};
let faceted_fields = hashset!(S("tags"));
let index = TempIndex::new();
index.add_documents(content()).unwrap();
index
.update_settings(|settings| {
settings.set_filterable_fields(faceted_fields.clone());
})
.unwrap();
check_ok(&index);
let index = TempIndex::new();
index
.update_settings(|settings| {
settings.set_filterable_fields(faceted_fields.clone());
})
.unwrap();
index.add_documents(content()).unwrap();
check_ok(&index);
}
#[test] #[test]
fn primary_key_must_not_contain_floats() { fn primary_key_must_not_contain_floats() {
let index = TempIndex::new_with_map_size(4096 * 100); let index = TempIndex::new_with_map_size(4096 * 100);

View File

@ -93,6 +93,12 @@ test_filter!(null_filter_1_not, vec![Right("opt1 IS NOT NULL")]);
test_filter!(null_filter_1_not_alt, vec![Right("NOT opt1 IS NULL")]); test_filter!(null_filter_1_not_alt, vec![Right("NOT opt1 IS NULL")]);
test_filter!(null_filter_1_double_not, vec![Right("NOT opt1 IS NOT NULL")]); test_filter!(null_filter_1_double_not, vec![Right("NOT opt1 IS NOT NULL")]);
test_filter!(empty_filter_1, vec![Right("opt1 IS EMPTY")]);
test_filter!(empty_filter_2, vec![Right("opt1.opt2 IS EMPTY")]);
test_filter!(empty_filter_1_not, vec![Right("opt1 IS NOT EMPTY")]);
test_filter!(empty_filter_1_not_alt, vec![Right("NOT opt1 IS EMPTY")]);
test_filter!(empty_filter_1_double_not, vec![Right("NOT opt1 IS NOT EMPTY")]);
test_filter!(in_filter, vec![Right("tag_in IN[1, 2, 3, four, five]")]); test_filter!(in_filter, vec![Right("tag_in IN[1, 2, 3, four, five]")]);
test_filter!(not_in_filter, vec![Right("tag_in NOT IN[1, 2, 3, four, five]")]); test_filter!(not_in_filter, vec![Right("tag_in NOT IN[1, 2, 3, four, five]")]);
test_filter!(not_not_in_filter, vec![Right("NOT tag_in NOT IN[1, 2, 3, four, five]")]); test_filter!(not_not_in_filter, vec![Right("NOT tag_in NOT IN[1, 2, 3, four, five]")]);

View File

@ -212,10 +212,22 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> {
} else if matches!(filter, "opt1.opt2 IS NULL") { } else if matches!(filter, "opt1.opt2 IS NULL") {
if document.opt1opt2.as_ref().map_or(false, |v| v.is_null()) { if document.opt1opt2.as_ref().map_or(false, |v| v.is_null()) {
id = Some(document.id.clone()); id = Some(document.id.clone());
} else if let Some(opt1) = &document.opt1 { }
if !opt1.is_null() { } else if matches!(filter, "opt1 IS EMPTY" | "NOT opt1 IS NOT EMPTY") {
id = contains_null_rec(opt1, "opt2").then(|| document.id.clone()); id = document
} .opt1
.as_ref()
.map_or(false, |v| is_empty_value(v))
.then(|| document.id.clone());
} else if matches!(filter, "NOT opt1 IS EMPTY" | "opt1 IS NOT EMPTY") {
id = document
.opt1
.as_ref()
.map_or(true, |v| !is_empty_value(v))
.then(|| document.id.clone());
} else if matches!(filter, "opt1.opt2 IS EMPTY") {
if document.opt1opt2.as_ref().map_or(false, |v| is_empty_value(v)) {
id = Some(document.id.clone());
} }
} else if matches!( } else if matches!(
filter, filter,
@ -230,6 +242,15 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> {
id id
} }
pub fn is_empty_value(v: &serde_json::Value) -> bool {
match v {
serde_json::Value::String(s) => s.is_empty(),
serde_json::Value::Array(a) => a.is_empty(),
serde_json::Value::Object(o) => o.is_empty(),
_ => false,
}
}
pub fn contains_key_rec(v: &serde_json::Value, key: &str) -> bool { pub fn contains_key_rec(v: &serde_json::Value, key: &str) -> bool {
match v { match v {
serde_json::Value::Array(v) => { serde_json::Value::Array(v) => {
@ -252,28 +273,6 @@ pub fn contains_key_rec(v: &serde_json::Value, key: &str) -> bool {
} }
} }
pub fn contains_null_rec(v: &serde_json::Value, key: &str) -> bool {
match v {
serde_json::Value::Object(v) => {
for (k, v) in v.iter() {
if k == key && v.is_null() || contains_null_rec(v, key) {
return true;
}
}
false
}
serde_json::Value::Array(v) => {
for v in v.iter() {
if contains_null_rec(v, key) {
return true;
}
}
false
}
_ => false,
}
}
pub fn expected_filtered_ids(filters: Vec<Either<Vec<&str>, &str>>) -> HashSet<String> { pub fn expected_filtered_ids(filters: Vec<Either<Vec<&str>, &str>>) -> HashSet<String> {
let dataset: Vec<TestDocument> = let dataset: Vec<TestDocument> =
serde_json::Deserializer::from_str(CONTENT).into_iter().map(|r| r.unwrap()).collect(); serde_json::Deserializer::from_str(CONTENT).into_iter().map(|r| r.unwrap()).collect();