3571: Introduce two filters to select documents with `null` and empty fields r=irevoire a=Kerollmops

# Pull Request

## Related issue
This PR implements the `X IS NULL`, `X IS NOT NULL`, `X IS EMPTY`, `X IS NOT EMPTY` filters that [this comment](https://github.com/meilisearch/product/discussions/539#discussioncomment-5115884) is describing in a very detailed manner.

## What does this PR do?

### `IS NULL` and `IS NOT NULL`

This PR will be exposed as a prototype for now. Below is the copy/pasted version of a spec that defines this filter.

- `IS NULL` matches fields that `EXISTS` AND `= IS NULL`
- `IS NOT NULL` matches fields that `NOT EXISTS` OR `!= IS NULL`

1. `{"name": "A", "price": null}`
2. `{"name": "A", "price": 10}`
3. `{"name": "A"}`

`price IS NULL` would match 1
`price IS NOT NULL` or `NOT price IS NULL` would match 2,3
`price EXISTS` would match 1, 2
`price NOT EXISTS` or `NOT price EXISTS` would match 3

common query : `(price EXISTS) AND (price IS NOT NULL)` would match 2

### `IS EMPTY` and `IS NOT EMPTY`

- `IS EMPTY` matches Array `[]`, Object `{}`, or String `""` fields that `EXISTS` and are empty
- `IS NOT EMPTY` matches fields that `NOT EXISTS` OR are not empty.

1. `{"name": "A", "tags": null}`
2. `{"name": "A", "tags": [null]}`
3. `{"name": "A", "tags": []}`
4. `{"name": "A", "tags": ["hello","world"]}`
5. `{"name": "A", "tags": [""]}`
6. `{"name": "A"}`
7. `{"name": "A", "tags": {}}`
8. `{"name": "A", "tags": {"t1":"v1"}}`
9. `{"name": "A", "tags": {"t1":""}}`
10. `{"name": "A", "tags": ""}`

`tags IS EMPTY` would match 3,7,10
`tags IS NOT EMPTY` or `NOT tags IS EMPTY` would match 1,2,4,5,6,8,9
`tags IS NULL` would match 1
`tags IS NOT NULL` or `NOT tags IS NULL` would match 2,3,4,5,6,7,8,9,10
`tags EXISTS` would match 1,2,3,4,5,7,8,9,10
`tags NOT EXISTS` or `NOT tags EXISTS` would match 6

common query : `(tags EXISTS) AND (tags IS NOT NULL) AND (tags IS NOT EMPTY)` would match 2,4,5,8,9

## What should the reviewer do?

- Check that I tested the filters
- Check that I deleted the ids of the documents when deleting documents


Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
bors[bot] 2023-04-27 13:14:00 +00:00 committed by GitHub
commit 414b3fae89
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
18 changed files with 730 additions and 118 deletions

View file

@ -1757,6 +1757,187 @@ mod tests {
check_ok(&index);
}
#[test]
fn index_documents_check_is_null_database() {
let content = || {
documents!([
{
"id": 0,
"colour": null,
},
{
"id": 1,
"colour": [null], // must not be returned
},
{
"id": 6,
"colour": {
"green": null
}
},
{
"id": 7,
"colour": {
"green": {
"blue": null
}
}
},
{
"id": 8,
"colour": 0,
},
{
"id": 9,
"colour": []
},
{
"id": 10,
"colour": {}
},
{
"id": 12,
"colour": [1]
},
{
"id": 13
},
{
"id": 14,
"colour": {
"green": 1
}
},
{
"id": 15,
"colour": {
"green": {
"blue": []
}
}
}
])
};
let check_ok = |index: &Index| {
let rtxn = index.read_txn().unwrap();
let facets = index.faceted_fields(&rtxn).unwrap();
assert_eq!(facets, hashset!(S("colour"), S("colour.green"), S("colour.green.blue")));
let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap();
let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();
let colour_blue_id =
index.fields_ids_map(&rtxn).unwrap().id("colour.green.blue").unwrap();
let bitmap_null_colour =
index.facet_id_is_null_docids.get(&rtxn, &BEU16::new(colour_id)).unwrap().unwrap();
assert_eq!(bitmap_null_colour.into_iter().collect::<Vec<_>>(), vec![0]);
let bitmap_colour_green = index
.facet_id_is_null_docids
.get(&rtxn, &BEU16::new(colour_green_id))
.unwrap()
.unwrap();
assert_eq!(bitmap_colour_green.into_iter().collect::<Vec<_>>(), vec![2]);
let bitmap_colour_blue = index
.facet_id_is_null_docids
.get(&rtxn, &BEU16::new(colour_blue_id))
.unwrap()
.unwrap();
assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![3]);
};
let faceted_fields = hashset!(S("colour"));
let index = TempIndex::new();
index.add_documents(content()).unwrap();
index
.update_settings(|settings| {
settings.set_filterable_fields(faceted_fields.clone());
})
.unwrap();
check_ok(&index);
let index = TempIndex::new();
index
.update_settings(|settings| {
settings.set_filterable_fields(faceted_fields.clone());
})
.unwrap();
index.add_documents(content()).unwrap();
check_ok(&index);
}
#[test]
fn index_documents_check_is_empty_database() {
let content = || {
documents!([
{"id": 0, "tags": null },
{"id": 1, "tags": [null] },
{"id": 2, "tags": [] },
{"id": 3, "tags": ["hello","world"] },
{"id": 4, "tags": [""] },
{"id": 5 },
{"id": 6, "tags": {} },
{"id": 7, "tags": {"green": "cool"} },
{"id": 8, "tags": {"green": ""} },
{"id": 9, "tags": "" },
{"id": 10, "tags": { "green": null } },
{"id": 11, "tags": { "green": { "blue": null } } },
{"id": 12, "tags": { "green": { "blue": [] } } }
])
};
let check_ok = |index: &Index| {
let rtxn = index.read_txn().unwrap();
let facets = index.faceted_fields(&rtxn).unwrap();
assert_eq!(facets, hashset!(S("tags"), S("tags.green"), S("tags.green.blue")));
let tags_id = index.fields_ids_map(&rtxn).unwrap().id("tags").unwrap();
let tags_green_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green").unwrap();
let tags_blue_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green.blue").unwrap();
let bitmap_empty_tags =
index.facet_id_is_empty_docids.get(&rtxn, &BEU16::new(tags_id)).unwrap().unwrap();
assert_eq!(bitmap_empty_tags.into_iter().collect::<Vec<_>>(), vec![2, 6, 9]);
let bitmap_tags_green = index
.facet_id_is_empty_docids
.get(&rtxn, &BEU16::new(tags_green_id))
.unwrap()
.unwrap();
assert_eq!(bitmap_tags_green.into_iter().collect::<Vec<_>>(), vec![8]);
let bitmap_tags_blue = index
.facet_id_is_empty_docids
.get(&rtxn, &BEU16::new(tags_blue_id))
.unwrap()
.unwrap();
assert_eq!(bitmap_tags_blue.into_iter().collect::<Vec<_>>(), vec![12]);
};
let faceted_fields = hashset!(S("tags"));
let index = TempIndex::new();
index.add_documents(content()).unwrap();
index
.update_settings(|settings| {
settings.set_filterable_fields(faceted_fields.clone());
})
.unwrap();
check_ok(&index);
let index = TempIndex::new();
index
.update_settings(|settings| {
settings.set_filterable_fields(faceted_fields.clone());
})
.unwrap();
index.add_documents(content()).unwrap();
check_ok(&index);
}
#[test]
fn primary_key_must_not_contain_floats() {
let index = TempIndex::new_with_map_size(4096 * 100);