3571: Introduce two filters to select documents with `null` and empty fields r=irevoire a=Kerollmops

# Pull Request

## Related issue
This PR implements the `X IS NULL`, `X IS NOT NULL`, `X IS EMPTY`, `X IS NOT EMPTY` filters that [this comment](https://github.com/meilisearch/product/discussions/539#discussioncomment-5115884) is describing in a very detailed manner.

## What does this PR do?

### `IS NULL` and `IS NOT NULL`

This PR will be exposed as a prototype for now. Below is the copy/pasted version of a spec that defines this filter.

- `IS NULL` matches fields that `EXISTS` AND `= IS NULL`
- `IS NOT NULL` matches fields that `NOT EXISTS` OR `!= IS NULL`

1. `{"name": "A", "price": null}`
2. `{"name": "A", "price": 10}`
3. `{"name": "A"}`

`price IS NULL` would match 1
`price IS NOT NULL` or `NOT price IS NULL` would match 2,3
`price EXISTS` would match 1, 2
`price NOT EXISTS` or `NOT price EXISTS` would match 3

common query : `(price EXISTS) AND (price IS NOT NULL)` would match 2

### `IS EMPTY` and `IS NOT EMPTY`

- `IS EMPTY` matches Array `[]`, Object `{}`, or String `""` fields that `EXISTS` and are empty
- `IS NOT EMPTY` matches fields that `NOT EXISTS` OR are not empty.

1. `{"name": "A", "tags": null}`
2. `{"name": "A", "tags": [null]}`
3. `{"name": "A", "tags": []}`
4. `{"name": "A", "tags": ["hello","world"]}`
5. `{"name": "A", "tags": [""]}`
6. `{"name": "A"}`
7. `{"name": "A", "tags": {}}`
8. `{"name": "A", "tags": {"t1":"v1"}}`
9. `{"name": "A", "tags": {"t1":""}}`
10. `{"name": "A", "tags": ""}`

`tags IS EMPTY` would match 3,7,10
`tags IS NOT EMPTY` or `NOT tags IS EMPTY` would match 1,2,4,5,6,8,9
`tags IS NULL` would match 1
`tags IS NOT NULL` or `NOT tags IS NULL` would match 2,3,4,5,6,7,8,9,10
`tags EXISTS` would match 1,2,3,4,5,7,8,9,10
`tags NOT EXISTS` or `NOT tags EXISTS` would match 6

common query : `(tags EXISTS) AND (tags IS NOT NULL) AND (tags IS NOT EMPTY)` would match 2,4,5,8,9

## What should the reviewer do?

- Check that I tested the filters
- Check that I deleted the ids of the documents when deleting documents


Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
bors[bot] 2023-04-27 13:14:00 +00:00 committed by GitHub
commit 414b3fae89
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 730 additions and 118 deletions

View File

@ -20,6 +20,8 @@ pub enum Condition<'a> {
GreaterThanOrEqual(Token<'a>),
Equal(Token<'a>),
NotEqual(Token<'a>),
Null,
Empty,
Exists,
LowerThan(Token<'a>),
LowerThanOrEqual(Token<'a>),
@ -44,6 +46,38 @@ pub fn parse_condition(input: Span) -> IResult<FilterCondition> {
Ok((input, condition))
}
/// null = value "IS" WS+ "NULL"
pub fn parse_is_null(input: Span) -> IResult<FilterCondition> {
let (input, key) = parse_value(input)?;
let (input, _) = tuple((tag("IS"), multispace1, tag("NULL")))(input)?;
Ok((input, FilterCondition::Condition { fid: key, op: Null }))
}
/// null = value "IS" WS+ "NOT" WS+ "NULL"
pub fn parse_is_not_null(input: Span) -> IResult<FilterCondition> {
let (input, key) = parse_value(input)?;
let (input, _) = tuple((tag("IS"), multispace1, tag("NOT"), multispace1, tag("NULL")))(input)?;
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Null }))))
}
/// empty = value "IS" WS+ "EMPTY"
pub fn parse_is_empty(input: Span) -> IResult<FilterCondition> {
let (input, key) = parse_value(input)?;
let (input, _) = tuple((tag("IS"), multispace1, tag("EMPTY")))(input)?;
Ok((input, FilterCondition::Condition { fid: key, op: Empty }))
}
/// empty = value "IS" WS+ "NOT" WS+ "EMPTY"
pub fn parse_is_not_empty(input: Span) -> IResult<FilterCondition> {
let (input, key) = parse_value(input)?;
let (input, _) = tuple((tag("IS"), multispace1, tag("NOT"), multispace1, tag("EMPTY")))(input)?;
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Empty }))))
}
/// exist = value "EXISTS"
pub fn parse_exists(input: Span) -> IResult<FilterCondition> {
let (input, key) = terminated(parse_value, tag("EXISTS"))(input)?;

View File

@ -143,11 +143,9 @@ impl<'a> Display for Error<'a> {
ErrorKind::MissingClosingDelimiter(c) => {
writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)?
}
ErrorKind::InvalidPrimary if input.trim().is_empty() => {
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.")?
}
ErrorKind::InvalidPrimary => {
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `{}`.", escaped_input)?
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
}
ErrorKind::ExpectedEof => {
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?

View File

@ -47,7 +47,10 @@ mod value;
use std::fmt::Debug;
pub use condition::{parse_condition, parse_to, Condition};
use condition::{parse_exists, parse_not_exists};
use condition::{
parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null, parse_is_null,
parse_not_exists,
};
use error::{cut_with_err, ExpectedValueKind, NomErrorExt};
pub use error::{Error, ErrorKind};
use nom::branch::alt;
@ -442,6 +445,10 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
parse_in,
parse_not_in,
parse_condition,
parse_is_null,
parse_is_not_null,
parse_is_empty,
parse_is_not_empty,
parse_exists,
parse_not_exists,
parse_to,
@ -526,14 +533,30 @@ pub mod tests {
insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
insta::assert_display_snapshot!(p("subscribers 100 TO 1000"), @"{subscribers} {100} TO {1000}");
// Test NOT + EXISTS
insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
// Test NOT
insta::assert_display_snapshot!(p("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})");
insta::assert_display_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})");
// Test NULL + NOT NULL
insta::assert_display_snapshot!(p("subscribers IS NULL"), @"{subscribers} IS NULL");
insta::assert_display_snapshot!(p("NOT subscribers IS NULL"), @"NOT ({subscribers} IS NULL)");
insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
insta::assert_display_snapshot!(p("NOT subscribers IS NOT NULL"), @"{subscribers} IS NULL");
insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
// Test EMPTY + NOT EMPTY
insta::assert_display_snapshot!(p("subscribers IS EMPTY"), @"{subscribers} IS EMPTY");
insta::assert_display_snapshot!(p("NOT subscribers IS EMPTY"), @"NOT ({subscribers} IS EMPTY)");
insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
insta::assert_display_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY");
insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
// Test EXISTS + NOT EXITS
insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
insta::assert_display_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)");
insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
insta::assert_display_snapshot!(p("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS");
insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
insta::assert_display_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})");
// Test nested NOT
insta::assert_display_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}");
@ -606,7 +629,7 @@ pub mod tests {
"###);
insta::assert_display_snapshot!(p("'OR'"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
1:5 'OR'
"###);
@ -616,12 +639,12 @@ pub mod tests {
"###);
insta::assert_display_snapshot!(p("channel Ponce"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
1:14 channel Ponce
"###);
insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
19:19 channel = Ponce OR
"###);
@ -706,12 +729,12 @@ pub mod tests {
"###);
insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
1:17 colour NOT EXIST
"###);
insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
1:23 subscribers 100 TO1000
"###);
@ -772,6 +795,39 @@ pub mod tests {
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
"###);
insta::assert_display_snapshot!(p(r#"value NULL"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
1:11 value NULL
"###);
insta::assert_display_snapshot!(p(r#"value NOT NULL"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
1:15 value NOT NULL
"###);
insta::assert_display_snapshot!(p(r#"value EMPTY"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
1:12 value EMPTY
"###);
insta::assert_display_snapshot!(p(r#"value NOT EMPTY"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
1:16 value NOT EMPTY
"###);
insta::assert_display_snapshot!(p(r#"value IS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
1:9 value IS
"###);
insta::assert_display_snapshot!(p(r#"value IS NOT"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
1:13 value IS NOT
"###);
insta::assert_display_snapshot!(p(r#"value IS EXISTS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
1:16 value IS EXISTS
"###);
insta::assert_display_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
1:20 value IS NOT EXISTS
"###);
}
#[test]
@ -853,6 +909,8 @@ impl<'a> std::fmt::Display for Condition<'a> {
Condition::GreaterThanOrEqual(token) => write!(f, ">= {token}"),
Condition::Equal(token) => write!(f, "= {token}"),
Condition::NotEqual(token) => write!(f, "!= {token}"),
Condition::Null => write!(f, "IS NULL"),
Condition::Empty => write!(f, "IS EMPTY"),
Condition::Exists => write!(f, "EXISTS"),
Condition::LowerThan(token) => write!(f, "< {token}"),
Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"),

View File

@ -183,7 +183,20 @@ fn is_syntax_component(c: char) -> bool {
}
fn is_keyword(s: &str) -> bool {
matches!(s, "AND" | "OR" | "IN" | "NOT" | "TO" | "EXISTS" | "_geoRadius" | "_geoBoundingBox")
matches!(
s,
"AND"
| "OR"
| "IN"
| "NOT"
| "TO"
| "EXISTS"
| "IS"
| "NULL"
| "EMPTY"
| "_geoRadius"
| "_geoBoundingBox"
)
}
#[cfg(test)]

View File

@ -4,51 +4,56 @@ use serde_json::{Map, Value};
pub fn flatten(json: &Map<String, Value>) -> Map<String, Value> {
let mut obj = Map::new();
let mut all_keys = vec![];
insert_object(&mut obj, None, json, &mut all_keys);
for key in all_keys {
obj.entry(key).or_insert(Value::Array(vec![]));
let mut all_entries = vec![];
insert_object(&mut obj, None, json, &mut all_entries);
for (key, old_val) in all_entries {
obj.entry(key).or_insert(old_val.clone());
}
obj
}
fn insert_object(
fn insert_object<'a>(
base_json: &mut Map<String, Value>,
base_key: Option<&str>,
object: &Map<String, Value>,
all_keys: &mut Vec<String>,
object: &'a Map<String, Value>,
all_entries: &mut Vec<(String, &'a Value)>,
) {
for (key, value) in object {
let new_key = base_key.map_or_else(|| key.clone(), |base_key| format!("{base_key}.{key}"));
all_keys.push(new_key.clone());
all_entries.push((new_key.clone(), value));
if let Some(array) = value.as_array() {
insert_array(base_json, &new_key, array, all_keys);
insert_array(base_json, &new_key, array, all_entries);
} else if let Some(object) = value.as_object() {
insert_object(base_json, Some(&new_key), object, all_keys);
insert_object(base_json, Some(&new_key), object, all_entries);
} else {
insert_value(base_json, &new_key, value.clone());
insert_value(base_json, &new_key, value.clone(), false);
}
}
}
fn insert_array(
fn insert_array<'a>(
base_json: &mut Map<String, Value>,
base_key: &str,
array: &Vec<Value>,
all_keys: &mut Vec<String>,
array: &'a Vec<Value>,
all_entries: &mut Vec<(String, &'a Value)>,
) {
for value in array {
if let Some(object) = value.as_object() {
insert_object(base_json, Some(base_key), object, all_keys);
insert_object(base_json, Some(base_key), object, all_entries);
} else if let Some(sub_array) = value.as_array() {
insert_array(base_json, base_key, sub_array, all_keys);
insert_array(base_json, base_key, sub_array, all_entries);
} else {
insert_value(base_json, base_key, value.clone());
insert_value(base_json, base_key, value.clone(), true);
}
}
}
fn insert_value(base_json: &mut Map<String, Value>, key: &str, to_insert: Value) {
fn insert_value(
base_json: &mut Map<String, Value>,
key: &str,
to_insert: Value,
came_from_array: bool,
) {
debug_assert!(!to_insert.is_object());
debug_assert!(!to_insert.is_array());
@ -63,6 +68,8 @@ fn insert_value(base_json: &mut Map<String, Value>, key: &str, to_insert: Value)
base_json[key] = Value::Array(vec![value, to_insert]);
}
// if it does not exist we can push the value untouched
} else if came_from_array {
base_json.insert(key.to_string(), Value::Array(vec![to_insert]));
} else {
base_json.insert(key.to_string(), to_insert);
}
@ -113,7 +120,11 @@ mod tests {
assert_eq!(
&flat,
json!({
"a": [],
"a": {
"b": "c",
"d": "e",
"f": "g"
},
"a.b": "c",
"a.d": "e",
"a.f": "g"
@ -164,7 +175,7 @@ mod tests {
assert_eq!(
&flat,
json!({
"a": 42,
"a": [42],
"a.b": ["c", "d", "e"],
})
.as_object()
@ -186,7 +197,7 @@ mod tests {
assert_eq!(
&flat,
json!({
"a": null,
"a": [null],
"a.b": ["c", "d", "e"],
})
.as_object()
@ -208,7 +219,9 @@ mod tests {
assert_eq!(
&flat,
json!({
"a": [],
"a": {
"b": "c"
},
"a.b": ["c", "d"],
})
.as_object()
@ -234,7 +247,7 @@ mod tests {
json!({
"a.b": ["c", "d", "f"],
"a.c": "e",
"a": 35,
"a": [35],
})
.as_object()
.unwrap()
@ -302,4 +315,53 @@ mod tests {
.unwrap()
);
}
#[test]
fn flatten_nested_values_keep_original_values() {
let mut base: Value = json!({
"tags": {
"t1": "v1"
},
"prices": {
"p1": [null],
"p1000": {"tamo": {"le": {}}}
},
"kiki": [[]]
});
let json = std::mem::take(base.as_object_mut().unwrap());
let flat = flatten(&json);
println!("{}", serde_json::to_string_pretty(&flat).unwrap());
assert_eq!(
&flat,
json!({
"prices": {
"p1": [null],
"p1000": {
"tamo": {
"le": {}
}
}
},
"prices.p1": [null],
"prices.p1000": {
"tamo": {
"le": {}
}
},
"prices.p1000.tamo": {
"le": {}
},
"prices.p1000.tamo.le": {},
"tags": {
"t1": "v1"
},
"tags.t1": "v1",
"kiki": [[]]
})
.as_object()
.unwrap()
);
}
}

View File

@ -547,7 +547,7 @@ async fn filter_invalid_syntax_object() {
index.wait_task(1).await;
let expected_response = json!({
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@ -572,7 +572,7 @@ async fn filter_invalid_syntax_array() {
index.wait_task(1).await;
let expected_response = json!({
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"

View File

@ -80,6 +80,8 @@ pub mod db_name {
pub const FIELD_ID_WORD_COUNT_DOCIDS: &str = "field-id-word-count-docids";
pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids";
pub const FACET_ID_EXISTS_DOCIDS: &str = "facet-id-exists-docids";
pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids";
pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids";
pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
@ -129,6 +131,10 @@ pub struct Index {
/// Maps the facet field id and the docids for which this field exists
pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
/// Maps the facet field id and the docids for which this field is set as null
pub facet_id_is_null_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
/// Maps the facet field id and the docids for which this field is considered empty
pub facet_id_is_empty_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
/// Maps the facet field id and ranges of numbers with the docids that corresponds to them.
pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
@ -153,7 +159,7 @@ impl Index {
) -> Result<Index> {
use db_name::*;
options.max_dbs(19);
options.max_dbs(21);
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
let env = options.open(path)?;
@ -175,6 +181,8 @@ impl Index {
let facet_id_f64_docids = env.create_database(Some(FACET_ID_F64_DOCIDS))?;
let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?;
let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?;
let facet_id_is_null_docids = env.create_database(Some(FACET_ID_IS_NULL_DOCIDS))?;
let facet_id_is_empty_docids = env.create_database(Some(FACET_ID_IS_EMPTY_DOCIDS))?;
let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?;
let field_id_docid_facet_strings =
@ -201,6 +209,8 @@ impl Index {
facet_id_f64_docids,
facet_id_string_docids,
facet_id_exists_docids,
facet_id_is_null_docids,
facet_id_is_empty_docids,
field_id_docid_facet_f64s,
field_id_docid_facet_strings,
documents,
@ -833,6 +843,30 @@ impl Index {
}
}
/// Retrieve all the documents which contain this field id set as null
pub fn null_faceted_documents_ids(
&self,
rtxn: &RoTxn,
field_id: FieldId,
) -> heed::Result<RoaringBitmap> {
match self.facet_id_is_null_docids.get(rtxn, &BEU16::new(field_id))? {
Some(docids) => Ok(docids),
None => Ok(RoaringBitmap::new()),
}
}
/// Retrieve all the documents which contain this field id and that is considered empty
pub fn empty_faceted_documents_ids(
&self,
rtxn: &RoTxn,
field_id: FieldId,
) -> heed::Result<RoaringBitmap> {
match self.facet_id_is_empty_docids.get(rtxn, &BEU16::new(field_id))? {
Some(docids) => Ok(docids),
None => Ok(RoaringBitmap::new()),
}
}
/// Retrieve all the documents which contain this field id
pub fn exists_faceted_documents_ids(
&self,

View File

@ -211,6 +211,14 @@ impl<'a> Filter<'a> {
Condition::Between { from, to } => {
(Included(from.parse_finite_float()?), Included(to.parse_finite_float()?))
}
Condition::Null => {
let is_null = index.null_faceted_documents_ids(rtxn, field_id)?;
return Ok(is_null);
}
Condition::Empty => {
let is_empty = index.empty_faceted_documents_ids(rtxn, field_id)?;
return Ok(is_empty);
}
Condition::Exists => {
let exist = index.exists_faceted_documents_ids(rtxn, field_id)?;
return Ok(exist);

View File

@ -271,6 +271,16 @@ pub fn snap_facet_id_exists_docids(index: &Index) -> String {
&format!("{facet_id:<3} {}", display_bitmap(&docids))
})
}
pub fn snap_facet_id_is_null_docids(index: &Index) -> String {
make_db_snap_from_iter!(index, facet_id_is_null_docids, |(facet_id, docids)| {
&format!("{facet_id:<3} {}", display_bitmap(&docids))
})
}
pub fn snap_facet_id_is_empty_docids(index: &Index) -> String {
make_db_snap_from_iter!(index, facet_id_is_empty_docids, |(facet_id, docids)| {
&format!("{facet_id:<3} {}", display_bitmap(&docids))
})
}
pub fn snap_facet_id_string_docids(index: &Index) -> String {
make_db_snap_from_iter!(index, facet_id_string_docids, |(
FacetGroupKey { field_id, level, left_bound },
@ -495,6 +505,12 @@ macro_rules! full_snap_of_db {
($index:ident, facet_id_exists_docids) => {{
$crate::snapshot_tests::snap_facet_id_exists_docids(&$index)
}};
($index:ident, facet_id_is_null_docids) => {{
$crate::snapshot_tests::snap_facet_id_is_null_docids(&$index)
}};
($index:ident, facet_id_is_empty_docids) => {{
$crate::snapshot_tests::snap_facet_id_is_empty_docids(&$index)
}};
($index:ident, documents_ids) => {{
$crate::snapshot_tests::snap_documents_ids(&$index)
}};

View File

@ -34,6 +34,8 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
facet_id_f64_docids,
facet_id_string_docids,
facet_id_exists_docids,
facet_id_is_null_docids,
facet_id_is_empty_docids,
field_id_docid_facet_f64s,
field_id_docid_facet_strings,
documents,
@ -86,6 +88,8 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
script_language_docids.clear(self.wtxn)?;
facet_id_f64_docids.clear(self.wtxn)?;
facet_id_exists_docids.clear(self.wtxn)?;
facet_id_is_null_docids.clear(self.wtxn)?;
facet_id_is_empty_docids.clear(self.wtxn)?;
facet_id_string_docids.clear(self.wtxn)?;
field_id_docid_facet_f64s.clear(self.wtxn)?;
field_id_docid_facet_strings.clear(self.wtxn)?;

View File

@ -245,6 +245,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
field_id_docid_facet_strings: _,
script_language_docids,
facet_id_exists_docids,
facet_id_is_null_docids,
facet_id_is_empty_docids,
documents,
} = self.index;
@ -517,12 +519,26 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
drop(iter);
// We delete the documents ids that are under the facet field id values.
remove_docids_from_facet_id_exists_docids(
remove_docids_from_facet_id_docids(
self.wtxn,
facet_id_exists_docids,
&self.to_delete_docids,
)?;
// We delete the documents ids that are under the facet field id values.
remove_docids_from_facet_id_docids(
self.wtxn,
facet_id_is_null_docids,
&self.to_delete_docids,
)?;
// We delete the documents ids that are under the facet field id values.
remove_docids_from_facet_id_docids(
self.wtxn,
facet_id_is_empty_docids,
&self.to_delete_docids,
)?;
self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?;
Ok(DetailedDocumentDeletionResult {
@ -625,7 +641,7 @@ fn remove_docids_from_field_id_docid_facet_value(
Ok(all_affected_facet_values)
}
fn remove_docids_from_facet_id_exists_docids<'a, C>(
fn remove_docids_from_facet_id_docids<'a, C>(
wtxn: &'a mut heed::RwTxn,
db: &heed::Database<C, CboRoaringBitmapCodec>,
to_remove: &RoaringBitmap,

View File

@ -181,7 +181,7 @@ fn json_to_string<'a>(value: &'a Value, buffer: &'a mut String) -> Option<&'a st
fn inner(value: &Value, output: &mut String) -> bool {
use std::fmt::Write;
match value {
Value::Null => false,
Value::Null | Value::Object(_) => false,
Value::Bool(boolean) => write!(output, "{}", boolean).is_ok(),
Value::Number(number) => write!(output, "{}", number).is_ok(),
Value::String(string) => write!(output, "{}", string).is_ok(),
@ -196,23 +196,6 @@ fn json_to_string<'a>(value: &'a Value, buffer: &'a mut String) -> Option<&'a st
// check that at least one value was written
count != 0
}
Value::Object(object) => {
let mut buffer = String::new();
let mut count = 0;
for (key, value) in object {
buffer.clear();
let _ = write!(&mut buffer, "{}: ", key);
if inner(value, &mut buffer) {
buffer.push_str(". ");
// We write the "key: value. " pair only when
// we are sure that the value can be written.
output.push_str(&buffer);
count += 1;
}
}
// check that at least one value was written
count != 0
}
}
}

View File

@ -7,7 +7,7 @@ use std::mem::size_of;
use heed::zerocopy::AsBytes;
use heed::BytesEncode;
use roaring::RoaringBitmap;
use serde_json::Value;
use serde_json::{from_slice, Value};
use super::helpers::{create_sorter, keep_first, sorter_into_reader, GrenadParameters};
use crate::error::InternalError;
@ -15,6 +15,15 @@ use crate::facet::value_encoding::f64_into_bytes;
use crate::update::index_documents::{create_writer, writer_into_reader};
use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result, BEU32, MAX_FACET_VALUE_LENGTH};
/// The extracted facet values stored in grenad files by type.
pub struct ExtractedFacetValues {
pub docid_fid_facet_numbers_chunk: grenad::Reader<File>,
pub docid_fid_facet_strings_chunk: grenad::Reader<File>,
pub fid_facet_is_null_docids_chunk: grenad::Reader<File>,
pub fid_facet_is_empty_docids_chunk: grenad::Reader<File>,
pub fid_facet_exists_docids_chunk: grenad::Reader<File>,
}
/// Extracts the facet values of each faceted field of each document.
///
/// Returns the generated grenad reader containing the docid the fid and the orginal value as key
@ -24,7 +33,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters,
faceted_fields: &HashSet<FieldId>,
) -> Result<(grenad::Reader<File>, grenad::Reader<File>, grenad::Reader<File>)> {
) -> Result<ExtractedFacetValues> {
let max_memory = indexer.max_memory_by_thread();
let mut fid_docid_facet_numbers_sorter = create_sorter(
@ -46,6 +55,8 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
);
let mut facet_exists_docids = BTreeMap::<FieldId, RoaringBitmap>::new();
let mut facet_is_null_docids = BTreeMap::<FieldId, RoaringBitmap>::new();
let mut facet_is_empty_docids = BTreeMap::<FieldId, RoaringBitmap>::new();
let mut key_buffer = Vec::new();
let mut cursor = obkv_documents.into_cursor()?;
@ -69,33 +80,44 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
// For the other extraction tasks, prefix the key with the field_id and the document_id
key_buffer.extend_from_slice(docid_bytes);
let value =
serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
let value = from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
let (numbers, strings) = extract_facet_values(&value);
// insert facet numbers in sorter
for number in numbers {
key_buffer.truncate(size_of::<FieldId>() + size_of::<DocumentId>());
if let Some(value_bytes) = f64_into_bytes(number) {
key_buffer.extend_from_slice(&value_bytes);
key_buffer.extend_from_slice(&number.to_be_bytes());
fid_docid_facet_numbers_sorter.insert(&key_buffer, ().as_bytes())?;
match extract_facet_values(&value) {
FilterableValues::Null => {
facet_is_null_docids.entry(field_id).or_default().insert(document);
}
}
FilterableValues::Empty => {
facet_is_empty_docids.entry(field_id).or_default().insert(document);
}
FilterableValues::Values { numbers, strings } => {
// insert facet numbers in sorter
for number in numbers {
key_buffer.truncate(size_of::<FieldId>() + size_of::<DocumentId>());
if let Some(value_bytes) = f64_into_bytes(number) {
key_buffer.extend_from_slice(&value_bytes);
key_buffer.extend_from_slice(&number.to_be_bytes());
// insert normalized and original facet string in sorter
for (normalized, original) in strings.into_iter().filter(|(n, _)| !n.is_empty()) {
let normalised_truncated_value: String = normalized
.char_indices()
.take_while(|(idx, _)| idx + 4 < MAX_FACET_VALUE_LENGTH)
.map(|(_, c)| c)
.collect();
fid_docid_facet_numbers_sorter
.insert(&key_buffer, ().as_bytes())?;
}
}
key_buffer.truncate(size_of::<FieldId>() + size_of::<DocumentId>());
key_buffer.extend_from_slice(normalised_truncated_value.as_bytes());
fid_docid_facet_strings_sorter.insert(&key_buffer, original.as_bytes())?;
// insert normalized and original facet string in sorter
for (normalized, original) in
strings.into_iter().filter(|(n, _)| !n.is_empty())
{
let normalized_truncated_value: String = normalized
.char_indices()
.take_while(|(idx, _)| idx + 4 < MAX_FACET_VALUE_LENGTH)
.map(|(_, c)| c)
.collect();
key_buffer.truncate(size_of::<FieldId>() + size_of::<DocumentId>());
key_buffer.extend_from_slice(normalized_truncated_value.as_bytes());
fid_docid_facet_strings_sorter
.insert(&key_buffer, original.as_bytes())?;
}
}
}
}
}
@ -112,14 +134,48 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
}
let facet_exists_docids_reader = writer_into_reader(facet_exists_docids_writer)?;
Ok((
sorter_into_reader(fid_docid_facet_numbers_sorter, indexer)?,
sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?,
facet_exists_docids_reader,
))
let mut facet_is_null_docids_writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,
tempfile::tempfile()?,
);
for (fid, bitmap) in facet_is_null_docids.into_iter() {
let bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(&bitmap).unwrap();
facet_is_null_docids_writer.insert(fid.to_be_bytes(), &bitmap_bytes)?;
}
let facet_is_null_docids_reader = writer_into_reader(facet_is_null_docids_writer)?;
let mut facet_is_empty_docids_writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,
tempfile::tempfile()?,
);
for (fid, bitmap) in facet_is_empty_docids.into_iter() {
let bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(&bitmap).unwrap();
facet_is_empty_docids_writer.insert(fid.to_be_bytes(), &bitmap_bytes)?;
}
let facet_is_empty_docids_reader = writer_into_reader(facet_is_empty_docids_writer)?;
Ok(ExtractedFacetValues {
docid_fid_facet_numbers_chunk: sorter_into_reader(fid_docid_facet_numbers_sorter, indexer)?,
docid_fid_facet_strings_chunk: sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?,
fid_facet_is_null_docids_chunk: facet_is_null_docids_reader,
fid_facet_is_empty_docids_chunk: facet_is_empty_docids_reader,
fid_facet_exists_docids_chunk: facet_exists_docids_reader,
})
}
fn extract_facet_values(value: &Value) -> (Vec<f64>, Vec<(String, String)>) {
/// Represent what a document field contains.
enum FilterableValues {
/// Corresponds to the JSON `null` value.
Null,
/// Corresponds to either, an empty string `""`, an empty array `[]`, or an empty object `{}`.
Empty,
/// Represents all the numbers and strings values found in this document field.
Values { numbers: Vec<f64>, strings: Vec<(String, String)> },
}
fn extract_facet_values(value: &Value) -> FilterableValues {
fn inner_extract_facet_values(
value: &Value,
can_recurse: bool,
@ -149,9 +205,16 @@ fn extract_facet_values(value: &Value) -> (Vec<f64>, Vec<(String, String)>) {
}
}
let mut facet_number_values = Vec::new();
let mut facet_string_values = Vec::new();
inner_extract_facet_values(value, true, &mut facet_number_values, &mut facet_string_values);
(facet_number_values, facet_string_values)
match value {
Value::Null => FilterableValues::Null,
Value::String(s) if s.is_empty() => FilterableValues::Empty,
Value::Array(a) if a.is_empty() => FilterableValues::Empty,
Value::Object(o) if o.is_empty() => FilterableValues::Empty,
otherwise => {
let mut numbers = Vec::new();
let mut strings = Vec::new();
inner_extract_facet_values(otherwise, true, &mut numbers, &mut strings);
FilterableValues::Values { numbers, strings }
}
}
}

View File

@ -18,7 +18,7 @@ use rayon::prelude::*;
use self::extract_docid_word_positions::extract_docid_word_positions;
use self::extract_facet_number_docids::extract_facet_number_docids;
use self::extract_facet_string_docids::extract_facet_string_docids;
use self::extract_fid_docid_facet_values::extract_fid_docid_facet_values;
use self::extract_fid_docid_facet_values::{extract_fid_docid_facet_values, ExtractedFacetValues};
use self::extract_fid_word_count_docids::extract_fid_word_count_docids;
use self::extract_geo_points::extract_geo_points;
use self::extract_word_docids::extract_word_docids;
@ -55,28 +55,35 @@ pub(crate) fn data_from_obkv_documents(
.collect::<Result<()>>()?;
#[allow(clippy::type_complexity)]
let result: Result<(Vec<_>, (Vec<_>, (Vec<_>, Vec<_>)))> = flattened_obkv_chunks
.par_bridge()
.map(|flattened_obkv_chunks| {
send_and_extract_flattened_documents_data(
flattened_obkv_chunks,
indexer,
lmdb_writer_sx.clone(),
&searchable_fields,
&faceted_fields,
primary_key_id,
geo_fields_ids,
&stop_words,
max_positions_per_attributes,
)
})
.collect();
let result: Result<(Vec<_>, (Vec<_>, (Vec<_>, (Vec<_>, (Vec<_>, Vec<_>)))))> =
flattened_obkv_chunks
.par_bridge()
.map(|flattened_obkv_chunks| {
send_and_extract_flattened_documents_data(
flattened_obkv_chunks,
indexer,
lmdb_writer_sx.clone(),
&searchable_fields,
&faceted_fields,
primary_key_id,
geo_fields_ids,
&stop_words,
max_positions_per_attributes,
)
})
.collect();
let (
docid_word_positions_chunks,
(
docid_fid_facet_numbers_chunks,
(docid_fid_facet_strings_chunks, facet_exists_docids_chunks),
(
docid_fid_facet_strings_chunks,
(
facet_is_null_docids_chunks,
(facet_is_empty_docids_chunks, facet_exists_docids_chunks),
),
),
),
) = result?;
@ -96,6 +103,38 @@ pub(crate) fn data_from_obkv_documents(
});
}
// merge facet_is_null_docids and send them as a typed chunk
{
let lmdb_writer_sx = lmdb_writer_sx.clone();
rayon::spawn(move || {
debug!("merge {} database", "facet-id-is-null-docids");
match facet_is_null_docids_chunks.merge(merge_cbo_roaring_bitmaps, &indexer) {
Ok(reader) => {
let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsNullDocids(reader)));
}
Err(e) => {
let _ = lmdb_writer_sx.send(Err(e));
}
}
});
}
// merge facet_is_empty_docids and send them as a typed chunk
{
let lmdb_writer_sx = lmdb_writer_sx.clone();
rayon::spawn(move || {
debug!("merge {} database", "facet-id-is-empty-docids");
match facet_is_empty_docids_chunks.merge(merge_cbo_roaring_bitmaps, &indexer) {
Ok(reader) => {
let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsEmptyDocids(reader)));
}
Err(e) => {
let _ = lmdb_writer_sx.send(Err(e));
}
}
});
}
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
docid_word_positions_chunks.clone(),
indexer,
@ -235,7 +274,10 @@ fn send_and_extract_flattened_documents_data(
grenad::Reader<CursorClonableMmap>,
(
grenad::Reader<CursorClonableMmap>,
(grenad::Reader<CursorClonableMmap>, grenad::Reader<File>),
(
grenad::Reader<CursorClonableMmap>,
(grenad::Reader<File>, (grenad::Reader<File>, grenad::Reader<File>)),
),
),
)> {
let flattened_documents_chunk =
@ -281,11 +323,13 @@ fn send_and_extract_flattened_documents_data(
Ok(docid_word_positions_chunk)
},
|| {
let (
let ExtractedFacetValues {
docid_fid_facet_numbers_chunk,
docid_fid_facet_strings_chunk,
fid_facet_is_null_docids_chunk,
fid_facet_is_empty_docids_chunk,
fid_facet_exists_docids_chunk,
) = extract_fid_docid_facet_values(
} = extract_fid_docid_facet_values(
flattened_documents_chunk.clone(),
indexer,
faceted_fields,
@ -309,7 +353,13 @@ fn send_and_extract_flattened_documents_data(
Ok((
docid_fid_facet_numbers_chunk,
(docid_fid_facet_strings_chunk, fid_facet_exists_docids_chunk),
(
docid_fid_facet_strings_chunk,
(
fid_facet_is_null_docids_chunk,
(fid_facet_is_empty_docids_chunk, fid_facet_exists_docids_chunk),
),
),
))
},
);

View File

@ -1757,6 +1757,187 @@ mod tests {
check_ok(&index);
}
#[test]
fn index_documents_check_is_null_database() {
let content = || {
documents!([
{
"id": 0,
"colour": null,
},
{
"id": 1,
"colour": [null], // must not be returned
},
{
"id": 6,
"colour": {
"green": null
}
},
{
"id": 7,
"colour": {
"green": {
"blue": null
}
}
},
{
"id": 8,
"colour": 0,
},
{
"id": 9,
"colour": []
},
{
"id": 10,
"colour": {}
},
{
"id": 12,
"colour": [1]
},
{
"id": 13
},
{
"id": 14,
"colour": {
"green": 1
}
},
{
"id": 15,
"colour": {
"green": {
"blue": []
}
}
}
])
};
let check_ok = |index: &Index| {
let rtxn = index.read_txn().unwrap();
let facets = index.faceted_fields(&rtxn).unwrap();
assert_eq!(facets, hashset!(S("colour"), S("colour.green"), S("colour.green.blue")));
let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap();
let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();
let colour_blue_id =
index.fields_ids_map(&rtxn).unwrap().id("colour.green.blue").unwrap();
let bitmap_null_colour =
index.facet_id_is_null_docids.get(&rtxn, &BEU16::new(colour_id)).unwrap().unwrap();
assert_eq!(bitmap_null_colour.into_iter().collect::<Vec<_>>(), vec![0]);
let bitmap_colour_green = index
.facet_id_is_null_docids
.get(&rtxn, &BEU16::new(colour_green_id))
.unwrap()
.unwrap();
assert_eq!(bitmap_colour_green.into_iter().collect::<Vec<_>>(), vec![2]);
let bitmap_colour_blue = index
.facet_id_is_null_docids
.get(&rtxn, &BEU16::new(colour_blue_id))
.unwrap()
.unwrap();
assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![3]);
};
let faceted_fields = hashset!(S("colour"));
let index = TempIndex::new();
index.add_documents(content()).unwrap();
index
.update_settings(|settings| {
settings.set_filterable_fields(faceted_fields.clone());
})
.unwrap();
check_ok(&index);
let index = TempIndex::new();
index
.update_settings(|settings| {
settings.set_filterable_fields(faceted_fields.clone());
})
.unwrap();
index.add_documents(content()).unwrap();
check_ok(&index);
}
#[test]
fn index_documents_check_is_empty_database() {
let content = || {
documents!([
{"id": 0, "tags": null },
{"id": 1, "tags": [null] },
{"id": 2, "tags": [] },
{"id": 3, "tags": ["hello","world"] },
{"id": 4, "tags": [""] },
{"id": 5 },
{"id": 6, "tags": {} },
{"id": 7, "tags": {"green": "cool"} },
{"id": 8, "tags": {"green": ""} },
{"id": 9, "tags": "" },
{"id": 10, "tags": { "green": null } },
{"id": 11, "tags": { "green": { "blue": null } } },
{"id": 12, "tags": { "green": { "blue": [] } } }
])
};
let check_ok = |index: &Index| {
let rtxn = index.read_txn().unwrap();
let facets = index.faceted_fields(&rtxn).unwrap();
assert_eq!(facets, hashset!(S("tags"), S("tags.green"), S("tags.green.blue")));
let tags_id = index.fields_ids_map(&rtxn).unwrap().id("tags").unwrap();
let tags_green_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green").unwrap();
let tags_blue_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green.blue").unwrap();
let bitmap_empty_tags =
index.facet_id_is_empty_docids.get(&rtxn, &BEU16::new(tags_id)).unwrap().unwrap();
assert_eq!(bitmap_empty_tags.into_iter().collect::<Vec<_>>(), vec![2, 6, 9]);
let bitmap_tags_green = index
.facet_id_is_empty_docids
.get(&rtxn, &BEU16::new(tags_green_id))
.unwrap()
.unwrap();
assert_eq!(bitmap_tags_green.into_iter().collect::<Vec<_>>(), vec![8]);
let bitmap_tags_blue = index
.facet_id_is_empty_docids
.get(&rtxn, &BEU16::new(tags_blue_id))
.unwrap()
.unwrap();
assert_eq!(bitmap_tags_blue.into_iter().collect::<Vec<_>>(), vec![12]);
};
let faceted_fields = hashset!(S("tags"));
let index = TempIndex::new();
index.add_documents(content()).unwrap();
index
.update_settings(|settings| {
settings.set_filterable_fields(faceted_fields.clone());
})
.unwrap();
check_ok(&index);
let index = TempIndex::new();
index
.update_settings(|settings| {
settings.set_filterable_fields(faceted_fields.clone());
})
.unwrap();
index.add_documents(content()).unwrap();
check_ok(&index);
}
#[test]
fn primary_key_must_not_contain_floats() {
let index = TempIndex::new_with_map_size(4096 * 100);

View File

@ -39,6 +39,8 @@ pub(crate) enum TypedChunk {
FieldIdFacetStringDocids(grenad::Reader<File>),
FieldIdFacetNumberDocids(grenad::Reader<File>),
FieldIdFacetExistsDocids(grenad::Reader<File>),
FieldIdFacetIsNullDocids(grenad::Reader<File>),
FieldIdFacetIsEmptyDocids(grenad::Reader<File>),
GeoPoints(grenad::Reader<File>),
ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
}
@ -161,6 +163,28 @@ pub(crate) fn write_typed_chunk_into_index(
)?;
is_merged_database = true;
}
TypedChunk::FieldIdFacetIsNullDocids(facet_id_is_null_docids) => {
append_entries_into_database(
facet_id_is_null_docids,
&index.facet_id_is_null_docids,
wtxn,
index_is_empty,
|value, _buffer| Ok(value),
merge_cbo_roaring_bitmaps,
)?;
is_merged_database = true;
}
TypedChunk::FieldIdFacetIsEmptyDocids(facet_id_is_empty_docids) => {
append_entries_into_database(
facet_id_is_empty_docids,
&index.facet_id_is_empty_docids,
wtxn,
index_is_empty,
|value, _buffer| Ok(value),
merge_cbo_roaring_bitmaps,
)?;
is_merged_database = true;
}
TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => {
append_entries_into_database(
word_pair_proximity_docids_iter,

View File

@ -82,10 +82,23 @@ test_filter!(
vec![Left(vec!["tag=red", "tag=green"]), Left(vec!["asc_desc_rank<3", "asc_desc_rank<1"])]
);
test_filter!(exists_filter_1, vec![Right("opt1 EXISTS")]);
test_filter!(exists_filter_2, vec![Right("opt1.opt2 EXISTS")]);
test_filter!(exists_filter_1_not, vec![Right("opt1 NOT EXISTS")]);
test_filter!(exists_filter_1_not_alt, vec![Right("NOT opt1 EXISTS")]);
test_filter!(exists_filter_1_double_not, vec![Right("NOT opt1 NOT EXISTS")]);
test_filter!(null_filter_1, vec![Right("opt1 IS NULL")]);
test_filter!(null_filter_2, vec![Right("opt1.opt2 IS NULL")]);
test_filter!(null_filter_1_not, vec![Right("opt1 IS NOT NULL")]);
test_filter!(null_filter_1_not_alt, vec![Right("NOT opt1 IS NULL")]);
test_filter!(null_filter_1_double_not, vec![Right("NOT opt1 IS NOT NULL")]);
test_filter!(empty_filter_1, vec![Right("opt1 IS EMPTY")]);
test_filter!(empty_filter_2, vec![Right("opt1.opt2 IS EMPTY")]);
test_filter!(empty_filter_1_not, vec![Right("opt1 IS NOT EMPTY")]);
test_filter!(empty_filter_1_not_alt, vec![Right("NOT opt1 IS EMPTY")]);
test_filter!(empty_filter_1_double_not, vec![Right("NOT opt1 IS NOT EMPTY")]);
test_filter!(in_filter, vec![Right("tag_in IN[1, 2, 3, four, five]")]);
test_filter!(not_in_filter, vec![Right("tag_in NOT IN[1, 2, 3, four, five]")]);
test_filter!(not_not_in_filter, vec![Right("NOT tag_in NOT IN[1, 2, 3, four, five]")]);

View File

@ -205,6 +205,30 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> {
} else if let Some(opt1) = &document.opt1 {
id = contains_key_rec(opt1, "opt2").then(|| document.id.clone());
}
} else if matches!(filter, "opt1 IS NULL" | "NOT opt1 IS NOT NULL") {
id = document.opt1.as_ref().map_or(false, |v| v.is_null()).then(|| document.id.clone());
} else if matches!(filter, "NOT opt1 IS NULL" | "opt1 IS NOT NULL") {
id = document.opt1.as_ref().map_or(true, |v| !v.is_null()).then(|| document.id.clone());
} else if matches!(filter, "opt1.opt2 IS NULL") {
if document.opt1opt2.as_ref().map_or(false, |v| v.is_null()) {
id = Some(document.id.clone());
} else if let Some(opt1) = &document.opt1 {
if !opt1.is_null() {
id = contains_null_rec(opt1, "opt2").then(|| document.id.clone());
}
}
} else if matches!(filter, "opt1 IS EMPTY" | "NOT opt1 IS NOT EMPTY") {
id = document.opt1.as_ref().map_or(false, is_empty_value).then(|| document.id.clone());
} else if matches!(filter, "NOT opt1 IS EMPTY" | "opt1 IS NOT EMPTY") {
id = document
.opt1
.as_ref()
.map_or(true, |v| !is_empty_value(v))
.then(|| document.id.clone());
} else if matches!(filter, "opt1.opt2 IS EMPTY") {
if document.opt1opt2.as_ref().map_or(false, is_empty_value) {
id = Some(document.id.clone());
}
} else if matches!(
filter,
"tag_in IN[1, 2, 3, four, five]" | "NOT tag_in NOT IN[1, 2, 3, four, five]"
@ -218,6 +242,15 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> {
id
}
pub fn is_empty_value(v: &serde_json::Value) -> bool {
match v {
serde_json::Value::String(s) => s.is_empty(),
serde_json::Value::Array(a) => a.is_empty(),
serde_json::Value::Object(o) => o.is_empty(),
_ => false,
}
}
pub fn contains_key_rec(v: &serde_json::Value, key: &str) -> bool {
match v {
serde_json::Value::Array(v) => {
@ -240,6 +273,28 @@ pub fn contains_key_rec(v: &serde_json::Value, key: &str) -> bool {
}
}
pub fn contains_null_rec(v: &serde_json::Value, key: &str) -> bool {
match v {
serde_json::Value::Object(v) => {
for (k, v) in v.iter() {
if k == key && v.is_null() || contains_null_rec(v, key) {
return true;
}
}
false
}
serde_json::Value::Array(v) => {
for v in v.iter() {
if contains_null_rec(v, key) {
return true;
}
}
false
}
_ => false,
}
}
pub fn expected_filtered_ids(filters: Vec<Either<Vec<&str>, &str>>) -> HashSet<String> {
let dataset: Vec<TestDocument> =
serde_json::Deserializer::from_str(CONTENT).into_iter().map(|r| r.unwrap()).collect();