From 9287858997b2a0392dd5637a093f190d381b0c02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 8 Mar 2023 16:14:00 +0100
Subject: [PATCH 01/21] Introduce a new facet_id_is_null_docids database in the
 index

---
 milli/src/index.rs                   | 8 +++++++-
 milli/src/update/clear_documents.rs  | 2 ++
 milli/src/update/delete_documents.rs | 8 ++++++++
 3 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/milli/src/index.rs b/milli/src/index.rs
index a4048dfb0..ae7bd211e 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -80,6 +80,7 @@ pub mod db_name {
     pub const FIELD_ID_WORD_COUNT_DOCIDS: &str = "field-id-word-count-docids";
     pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids";
     pub const FACET_ID_EXISTS_DOCIDS: &str = "facet-id-exists-docids";
+    pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids";
     pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
     pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
     pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
@@ -130,6 +131,9 @@ pub struct Index {
     /// Maps the facet field id and the docids for which this field exists
     pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
 
+    /// Maps the facet field id and the docids for which this field is set as null
+    pub facet_id_is_null_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
+
     /// Maps the facet field id and ranges of numbers with the docids that corresponds to them.
     pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
     /// Maps the facet field id and ranges of strings with the docids that corresponds to them.
@@ -153,7 +157,7 @@ impl Index {
     ) -> Result<Index> {
         use db_name::*;
 
-        options.max_dbs(19);
+        options.max_dbs(20);
         unsafe { options.flag(Flags::MdbAlwaysFreePages) };
 
         let env = options.open(path)?;
@@ -175,6 +179,7 @@ impl Index {
         let facet_id_f64_docids = env.create_database(Some(FACET_ID_F64_DOCIDS))?;
         let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?;
         let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?;
+        let facet_id_is_null_docids = env.create_database(Some(FACET_ID_IS_NULL_DOCIDS))?;
 
         let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?;
         let field_id_docid_facet_strings =
@@ -201,6 +206,7 @@ impl Index {
             facet_id_f64_docids,
             facet_id_string_docids,
             facet_id_exists_docids,
+            facet_id_is_null_docids,
             field_id_docid_facet_f64s,
             field_id_docid_facet_strings,
             documents,
diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs
index 0296bc192..7ac09a785 100644
--- a/milli/src/update/clear_documents.rs
+++ b/milli/src/update/clear_documents.rs
@@ -34,6 +34,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
             facet_id_f64_docids,
             facet_id_string_docids,
             facet_id_exists_docids,
+            facet_id_is_null_docids,
             field_id_docid_facet_f64s,
             field_id_docid_facet_strings,
             documents,
@@ -86,6 +87,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
         script_language_docids.clear(self.wtxn)?;
         facet_id_f64_docids.clear(self.wtxn)?;
         facet_id_exists_docids.clear(self.wtxn)?;
+        facet_id_is_null_docids.clear(self.wtxn)?;
         facet_id_string_docids.clear(self.wtxn)?;
         field_id_docid_facet_f64s.clear(self.wtxn)?;
         field_id_docid_facet_strings.clear(self.wtxn)?;
diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs
index eeb67b829..7180d7d42 100644
--- a/milli/src/update/delete_documents.rs
+++ b/milli/src/update/delete_documents.rs
@@ -245,6 +245,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
             field_id_docid_facet_strings: _,
             script_language_docids,
             facet_id_exists_docids,
+            facet_id_is_null_docids,
             documents,
         } = self.index;
 
@@ -523,6 +524,13 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
             &self.to_delete_docids,
         )?;
 
+        // We delete the documents ids that are under the facet field id values.
+        remove_docids_from_facet_id_exists_docids(
+            self.wtxn,
+            facet_id_is_null_docids,
+            &self.to_delete_docids,
+        )?;
+
         self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?;
 
         Ok(DetailedDocumentDeletionResult {

From 19ab4d1a159d3be2bda3bde05dcef244bdfd9393 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 8 Mar 2023 16:46:42 +0100
Subject: [PATCH 02/21] Classify the NULL fields values in the facet extractor

---
 .../extract/extract_fid_docid_facet_values.rs | 99 +++++++++++++------
 .../src/update/index_documents/extract/mod.rs | 15 ++-
 2 files changed, 78 insertions(+), 36 deletions(-)

diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
index 71ac330e2..5fe6a7606 100644
--- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@@ -8,7 +8,7 @@ use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer}
 use heed::zerocopy::AsBytes;
 use heed::BytesEncode;
 use roaring::RoaringBitmap;
-use serde_json::Value;
+use serde_json::{from_slice, Value};
 
 use super::helpers::{create_sorter, keep_first, sorter_into_reader, GrenadParameters};
 use crate::error::InternalError;
@@ -25,7 +25,8 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
     obkv_documents: grenad::Reader<R>,
     indexer: GrenadParameters,
     faceted_fields: &HashSet<FieldId>,
-) -> Result<(grenad::Reader<File>, grenad::Reader<File>, grenad::Reader<File>)> {
+) -> Result<(grenad::Reader<File>, grenad::Reader<File>, grenad::Reader<File>, grenad::Reader<File>)>
+{
     let max_memory = indexer.max_memory_by_thread();
 
     let mut fid_docid_facet_numbers_sorter = create_sorter(
@@ -47,6 +48,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
     );
 
     let mut facet_exists_docids = BTreeMap::<FieldId, RoaringBitmap>::new();
+    let mut facet_is_null_docids = BTreeMap::<FieldId, RoaringBitmap>::new();
 
     let mut key_buffer = Vec::new();
     let mut cursor = obkv_documents.into_cursor()?;
@@ -70,33 +72,40 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
                 // For the other extraction tasks, prefix the key with the field_id and the document_id
                 key_buffer.extend_from_slice(docid_bytes);
 
-                let value =
-                    serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
-
-                let (numbers, strings) = extract_facet_values(&value);
-
-                // insert facet numbers in sorter
-                for number in numbers {
-                    key_buffer.truncate(size_of::<FieldId>() + size_of::<DocumentId>());
-                    if let Some(value_bytes) = f64_into_bytes(number) {
-                        key_buffer.extend_from_slice(&value_bytes);
-                        key_buffer.extend_from_slice(&number.to_be_bytes());
-
-                        fid_docid_facet_numbers_sorter.insert(&key_buffer, ().as_bytes())?;
+                let value = from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
+                match extract_facet_values(&value) {
+                    FilterableValues::Null => {
+                        facet_is_null_docids.entry(field_id).or_default().insert(document);
                     }
-                }
+                    FilterableValues::Values { numbers, strings } => {
+                        // insert facet numbers in sorter
+                        for number in numbers {
+                            key_buffer.truncate(size_of::<FieldId>() + size_of::<DocumentId>());
+                            if let Some(value_bytes) = f64_into_bytes(number) {
+                                key_buffer.extend_from_slice(&value_bytes);
+                                key_buffer.extend_from_slice(&number.to_be_bytes());
 
-                // insert normalized and original facet string in sorter
-                for (normalized, original) in strings.into_iter().filter(|(n, _)| !n.is_empty()) {
-                    let normalised_truncated_value: String = normalized
-                        .char_indices()
-                        .take_while(|(idx, _)| idx + 4 < MAX_FACET_VALUE_LENGTH)
-                        .map(|(_, c)| c)
-                        .collect();
+                                fid_docid_facet_numbers_sorter
+                                    .insert(&key_buffer, ().as_bytes())?;
+                            }
+                        }
 
-                    key_buffer.truncate(size_of::<FieldId>() + size_of::<DocumentId>());
-                    key_buffer.extend_from_slice(normalised_truncated_value.as_bytes());
-                    fid_docid_facet_strings_sorter.insert(&key_buffer, original.as_bytes())?;
+                        // insert normalized and original facet string in sorter
+                        for (normalized, original) in
+                            strings.into_iter().filter(|(n, _)| !n.is_empty())
+                        {
+                            let normalised_truncated_value: String = normalized
+                                .char_indices()
+                                .take_while(|(idx, _)| idx + 4 < MAX_FACET_VALUE_LENGTH)
+                                .map(|(_, c)| c)
+                                .collect();
+
+                            key_buffer.truncate(size_of::<FieldId>() + size_of::<DocumentId>());
+                            key_buffer.extend_from_slice(normalised_truncated_value.as_bytes());
+                            fid_docid_facet_strings_sorter
+                                .insert(&key_buffer, original.as_bytes())?;
+                        }
+                    }
                 }
             }
         }
@@ -113,14 +122,36 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
     }
     let facet_exists_docids_reader = writer_into_reader(facet_exists_docids_writer)?;
 
+    let mut facet_is_null_docids_writer = create_writer(
+        indexer.chunk_compression_type,
+        indexer.chunk_compression_level,
+        tempfile::tempfile()?,
+    );
+    for (fid, bitmap) in facet_is_null_docids.into_iter() {
+        let bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(&bitmap).unwrap();
+        facet_is_null_docids_writer.insert(fid.to_be_bytes(), &bitmap_bytes)?;
+    }
+    let facet_is_null_docids_reader = writer_into_reader(facet_is_null_docids_writer)?;
+
     Ok((
         sorter_into_reader(fid_docid_facet_numbers_sorter, indexer)?,
         sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?,
+        facet_is_null_docids_reader,
         facet_exists_docids_reader,
     ))
 }
 
-fn extract_facet_values(value: &Value) -> (Vec<f64>, Vec<(String, String)>) {
+/// Represent what a document field contains.
+enum FilterableValues {
+    Null,
+    /// Represents all the numbers and strings values found in this document field.
+    Values {
+        numbers: Vec<f64>,
+        strings: Vec<(String, String)>,
+    },
+}
+
+fn extract_facet_values(value: &Value) -> FilterableValues {
     fn inner_extract_facet_values(
         value: &Value,
         can_recurse: bool,
@@ -152,9 +183,13 @@ fn extract_facet_values(value: &Value) -> (Vec<f64>, Vec<(String, String)>) {
         }
     }
 
-    let mut facet_number_values = Vec::new();
-    let mut facet_string_values = Vec::new();
-    inner_extract_facet_values(value, true, &mut facet_number_values, &mut facet_string_values);
-
-    (facet_number_values, facet_string_values)
+    match value {
+        Value::Null => FilterableValues::Null,
+        otherwise => {
+            let mut numbers = Vec::new();
+            let mut strings = Vec::new();
+            inner_extract_facet_values(otherwise, true, &mut numbers, &mut strings);
+            FilterableValues::Values { numbers, strings }
+        }
+    }
 }
diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs
index c0f07cf79..9f9fc8f4f 100644
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@@ -55,7 +55,7 @@ pub(crate) fn data_from_obkv_documents(
         .collect::<Result<()>>()?;
 
     #[allow(clippy::type_complexity)]
-    let result: Result<(Vec<_>, (Vec<_>, (Vec<_>, Vec<_>)))> = flattened_obkv_chunks
+    let result: Result<(Vec<_>, (Vec<_>, (Vec<_>, (Vec<_>, Vec<_>))))> = flattened_obkv_chunks
         .par_bridge()
         .map(|flattened_obkv_chunks| {
             send_and_extract_flattened_documents_data(
@@ -76,7 +76,10 @@ pub(crate) fn data_from_obkv_documents(
         docid_word_positions_chunks,
         (
             docid_fid_facet_numbers_chunks,
-            (docid_fid_facet_strings_chunks, facet_exists_docids_chunks),
+            (
+                docid_fid_facet_strings_chunks,
+                (facet_is_null_docids_chunks, facet_exists_docids_chunks),
+            ),
         ),
     ) = result?;
 
@@ -235,7 +238,7 @@ fn send_and_extract_flattened_documents_data(
     grenad::Reader<CursorClonableMmap>,
     (
         grenad::Reader<CursorClonableMmap>,
-        (grenad::Reader<CursorClonableMmap>, grenad::Reader<File>),
+        (grenad::Reader<CursorClonableMmap>, (grenad::Reader<File>, grenad::Reader<File>)),
     ),
 )> {
     let flattened_documents_chunk =
@@ -284,6 +287,7 @@ fn send_and_extract_flattened_documents_data(
                 let (
                     docid_fid_facet_numbers_chunk,
                     docid_fid_facet_strings_chunk,
+                    fid_facet_is_null_docids_chunk,
                     fid_facet_exists_docids_chunk,
                 ) = extract_fid_docid_facet_values(
                     flattened_documents_chunk.clone(),
@@ -309,7 +313,10 @@ fn send_and_extract_flattened_documents_data(
 
                 Ok((
                     docid_fid_facet_numbers_chunk,
-                    (docid_fid_facet_strings_chunk, fid_facet_exists_docids_chunk),
+                    (
+                        docid_fid_facet_strings_chunk,
+                        (fid_facet_is_null_docids_chunk, fid_facet_exists_docids_chunk),
+                    ),
                 ))
             },
         );

From 43ff236df8b7ad693c4cfe26d8a16dc09dc72359 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 8 Mar 2023 16:49:53 +0100
Subject: [PATCH 03/21] Write the NULL facet values in the database

---
 milli/src/update/index_documents/extract/mod.rs | 16 ++++++++++++++++
 milli/src/update/index_documents/typed_chunk.rs | 12 ++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs
index 9f9fc8f4f..c11d08405 100644
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@@ -99,6 +99,22 @@ pub(crate) fn data_from_obkv_documents(
         });
     }
 
+    // merge facet_is_null_docids and send them as a typed chunk
+    {
+        let lmdb_writer_sx = lmdb_writer_sx.clone();
+        rayon::spawn(move || {
+            debug!("merge {} database", "facet-id-is-null-docids");
+            match facet_is_null_docids_chunks.merge(merge_cbo_roaring_bitmaps, &indexer) {
+                Ok(reader) => {
+                    let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsNullDocids(reader)));
+                }
+                Err(e) => {
+                    let _ = lmdb_writer_sx.send(Err(e));
+                }
+            }
+        });
+    }
+
     spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
         docid_word_positions_chunks.clone(),
         indexer,
diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs
index b9b11cfa8..79f2e2c55 100644
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@@ -39,6 +39,7 @@ pub(crate) enum TypedChunk {
     FieldIdFacetStringDocids(grenad::Reader<File>),
     FieldIdFacetNumberDocids(grenad::Reader<File>),
     FieldIdFacetExistsDocids(grenad::Reader<File>),
+    FieldIdFacetIsNullDocids(grenad::Reader<File>),
     GeoPoints(grenad::Reader<File>),
     ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
 }
@@ -161,6 +162,17 @@ pub(crate) fn write_typed_chunk_into_index(
             )?;
             is_merged_database = true;
         }
+        TypedChunk::FieldIdFacetIsNullDocids(facet_id_is_null_docids) => {
+            append_entries_into_database(
+                facet_id_is_null_docids,
+                &index.facet_id_is_null_docids,
+                wtxn,
+                index_is_empty,
+                |value, _buffer| Ok(value),
+                merge_cbo_roaring_bitmaps,
+            )?;
+            is_merged_database = true;
+        }
         TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => {
             append_entries_into_database(
                 word_pair_proximity_docids_iter,

From 7c0cd7172d02664af8f5f59e2520741185d5dca1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 8 Mar 2023 16:57:42 +0100
Subject: [PATCH 04/21] Introduce the NULL and NOT value NULL operator

---
 filter-parser/src/condition.rs   | 16 ++++++++++++++++
 filter-parser/src/lib.rs         | 20 ++++++++++++++++----
 milli/src/index.rs               | 12 ++++++++++++
 milli/src/search/facet/filter.rs |  4 ++++
 4 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/filter-parser/src/condition.rs b/filter-parser/src/condition.rs
index 735ffec0e..834fac8b8 100644
--- a/filter-parser/src/condition.rs
+++ b/filter-parser/src/condition.rs
@@ -20,6 +20,7 @@ pub enum Condition<'a> {
     GreaterThanOrEqual(Token<'a>),
     Equal(Token<'a>),
     NotEqual(Token<'a>),
+    Null,
     Exists,
     LowerThan(Token<'a>),
     LowerThanOrEqual(Token<'a>),
@@ -44,6 +45,21 @@ pub fn parse_condition(input: Span) -> IResult<FilterCondition> {
     Ok((input, condition))
 }
 
+/// null          = value "NULL"
+pub fn parse_null(input: Span) -> IResult<FilterCondition> {
+    let (input, key) = terminated(parse_value, tag("NULL"))(input)?;
+
+    Ok((input, FilterCondition::Condition { fid: key, op: Null }))
+}
+
+/// null          = value "NOT" WS+ "NULL"
+pub fn parse_not_null(input: Span) -> IResult<FilterCondition> {
+    let (input, key) = parse_value(input)?;
+
+    let (input, _) = tuple((tag("NOT"), multispace1, tag("NULL")))(input)?;
+    Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Null }))))
+}
+
 /// exist          = value "EXISTS"
 pub fn parse_exists(input: Span) -> IResult<FilterCondition> {
     let (input, key) = terminated(parse_value, tag("EXISTS"))(input)?;
diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs
index 8e21ff6be..36657587f 100644
--- a/filter-parser/src/lib.rs
+++ b/filter-parser/src/lib.rs
@@ -47,7 +47,7 @@ mod value;
 use std::fmt::Debug;
 
 pub use condition::{parse_condition, parse_to, Condition};
-use condition::{parse_exists, parse_not_exists};
+use condition::{parse_exists, parse_not_exists, parse_not_null, parse_null};
 use error::{cut_with_err, ExpectedValueKind, NomErrorExt};
 pub use error::{Error, ErrorKind};
 use nom::branch::alt;
@@ -414,6 +414,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
         parse_in,
         parse_not_in,
         parse_condition,
+        parse_null,
+        parse_not_null,
         parse_exists,
         parse_not_exists,
         parse_to,
@@ -496,14 +498,23 @@ pub mod tests {
         insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
         insta::assert_display_snapshot!(p("subscribers 100 TO 1000"), @"{subscribers} {100} TO {1000}");
 
-        // Test NOT + EXISTS
-        insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
+        // Test NOT
         insta::assert_display_snapshot!(p("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})");
+        insta::assert_display_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})");
+
+        // Test NULL + NOT NULL
+        insta::assert_display_snapshot!(p("subscribers NULL"), @"{subscribers} NULL");
+        insta::assert_display_snapshot!(p("NOT subscribers NULL"), @"NOT ({subscribers} NULL)");
+        insta::assert_display_snapshot!(p("subscribers NOT NULL"), @"NOT ({subscribers} NULL)");
+        insta::assert_display_snapshot!(p("NOT subscribers NOT NULL"), @"{subscribers} NULL");
+        insta::assert_display_snapshot!(p("subscribers NOT   NULL"), @"NOT ({subscribers} NULL)");
+
+        // Test EXISTS + NOT EXITS
+        insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
         insta::assert_display_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)");
         insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
         insta::assert_display_snapshot!(p("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS");
         insta::assert_display_snapshot!(p("subscribers NOT   EXISTS"), @"NOT ({subscribers} EXISTS)");
-        insta::assert_display_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})");
 
         // Test nested NOT
         insta::assert_display_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}");
@@ -800,6 +811,7 @@ impl<'a> std::fmt::Display for Condition<'a> {
             Condition::GreaterThanOrEqual(token) => write!(f, ">= {token}"),
             Condition::Equal(token) => write!(f, "= {token}"),
             Condition::NotEqual(token) => write!(f, "!= {token}"),
+            Condition::Null => write!(f, "NULL"),
             Condition::Exists => write!(f, "EXISTS"),
             Condition::LowerThan(token) => write!(f, "< {token}"),
             Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"),
diff --git a/milli/src/index.rs b/milli/src/index.rs
index ae7bd211e..3316028df 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -839,6 +839,18 @@ impl Index {
         }
     }
 
+    /// Retrieve all the documents which contain this field id set as null
+    pub fn null_faceted_documents_ids(
+        &self,
+        rtxn: &RoTxn,
+        field_id: FieldId,
+    ) -> heed::Result<RoaringBitmap> {
+        match self.facet_id_is_null_docids.get(rtxn, &BEU16::new(field_id))? {
+            Some(docids) => Ok(docids),
+            None => Ok(RoaringBitmap::new()),
+        }
+    }
+
     /// Retrieve all the documents which contain this field id
     pub fn exists_faceted_documents_ids(
         &self,
diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs
index a4ac53950..df42725c5 100644
--- a/milli/src/search/facet/filter.rs
+++ b/milli/src/search/facet/filter.rs
@@ -219,6 +219,10 @@ impl<'a> Filter<'a> {
             Condition::Between { from, to } => {
                 (Included(from.parse_finite_float()?), Included(to.parse_finite_float()?))
             }
+            Condition::Null => {
+                let is_null = index.null_faceted_documents_ids(rtxn, field_id)?;
+                return Ok(is_null);
+            }
             Condition::Exists => {
                 let exist = index.exists_faceted_documents_ids(rtxn, field_id)?;
                 return Ok(exist);

From 7dc04747fd94e8b43d15584e351ecb1489ab2fed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 8 Mar 2023 17:37:08 +0100
Subject: [PATCH 05/21] Make clippy happy

---
 .../index_documents/extract/extract_fid_docid_facet_values.rs    | 1 +
 1 file changed, 1 insertion(+)

diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
index 5fe6a7606..0589dc773 100644
--- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@@ -21,6 +21,7 @@ use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result, BEU32, MAX_FACET
 /// Returns the generated grenad reader containing the docid the fid and the orginal value as key
 /// and the normalized value as value extracted from the given chunk of documents.
 #[logging_timer::time]
+#[allow(clippy::type_complexity)]
 pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
     obkv_documents: grenad::Reader<R>,
     indexer: GrenadParameters,

From b1d61f5a02f50cdb9f34d23f192f953e155d30ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Thu, 9 Mar 2023 10:04:27 +0100
Subject: [PATCH 06/21] Add more tests for the NULL filter

---
 milli/tests/search/filters.rs |  7 +++++++
 milli/tests/search/mod.rs     | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

diff --git a/milli/tests/search/filters.rs b/milli/tests/search/filters.rs
index 18de24ac3..0b5296b82 100644
--- a/milli/tests/search/filters.rs
+++ b/milli/tests/search/filters.rs
@@ -82,10 +82,17 @@ test_filter!(
     vec![Left(vec!["tag=red", "tag=green"]), Left(vec!["asc_desc_rank<3", "asc_desc_rank<1"])]
 );
 test_filter!(exists_filter_1, vec![Right("opt1 EXISTS")]);
+test_filter!(exists_filter_2, vec![Right("opt1.opt2 EXISTS")]);
 test_filter!(exists_filter_1_not, vec![Right("opt1 NOT EXISTS")]);
 test_filter!(exists_filter_1_not_alt, vec![Right("NOT opt1 EXISTS")]);
 test_filter!(exists_filter_1_double_not, vec![Right("NOT opt1 NOT EXISTS")]);
 
+test_filter!(null_filter_1, vec![Right("opt1 NULL")]);
+test_filter!(null_filter_2, vec![Right("opt1.opt2 NULL")]);
+test_filter!(null_filter_1_not, vec![Right("opt1 NOT NULL")]);
+test_filter!(null_filter_1_not_alt, vec![Right("NOT opt1 NULL")]);
+test_filter!(null_filter_1_double_not, vec![Right("NOT opt1 NOT NULL")]);
+
 test_filter!(in_filter, vec![Right("tag_in IN[1, 2, 3, four, five]")]);
 test_filter!(not_in_filter, vec![Right("tag_in NOT IN[1, 2, 3, four, five]")]);
 test_filter!(not_not_in_filter, vec![Right("NOT tag_in NOT IN[1, 2, 3, four, five]")]);
diff --git a/milli/tests/search/mod.rs b/milli/tests/search/mod.rs
index 18c74e344..e67c1bc64 100644
--- a/milli/tests/search/mod.rs
+++ b/milli/tests/search/mod.rs
@@ -205,6 +205,18 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> {
         } else if let Some(opt1) = &document.opt1 {
             id = contains_key_rec(opt1, "opt2").then(|| document.id.clone());
         }
+    } else if matches!(filter, "opt1 NULL" | "NOT opt1 NOT NULL") {
+        id = document.opt1.as_ref().map_or(false, |v| v.is_null()).then(|| document.id.clone());
+    } else if matches!(filter, "NOT opt1 NULL" | "opt1 NOT NULL") {
+        id = document.opt1.as_ref().map_or(true, |v| !v.is_null()).then(|| document.id.clone());
+    } else if matches!(filter, "opt1.opt2 NULL") {
+        if document.opt1opt2.as_ref().map_or(false, |v| v.is_null()) {
+            id = Some(document.id.clone());
+        } else if let Some(opt1) = &document.opt1 {
+            if !opt1.is_null() {
+                id = contains_null_rec(opt1, "opt2").then(|| document.id.clone());
+            }
+        }
     } else if matches!(
         filter,
         "tag_in IN[1, 2, 3, four, five]" | "NOT tag_in NOT IN[1, 2, 3, four, five]"
@@ -240,6 +252,28 @@ pub fn contains_key_rec(v: &serde_json::Value, key: &str) -> bool {
     }
 }
 
+pub fn contains_null_rec(v: &serde_json::Value, key: &str) -> bool {
+    match v {
+        serde_json::Value::Object(v) => {
+            for (k, v) in v.iter() {
+                if k == key && v.is_null() || contains_null_rec(v, key) {
+                    return true;
+                }
+            }
+            false
+        }
+        serde_json::Value::Array(v) => {
+            for v in v.iter() {
+                if contains_null_rec(v, key) {
+                    return true;
+                }
+            }
+            false
+        }
+        _ => false,
+    }
+}
+
 pub fn expected_filtered_ids(filters: Vec<Either<Vec<&str>, &str>>) -> HashSet<String> {
     let dataset: Vec<TestDocument> =
         serde_json::Deserializer::from_str(CONTENT).into_iter().map(|r| r.unwrap()).collect();

From e106b16148ba7231af3045be5e8384813373b509 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Thu, 9 Mar 2023 13:01:43 +0100
Subject: [PATCH 07/21] Fix a typo in a variable

Co-authored-by: Louis Dureuil <louis@meilisearch.com>

aaa
---
 .../index_documents/extract/extract_fid_docid_facet_values.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
index 0589dc773..be7b44eee 100644
--- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@@ -95,14 +95,14 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
                         for (normalized, original) in
                             strings.into_iter().filter(|(n, _)| !n.is_empty())
                         {
-                            let normalised_truncated_value: String = normalized
+                            let normalized_truncated_value: String = normalized
                                 .char_indices()
                                 .take_while(|(idx, _)| idx + 4 < MAX_FACET_VALUE_LENGTH)
                                 .map(|(_, c)| c)
                                 .collect();
 
                             key_buffer.truncate(size_of::<FieldId>() + size_of::<DocumentId>());
-                            key_buffer.extend_from_slice(normalised_truncated_value.as_bytes());
+                            key_buffer.extend_from_slice(normalized_truncated_value.as_bytes());
                             fid_docid_facet_strings_sorter
                                 .insert(&key_buffer, original.as_bytes())?;
                         }

From e064c52544e97dbad7bc43732fc5301db6ea1988 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Thu, 9 Mar 2023 13:05:54 +0100
Subject: [PATCH 08/21] Rename an internal facet deletion method

---
 milli/src/update/delete_documents.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs
index 7180d7d42..bb232d7cc 100644
--- a/milli/src/update/delete_documents.rs
+++ b/milli/src/update/delete_documents.rs
@@ -518,14 +518,14 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
 
         drop(iter);
         // We delete the documents ids that are under the facet field id values.
-        remove_docids_from_facet_id_exists_docids(
+        remove_docids_from_facet_id_docids(
             self.wtxn,
             facet_id_exists_docids,
             &self.to_delete_docids,
         )?;
 
         // We delete the documents ids that are under the facet field id values.
-        remove_docids_from_facet_id_exists_docids(
+        remove_docids_from_facet_id_docids(
             self.wtxn,
             facet_id_is_null_docids,
             &self.to_delete_docids,
@@ -633,7 +633,7 @@ fn remove_docids_from_field_id_docid_facet_value(
     Ok(all_affected_facet_values)
 }
 
-fn remove_docids_from_facet_id_exists_docids<'a, C>(
+fn remove_docids_from_facet_id_docids<'a, C>(
     wtxn: &'a mut heed::RwTxn,
     db: &heed::Database<C, CboRoaringBitmapCodec>,
     to_remove: &RoaringBitmap,

From 0ad53784e73b6c2da743d9cafee999002a191f1d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Thu, 9 Mar 2023 13:21:21 +0100
Subject: [PATCH 09/21] Create a new struct to reduce the type complexity

---
 .../extract/extract_fid_docid_facet_values.rs | 24 ++++++++++++-------
 .../src/update/index_documents/extract/mod.rs |  6 ++---
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
index be7b44eee..6460af812 100644
--- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@@ -16,18 +16,24 @@ use crate::facet::value_encoding::f64_into_bytes;
 use crate::update::index_documents::{create_writer, writer_into_reader};
 use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result, BEU32, MAX_FACET_VALUE_LENGTH};
 
+/// The extracted facet values stored in grenad files by type.
+pub struct ExtractedFacetValues {
+    pub docid_fid_facet_numbers_chunk: grenad::Reader<File>,
+    pub docid_fid_facet_strings_chunk: grenad::Reader<File>,
+    pub fid_facet_is_null_docids_chunk: grenad::Reader<File>,
+    pub fid_facet_exists_docids_chunk: grenad::Reader<File>,
+}
+
 /// Extracts the facet values of each faceted field of each document.
 ///
 /// Returns the generated grenad reader containing the docid the fid and the orginal value as key
 /// and the normalized value as value extracted from the given chunk of documents.
 #[logging_timer::time]
-#[allow(clippy::type_complexity)]
 pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
     obkv_documents: grenad::Reader<R>,
     indexer: GrenadParameters,
     faceted_fields: &HashSet<FieldId>,
-) -> Result<(grenad::Reader<File>, grenad::Reader<File>, grenad::Reader<File>, grenad::Reader<File>)>
-{
+) -> Result<ExtractedFacetValues> {
     let max_memory = indexer.max_memory_by_thread();
 
     let mut fid_docid_facet_numbers_sorter = create_sorter(
@@ -134,12 +140,12 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
     }
     let facet_is_null_docids_reader = writer_into_reader(facet_is_null_docids_writer)?;
 
-    Ok((
-        sorter_into_reader(fid_docid_facet_numbers_sorter, indexer)?,
-        sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?,
-        facet_is_null_docids_reader,
-        facet_exists_docids_reader,
-    ))
+    Ok(ExtractedFacetValues {
+        docid_fid_facet_numbers_chunk: sorter_into_reader(fid_docid_facet_numbers_sorter, indexer)?,
+        docid_fid_facet_strings_chunk: sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?,
+        fid_facet_is_null_docids_chunk: facet_is_null_docids_reader,
+        fid_facet_exists_docids_chunk: facet_exists_docids_reader,
+    })
 }
 
 /// Represent what a document field contains.
diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs
index c11d08405..4a5c9b64c 100644
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@@ -18,7 +18,7 @@ use rayon::prelude::*;
 use self::extract_docid_word_positions::extract_docid_word_positions;
 use self::extract_facet_number_docids::extract_facet_number_docids;
 use self::extract_facet_string_docids::extract_facet_string_docids;
-use self::extract_fid_docid_facet_values::extract_fid_docid_facet_values;
+use self::extract_fid_docid_facet_values::{extract_fid_docid_facet_values, ExtractedFacetValues};
 use self::extract_fid_word_count_docids::extract_fid_word_count_docids;
 use self::extract_geo_points::extract_geo_points;
 use self::extract_word_docids::extract_word_docids;
@@ -300,12 +300,12 @@ fn send_and_extract_flattened_documents_data(
                 Ok(docid_word_positions_chunk)
             },
             || {
-                let (
+                let ExtractedFacetValues {
                     docid_fid_facet_numbers_chunk,
                     docid_fid_facet_strings_chunk,
                     fid_facet_is_null_docids_chunk,
                     fid_facet_exists_docids_chunk,
-                ) = extract_fid_docid_facet_values(
+                } = extract_fid_docid_facet_values(
                     flattened_documents_chunk.clone(),
                     indexer,
                     faceted_fields,

From ff86073288ce407e25710af8d0c9cad4ae80bacf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Thu, 9 Mar 2023 13:32:27 +0100
Subject: [PATCH 10/21] Add a snapshot for the NULL facet database

---
 milli/src/snapshot_tests.rs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/milli/src/snapshot_tests.rs b/milli/src/snapshot_tests.rs
index f7f1a97e6..c6ea8f3dd 100644
--- a/milli/src/snapshot_tests.rs
+++ b/milli/src/snapshot_tests.rs
@@ -271,6 +271,11 @@ pub fn snap_facet_id_exists_docids(index: &Index) -> String {
         &format!("{facet_id:<3} {}", display_bitmap(&docids))
     })
 }
+pub fn snap_facet_id_is_null_docids(index: &Index) -> String {
+    make_db_snap_from_iter!(index, facet_id_is_null_docids, |(facet_id, docids)| {
+        &format!("{facet_id:<3} {}", display_bitmap(&docids))
+    })
+}
 pub fn snap_facet_id_string_docids(index: &Index) -> String {
     make_db_snap_from_iter!(index, facet_id_string_docids, |(
         FacetGroupKey { field_id, level, left_bound },
@@ -495,6 +500,9 @@ macro_rules! full_snap_of_db {
     ($index:ident, facet_id_exists_docids) => {{
         $crate::snapshot_tests::snap_facet_id_exists_docids(&$index)
     }};
+    ($index:ident, facet_id_is_null_docids) => {{
+        $crate::snapshot_tests::snap_facet_id_is_null_docids(&$index)
+    }};
     ($index:ident, documents_ids) => {{
         $crate::snapshot_tests::snap_documents_ids(&$index)
     }};

From df48ac8803b908c9f080b5481930983103f63a07 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Thu, 9 Mar 2023 13:53:37 +0100
Subject: [PATCH 11/21] Add one more test for the NULL operator

---
 milli/src/update/index_documents/mod.rs | 108 ++++++++++++++++++++++++
 1 file changed, 108 insertions(+)

diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index 2a7930f84..7b9bd7834 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -1758,6 +1758,114 @@ mod tests {
         check_ok(&index);
     }
 
+    #[test]
+    fn index_documents_check_is_null_database() {
+        let content = || {
+            documents!([
+                {
+                    "id": 0,
+                    "colour": null,
+                },
+                {
+                    "id": 6,
+                    "colour": {
+                        "green": null
+                    }
+                },
+                {
+                    "id": 7,
+                    "colour": {
+                        "green": {
+                            "blue": null
+                        }
+                    }
+                },
+                {
+                    "id": 8,
+                    "colour": 0,
+                },
+                {
+                    "id": 9,
+                    "colour": []
+                },
+                {
+                    "id": 10,
+                    "colour": {}
+                },
+                {
+                    "id": 12,
+                    "colour": [1]
+                },
+                {
+                    "id": 13
+                },
+                {
+                    "id": 14,
+                    "colour": {
+                        "green": 1
+                    }
+                },
+                {
+                    "id": 15,
+                    "colour": {
+                        "green": {
+                            "blue": []
+                        }
+                    }
+                }
+            ])
+        };
+
+        let check_ok = |index: &Index| {
+            let rtxn = index.read_txn().unwrap();
+            let facets = index.faceted_fields(&rtxn).unwrap();
+            assert_eq!(facets, hashset!(S("colour"), S("colour.green"), S("colour.green.blue")));
+
+            let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap();
+            let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();
+            let colour_blue_id =
+                index.fields_ids_map(&rtxn).unwrap().id("colour.green.blue").unwrap();
+
+            let bitmap_null_colour =
+                index.facet_id_is_null_docids.get(&rtxn, &BEU16::new(colour_id)).unwrap().unwrap();
+            assert_eq!(bitmap_null_colour.into_iter().collect::<Vec<_>>(), vec![0]);
+
+            let bitmap_colour_green = index
+                .facet_id_is_null_docids
+                .get(&rtxn, &BEU16::new(colour_green_id))
+                .unwrap()
+                .unwrap();
+            assert_eq!(bitmap_colour_green.into_iter().collect::<Vec<_>>(), vec![1]);
+
+            let bitmap_colour_blue = index
+                .facet_id_is_null_docids
+                .get(&rtxn, &BEU16::new(colour_blue_id))
+                .unwrap()
+                .unwrap();
+            assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![2]);
+        };
+
+        let faceted_fields = hashset!(S("colour"));
+
+        let index = TempIndex::new();
+        index.add_documents(content()).unwrap();
+        index
+            .update_settings(|settings| {
+                settings.set_filterable_fields(faceted_fields.clone());
+            })
+            .unwrap();
+        check_ok(&index);
+
+        let index = TempIndex::new();
+        index
+            .update_settings(|settings| {
+                settings.set_filterable_fields(faceted_fields.clone());
+            })
+            .unwrap();
+        index.add_documents(content()).unwrap();
+        check_ok(&index);
+    }
+
     #[test]
     fn primary_key_must_not_contain_floats() {
         let index = TempIndex::new_with_map_size(4096 * 100);

From c25779afba1e7fadd9365422db87f2e0b5ec3635 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Mon, 13 Mar 2023 17:40:34 +0100
Subject: [PATCH 12/21] Specify that the NULL keyword is a keyword too

---
 filter-parser/src/value.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs
index 2296c0769..9a0a5e710 100644
--- a/filter-parser/src/value.rs
+++ b/filter-parser/src/value.rs
@@ -178,7 +178,10 @@ fn is_syntax_component(c: char) -> bool {
 }
 
 fn is_keyword(s: &str) -> bool {
-    matches!(s, "AND" | "OR" | "IN" | "NOT" | "TO" | "EXISTS" | "_geoRadius" | "_geoBoundingBox")
+    matches!(
+        s,
+        "AND" | "OR" | "IN" | "NOT" | "TO" | "EXISTS" | "NULL" | "_geoRadius" | "_geoBoundingBox"
+    )
 }
 
 #[cfg(test)]

From 030263caa36c73e4d9a178621483231718f0e2e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Tue, 14 Mar 2023 10:31:04 +0100
Subject: [PATCH 13/21] Change the IS NULL filter syntax to use the IS keyword

---
 filter-parser/src/condition.rs | 13 +++++++------
 filter-parser/src/lib.rs       |  8 ++++----
 filter-parser/src/value.rs     | 11 ++++++++++-
 3 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/filter-parser/src/condition.rs b/filter-parser/src/condition.rs
index 834fac8b8..fe424539f 100644
--- a/filter-parser/src/condition.rs
+++ b/filter-parser/src/condition.rs
@@ -45,18 +45,19 @@ pub fn parse_condition(input: Span) -> IResult<FilterCondition> {
     Ok((input, condition))
 }
 
-/// null          = value "NULL"
-pub fn parse_null(input: Span) -> IResult<FilterCondition> {
-    let (input, key) = terminated(parse_value, tag("NULL"))(input)?;
+/// null          = value "IS" WS+ "NULL"
+pub fn parse_is_null(input: Span) -> IResult<FilterCondition> {
+    let (input, key) = parse_value(input)?;
 
+    let (input, _) = tuple((tag("IS"), multispace1, tag("NULL")))(input)?;
     Ok((input, FilterCondition::Condition { fid: key, op: Null }))
 }
 
-/// null          = value "NOT" WS+ "NULL"
-pub fn parse_not_null(input: Span) -> IResult<FilterCondition> {
+/// null          = value "IS" WS+ "NOT" WS+ "NULL"
+pub fn parse_is_not_null(input: Span) -> IResult<FilterCondition> {
     let (input, key) = parse_value(input)?;
 
-    let (input, _) = tuple((tag("NOT"), multispace1, tag("NULL")))(input)?;
+    let (input, _) = tuple((tag("IS"), multispace1, tag("NOT"), multispace1, tag("NULL")))(input)?;
     Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Null }))))
 }
 
diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs
index 36657587f..513da07c5 100644
--- a/filter-parser/src/lib.rs
+++ b/filter-parser/src/lib.rs
@@ -47,7 +47,7 @@ mod value;
 use std::fmt::Debug;
 
 pub use condition::{parse_condition, parse_to, Condition};
-use condition::{parse_exists, parse_not_exists, parse_not_null, parse_null};
+use condition::{parse_exists, parse_is_not_null, parse_is_null, parse_not_exists};
 use error::{cut_with_err, ExpectedValueKind, NomErrorExt};
 pub use error::{Error, ErrorKind};
 use nom::branch::alt;
@@ -414,8 +414,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
         parse_in,
         parse_not_in,
         parse_condition,
-        parse_null,
-        parse_not_null,
+        parse_is_null,
+        parse_is_not_null,
         parse_exists,
         parse_not_exists,
         parse_to,
@@ -811,7 +811,7 @@ impl<'a> std::fmt::Display for Condition<'a> {
             Condition::GreaterThanOrEqual(token) => write!(f, ">= {token}"),
             Condition::Equal(token) => write!(f, "= {token}"),
             Condition::NotEqual(token) => write!(f, "!= {token}"),
-            Condition::Null => write!(f, "NULL"),
+            Condition::Null => write!(f, "IS NULL"),
             Condition::Exists => write!(f, "EXISTS"),
             Condition::LowerThan(token) => write!(f, "< {token}"),
             Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"),
diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs
index 9a0a5e710..f8f1c43bc 100644
--- a/filter-parser/src/value.rs
+++ b/filter-parser/src/value.rs
@@ -180,7 +180,16 @@ fn is_syntax_component(c: char) -> bool {
 fn is_keyword(s: &str) -> bool {
     matches!(
         s,
-        "AND" | "OR" | "IN" | "NOT" | "TO" | "EXISTS" | "NULL" | "_geoRadius" | "_geoBoundingBox"
+        "AND"
+            | "OR"
+            | "IN"
+            | "NOT"
+            | "TO"
+            | "EXISTS"
+            | "IS"
+            | "NULL"
+            | "_geoRadius"
+            | "_geoBoundingBox"
     )
 }
 

From fa2ea4a3793ce1cd78d4d4f1b38f94f7273fb5c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Tue, 14 Mar 2023 10:31:27 +0100
Subject: [PATCH 14/21] Update the test to accept the new IS syntax

---
 filter-parser/src/error.rs         |  4 ++--
 filter-parser/src/lib.rs           | 20 ++++++++++----------
 meilisearch/tests/search/errors.rs |  4 ++--
 milli/tests/search/filters.rs      | 10 +++++-----
 milli/tests/search/mod.rs          |  6 +++---
 5 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/filter-parser/src/error.rs b/filter-parser/src/error.rs
index 4d9d89859..fc6ad8f6d 100644
--- a/filter-parser/src/error.rs
+++ b/filter-parser/src/error.rs
@@ -144,10 +144,10 @@ impl<'a> Display for Error<'a> {
                 writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)?
             }
             ErrorKind::InvalidPrimary if input.trim().is_empty() => {
-                writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.")?
+                writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.")?
             }
             ErrorKind::InvalidPrimary => {
-                writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `{}`.", escaped_input)?
+                writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `{}`.", escaped_input)?
             }
             ErrorKind::ExpectedEof => {
                 writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs
index 513da07c5..c75ada205 100644
--- a/filter-parser/src/lib.rs
+++ b/filter-parser/src/lib.rs
@@ -503,11 +503,11 @@ pub mod tests {
         insta::assert_display_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})");
 
         // Test NULL + NOT NULL
-        insta::assert_display_snapshot!(p("subscribers NULL"), @"{subscribers} NULL");
-        insta::assert_display_snapshot!(p("NOT subscribers NULL"), @"NOT ({subscribers} NULL)");
-        insta::assert_display_snapshot!(p("subscribers NOT NULL"), @"NOT ({subscribers} NULL)");
-        insta::assert_display_snapshot!(p("NOT subscribers NOT NULL"), @"{subscribers} NULL");
-        insta::assert_display_snapshot!(p("subscribers NOT   NULL"), @"NOT ({subscribers} NULL)");
+        insta::assert_display_snapshot!(p("subscribers IS NULL"), @"{subscribers} IS NULL");
+        insta::assert_display_snapshot!(p("NOT subscribers IS NULL"), @"NOT ({subscribers} IS NULL)");
+        insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
+        insta::assert_display_snapshot!(p("NOT subscribers IS NOT NULL"), @"{subscribers} IS NULL");
+        insta::assert_display_snapshot!(p("subscribers  IS   NOT   NULL"), @"NOT ({subscribers} IS NULL)");
 
         // Test EXISTS + NOT EXITS
         insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
@@ -587,7 +587,7 @@ pub mod tests {
         "###);
 
         insta::assert_display_snapshot!(p("'OR'"), @r###"
-        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
         1:5 'OR'
         "###);
 
@@ -597,12 +597,12 @@ pub mod tests {
         "###);
 
         insta::assert_display_snapshot!(p("channel Ponce"), @r###"
-        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
         1:14 channel Ponce
         "###);
 
         insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###"
-        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
         19:19 channel = Ponce OR
         "###);
 
@@ -667,12 +667,12 @@ pub mod tests {
         "###);
 
         insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###"
-        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
         1:17 colour NOT EXIST
         "###);
 
         insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###"
-        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
         1:23 subscribers 100 TO1000
         "###);
 
diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs
index 9e4dbdcf5..ab42700f3 100644
--- a/meilisearch/tests/search/errors.rs
+++ b/meilisearch/tests/search/errors.rs
@@ -547,7 +547,7 @@ async fn filter_invalid_syntax_object() {
     index.wait_task(1).await;
 
     let expected_response = json!({
-        "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
+        "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
         "code": "invalid_search_filter",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@@ -572,7 +572,7 @@ async fn filter_invalid_syntax_array() {
     index.wait_task(1).await;
 
     let expected_response = json!({
-        "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
+        "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
         "code": "invalid_search_filter",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
diff --git a/milli/tests/search/filters.rs b/milli/tests/search/filters.rs
index 0b5296b82..57ad6a40b 100644
--- a/milli/tests/search/filters.rs
+++ b/milli/tests/search/filters.rs
@@ -87,11 +87,11 @@ test_filter!(exists_filter_1_not, vec![Right("opt1 NOT EXISTS")]);
 test_filter!(exists_filter_1_not_alt, vec![Right("NOT opt1 EXISTS")]);
 test_filter!(exists_filter_1_double_not, vec![Right("NOT opt1 NOT EXISTS")]);
 
-test_filter!(null_filter_1, vec![Right("opt1 NULL")]);
-test_filter!(null_filter_2, vec![Right("opt1.opt2 NULL")]);
-test_filter!(null_filter_1_not, vec![Right("opt1 NOT NULL")]);
-test_filter!(null_filter_1_not_alt, vec![Right("NOT opt1 NULL")]);
-test_filter!(null_filter_1_double_not, vec![Right("NOT opt1 NOT NULL")]);
+test_filter!(null_filter_1, vec![Right("opt1 IS NULL")]);
+test_filter!(null_filter_2, vec![Right("opt1.opt2 IS NULL")]);
+test_filter!(null_filter_1_not, vec![Right("opt1 IS NOT NULL")]);
+test_filter!(null_filter_1_not_alt, vec![Right("NOT opt1 IS NULL")]);
+test_filter!(null_filter_1_double_not, vec![Right("NOT opt1 IS NOT NULL")]);
 
 test_filter!(in_filter, vec![Right("tag_in IN[1, 2, 3, four, five]")]);
 test_filter!(not_in_filter, vec![Right("tag_in NOT IN[1, 2, 3, four, five]")]);
diff --git a/milli/tests/search/mod.rs b/milli/tests/search/mod.rs
index e67c1bc64..51852cced 100644
--- a/milli/tests/search/mod.rs
+++ b/milli/tests/search/mod.rs
@@ -205,11 +205,11 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> {
         } else if let Some(opt1) = &document.opt1 {
             id = contains_key_rec(opt1, "opt2").then(|| document.id.clone());
         }
-    } else if matches!(filter, "opt1 NULL" | "NOT opt1 NOT NULL") {
+    } else if matches!(filter, "opt1 IS NULL" | "NOT opt1 IS NOT NULL") {
         id = document.opt1.as_ref().map_or(false, |v| v.is_null()).then(|| document.id.clone());
-    } else if matches!(filter, "NOT opt1 NULL" | "opt1 NOT NULL") {
+    } else if matches!(filter, "NOT opt1 IS NULL" | "opt1 IS NOT NULL") {
         id = document.opt1.as_ref().map_or(true, |v| !v.is_null()).then(|| document.id.clone());
-    } else if matches!(filter, "opt1.opt2 NULL") {
+    } else if matches!(filter, "opt1.opt2 IS NULL") {
         if document.opt1opt2.as_ref().map_or(false, |v| v.is_null()) {
             id = Some(document.id.clone());
         } else if let Some(opt1) = &document.opt1 {

From ea016d97afd2dfdae2fe15a12a7bfd6554d3a097 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Tue, 14 Mar 2023 18:08:12 +0100
Subject: [PATCH 15/21] Implementing an IS EMPTY filter

---
 filter-parser/src/condition.rs                | 17 +++++
 filter-parser/src/error.rs                    |  6 +-
 filter-parser/src/lib.rs                      | 25 +++++--
 filter-parser/src/value.rs                    |  1 +
 meilisearch/tests/search/errors.rs            |  4 +-
 milli/src/index.rs                            | 20 +++++-
 milli/src/search/facet/filter.rs              |  4 ++
 milli/src/update/clear_documents.rs           |  2 +
 milli/src/update/delete_documents.rs          |  8 +++
 .../extract/extract_fid_docid_facet_values.rs | 29 +++++++--
 .../src/update/index_documents/extract/mod.rs | 65 +++++++++++++------
 .../src/update/index_documents/typed_chunk.rs | 12 ++++
 12 files changed, 156 insertions(+), 37 deletions(-)

diff --git a/filter-parser/src/condition.rs b/filter-parser/src/condition.rs
index fe424539f..9abe1c6ea 100644
--- a/filter-parser/src/condition.rs
+++ b/filter-parser/src/condition.rs
@@ -21,6 +21,7 @@ pub enum Condition<'a> {
     Equal(Token<'a>),
     NotEqual(Token<'a>),
     Null,
+    Empty,
     Exists,
     LowerThan(Token<'a>),
     LowerThanOrEqual(Token<'a>),
@@ -61,6 +62,22 @@ pub fn parse_is_not_null(input: Span) -> IResult<FilterCondition> {
     Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Null }))))
 }
 
+/// empty          = value "IS" WS+ "EMPTY"
+pub fn parse_is_empty(input: Span) -> IResult<FilterCondition> {
+    let (input, key) = parse_value(input)?;
+
+    let (input, _) = tuple((tag("IS"), multispace1, tag("EMPTY")))(input)?;
+    Ok((input, FilterCondition::Condition { fid: key, op: Empty }))
+}
+
+/// empty          = value "IS" WS+ "NOT" WS+ "EMPTY"
+pub fn parse_is_not_empty(input: Span) -> IResult<FilterCondition> {
+    let (input, key) = parse_value(input)?;
+
+    let (input, _) = tuple((tag("IS"), multispace1, tag("NOT"), multispace1, tag("EMPTY")))(input)?;
+    Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Empty }))))
+}
+
 /// exist          = value "EXISTS"
 pub fn parse_exists(input: Span) -> IResult<FilterCondition> {
     let (input, key) = terminated(parse_value, tag("EXISTS"))(input)?;
diff --git a/filter-parser/src/error.rs b/filter-parser/src/error.rs
index fc6ad8f6d..cbb83c841 100644
--- a/filter-parser/src/error.rs
+++ b/filter-parser/src/error.rs
@@ -143,11 +143,9 @@ impl<'a> Display for Error<'a> {
             ErrorKind::MissingClosingDelimiter(c) => {
                 writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)?
             }
-            ErrorKind::InvalidPrimary if input.trim().is_empty() => {
-                writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.")?
-            }
             ErrorKind::InvalidPrimary => {
-                writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `{}`.", escaped_input)?
+                let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
+                writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
             }
             ErrorKind::ExpectedEof => {
                 writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs
index c75ada205..69eb6700f 100644
--- a/filter-parser/src/lib.rs
+++ b/filter-parser/src/lib.rs
@@ -47,7 +47,10 @@ mod value;
 use std::fmt::Debug;
 
 pub use condition::{parse_condition, parse_to, Condition};
-use condition::{parse_exists, parse_is_not_null, parse_is_null, parse_not_exists};
+use condition::{
+    parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null, parse_is_null,
+    parse_not_exists,
+};
 use error::{cut_with_err, ExpectedValueKind, NomErrorExt};
 pub use error::{Error, ErrorKind};
 use nom::branch::alt;
@@ -416,6 +419,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
         parse_condition,
         parse_is_null,
         parse_is_not_null,
+        parse_is_empty,
+        parse_is_not_empty,
         parse_exists,
         parse_not_exists,
         parse_to,
@@ -509,6 +514,13 @@ pub mod tests {
         insta::assert_display_snapshot!(p("NOT subscribers IS NOT NULL"), @"{subscribers} IS NULL");
         insta::assert_display_snapshot!(p("subscribers  IS   NOT   NULL"), @"NOT ({subscribers} IS NULL)");
 
+        // Test EMPTY + NOT EMPTY
+        insta::assert_display_snapshot!(p("subscribers IS EMPTY"), @"{subscribers} IS EMPTY");
+        insta::assert_display_snapshot!(p("NOT subscribers IS EMPTY"), @"NOT ({subscribers} IS EMPTY)");
+        insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
+        insta::assert_display_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY");
+        insta::assert_display_snapshot!(p("subscribers  IS   NOT   EMPTY"), @"NOT ({subscribers} IS EMPTY)");
+
         // Test EXISTS + NOT EXITS
         insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
         insta::assert_display_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)");
@@ -587,7 +599,7 @@ pub mod tests {
         "###);
 
         insta::assert_display_snapshot!(p("'OR'"), @r###"
-        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
         1:5 'OR'
         "###);
 
@@ -597,12 +609,12 @@ pub mod tests {
         "###);
 
         insta::assert_display_snapshot!(p("channel Ponce"), @r###"
-        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
         1:14 channel Ponce
         "###);
 
         insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###"
-        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
         19:19 channel = Ponce OR
         "###);
 
@@ -667,12 +679,12 @@ pub mod tests {
         "###);
 
         insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###"
-        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
         1:17 colour NOT EXIST
         "###);
 
         insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###"
-        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
         1:23 subscribers 100 TO1000
         "###);
 
@@ -812,6 +824,7 @@ impl<'a> std::fmt::Display for Condition<'a> {
             Condition::Equal(token) => write!(f, "= {token}"),
             Condition::NotEqual(token) => write!(f, "!= {token}"),
             Condition::Null => write!(f, "IS NULL"),
+            Condition::Empty => write!(f, "IS EMPTY"),
             Condition::Exists => write!(f, "EXISTS"),
             Condition::LowerThan(token) => write!(f, "< {token}"),
             Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"),
diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs
index f8f1c43bc..518c0a25a 100644
--- a/filter-parser/src/value.rs
+++ b/filter-parser/src/value.rs
@@ -188,6 +188,7 @@ fn is_keyword(s: &str) -> bool {
             | "EXISTS"
             | "IS"
             | "NULL"
+            | "EMPTY"
             | "_geoRadius"
             | "_geoBoundingBox"
     )
diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs
index ab42700f3..2a0e4a39d 100644
--- a/meilisearch/tests/search/errors.rs
+++ b/meilisearch/tests/search/errors.rs
@@ -547,7 +547,7 @@ async fn filter_invalid_syntax_object() {
     index.wait_task(1).await;
 
     let expected_response = json!({
-        "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
+        "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
         "code": "invalid_search_filter",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@@ -572,7 +572,7 @@ async fn filter_invalid_syntax_array() {
     index.wait_task(1).await;
 
     let expected_response = json!({
-        "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
+        "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
         "code": "invalid_search_filter",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
diff --git a/milli/src/index.rs b/milli/src/index.rs
index 3316028df..c60857bd0 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -81,6 +81,7 @@ pub mod db_name {
     pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids";
     pub const FACET_ID_EXISTS_DOCIDS: &str = "facet-id-exists-docids";
     pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids";
+    pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids";
     pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
     pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
     pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
@@ -130,9 +131,10 @@ pub struct Index {
 
     /// Maps the facet field id and the docids for which this field exists
     pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
-
     /// Maps the facet field id and the docids for which this field is set as null
     pub facet_id_is_null_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
+    /// Maps the facet field id and the docids for which this field is considered empty
+    pub facet_id_is_empty_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
 
     /// Maps the facet field id and ranges of numbers with the docids that corresponds to them.
     pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
@@ -157,7 +159,7 @@ impl Index {
     ) -> Result<Index> {
         use db_name::*;
 
-        options.max_dbs(20);
+        options.max_dbs(21);
         unsafe { options.flag(Flags::MdbAlwaysFreePages) };
 
         let env = options.open(path)?;
@@ -180,6 +182,7 @@ impl Index {
         let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?;
         let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?;
         let facet_id_is_null_docids = env.create_database(Some(FACET_ID_IS_NULL_DOCIDS))?;
+        let facet_id_is_empty_docids = env.create_database(Some(FACET_ID_IS_EMPTY_DOCIDS))?;
 
         let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?;
         let field_id_docid_facet_strings =
@@ -207,6 +210,7 @@ impl Index {
             facet_id_string_docids,
             facet_id_exists_docids,
             facet_id_is_null_docids,
+            facet_id_is_empty_docids,
             field_id_docid_facet_f64s,
             field_id_docid_facet_strings,
             documents,
@@ -851,6 +855,18 @@ impl Index {
         }
     }
 
+    /// Retrieve all the documents which contain this field id and that is considered empty
+    pub fn empty_faceted_documents_ids(
+        &self,
+        rtxn: &RoTxn,
+        field_id: FieldId,
+    ) -> heed::Result<RoaringBitmap> {
+        match self.facet_id_is_empty_docids.get(rtxn, &BEU16::new(field_id))? {
+            Some(docids) => Ok(docids),
+            None => Ok(RoaringBitmap::new()),
+        }
+    }
+
     /// Retrieve all the documents which contain this field id
     pub fn exists_faceted_documents_ids(
         &self,
diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs
index df42725c5..0c11b737e 100644
--- a/milli/src/search/facet/filter.rs
+++ b/milli/src/search/facet/filter.rs
@@ -223,6 +223,10 @@ impl<'a> Filter<'a> {
                 let is_null = index.null_faceted_documents_ids(rtxn, field_id)?;
                 return Ok(is_null);
             }
+            Condition::Empty => {
+                let is_empty = index.empty_faceted_documents_ids(rtxn, field_id)?;
+                return Ok(is_empty);
+            }
             Condition::Exists => {
                 let exist = index.exists_faceted_documents_ids(rtxn, field_id)?;
                 return Ok(exist);
diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs
index 7ac09a785..326e0825d 100644
--- a/milli/src/update/clear_documents.rs
+++ b/milli/src/update/clear_documents.rs
@@ -35,6 +35,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
             facet_id_string_docids,
             facet_id_exists_docids,
             facet_id_is_null_docids,
+            facet_id_is_empty_docids,
             field_id_docid_facet_f64s,
             field_id_docid_facet_strings,
             documents,
@@ -88,6 +89,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
         facet_id_f64_docids.clear(self.wtxn)?;
         facet_id_exists_docids.clear(self.wtxn)?;
         facet_id_is_null_docids.clear(self.wtxn)?;
+        facet_id_is_empty_docids.clear(self.wtxn)?;
         facet_id_string_docids.clear(self.wtxn)?;
         field_id_docid_facet_f64s.clear(self.wtxn)?;
         field_id_docid_facet_strings.clear(self.wtxn)?;
diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs
index bb232d7cc..6f2fa5e5a 100644
--- a/milli/src/update/delete_documents.rs
+++ b/milli/src/update/delete_documents.rs
@@ -246,6 +246,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
             script_language_docids,
             facet_id_exists_docids,
             facet_id_is_null_docids,
+            facet_id_is_empty_docids,
             documents,
         } = self.index;
 
@@ -531,6 +532,13 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
             &self.to_delete_docids,
         )?;
 
+        // We delete the documents ids that are under the facet field id values.
+        remove_docids_from_facet_id_docids(
+            self.wtxn,
+            facet_id_is_empty_docids,
+            &self.to_delete_docids,
+        )?;
+
         self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?;
 
         Ok(DetailedDocumentDeletionResult {
diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
index 6460af812..8f3d9408d 100644
--- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@@ -21,6 +21,7 @@ pub struct ExtractedFacetValues {
     pub docid_fid_facet_numbers_chunk: grenad::Reader<File>,
     pub docid_fid_facet_strings_chunk: grenad::Reader<File>,
     pub fid_facet_is_null_docids_chunk: grenad::Reader<File>,
+    pub fid_facet_is_empty_docids_chunk: grenad::Reader<File>,
     pub fid_facet_exists_docids_chunk: grenad::Reader<File>,
 }
 
@@ -56,6 +57,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
 
     let mut facet_exists_docids = BTreeMap::<FieldId, RoaringBitmap>::new();
     let mut facet_is_null_docids = BTreeMap::<FieldId, RoaringBitmap>::new();
+    let mut facet_is_empty_docids = BTreeMap::<FieldId, RoaringBitmap>::new();
 
     let mut key_buffer = Vec::new();
     let mut cursor = obkv_documents.into_cursor()?;
@@ -80,10 +82,14 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
                 key_buffer.extend_from_slice(docid_bytes);
 
                 let value = from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
+
                 match extract_facet_values(&value) {
                     FilterableValues::Null => {
                         facet_is_null_docids.entry(field_id).or_default().insert(document);
                     }
+                    FilterableValues::Empty => {
+                        facet_is_empty_docids.entry(field_id).or_default().insert(document);
+                    }
                     FilterableValues::Values { numbers, strings } => {
                         // insert facet numbers in sorter
                         for number in numbers {
@@ -140,22 +146,34 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
     }
     let facet_is_null_docids_reader = writer_into_reader(facet_is_null_docids_writer)?;
 
+    let mut facet_is_empty_docids_writer = create_writer(
+        indexer.chunk_compression_type,
+        indexer.chunk_compression_level,
+        tempfile::tempfile()?,
+    );
+    for (fid, bitmap) in facet_is_empty_docids.into_iter() {
+        let bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(&bitmap).unwrap();
+        facet_is_empty_docids_writer.insert(fid.to_be_bytes(), &bitmap_bytes)?;
+    }
+    let facet_is_empty_docids_reader = writer_into_reader(facet_is_empty_docids_writer)?;
+
     Ok(ExtractedFacetValues {
         docid_fid_facet_numbers_chunk: sorter_into_reader(fid_docid_facet_numbers_sorter, indexer)?,
         docid_fid_facet_strings_chunk: sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?,
         fid_facet_is_null_docids_chunk: facet_is_null_docids_reader,
+        fid_facet_is_empty_docids_chunk: facet_is_empty_docids_reader,
         fid_facet_exists_docids_chunk: facet_exists_docids_reader,
     })
 }
 
 /// Represent what a document field contains.
 enum FilterableValues {
+    /// Corresponds to the JSON `null` value.
     Null,
+    /// Corresponds to either, an empty string `""`, an empty array `[]`, or an empty object `{}`.
+    Empty,
     /// Represents all the numbers and strings values found in this document field.
-    Values {
-        numbers: Vec<f64>,
-        strings: Vec<(String, String)>,
-    },
+    Values { numbers: Vec<f64>, strings: Vec<(String, String)> },
 }
 
 fn extract_facet_values(value: &Value) -> FilterableValues {
@@ -192,6 +210,9 @@ fn extract_facet_values(value: &Value) -> FilterableValues {
 
     match value {
         Value::Null => FilterableValues::Null,
+        Value::String(s) if s.is_empty() => FilterableValues::Empty,
+        Value::Array(a) if a.is_empty() => FilterableValues::Empty,
+        Value::Object(o) if o.is_empty() => FilterableValues::Empty,
         otherwise => {
             let mut numbers = Vec::new();
             let mut strings = Vec::new();
diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs
index 4a5c9b64c..641a8a210 100644
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@@ -55,22 +55,23 @@ pub(crate) fn data_from_obkv_documents(
         .collect::<Result<()>>()?;
 
     #[allow(clippy::type_complexity)]
-    let result: Result<(Vec<_>, (Vec<_>, (Vec<_>, (Vec<_>, Vec<_>))))> = flattened_obkv_chunks
-        .par_bridge()
-        .map(|flattened_obkv_chunks| {
-            send_and_extract_flattened_documents_data(
-                flattened_obkv_chunks,
-                indexer,
-                lmdb_writer_sx.clone(),
-                &searchable_fields,
-                &faceted_fields,
-                primary_key_id,
-                geo_fields_ids,
-                &stop_words,
-                max_positions_per_attributes,
-            )
-        })
-        .collect();
+    let result: Result<(Vec<_>, (Vec<_>, (Vec<_>, (Vec<_>, (Vec<_>, Vec<_>)))))> =
+        flattened_obkv_chunks
+            .par_bridge()
+            .map(|flattened_obkv_chunks| {
+                send_and_extract_flattened_documents_data(
+                    flattened_obkv_chunks,
+                    indexer,
+                    lmdb_writer_sx.clone(),
+                    &searchable_fields,
+                    &faceted_fields,
+                    primary_key_id,
+                    geo_fields_ids,
+                    &stop_words,
+                    max_positions_per_attributes,
+                )
+            })
+            .collect();
 
     let (
         docid_word_positions_chunks,
@@ -78,7 +79,10 @@ pub(crate) fn data_from_obkv_documents(
             docid_fid_facet_numbers_chunks,
             (
                 docid_fid_facet_strings_chunks,
-                (facet_is_null_docids_chunks, facet_exists_docids_chunks),
+                (
+                    facet_is_null_docids_chunks,
+                    (facet_is_empty_docids_chunks, facet_exists_docids_chunks),
+                ),
             ),
         ),
     ) = result?;
@@ -115,6 +119,22 @@ pub(crate) fn data_from_obkv_documents(
         });
     }
 
+    // merge facet_is_empty_docids and send them as a typed chunk
+    {
+        let lmdb_writer_sx = lmdb_writer_sx.clone();
+        rayon::spawn(move || {
+            debug!("merge {} database", "facet-id-is-empty-docids");
+            match facet_is_empty_docids_chunks.merge(merge_cbo_roaring_bitmaps, &indexer) {
+                Ok(reader) => {
+                    let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsEmptyDocids(reader)));
+                }
+                Err(e) => {
+                    let _ = lmdb_writer_sx.send(Err(e));
+                }
+            }
+        });
+    }
+
     spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
         docid_word_positions_chunks.clone(),
         indexer,
@@ -254,7 +274,10 @@ fn send_and_extract_flattened_documents_data(
     grenad::Reader<CursorClonableMmap>,
     (
         grenad::Reader<CursorClonableMmap>,
-        (grenad::Reader<CursorClonableMmap>, (grenad::Reader<File>, grenad::Reader<File>)),
+        (
+            grenad::Reader<CursorClonableMmap>,
+            (grenad::Reader<File>, (grenad::Reader<File>, grenad::Reader<File>)),
+        ),
     ),
 )> {
     let flattened_documents_chunk =
@@ -304,6 +327,7 @@ fn send_and_extract_flattened_documents_data(
                     docid_fid_facet_numbers_chunk,
                     docid_fid_facet_strings_chunk,
                     fid_facet_is_null_docids_chunk,
+                    fid_facet_is_empty_docids_chunk,
                     fid_facet_exists_docids_chunk,
                 } = extract_fid_docid_facet_values(
                     flattened_documents_chunk.clone(),
@@ -331,7 +355,10 @@ fn send_and_extract_flattened_documents_data(
                     docid_fid_facet_numbers_chunk,
                     (
                         docid_fid_facet_strings_chunk,
-                        (fid_facet_is_null_docids_chunk, fid_facet_exists_docids_chunk),
+                        (
+                            fid_facet_is_null_docids_chunk,
+                            (fid_facet_is_empty_docids_chunk, fid_facet_exists_docids_chunk),
+                        ),
                     ),
                 ))
             },
diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs
index 79f2e2c55..e1fc01ca9 100644
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@@ -40,6 +40,7 @@ pub(crate) enum TypedChunk {
     FieldIdFacetNumberDocids(grenad::Reader<File>),
     FieldIdFacetExistsDocids(grenad::Reader<File>),
     FieldIdFacetIsNullDocids(grenad::Reader<File>),
+    FieldIdFacetIsEmptyDocids(grenad::Reader<File>),
     GeoPoints(grenad::Reader<File>),
     ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
 }
@@ -173,6 +174,17 @@ pub(crate) fn write_typed_chunk_into_index(
             )?;
             is_merged_database = true;
         }
+        TypedChunk::FieldIdFacetIsEmptyDocids(facet_id_is_empty_docids) => {
+            append_entries_into_database(
+                facet_id_is_empty_docids,
+                &index.facet_id_is_empty_docids,
+                wtxn,
+                index_is_empty,
+                |value, _buffer| Ok(value),
+                merge_cbo_roaring_bitmaps,
+            )?;
+            is_merged_database = true;
+        }
         TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => {
             append_entries_into_database(
                 word_pair_proximity_docids_iter,

From d5881519cb5b364b42f2b2780cea48752c1ec79c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 15 Mar 2023 11:01:51 +0100
Subject: [PATCH 16/21] Make the json flattener return the original values

---
 flatten-serde-json/src/lib.rs | 64 ++++++++++++++++++++++++++---------
 1 file changed, 48 insertions(+), 16 deletions(-)

diff --git a/flatten-serde-json/src/lib.rs b/flatten-serde-json/src/lib.rs
index e1b2b20c7..b2e36d5b3 100644
--- a/flatten-serde-json/src/lib.rs
+++ b/flatten-serde-json/src/lib.rs
@@ -3,45 +3,45 @@
 use serde_json::{Map, Value};
 
 pub fn flatten(json: &Map<String, Value>) -> Map<String, Value> {
-    let mut obj = Map::new();
-    let mut all_keys = vec![];
-    insert_object(&mut obj, None, json, &mut all_keys);
-    for key in all_keys {
-        obj.entry(key).or_insert(Value::Array(vec![]));
+    let mut obj = json.clone();
+    let mut all_entries = vec![];
+    insert_object(&mut obj, None, json, &mut all_entries);
+    for (key, old_val) in all_entries {
+        obj.entry(key).or_insert(old_val.clone());
     }
     obj
 }
 
-fn insert_object(
+fn insert_object<'a>(
     base_json: &mut Map<String, Value>,
     base_key: Option<&str>,
-    object: &Map<String, Value>,
-    all_keys: &mut Vec<String>,
+    object: &'a Map<String, Value>,
+    all_entries: &mut Vec<(String, &'a Value)>,
 ) {
     for (key, value) in object {
         let new_key = base_key.map_or_else(|| key.clone(), |base_key| format!("{base_key}.{key}"));
-        all_keys.push(new_key.clone());
+        all_entries.push((new_key.clone(), value));
         if let Some(array) = value.as_array() {
-            insert_array(base_json, &new_key, array, all_keys);
+            insert_array(base_json, &new_key, array, all_entries);
         } else if let Some(object) = value.as_object() {
-            insert_object(base_json, Some(&new_key), object, all_keys);
+            insert_object(base_json, Some(&new_key), object, all_entries);
         } else {
             insert_value(base_json, &new_key, value.clone());
         }
     }
 }
 
-fn insert_array(
+fn insert_array<'a>(
     base_json: &mut Map<String, Value>,
     base_key: &str,
-    array: &Vec<Value>,
-    all_keys: &mut Vec<String>,
+    array: &'a Vec<Value>,
+    all_entries: &mut Vec<(String, &'a Value)>,
 ) {
     for value in array {
         if let Some(object) = value.as_object() {
-            insert_object(base_json, Some(base_key), object, all_keys);
+            insert_object(base_json, Some(base_key), object, all_entries);
         } else if let Some(sub_array) = value.as_array() {
-            insert_array(base_json, base_key, sub_array, all_keys);
+            insert_array(base_json, base_key, sub_array, all_entries);
         } else {
             insert_value(base_json, base_key, value.clone());
         }
@@ -302,4 +302,36 @@ mod tests {
             .unwrap()
         );
     }
+
+    #[test]
+    fn flatten_nested_values_keep_original_values() {
+        let mut base: Value = json!({
+            "tags": {
+                "t1": "v1"
+            },
+            "prices": {
+                "p1": [null]
+            }
+        });
+        let json = std::mem::take(base.as_object_mut().unwrap());
+        let flat = flatten(&json);
+
+        println!("{}", serde_json::to_string_pretty(&flat).unwrap());
+
+        assert_eq!(
+            &flat,
+            json!({
+                "tags": {
+                    "t1": "v1"
+                },
+                "tags.t1": "v1",
+                "prices": {
+                    "p1": [null]
+                },
+                "prices.p1": [null]
+            })
+            .as_object()
+            .unwrap()
+        );
+    }
 }

From 72123c458b91fcf349d9b186bedfa05fb165934a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 15 Mar 2023 11:20:34 +0100
Subject: [PATCH 17/21] Fix the tests to make flattening work

---
 flatten-serde-json/src/lib.rs | 68 +++++++++++++++++++++++++----------
 1 file changed, 49 insertions(+), 19 deletions(-)

diff --git a/flatten-serde-json/src/lib.rs b/flatten-serde-json/src/lib.rs
index b2e36d5b3..c3346e15e 100644
--- a/flatten-serde-json/src/lib.rs
+++ b/flatten-serde-json/src/lib.rs
@@ -3,7 +3,7 @@
 use serde_json::{Map, Value};
 
 pub fn flatten(json: &Map<String, Value>) -> Map<String, Value> {
-    let mut obj = json.clone();
+    let mut obj = Map::new();
     let mut all_entries = vec![];
     insert_object(&mut obj, None, json, &mut all_entries);
     for (key, old_val) in all_entries {
@@ -26,7 +26,7 @@ fn insert_object<'a>(
         } else if let Some(object) = value.as_object() {
             insert_object(base_json, Some(&new_key), object, all_entries);
         } else {
-            insert_value(base_json, &new_key, value.clone());
+            insert_value(base_json, &new_key, value.clone(), false);
         }
     }
 }
@@ -43,12 +43,17 @@ fn insert_array<'a>(
         } else if let Some(sub_array) = value.as_array() {
             insert_array(base_json, base_key, sub_array, all_entries);
         } else {
-            insert_value(base_json, base_key, value.clone());
+            insert_value(base_json, base_key, value.clone(), true);
         }
     }
 }
 
-fn insert_value(base_json: &mut Map<String, Value>, key: &str, to_insert: Value) {
+fn insert_value(
+    base_json: &mut Map<String, Value>,
+    key: &str,
+    to_insert: Value,
+    came_from_array: bool,
+) {
     debug_assert!(!to_insert.is_object());
     debug_assert!(!to_insert.is_array());
 
@@ -63,6 +68,8 @@ fn insert_value(base_json: &mut Map<String, Value>, key: &str, to_insert: Value)
             base_json[key] = Value::Array(vec![value, to_insert]);
         }
         // if it does not exist we can push the value untouched
+    } else if came_from_array {
+        base_json.insert(key.to_string(), Value::Array(vec![to_insert]));
     } else {
         base_json.insert(key.to_string(), to_insert);
     }
@@ -113,7 +120,11 @@ mod tests {
         assert_eq!(
             &flat,
             json!({
-                "a": [],
+                "a": {
+                  "b": "c",
+                  "d": "e",
+                  "f": "g"
+                },
                 "a.b": "c",
                 "a.d": "e",
                 "a.f": "g"
@@ -164,7 +175,7 @@ mod tests {
         assert_eq!(
             &flat,
             json!({
-                "a": 42,
+                "a": [42],
                 "a.b": ["c", "d", "e"],
             })
             .as_object()
@@ -186,7 +197,7 @@ mod tests {
         assert_eq!(
             &flat,
             json!({
-                "a": null,
+                "a": [null],
                 "a.b": ["c", "d", "e"],
             })
             .as_object()
@@ -208,7 +219,9 @@ mod tests {
         assert_eq!(
             &flat,
             json!({
-                "a": [],
+                "a": {
+                    "b": "c"
+                },
                 "a.b": ["c", "d"],
             })
             .as_object()
@@ -234,7 +247,7 @@ mod tests {
             json!({
                 "a.b": ["c", "d", "f"],
                 "a.c": "e",
-                "a": 35,
+                "a": [35],
             })
             .as_object()
             .unwrap()
@@ -310,8 +323,10 @@ mod tests {
                 "t1": "v1"
             },
             "prices": {
-                "p1": [null]
-            }
+                "p1": [null],
+                "p1000": {"tamo": {"le": {}}}
+            },
+            "kiki": [[]]
         });
         let json = std::mem::take(base.as_object_mut().unwrap());
         let flat = flatten(&json);
@@ -321,14 +336,29 @@ mod tests {
         assert_eq!(
             &flat,
             json!({
-                "tags": {
-                    "t1": "v1"
-                },
-                "tags.t1": "v1",
-                "prices": {
-                    "p1": [null]
-                },
-                "prices.p1": [null]
+              "prices": {
+                "p1": [null],
+                "p1000": {
+                  "tamo": {
+                    "le": {}
+                  }
+                }
+              },
+              "prices.p1": [null],
+              "prices.p1000": {
+                "tamo": {
+                  "le": {}
+                }
+              },
+              "prices.p1000.tamo": {
+                "le": {}
+              },
+              "prices.p1000.tamo.le": {},
+              "tags": {
+                "t1": "v1"
+              },
+              "tags.t1": "v1",
+              "kiki": [[]]
             })
             .as_object()
             .unwrap()

From 64571c8288b358096fff5bd992d2ee168516e4dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 15 Mar 2023 14:57:17 +0100
Subject: [PATCH 18/21] Improve the testing of the filters

---
 milli/src/snapshot_tests.rs             |  8 +++
 milli/src/update/index_documents/mod.rs | 77 ++++++++++++++++++++++++-
 milli/tests/search/filters.rs           |  6 ++
 milli/tests/search/mod.rs               | 51 ++++++++--------
 4 files changed, 114 insertions(+), 28 deletions(-)

diff --git a/milli/src/snapshot_tests.rs b/milli/src/snapshot_tests.rs
index c6ea8f3dd..85d1bc626 100644
--- a/milli/src/snapshot_tests.rs
+++ b/milli/src/snapshot_tests.rs
@@ -276,6 +276,11 @@ pub fn snap_facet_id_is_null_docids(index: &Index) -> String {
         &format!("{facet_id:<3} {}", display_bitmap(&docids))
     })
 }
+pub fn snap_facet_id_is_empty_docids(index: &Index) -> String {
+    make_db_snap_from_iter!(index, facet_id_is_empty_docids, |(facet_id, docids)| {
+        &format!("{facet_id:<3} {}", display_bitmap(&docids))
+    })
+}
 pub fn snap_facet_id_string_docids(index: &Index) -> String {
     make_db_snap_from_iter!(index, facet_id_string_docids, |(
         FacetGroupKey { field_id, level, left_bound },
@@ -503,6 +508,9 @@ macro_rules! full_snap_of_db {
     ($index:ident, facet_id_is_null_docids) => {{
         $crate::snapshot_tests::snap_facet_id_is_null_docids(&$index)
     }};
+    ($index:ident, facet_id_is_empty_docids) => {{
+        $crate::snapshot_tests::snap_facet_id_is_empty_docids(&$index)
+    }};
     ($index:ident, documents_ids) => {{
         $crate::snapshot_tests::snap_documents_ids(&$index)
     }};
diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index 7b9bd7834..a0bf1400d 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -1766,6 +1766,10 @@ mod tests {
                     "id": 0,
                     "colour": null,
                 },
+                {
+                    "id": 1,
+                    "colour": [null], // must not be returned
+                },
                 {
                     "id": 6,
                     "colour": {
@@ -1835,14 +1839,14 @@ mod tests {
                 .get(&rtxn, &BEU16::new(colour_green_id))
                 .unwrap()
                 .unwrap();
-            assert_eq!(bitmap_colour_green.into_iter().collect::<Vec<_>>(), vec![1]);
+            assert_eq!(bitmap_colour_green.into_iter().collect::<Vec<_>>(), vec![2]);
 
             let bitmap_colour_blue = index
                 .facet_id_is_null_docids
                 .get(&rtxn, &BEU16::new(colour_blue_id))
                 .unwrap()
                 .unwrap();
-            assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![2]);
+            assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![3]);
         };
 
         let faceted_fields = hashset!(S("colour"));
@@ -1866,6 +1870,75 @@ mod tests {
         check_ok(&index);
     }
 
+    #[test]
+    fn index_documents_check_is_empty_database() {
+        let content = || {
+            documents!([
+                {"id": 0, "tags": null },
+                {"id": 1, "tags": [null] },
+                {"id": 2, "tags": [] },
+                {"id": 3, "tags": ["hello","world"] },
+                {"id": 4, "tags": [""] },
+                {"id": 5 },
+                {"id": 6, "tags": {} },
+                {"id": 7, "tags": {"green": "cool"} },
+                {"id": 8, "tags": {"green": ""} },
+                {"id": 9, "tags": "" },
+                {"id": 10, "tags": { "green": null } },
+                {"id": 11, "tags": { "green": { "blue": null } } },
+                {"id": 12, "tags": { "green": { "blue": [] } } }
+            ])
+        };
+
+        let check_ok = |index: &Index| {
+            let rtxn = index.read_txn().unwrap();
+            let facets = index.faceted_fields(&rtxn).unwrap();
+            assert_eq!(facets, hashset!(S("tags"), S("tags.green"), S("tags.green.blue")));
+
+            let tags_id = index.fields_ids_map(&rtxn).unwrap().id("tags").unwrap();
+            let tags_green_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green").unwrap();
+            let tags_blue_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green.blue").unwrap();
+
+            let bitmap_empty_tags =
+                index.facet_id_is_empty_docids.get(&rtxn, &BEU16::new(tags_id)).unwrap().unwrap();
+            assert_eq!(bitmap_empty_tags.into_iter().collect::<Vec<_>>(), vec![2, 6, 9]);
+
+            let bitmap_tags_green = index
+                .facet_id_is_empty_docids
+                .get(&rtxn, &BEU16::new(tags_green_id))
+                .unwrap()
+                .unwrap();
+            assert_eq!(bitmap_tags_green.into_iter().collect::<Vec<_>>(), vec![8]);
+
+            let bitmap_tags_blue = index
+                .facet_id_is_empty_docids
+                .get(&rtxn, &BEU16::new(tags_blue_id))
+                .unwrap()
+                .unwrap();
+            assert_eq!(bitmap_tags_blue.into_iter().collect::<Vec<_>>(), vec![12]);
+        };
+
+        let faceted_fields = hashset!(S("tags"));
+
+        let index = TempIndex::new();
+        index.add_documents(content()).unwrap();
+        index
+            .update_settings(|settings| {
+                settings.set_filterable_fields(faceted_fields.clone());
+            })
+            .unwrap();
+        check_ok(&index);
+
+        let index = TempIndex::new();
+        index
+            .update_settings(|settings| {
+                settings.set_filterable_fields(faceted_fields.clone());
+            })
+            .unwrap();
+        index.add_documents(content()).unwrap();
+        check_ok(&index);
+    }
+
     #[test]
     fn primary_key_must_not_contain_floats() {
         let index = TempIndex::new_with_map_size(4096 * 100);
diff --git a/milli/tests/search/filters.rs b/milli/tests/search/filters.rs
index 57ad6a40b..db5a004e0 100644
--- a/milli/tests/search/filters.rs
+++ b/milli/tests/search/filters.rs
@@ -93,6 +93,12 @@ test_filter!(null_filter_1_not, vec![Right("opt1 IS NOT NULL")]);
 test_filter!(null_filter_1_not_alt, vec![Right("NOT opt1 IS NULL")]);
 test_filter!(null_filter_1_double_not, vec![Right("NOT opt1 IS NOT NULL")]);
 
+test_filter!(empty_filter_1, vec![Right("opt1 IS EMPTY")]);
+test_filter!(empty_filter_2, vec![Right("opt1.opt2 IS EMPTY")]);
+test_filter!(empty_filter_1_not, vec![Right("opt1 IS NOT EMPTY")]);
+test_filter!(empty_filter_1_not_alt, vec![Right("NOT opt1 IS EMPTY")]);
+test_filter!(empty_filter_1_double_not, vec![Right("NOT opt1 IS NOT EMPTY")]);
+
 test_filter!(in_filter, vec![Right("tag_in IN[1, 2, 3, four, five]")]);
 test_filter!(not_in_filter, vec![Right("tag_in NOT IN[1, 2, 3, four, five]")]);
 test_filter!(not_not_in_filter, vec![Right("NOT tag_in NOT IN[1, 2, 3, four, five]")]);
diff --git a/milli/tests/search/mod.rs b/milli/tests/search/mod.rs
index 51852cced..23744c005 100644
--- a/milli/tests/search/mod.rs
+++ b/milli/tests/search/mod.rs
@@ -212,10 +212,22 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> {
     } else if matches!(filter, "opt1.opt2 IS NULL") {
         if document.opt1opt2.as_ref().map_or(false, |v| v.is_null()) {
             id = Some(document.id.clone());
-        } else if let Some(opt1) = &document.opt1 {
-            if !opt1.is_null() {
-                id = contains_null_rec(opt1, "opt2").then(|| document.id.clone());
-            }
+        }
+    } else if matches!(filter, "opt1 IS EMPTY" | "NOT opt1 IS NOT EMPTY") {
+        id = document
+            .opt1
+            .as_ref()
+            .map_or(false, |v| is_empty_value(v))
+            .then(|| document.id.clone());
+    } else if matches!(filter, "NOT opt1 IS EMPTY" | "opt1 IS NOT EMPTY") {
+        id = document
+            .opt1
+            .as_ref()
+            .map_or(true, |v| !is_empty_value(v))
+            .then(|| document.id.clone());
+    } else if matches!(filter, "opt1.opt2 IS EMPTY") {
+        if document.opt1opt2.as_ref().map_or(false, |v| is_empty_value(v)) {
+            id = Some(document.id.clone());
         }
     } else if matches!(
         filter,
@@ -230,6 +242,15 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> {
     id
 }
 
+pub fn is_empty_value(v: &serde_json::Value) -> bool {
+    match v {
+        serde_json::Value::String(s) => s.is_empty(),
+        serde_json::Value::Array(a) => a.is_empty(),
+        serde_json::Value::Object(o) => o.is_empty(),
+        _ => false,
+    }
+}
+
 pub fn contains_key_rec(v: &serde_json::Value, key: &str) -> bool {
     match v {
         serde_json::Value::Array(v) => {
@@ -252,28 +273,6 @@ pub fn contains_key_rec(v: &serde_json::Value, key: &str) -> bool {
     }
 }
 
-pub fn contains_null_rec(v: &serde_json::Value, key: &str) -> bool {
-    match v {
-        serde_json::Value::Object(v) => {
-            for (k, v) in v.iter() {
-                if k == key && v.is_null() || contains_null_rec(v, key) {
-                    return true;
-                }
-            }
-            false
-        }
-        serde_json::Value::Array(v) => {
-            for v in v.iter() {
-                if contains_null_rec(v, key) {
-                    return true;
-                }
-            }
-            false
-        }
-        _ => false,
-    }
-}
-
 pub fn expected_filtered_ids(filters: Vec<Either<Vec<&str>, &str>>) -> HashSet<String> {
     let dataset: Vec<TestDocument> =
         serde_json::Deserializer::from_str(CONTENT).into_iter().map(|r| r.unwrap()).collect();

From 1a9c58a7abe9820a716b49d5f869e9bd4162b8db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 15 Mar 2023 16:56:44 +0100
Subject: [PATCH 19/21] Fix a bug with the new flattening rules

---
 .../extract/extract_docid_word_positions.rs   | 19 +------------------
 1 file changed, 1 insertion(+), 18 deletions(-)

diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
index 2d51fcc1a..ac5148363 100644
--- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@@ -107,7 +107,7 @@ fn json_to_string<'a>(value: &'a Value, buffer: &'a mut String) -> Option<&'a st
     fn inner(value: &Value, output: &mut String) -> bool {
         use std::fmt::Write;
         match value {
-            Value::Null => false,
+            Value::Null | Value::Object(_) => false,
             Value::Bool(boolean) => write!(output, "{}", boolean).is_ok(),
             Value::Number(number) => write!(output, "{}", number).is_ok(),
             Value::String(string) => write!(output, "{}", string).is_ok(),
@@ -122,23 +122,6 @@ fn json_to_string<'a>(value: &'a Value, buffer: &'a mut String) -> Option<&'a st
                 // check that at least one value was written
                 count != 0
             }
-            Value::Object(object) => {
-                let mut buffer = String::new();
-                let mut count = 0;
-                for (key, value) in object {
-                    buffer.clear();
-                    let _ = write!(&mut buffer, "{}: ", key);
-                    if inner(value, &mut buffer) {
-                        buffer.push_str(". ");
-                        // We write the "key: value. " pair only when
-                        // we are sure that the value can be written.
-                        output.push_str(&buffer);
-                        count += 1;
-                    }
-                }
-                // check that at least one value was written
-                count != 0
-            }
         }
     }
 

From cf34d1c95f33d7ec6adeb1180901698e4d6ea916 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 15 Mar 2023 16:57:09 +0100
Subject: [PATCH 20/21] Fix a test that forget to match a Null value

---
 milli/tests/search/mod.rs | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/milli/tests/search/mod.rs b/milli/tests/search/mod.rs
index 23744c005..7f072ef95 100644
--- a/milli/tests/search/mod.rs
+++ b/milli/tests/search/mod.rs
@@ -212,13 +212,13 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> {
     } else if matches!(filter, "opt1.opt2 IS NULL") {
         if document.opt1opt2.as_ref().map_or(false, |v| v.is_null()) {
             id = Some(document.id.clone());
+        } else if let Some(opt1) = &document.opt1 {
+            if !opt1.is_null() {
+                id = contains_null_rec(opt1, "opt2").then(|| document.id.clone());
+            }
         }
     } else if matches!(filter, "opt1 IS EMPTY" | "NOT opt1 IS NOT EMPTY") {
-        id = document
-            .opt1
-            .as_ref()
-            .map_or(false, |v| is_empty_value(v))
-            .then(|| document.id.clone());
+        id = document.opt1.as_ref().map_or(false, is_empty_value).then(|| document.id.clone());
     } else if matches!(filter, "NOT opt1 IS EMPTY" | "opt1 IS NOT EMPTY") {
         id = document
             .opt1
@@ -226,7 +226,7 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> {
             .map_or(true, |v| !is_empty_value(v))
             .then(|| document.id.clone());
     } else if matches!(filter, "opt1.opt2 IS EMPTY") {
-        if document.opt1opt2.as_ref().map_or(false, |v| is_empty_value(v)) {
+        if document.opt1opt2.as_ref().map_or(false, is_empty_value) {
             id = Some(document.id.clone());
         }
     } else if matches!(
@@ -273,6 +273,28 @@ pub fn contains_key_rec(v: &serde_json::Value, key: &str) -> bool {
     }
 }
 
+pub fn contains_null_rec(v: &serde_json::Value, key: &str) -> bool {
+    match v {
+        serde_json::Value::Object(v) => {
+            for (k, v) in v.iter() {
+                if k == key && v.is_null() || contains_null_rec(v, key) {
+                    return true;
+                }
+            }
+            false
+        }
+        serde_json::Value::Array(v) => {
+            for v in v.iter() {
+                if contains_null_rec(v, key) {
+                    return true;
+                }
+            }
+            false
+        }
+        _ => false,
+    }
+}
+
 pub fn expected_filtered_ids(filters: Vec<Either<Vec<&str>, &str>>) -> HashSet<String> {
     let dataset: Vec<TestDocument> =
         serde_json::Deserializer::from_str(CONTENT).into_iter().map(|r| r.unwrap()).collect();

From a8531053a0083ba9673d8470a935a70407fafccb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Thu, 16 Mar 2023 11:09:20 +0100
Subject: [PATCH 21/21] Make sure the parser reject invalid syntax

---
 filter-parser/src/lib.rs | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs
index 69eb6700f..640009983 100644
--- a/filter-parser/src/lib.rs
+++ b/filter-parser/src/lib.rs
@@ -745,6 +745,39 @@ pub mod tests {
         Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
         5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
         "###);
+
+        insta::assert_display_snapshot!(p(r#"value NULL"#), @r###"
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
+        1:11 value NULL
+        "###);
+        insta::assert_display_snapshot!(p(r#"value NOT NULL"#), @r###"
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
+        1:15 value NOT NULL
+        "###);
+        insta::assert_display_snapshot!(p(r#"value EMPTY"#), @r###"
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
+        1:12 value EMPTY
+        "###);
+        insta::assert_display_snapshot!(p(r#"value NOT EMPTY"#), @r###"
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
+        1:16 value NOT EMPTY
+        "###);
+        insta::assert_display_snapshot!(p(r#"value IS"#), @r###"
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
+        1:9 value IS
+        "###);
+        insta::assert_display_snapshot!(p(r#"value IS NOT"#), @r###"
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
+        1:13 value IS NOT
+        "###);
+        insta::assert_display_snapshot!(p(r#"value IS EXISTS"#), @r###"
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
+        1:16 value IS EXISTS
+        "###);
+        insta::assert_display_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
+        1:20 value IS NOT EXISTS
+        "###);
     }
 
     #[test]