mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-10 21:44:34 +01:00
Implementing an IS EMPTY filter
This commit is contained in:
parent
fa2ea4a379
commit
ea016d97af
@ -21,6 +21,7 @@ pub enum Condition<'a> {
|
|||||||
Equal(Token<'a>),
|
Equal(Token<'a>),
|
||||||
NotEqual(Token<'a>),
|
NotEqual(Token<'a>),
|
||||||
Null,
|
Null,
|
||||||
|
Empty,
|
||||||
Exists,
|
Exists,
|
||||||
LowerThan(Token<'a>),
|
LowerThan(Token<'a>),
|
||||||
LowerThanOrEqual(Token<'a>),
|
LowerThanOrEqual(Token<'a>),
|
||||||
@ -61,6 +62,22 @@ pub fn parse_is_not_null(input: Span) -> IResult<FilterCondition> {
|
|||||||
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Null }))))
|
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Null }))))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// empty = value "IS" WS+ "EMPTY"
|
||||||
|
pub fn parse_is_empty(input: Span) -> IResult<FilterCondition> {
|
||||||
|
let (input, key) = parse_value(input)?;
|
||||||
|
|
||||||
|
let (input, _) = tuple((tag("IS"), multispace1, tag("EMPTY")))(input)?;
|
||||||
|
Ok((input, FilterCondition::Condition { fid: key, op: Empty }))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// empty = value "IS" WS+ "NOT" WS+ "EMPTY"
|
||||||
|
pub fn parse_is_not_empty(input: Span) -> IResult<FilterCondition> {
|
||||||
|
let (input, key) = parse_value(input)?;
|
||||||
|
|
||||||
|
let (input, _) = tuple((tag("IS"), multispace1, tag("NOT"), multispace1, tag("EMPTY")))(input)?;
|
||||||
|
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Empty }))))
|
||||||
|
}
|
||||||
|
|
||||||
/// exist = value "EXISTS"
|
/// exist = value "EXISTS"
|
||||||
pub fn parse_exists(input: Span) -> IResult<FilterCondition> {
|
pub fn parse_exists(input: Span) -> IResult<FilterCondition> {
|
||||||
let (input, key) = terminated(parse_value, tag("EXISTS"))(input)?;
|
let (input, key) = terminated(parse_value, tag("EXISTS"))(input)?;
|
||||||
|
@ -143,11 +143,9 @@ impl<'a> Display for Error<'a> {
|
|||||||
ErrorKind::MissingClosingDelimiter(c) => {
|
ErrorKind::MissingClosingDelimiter(c) => {
|
||||||
writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)?
|
writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)?
|
||||||
}
|
}
|
||||||
ErrorKind::InvalidPrimary if input.trim().is_empty() => {
|
|
||||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.")?
|
|
||||||
}
|
|
||||||
ErrorKind::InvalidPrimary => {
|
ErrorKind::InvalidPrimary => {
|
||||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `{}`.", escaped_input)?
|
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
|
||||||
|
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
|
||||||
}
|
}
|
||||||
ErrorKind::ExpectedEof => {
|
ErrorKind::ExpectedEof => {
|
||||||
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
|
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
|
||||||
|
@ -47,7 +47,10 @@ mod value;
|
|||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
|
|
||||||
pub use condition::{parse_condition, parse_to, Condition};
|
pub use condition::{parse_condition, parse_to, Condition};
|
||||||
use condition::{parse_exists, parse_is_not_null, parse_is_null, parse_not_exists};
|
use condition::{
|
||||||
|
parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null, parse_is_null,
|
||||||
|
parse_not_exists,
|
||||||
|
};
|
||||||
use error::{cut_with_err, ExpectedValueKind, NomErrorExt};
|
use error::{cut_with_err, ExpectedValueKind, NomErrorExt};
|
||||||
pub use error::{Error, ErrorKind};
|
pub use error::{Error, ErrorKind};
|
||||||
use nom::branch::alt;
|
use nom::branch::alt;
|
||||||
@ -416,6 +419,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
|
|||||||
parse_condition,
|
parse_condition,
|
||||||
parse_is_null,
|
parse_is_null,
|
||||||
parse_is_not_null,
|
parse_is_not_null,
|
||||||
|
parse_is_empty,
|
||||||
|
parse_is_not_empty,
|
||||||
parse_exists,
|
parse_exists,
|
||||||
parse_not_exists,
|
parse_not_exists,
|
||||||
parse_to,
|
parse_to,
|
||||||
@ -509,6 +514,13 @@ pub mod tests {
|
|||||||
insta::assert_display_snapshot!(p("NOT subscribers IS NOT NULL"), @"{subscribers} IS NULL");
|
insta::assert_display_snapshot!(p("NOT subscribers IS NOT NULL"), @"{subscribers} IS NULL");
|
||||||
insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
|
insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
|
||||||
|
|
||||||
|
// Test EMPTY + NOT EMPTY
|
||||||
|
insta::assert_display_snapshot!(p("subscribers IS EMPTY"), @"{subscribers} IS EMPTY");
|
||||||
|
insta::assert_display_snapshot!(p("NOT subscribers IS EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
||||||
|
insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
||||||
|
insta::assert_display_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY");
|
||||||
|
insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
||||||
|
|
||||||
// Test EXISTS + NOT EXITS
|
// Test EXISTS + NOT EXITS
|
||||||
insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
|
insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
|
||||||
insta::assert_display_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)");
|
insta::assert_display_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)");
|
||||||
@ -587,7 +599,7 @@ pub mod tests {
|
|||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("'OR'"), @r###"
|
insta::assert_display_snapshot!(p("'OR'"), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
|
||||||
1:5 'OR'
|
1:5 'OR'
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -597,12 +609,12 @@ pub mod tests {
|
|||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("channel Ponce"), @r###"
|
insta::assert_display_snapshot!(p("channel Ponce"), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
|
||||||
1:14 channel Ponce
|
1:14 channel Ponce
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###"
|
insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
|
||||||
19:19 channel = Ponce OR
|
19:19 channel = Ponce OR
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -667,12 +679,12 @@ pub mod tests {
|
|||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###"
|
insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
|
||||||
1:17 colour NOT EXIST
|
1:17 colour NOT EXIST
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###"
|
insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###"
|
||||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
|
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
|
||||||
1:23 subscribers 100 TO1000
|
1:23 subscribers 100 TO1000
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -812,6 +824,7 @@ impl<'a> std::fmt::Display for Condition<'a> {
|
|||||||
Condition::Equal(token) => write!(f, "= {token}"),
|
Condition::Equal(token) => write!(f, "= {token}"),
|
||||||
Condition::NotEqual(token) => write!(f, "!= {token}"),
|
Condition::NotEqual(token) => write!(f, "!= {token}"),
|
||||||
Condition::Null => write!(f, "IS NULL"),
|
Condition::Null => write!(f, "IS NULL"),
|
||||||
|
Condition::Empty => write!(f, "IS EMPTY"),
|
||||||
Condition::Exists => write!(f, "EXISTS"),
|
Condition::Exists => write!(f, "EXISTS"),
|
||||||
Condition::LowerThan(token) => write!(f, "< {token}"),
|
Condition::LowerThan(token) => write!(f, "< {token}"),
|
||||||
Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"),
|
Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"),
|
||||||
|
@ -188,6 +188,7 @@ fn is_keyword(s: &str) -> bool {
|
|||||||
| "EXISTS"
|
| "EXISTS"
|
||||||
| "IS"
|
| "IS"
|
||||||
| "NULL"
|
| "NULL"
|
||||||
|
| "EMPTY"
|
||||||
| "_geoRadius"
|
| "_geoRadius"
|
||||||
| "_geoBoundingBox"
|
| "_geoBoundingBox"
|
||||||
)
|
)
|
||||||
|
@ -547,7 +547,7 @@ async fn filter_invalid_syntax_object() {
|
|||||||
index.wait_task(1).await;
|
index.wait_task(1).await;
|
||||||
|
|
||||||
let expected_response = json!({
|
let expected_response = json!({
|
||||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||||
"code": "invalid_search_filter",
|
"code": "invalid_search_filter",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||||
@ -572,7 +572,7 @@ async fn filter_invalid_syntax_array() {
|
|||||||
index.wait_task(1).await;
|
index.wait_task(1).await;
|
||||||
|
|
||||||
let expected_response = json!({
|
let expected_response = json!({
|
||||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||||
"code": "invalid_search_filter",
|
"code": "invalid_search_filter",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||||
|
@ -81,6 +81,7 @@ pub mod db_name {
|
|||||||
pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids";
|
pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids";
|
||||||
pub const FACET_ID_EXISTS_DOCIDS: &str = "facet-id-exists-docids";
|
pub const FACET_ID_EXISTS_DOCIDS: &str = "facet-id-exists-docids";
|
||||||
pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids";
|
pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids";
|
||||||
|
pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids";
|
||||||
pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
|
pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
|
||||||
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
|
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
|
||||||
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
|
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
|
||||||
@ -130,9 +131,10 @@ pub struct Index {
|
|||||||
|
|
||||||
/// Maps the facet field id and the docids for which this field exists
|
/// Maps the facet field id and the docids for which this field exists
|
||||||
pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
||||||
|
|
||||||
/// Maps the facet field id and the docids for which this field is set as null
|
/// Maps the facet field id and the docids for which this field is set as null
|
||||||
pub facet_id_is_null_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
pub facet_id_is_null_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
||||||
|
/// Maps the facet field id and the docids for which this field is considered empty
|
||||||
|
pub facet_id_is_empty_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
||||||
|
|
||||||
/// Maps the facet field id and ranges of numbers with the docids that corresponds to them.
|
/// Maps the facet field id and ranges of numbers with the docids that corresponds to them.
|
||||||
pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||||
@ -157,7 +159,7 @@ impl Index {
|
|||||||
) -> Result<Index> {
|
) -> Result<Index> {
|
||||||
use db_name::*;
|
use db_name::*;
|
||||||
|
|
||||||
options.max_dbs(20);
|
options.max_dbs(21);
|
||||||
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
|
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
|
||||||
|
|
||||||
let env = options.open(path)?;
|
let env = options.open(path)?;
|
||||||
@ -180,6 +182,7 @@ impl Index {
|
|||||||
let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?;
|
let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?;
|
||||||
let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?;
|
let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?;
|
||||||
let facet_id_is_null_docids = env.create_database(Some(FACET_ID_IS_NULL_DOCIDS))?;
|
let facet_id_is_null_docids = env.create_database(Some(FACET_ID_IS_NULL_DOCIDS))?;
|
||||||
|
let facet_id_is_empty_docids = env.create_database(Some(FACET_ID_IS_EMPTY_DOCIDS))?;
|
||||||
|
|
||||||
let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?;
|
let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?;
|
||||||
let field_id_docid_facet_strings =
|
let field_id_docid_facet_strings =
|
||||||
@ -207,6 +210,7 @@ impl Index {
|
|||||||
facet_id_string_docids,
|
facet_id_string_docids,
|
||||||
facet_id_exists_docids,
|
facet_id_exists_docids,
|
||||||
facet_id_is_null_docids,
|
facet_id_is_null_docids,
|
||||||
|
facet_id_is_empty_docids,
|
||||||
field_id_docid_facet_f64s,
|
field_id_docid_facet_f64s,
|
||||||
field_id_docid_facet_strings,
|
field_id_docid_facet_strings,
|
||||||
documents,
|
documents,
|
||||||
@ -851,6 +855,18 @@ impl Index {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Retrieve all the documents which contain this field id and that is considered empty
|
||||||
|
pub fn empty_faceted_documents_ids(
|
||||||
|
&self,
|
||||||
|
rtxn: &RoTxn,
|
||||||
|
field_id: FieldId,
|
||||||
|
) -> heed::Result<RoaringBitmap> {
|
||||||
|
match self.facet_id_is_empty_docids.get(rtxn, &BEU16::new(field_id))? {
|
||||||
|
Some(docids) => Ok(docids),
|
||||||
|
None => Ok(RoaringBitmap::new()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Retrieve all the documents which contain this field id
|
/// Retrieve all the documents which contain this field id
|
||||||
pub fn exists_faceted_documents_ids(
|
pub fn exists_faceted_documents_ids(
|
||||||
&self,
|
&self,
|
||||||
|
@ -223,6 +223,10 @@ impl<'a> Filter<'a> {
|
|||||||
let is_null = index.null_faceted_documents_ids(rtxn, field_id)?;
|
let is_null = index.null_faceted_documents_ids(rtxn, field_id)?;
|
||||||
return Ok(is_null);
|
return Ok(is_null);
|
||||||
}
|
}
|
||||||
|
Condition::Empty => {
|
||||||
|
let is_empty = index.empty_faceted_documents_ids(rtxn, field_id)?;
|
||||||
|
return Ok(is_empty);
|
||||||
|
}
|
||||||
Condition::Exists => {
|
Condition::Exists => {
|
||||||
let exist = index.exists_faceted_documents_ids(rtxn, field_id)?;
|
let exist = index.exists_faceted_documents_ids(rtxn, field_id)?;
|
||||||
return Ok(exist);
|
return Ok(exist);
|
||||||
|
@ -35,6 +35,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
|||||||
facet_id_string_docids,
|
facet_id_string_docids,
|
||||||
facet_id_exists_docids,
|
facet_id_exists_docids,
|
||||||
facet_id_is_null_docids,
|
facet_id_is_null_docids,
|
||||||
|
facet_id_is_empty_docids,
|
||||||
field_id_docid_facet_f64s,
|
field_id_docid_facet_f64s,
|
||||||
field_id_docid_facet_strings,
|
field_id_docid_facet_strings,
|
||||||
documents,
|
documents,
|
||||||
@ -88,6 +89,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
|||||||
facet_id_f64_docids.clear(self.wtxn)?;
|
facet_id_f64_docids.clear(self.wtxn)?;
|
||||||
facet_id_exists_docids.clear(self.wtxn)?;
|
facet_id_exists_docids.clear(self.wtxn)?;
|
||||||
facet_id_is_null_docids.clear(self.wtxn)?;
|
facet_id_is_null_docids.clear(self.wtxn)?;
|
||||||
|
facet_id_is_empty_docids.clear(self.wtxn)?;
|
||||||
facet_id_string_docids.clear(self.wtxn)?;
|
facet_id_string_docids.clear(self.wtxn)?;
|
||||||
field_id_docid_facet_f64s.clear(self.wtxn)?;
|
field_id_docid_facet_f64s.clear(self.wtxn)?;
|
||||||
field_id_docid_facet_strings.clear(self.wtxn)?;
|
field_id_docid_facet_strings.clear(self.wtxn)?;
|
||||||
|
@ -246,6 +246,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
script_language_docids,
|
script_language_docids,
|
||||||
facet_id_exists_docids,
|
facet_id_exists_docids,
|
||||||
facet_id_is_null_docids,
|
facet_id_is_null_docids,
|
||||||
|
facet_id_is_empty_docids,
|
||||||
documents,
|
documents,
|
||||||
} = self.index;
|
} = self.index;
|
||||||
|
|
||||||
@ -531,6 +532,13 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
&self.to_delete_docids,
|
&self.to_delete_docids,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
// We delete the documents ids that are under the facet field id values.
|
||||||
|
remove_docids_from_facet_id_docids(
|
||||||
|
self.wtxn,
|
||||||
|
facet_id_is_empty_docids,
|
||||||
|
&self.to_delete_docids,
|
||||||
|
)?;
|
||||||
|
|
||||||
self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?;
|
self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?;
|
||||||
|
|
||||||
Ok(DetailedDocumentDeletionResult {
|
Ok(DetailedDocumentDeletionResult {
|
||||||
|
@ -21,6 +21,7 @@ pub struct ExtractedFacetValues {
|
|||||||
pub docid_fid_facet_numbers_chunk: grenad::Reader<File>,
|
pub docid_fid_facet_numbers_chunk: grenad::Reader<File>,
|
||||||
pub docid_fid_facet_strings_chunk: grenad::Reader<File>,
|
pub docid_fid_facet_strings_chunk: grenad::Reader<File>,
|
||||||
pub fid_facet_is_null_docids_chunk: grenad::Reader<File>,
|
pub fid_facet_is_null_docids_chunk: grenad::Reader<File>,
|
||||||
|
pub fid_facet_is_empty_docids_chunk: grenad::Reader<File>,
|
||||||
pub fid_facet_exists_docids_chunk: grenad::Reader<File>,
|
pub fid_facet_exists_docids_chunk: grenad::Reader<File>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -56,6 +57,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
|||||||
|
|
||||||
let mut facet_exists_docids = BTreeMap::<FieldId, RoaringBitmap>::new();
|
let mut facet_exists_docids = BTreeMap::<FieldId, RoaringBitmap>::new();
|
||||||
let mut facet_is_null_docids = BTreeMap::<FieldId, RoaringBitmap>::new();
|
let mut facet_is_null_docids = BTreeMap::<FieldId, RoaringBitmap>::new();
|
||||||
|
let mut facet_is_empty_docids = BTreeMap::<FieldId, RoaringBitmap>::new();
|
||||||
|
|
||||||
let mut key_buffer = Vec::new();
|
let mut key_buffer = Vec::new();
|
||||||
let mut cursor = obkv_documents.into_cursor()?;
|
let mut cursor = obkv_documents.into_cursor()?;
|
||||||
@ -80,10 +82,14 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
|||||||
key_buffer.extend_from_slice(docid_bytes);
|
key_buffer.extend_from_slice(docid_bytes);
|
||||||
|
|
||||||
let value = from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
|
let value = from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
|
||||||
|
|
||||||
match extract_facet_values(&value) {
|
match extract_facet_values(&value) {
|
||||||
FilterableValues::Null => {
|
FilterableValues::Null => {
|
||||||
facet_is_null_docids.entry(field_id).or_default().insert(document);
|
facet_is_null_docids.entry(field_id).or_default().insert(document);
|
||||||
}
|
}
|
||||||
|
FilterableValues::Empty => {
|
||||||
|
facet_is_empty_docids.entry(field_id).or_default().insert(document);
|
||||||
|
}
|
||||||
FilterableValues::Values { numbers, strings } => {
|
FilterableValues::Values { numbers, strings } => {
|
||||||
// insert facet numbers in sorter
|
// insert facet numbers in sorter
|
||||||
for number in numbers {
|
for number in numbers {
|
||||||
@ -140,22 +146,34 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
|||||||
}
|
}
|
||||||
let facet_is_null_docids_reader = writer_into_reader(facet_is_null_docids_writer)?;
|
let facet_is_null_docids_reader = writer_into_reader(facet_is_null_docids_writer)?;
|
||||||
|
|
||||||
|
let mut facet_is_empty_docids_writer = create_writer(
|
||||||
|
indexer.chunk_compression_type,
|
||||||
|
indexer.chunk_compression_level,
|
||||||
|
tempfile::tempfile()?,
|
||||||
|
);
|
||||||
|
for (fid, bitmap) in facet_is_empty_docids.into_iter() {
|
||||||
|
let bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(&bitmap).unwrap();
|
||||||
|
facet_is_empty_docids_writer.insert(fid.to_be_bytes(), &bitmap_bytes)?;
|
||||||
|
}
|
||||||
|
let facet_is_empty_docids_reader = writer_into_reader(facet_is_empty_docids_writer)?;
|
||||||
|
|
||||||
Ok(ExtractedFacetValues {
|
Ok(ExtractedFacetValues {
|
||||||
docid_fid_facet_numbers_chunk: sorter_into_reader(fid_docid_facet_numbers_sorter, indexer)?,
|
docid_fid_facet_numbers_chunk: sorter_into_reader(fid_docid_facet_numbers_sorter, indexer)?,
|
||||||
docid_fid_facet_strings_chunk: sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?,
|
docid_fid_facet_strings_chunk: sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?,
|
||||||
fid_facet_is_null_docids_chunk: facet_is_null_docids_reader,
|
fid_facet_is_null_docids_chunk: facet_is_null_docids_reader,
|
||||||
|
fid_facet_is_empty_docids_chunk: facet_is_empty_docids_reader,
|
||||||
fid_facet_exists_docids_chunk: facet_exists_docids_reader,
|
fid_facet_exists_docids_chunk: facet_exists_docids_reader,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Represent what a document field contains.
|
/// Represent what a document field contains.
|
||||||
enum FilterableValues {
|
enum FilterableValues {
|
||||||
|
/// Corresponds to the JSON `null` value.
|
||||||
Null,
|
Null,
|
||||||
|
/// Corresponds to either, an empty string `""`, an empty array `[]`, or an empty object `{}`.
|
||||||
|
Empty,
|
||||||
/// Represents all the numbers and strings values found in this document field.
|
/// Represents all the numbers and strings values found in this document field.
|
||||||
Values {
|
Values { numbers: Vec<f64>, strings: Vec<(String, String)> },
|
||||||
numbers: Vec<f64>,
|
|
||||||
strings: Vec<(String, String)>,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extract_facet_values(value: &Value) -> FilterableValues {
|
fn extract_facet_values(value: &Value) -> FilterableValues {
|
||||||
@ -192,6 +210,9 @@ fn extract_facet_values(value: &Value) -> FilterableValues {
|
|||||||
|
|
||||||
match value {
|
match value {
|
||||||
Value::Null => FilterableValues::Null,
|
Value::Null => FilterableValues::Null,
|
||||||
|
Value::String(s) if s.is_empty() => FilterableValues::Empty,
|
||||||
|
Value::Array(a) if a.is_empty() => FilterableValues::Empty,
|
||||||
|
Value::Object(o) if o.is_empty() => FilterableValues::Empty,
|
||||||
otherwise => {
|
otherwise => {
|
||||||
let mut numbers = Vec::new();
|
let mut numbers = Vec::new();
|
||||||
let mut strings = Vec::new();
|
let mut strings = Vec::new();
|
||||||
|
@ -55,7 +55,8 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
.collect::<Result<()>>()?;
|
.collect::<Result<()>>()?;
|
||||||
|
|
||||||
#[allow(clippy::type_complexity)]
|
#[allow(clippy::type_complexity)]
|
||||||
let result: Result<(Vec<_>, (Vec<_>, (Vec<_>, (Vec<_>, Vec<_>))))> = flattened_obkv_chunks
|
let result: Result<(Vec<_>, (Vec<_>, (Vec<_>, (Vec<_>, (Vec<_>, Vec<_>)))))> =
|
||||||
|
flattened_obkv_chunks
|
||||||
.par_bridge()
|
.par_bridge()
|
||||||
.map(|flattened_obkv_chunks| {
|
.map(|flattened_obkv_chunks| {
|
||||||
send_and_extract_flattened_documents_data(
|
send_and_extract_flattened_documents_data(
|
||||||
@ -78,7 +79,10 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
docid_fid_facet_numbers_chunks,
|
docid_fid_facet_numbers_chunks,
|
||||||
(
|
(
|
||||||
docid_fid_facet_strings_chunks,
|
docid_fid_facet_strings_chunks,
|
||||||
(facet_is_null_docids_chunks, facet_exists_docids_chunks),
|
(
|
||||||
|
facet_is_null_docids_chunks,
|
||||||
|
(facet_is_empty_docids_chunks, facet_exists_docids_chunks),
|
||||||
|
),
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
) = result?;
|
) = result?;
|
||||||
@ -115,6 +119,22 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// merge facet_is_empty_docids and send them as a typed chunk
|
||||||
|
{
|
||||||
|
let lmdb_writer_sx = lmdb_writer_sx.clone();
|
||||||
|
rayon::spawn(move || {
|
||||||
|
debug!("merge {} database", "facet-id-is-empty-docids");
|
||||||
|
match facet_is_empty_docids_chunks.merge(merge_cbo_roaring_bitmaps, &indexer) {
|
||||||
|
Ok(reader) => {
|
||||||
|
let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsEmptyDocids(reader)));
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
let _ = lmdb_writer_sx.send(Err(e));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||||
docid_word_positions_chunks.clone(),
|
docid_word_positions_chunks.clone(),
|
||||||
indexer,
|
indexer,
|
||||||
@ -254,7 +274,10 @@ fn send_and_extract_flattened_documents_data(
|
|||||||
grenad::Reader<CursorClonableMmap>,
|
grenad::Reader<CursorClonableMmap>,
|
||||||
(
|
(
|
||||||
grenad::Reader<CursorClonableMmap>,
|
grenad::Reader<CursorClonableMmap>,
|
||||||
(grenad::Reader<CursorClonableMmap>, (grenad::Reader<File>, grenad::Reader<File>)),
|
(
|
||||||
|
grenad::Reader<CursorClonableMmap>,
|
||||||
|
(grenad::Reader<File>, (grenad::Reader<File>, grenad::Reader<File>)),
|
||||||
|
),
|
||||||
),
|
),
|
||||||
)> {
|
)> {
|
||||||
let flattened_documents_chunk =
|
let flattened_documents_chunk =
|
||||||
@ -304,6 +327,7 @@ fn send_and_extract_flattened_documents_data(
|
|||||||
docid_fid_facet_numbers_chunk,
|
docid_fid_facet_numbers_chunk,
|
||||||
docid_fid_facet_strings_chunk,
|
docid_fid_facet_strings_chunk,
|
||||||
fid_facet_is_null_docids_chunk,
|
fid_facet_is_null_docids_chunk,
|
||||||
|
fid_facet_is_empty_docids_chunk,
|
||||||
fid_facet_exists_docids_chunk,
|
fid_facet_exists_docids_chunk,
|
||||||
} = extract_fid_docid_facet_values(
|
} = extract_fid_docid_facet_values(
|
||||||
flattened_documents_chunk.clone(),
|
flattened_documents_chunk.clone(),
|
||||||
@ -331,7 +355,10 @@ fn send_and_extract_flattened_documents_data(
|
|||||||
docid_fid_facet_numbers_chunk,
|
docid_fid_facet_numbers_chunk,
|
||||||
(
|
(
|
||||||
docid_fid_facet_strings_chunk,
|
docid_fid_facet_strings_chunk,
|
||||||
(fid_facet_is_null_docids_chunk, fid_facet_exists_docids_chunk),
|
(
|
||||||
|
fid_facet_is_null_docids_chunk,
|
||||||
|
(fid_facet_is_empty_docids_chunk, fid_facet_exists_docids_chunk),
|
||||||
|
),
|
||||||
),
|
),
|
||||||
))
|
))
|
||||||
},
|
},
|
||||||
|
@ -40,6 +40,7 @@ pub(crate) enum TypedChunk {
|
|||||||
FieldIdFacetNumberDocids(grenad::Reader<File>),
|
FieldIdFacetNumberDocids(grenad::Reader<File>),
|
||||||
FieldIdFacetExistsDocids(grenad::Reader<File>),
|
FieldIdFacetExistsDocids(grenad::Reader<File>),
|
||||||
FieldIdFacetIsNullDocids(grenad::Reader<File>),
|
FieldIdFacetIsNullDocids(grenad::Reader<File>),
|
||||||
|
FieldIdFacetIsEmptyDocids(grenad::Reader<File>),
|
||||||
GeoPoints(grenad::Reader<File>),
|
GeoPoints(grenad::Reader<File>),
|
||||||
ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
|
ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
|
||||||
}
|
}
|
||||||
@ -173,6 +174,17 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
)?;
|
)?;
|
||||||
is_merged_database = true;
|
is_merged_database = true;
|
||||||
}
|
}
|
||||||
|
TypedChunk::FieldIdFacetIsEmptyDocids(facet_id_is_empty_docids) => {
|
||||||
|
append_entries_into_database(
|
||||||
|
facet_id_is_empty_docids,
|
||||||
|
&index.facet_id_is_empty_docids,
|
||||||
|
wtxn,
|
||||||
|
index_is_empty,
|
||||||
|
|value, _buffer| Ok(value),
|
||||||
|
merge_cbo_roaring_bitmaps,
|
||||||
|
)?;
|
||||||
|
is_merged_database = true;
|
||||||
|
}
|
||||||
TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => {
|
TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => {
|
||||||
append_entries_into_database(
|
append_entries_into_database(
|
||||||
word_pair_proximity_docids_iter,
|
word_pair_proximity_docids_iter,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user