4804: Implements the experimental contains filter operator r=irevoire a=irevoire

# Pull Request
Related PRD: (private link) https://www.notion.so/meilisearch/Contains-Like-Filter-Operator-0d8ad53c6761466f913432eb1d843f1e
Public usage page: https://meilisearch.notion.site/Contains-filter-operator-usage-3e7421b0aacf45f48ab09abe259a1de6

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/3613

## What does this PR do?
- Extract the contains operator from this PR: https://github.com/meilisearch/meilisearch/pull/3751
- Gate it behind a feature flag
- Add tests


Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-07-17 15:47:11 +00:00 committed by GitHub
commit ea73615abf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
34 changed files with 487 additions and 121 deletions

View file

@ -2140,6 +2140,47 @@ pub(crate) mod tests {
);
}
#[test]
fn test_contains() {
let index = TempIndex::new();
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S("doggo") });
})
.unwrap();
index
.add_documents(documents!([
{ "id": 0, "doggo": "kefir" },
{ "id": 1, "doggo": "kefirounet" },
{ "id": 2, "doggo": "kefkef" },
{ "id": 3, "doggo": "fifir" },
{ "id": 4, "doggo": "boubou" },
{ "id": 5 },
]))
.unwrap();
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
let search_result = search
.filter(Filter::from_str("doggo CONTAINS kefir").unwrap().unwrap())
.execute()
.unwrap();
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0, 1]>");
let mut search = index.search(&rtxn);
let search_result = search
.filter(Filter::from_str("doggo CONTAINS KEF").unwrap().unwrap())
.execute()
.unwrap();
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0, 1, 2]>");
let mut search = index.search(&rtxn);
let search_result = search
.filter(Filter::from_str("doggo NOT CONTAINS fir").unwrap().unwrap())
.execute()
.unwrap();
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[2, 4, 5]>");
}
#[test]
fn replace_documents_external_ids_and_soft_deletion_check() {
use big_s::S;

View file

@ -4,6 +4,8 @@ use std::ops::Bound::{self, Excluded, Included};
use either::Either;
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token};
use heed::types::LazyDecode;
use memchr::memmem::Finder;
use roaring::{MultiOps, RoaringBitmap};
use serde_json::Value;
@ -12,7 +14,11 @@ use crate::error::{Error, UserError};
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec,
};
use crate::{distance_between_two_points, lat_lng_to_xyz, FieldId, Index, Result};
use crate::index::db_name::FACET_ID_STRING_DOCIDS;
use crate::{
distance_between_two_points, lat_lng_to_xyz, FieldId, Index, InternalError, Result,
SerializationError,
};
/// The maximum number of filters the filter AST can process.
const MAX_FILTER_DEPTH: usize = 2000;
@ -218,6 +224,10 @@ impl<'a> Filter<'a> {
Ok(Some(Self { condition }))
}
pub fn use_contains_operator(&self) -> Option<&Token> {
self.condition.use_contains_operator()
}
}
impl<'a> Filter<'a> {
@ -295,6 +305,41 @@ impl<'a> Filter<'a> {
let all_ids = index.documents_ids(rtxn)?;
return Ok(all_ids - docids);
}
Condition::Contains { keyword: _, word } => {
let value = crate::normalize_facet(word.value());
let finder = Finder::new(&value);
let base = FacetGroupKey { field_id, level: 0, left_bound: "" };
let docids = strings_db
.prefix_iter(rtxn, &base)?
.remap_data_type::<LazyDecode<FacetGroupValueCodec>>()
.filter_map(|result| -> Option<Result<RoaringBitmap>> {
match result {
Ok((FacetGroupKey { left_bound, .. }, lazy_group_value)) => {
if finder.find(left_bound.as_bytes()).is_some() {
Some(lazy_group_value.decode().map(|gv| gv.bitmap).map_err(
|_| {
InternalError::from(SerializationError::Decoding {
db_name: Some(FACET_ID_STRING_DOCIDS),
})
.into()
},
))
} else {
None
}
}
Err(_e) => {
Some(Err(InternalError::from(SerializationError::Decoding {
db_name: Some(FACET_ID_STRING_DOCIDS),
})
.into()))
}
}
})
.union()?;
return Ok(docids);
}
};
let mut output = RoaringBitmap::new();