mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 11:57:07 +02:00
Merge #4804
4804: Implements the experimental contains filter operator r=irevoire a=irevoire # Pull Request Related PRD: (private link) https://www.notion.so/meilisearch/Contains-Like-Filter-Operator-0d8ad53c6761466f913432eb1d843f1e Public usage page: https://meilisearch.notion.site/Contains-filter-operator-usage-3e7421b0aacf45f48ab09abe259a1de6 ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/3613 ## What does this PR do? - Extract the contains operator from this PR: https://github.com/meilisearch/meilisearch/pull/3751 - Gate it behind a feature flag - Add tests Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
commit
ea73615abf
34 changed files with 487 additions and 121 deletions
|
@ -2140,6 +2140,47 @@ pub(crate) mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_contains() {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset! { S("doggo") });
|
||||
})
|
||||
.unwrap();
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{ "id": 0, "doggo": "kefir" },
|
||||
{ "id": 1, "doggo": "kefirounet" },
|
||||
{ "id": 2, "doggo": "kefkef" },
|
||||
{ "id": 3, "doggo": "fifir" },
|
||||
{ "id": 4, "doggo": "boubou" },
|
||||
{ "id": 5 },
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut search = index.search(&rtxn);
|
||||
let search_result = search
|
||||
.filter(Filter::from_str("doggo CONTAINS kefir").unwrap().unwrap())
|
||||
.execute()
|
||||
.unwrap();
|
||||
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0, 1]>");
|
||||
let mut search = index.search(&rtxn);
|
||||
let search_result = search
|
||||
.filter(Filter::from_str("doggo CONTAINS KEF").unwrap().unwrap())
|
||||
.execute()
|
||||
.unwrap();
|
||||
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0, 1, 2]>");
|
||||
let mut search = index.search(&rtxn);
|
||||
let search_result = search
|
||||
.filter(Filter::from_str("doggo NOT CONTAINS fir").unwrap().unwrap())
|
||||
.execute()
|
||||
.unwrap();
|
||||
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[2, 4, 5]>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace_documents_external_ids_and_soft_deletion_check() {
|
||||
use big_s::S;
|
||||
|
|
|
@ -4,6 +4,8 @@ use std::ops::Bound::{self, Excluded, Included};
|
|||
|
||||
use either::Either;
|
||||
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token};
|
||||
use heed::types::LazyDecode;
|
||||
use memchr::memmem::Finder;
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use serde_json::Value;
|
||||
|
||||
|
@ -12,7 +14,11 @@ use crate::error::{Error, UserError};
|
|||
use crate::heed_codec::facet::{
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec,
|
||||
};
|
||||
use crate::{distance_between_two_points, lat_lng_to_xyz, FieldId, Index, Result};
|
||||
use crate::index::db_name::FACET_ID_STRING_DOCIDS;
|
||||
use crate::{
|
||||
distance_between_two_points, lat_lng_to_xyz, FieldId, Index, InternalError, Result,
|
||||
SerializationError,
|
||||
};
|
||||
|
||||
/// The maximum number of filters the filter AST can process.
|
||||
const MAX_FILTER_DEPTH: usize = 2000;
|
||||
|
@ -218,6 +224,10 @@ impl<'a> Filter<'a> {
|
|||
|
||||
Ok(Some(Self { condition }))
|
||||
}
|
||||
|
||||
pub fn use_contains_operator(&self) -> Option<&Token> {
|
||||
self.condition.use_contains_operator()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Filter<'a> {
|
||||
|
@ -295,6 +305,41 @@ impl<'a> Filter<'a> {
|
|||
let all_ids = index.documents_ids(rtxn)?;
|
||||
return Ok(all_ids - docids);
|
||||
}
|
||||
Condition::Contains { keyword: _, word } => {
|
||||
let value = crate::normalize_facet(word.value());
|
||||
let finder = Finder::new(&value);
|
||||
let base = FacetGroupKey { field_id, level: 0, left_bound: "" };
|
||||
let docids = strings_db
|
||||
.prefix_iter(rtxn, &base)?
|
||||
.remap_data_type::<LazyDecode<FacetGroupValueCodec>>()
|
||||
.filter_map(|result| -> Option<Result<RoaringBitmap>> {
|
||||
match result {
|
||||
Ok((FacetGroupKey { left_bound, .. }, lazy_group_value)) => {
|
||||
if finder.find(left_bound.as_bytes()).is_some() {
|
||||
Some(lazy_group_value.decode().map(|gv| gv.bitmap).map_err(
|
||||
|_| {
|
||||
InternalError::from(SerializationError::Decoding {
|
||||
db_name: Some(FACET_ID_STRING_DOCIDS),
|
||||
})
|
||||
.into()
|
||||
},
|
||||
))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
Err(_e) => {
|
||||
Some(Err(InternalError::from(SerializationError::Decoding {
|
||||
db_name: Some(FACET_ID_STRING_DOCIDS),
|
||||
})
|
||||
.into()))
|
||||
}
|
||||
}
|
||||
})
|
||||
.union()?;
|
||||
|
||||
return Ok(docids);
|
||||
}
|
||||
};
|
||||
|
||||
let mut output = RoaringBitmap::new();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue