mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-22 12:40:04 +01:00
Parse every attributes and filter before tokenization
This commit is contained in:
parent
ff9c92c409
commit
4d616f8794
@ -1623,15 +1623,14 @@ async fn change_attributes_settings() {
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"q": "bobby"
|
||||
"q": "bobby",
|
||||
"attributesToRetrieve": ["id", "doggos"]
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"father": "jean",
|
||||
"mother": "michelle",
|
||||
"id": 852,
|
||||
"doggos": [
|
||||
{
|
||||
@ -1642,15 +1641,7 @@ async fn change_attributes_settings() {
|
||||
"name": "buddy",
|
||||
"age": 4
|
||||
}
|
||||
],
|
||||
"cattos": "pésti",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
1.0,
|
||||
2.0,
|
||||
3.0
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
"###);
|
||||
@ -1663,15 +1654,14 @@ async fn change_attributes_settings() {
|
||||
.search(
|
||||
json!({
|
||||
"q": "",
|
||||
"filter": "doggos.age < 5"
|
||||
"filter": "doggos.age < 5",
|
||||
"attributesToRetrieve": ["id", "doggos"]
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"father": "jean",
|
||||
"mother": "michelle",
|
||||
"id": 852,
|
||||
"doggos": [
|
||||
{
|
||||
@ -1682,15 +1672,7 @@ async fn change_attributes_settings() {
|
||||
"name": "buddy",
|
||||
"age": 4
|
||||
}
|
||||
],
|
||||
"cattos": "pésti",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
1.0,
|
||||
2.0,
|
||||
3.0
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
"###);
|
||||
|
@ -75,12 +75,12 @@ pub trait SearchableExtractor: Sized + Sync {
|
||||
let dictionary = indexing_context.index.dictionary(&rtxn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||
let builder = tokenizer_builder(
|
||||
let mut builder = tokenizer_builder(
|
||||
stop_words.as_ref(),
|
||||
allowed_separators.as_deref(),
|
||||
dictionary.as_deref(),
|
||||
);
|
||||
let tokenizer = builder.into_tokenizer();
|
||||
let tokenizer = builder.build();
|
||||
|
||||
let attributes_to_extract = Self::attributes_to_extract(&rtxn, indexing_context.index)?;
|
||||
let attributes_to_skip = Self::attributes_to_skip(&rtxn, indexing_context.index)?;
|
||||
|
@ -40,6 +40,12 @@ impl<'a> DocumentTokenizer<'a> {
|
||||
return Err(UserError::AttributeLimitReached.into());
|
||||
};
|
||||
|
||||
if select_field(field_name, self.attribute_to_extract, self.attribute_to_skip)
|
||||
!= Selection::Select
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let position = field_position
|
||||
.entry(field_id)
|
||||
.and_modify(|counter| *counter += MAX_DISTANCE)
|
||||
@ -87,30 +93,25 @@ impl<'a> DocumentTokenizer<'a> {
|
||||
Ok(())
|
||||
};
|
||||
|
||||
// if the current field is searchable or contains a searchable attribute
|
||||
if select_field(field_name, self.attribute_to_extract, self.attribute_to_skip)
|
||||
!= Selection::Skip
|
||||
{
|
||||
// parse json.
|
||||
match serde_json::to_value(value).map_err(InternalError::SerdeJson)? {
|
||||
Value::Object(object) => seek_leaf_values_in_object(
|
||||
&object,
|
||||
self.attribute_to_extract,
|
||||
self.attribute_to_skip,
|
||||
field_name,
|
||||
Depth::OnBaseKey,
|
||||
&mut tokenize_field,
|
||||
)?,
|
||||
Value::Array(array) => seek_leaf_values_in_array(
|
||||
&array,
|
||||
self.attribute_to_extract,
|
||||
self.attribute_to_skip,
|
||||
field_name,
|
||||
Depth::OnBaseKey,
|
||||
&mut tokenize_field,
|
||||
)?,
|
||||
value => tokenize_field(field_name, Depth::OnBaseKey, &value)?,
|
||||
}
|
||||
// parse json.
|
||||
match serde_json::to_value(value).map_err(InternalError::SerdeJson)? {
|
||||
Value::Object(object) => seek_leaf_values_in_object(
|
||||
&object,
|
||||
None,
|
||||
&[],
|
||||
field_name,
|
||||
Depth::OnBaseKey,
|
||||
&mut tokenize_field,
|
||||
)?,
|
||||
Value::Array(array) => seek_leaf_values_in_array(
|
||||
&array,
|
||||
None,
|
||||
&[],
|
||||
field_name,
|
||||
Depth::OnBaseKey,
|
||||
&mut tokenize_field,
|
||||
)?,
|
||||
value => tokenize_field(field_name, Depth::OnBaseKey, &value)?,
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user