mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-23 05:00:06 +01:00
Parse every attributes and filter before tokenization
This commit is contained in:
parent
ff9c92c409
commit
4d616f8794
@ -1623,15 +1623,14 @@ async fn change_attributes_settings() {
|
|||||||
index
|
index
|
||||||
.search(
|
.search(
|
||||||
json!({
|
json!({
|
||||||
"q": "bobby"
|
"q": "bobby",
|
||||||
|
"attributesToRetrieve": ["id", "doggos"]
|
||||||
}),
|
}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
|
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"father": "jean",
|
|
||||||
"mother": "michelle",
|
|
||||||
"id": 852,
|
"id": 852,
|
||||||
"doggos": [
|
"doggos": [
|
||||||
{
|
{
|
||||||
@ -1642,16 +1641,8 @@ async fn change_attributes_settings() {
|
|||||||
"name": "buddy",
|
"name": "buddy",
|
||||||
"age": 4
|
"age": 4
|
||||||
}
|
}
|
||||||
],
|
|
||||||
"cattos": "pésti",
|
|
||||||
"_vectors": {
|
|
||||||
"manual": [
|
|
||||||
1.0,
|
|
||||||
2.0,
|
|
||||||
3.0
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
},
|
},
|
||||||
@ -1663,15 +1654,14 @@ async fn change_attributes_settings() {
|
|||||||
.search(
|
.search(
|
||||||
json!({
|
json!({
|
||||||
"q": "",
|
"q": "",
|
||||||
"filter": "doggos.age < 5"
|
"filter": "doggos.age < 5",
|
||||||
|
"attributesToRetrieve": ["id", "doggos"]
|
||||||
}),
|
}),
|
||||||
|response, code| {
|
|response, code| {
|
||||||
assert_eq!(code, 200, "{}", response);
|
assert_eq!(code, 200, "{}", response);
|
||||||
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
|
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"father": "jean",
|
|
||||||
"mother": "michelle",
|
|
||||||
"id": 852,
|
"id": 852,
|
||||||
"doggos": [
|
"doggos": [
|
||||||
{
|
{
|
||||||
@ -1682,16 +1672,8 @@ async fn change_attributes_settings() {
|
|||||||
"name": "buddy",
|
"name": "buddy",
|
||||||
"age": 4
|
"age": 4
|
||||||
}
|
}
|
||||||
],
|
|
||||||
"cattos": "pésti",
|
|
||||||
"_vectors": {
|
|
||||||
"manual": [
|
|
||||||
1.0,
|
|
||||||
2.0,
|
|
||||||
3.0
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
},
|
},
|
||||||
|
@ -75,12 +75,12 @@ pub trait SearchableExtractor: Sized + Sync {
|
|||||||
let dictionary = indexing_context.index.dictionary(&rtxn)?;
|
let dictionary = indexing_context.index.dictionary(&rtxn)?;
|
||||||
let dictionary: Option<Vec<_>> =
|
let dictionary: Option<Vec<_>> =
|
||||||
dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||||
let builder = tokenizer_builder(
|
let mut builder = tokenizer_builder(
|
||||||
stop_words.as_ref(),
|
stop_words.as_ref(),
|
||||||
allowed_separators.as_deref(),
|
allowed_separators.as_deref(),
|
||||||
dictionary.as_deref(),
|
dictionary.as_deref(),
|
||||||
);
|
);
|
||||||
let tokenizer = builder.into_tokenizer();
|
let tokenizer = builder.build();
|
||||||
|
|
||||||
let attributes_to_extract = Self::attributes_to_extract(&rtxn, indexing_context.index)?;
|
let attributes_to_extract = Self::attributes_to_extract(&rtxn, indexing_context.index)?;
|
||||||
let attributes_to_skip = Self::attributes_to_skip(&rtxn, indexing_context.index)?;
|
let attributes_to_skip = Self::attributes_to_skip(&rtxn, indexing_context.index)?;
|
||||||
|
@ -40,6 +40,12 @@ impl<'a> DocumentTokenizer<'a> {
|
|||||||
return Err(UserError::AttributeLimitReached.into());
|
return Err(UserError::AttributeLimitReached.into());
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if select_field(field_name, self.attribute_to_extract, self.attribute_to_skip)
|
||||||
|
!= Selection::Select
|
||||||
|
{
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
let position = field_position
|
let position = field_position
|
||||||
.entry(field_id)
|
.entry(field_id)
|
||||||
.and_modify(|counter| *counter += MAX_DISTANCE)
|
.and_modify(|counter| *counter += MAX_DISTANCE)
|
||||||
@ -87,24 +93,20 @@ impl<'a> DocumentTokenizer<'a> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
};
|
};
|
||||||
|
|
||||||
// if the current field is searchable or contains a searchable attribute
|
|
||||||
if select_field(field_name, self.attribute_to_extract, self.attribute_to_skip)
|
|
||||||
!= Selection::Skip
|
|
||||||
{
|
|
||||||
// parse json.
|
// parse json.
|
||||||
match serde_json::to_value(value).map_err(InternalError::SerdeJson)? {
|
match serde_json::to_value(value).map_err(InternalError::SerdeJson)? {
|
||||||
Value::Object(object) => seek_leaf_values_in_object(
|
Value::Object(object) => seek_leaf_values_in_object(
|
||||||
&object,
|
&object,
|
||||||
self.attribute_to_extract,
|
None,
|
||||||
self.attribute_to_skip,
|
&[],
|
||||||
field_name,
|
field_name,
|
||||||
Depth::OnBaseKey,
|
Depth::OnBaseKey,
|
||||||
&mut tokenize_field,
|
&mut tokenize_field,
|
||||||
)?,
|
)?,
|
||||||
Value::Array(array) => seek_leaf_values_in_array(
|
Value::Array(array) => seek_leaf_values_in_array(
|
||||||
&array,
|
&array,
|
||||||
self.attribute_to_extract,
|
None,
|
||||||
self.attribute_to_skip,
|
&[],
|
||||||
field_name,
|
field_name,
|
||||||
Depth::OnBaseKey,
|
Depth::OnBaseKey,
|
||||||
&mut tokenize_field,
|
&mut tokenize_field,
|
||||||
@ -112,7 +114,6 @@ impl<'a> DocumentTokenizer<'a> {
|
|||||||
value => tokenize_field(field_name, Depth::OnBaseKey, &value)?,
|
value => tokenize_field(field_name, Depth::OnBaseKey, &value)?,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user