mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Merge #3986
3986: Fix geo bounding box with strings r=ManyTheFish a=irevoire # Pull Request When sending a document with one geofield of type string (i.e.: `{ "_geo": { "lat": 12, "lng": "13" }}`), the geobounding box would exclude this document. This PR fixes this issue by automatically parsing the string value in case we're working on a geofield. ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/3973 ## What does this PR do? - Automatically parse the facet value iif we're working on a geofield. - Make insta works with snapshots in loops or closure executed multiple times. (you may need to update your cli if it panics after this PR: `cargo install cargo-insta`). - Add one integration test in milli and in meilisearch to ensure it works forever. - Add three snapshots for the dump that mysteriously disappeared I don't know how Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
commit
44c1900f36
@ -0,0 +1,24 @@
|
|||||||
|
---
|
||||||
|
source: dump/src/reader/mod.rs
|
||||||
|
expression: spells.settings().unwrap()
|
||||||
|
---
|
||||||
|
{
|
||||||
|
"displayedAttributes": [
|
||||||
|
"*"
|
||||||
|
],
|
||||||
|
"searchableAttributes": [
|
||||||
|
"*"
|
||||||
|
],
|
||||||
|
"filterableAttributes": [],
|
||||||
|
"sortableAttributes": [],
|
||||||
|
"rankingRules": [
|
||||||
|
"typo",
|
||||||
|
"words",
|
||||||
|
"proximity",
|
||||||
|
"attribute",
|
||||||
|
"exactness"
|
||||||
|
],
|
||||||
|
"stopWords": [],
|
||||||
|
"synonyms": {},
|
||||||
|
"distinctAttribute": null
|
||||||
|
}
|
@ -0,0 +1,38 @@
|
|||||||
|
---
|
||||||
|
source: dump/src/reader/mod.rs
|
||||||
|
expression: products.settings().unwrap()
|
||||||
|
---
|
||||||
|
{
|
||||||
|
"displayedAttributes": [
|
||||||
|
"*"
|
||||||
|
],
|
||||||
|
"searchableAttributes": [
|
||||||
|
"*"
|
||||||
|
],
|
||||||
|
"filterableAttributes": [],
|
||||||
|
"sortableAttributes": [],
|
||||||
|
"rankingRules": [
|
||||||
|
"typo",
|
||||||
|
"words",
|
||||||
|
"proximity",
|
||||||
|
"attribute",
|
||||||
|
"exactness"
|
||||||
|
],
|
||||||
|
"stopWords": [],
|
||||||
|
"synonyms": {
|
||||||
|
"android": [
|
||||||
|
"phone",
|
||||||
|
"smartphone"
|
||||||
|
],
|
||||||
|
"iphone": [
|
||||||
|
"phone",
|
||||||
|
"smartphone"
|
||||||
|
],
|
||||||
|
"phone": [
|
||||||
|
"android",
|
||||||
|
"iphone",
|
||||||
|
"smartphone"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"distinctAttribute": null
|
||||||
|
}
|
@ -0,0 +1,31 @@
|
|||||||
|
---
|
||||||
|
source: dump/src/reader/mod.rs
|
||||||
|
expression: movies.settings().unwrap()
|
||||||
|
---
|
||||||
|
{
|
||||||
|
"displayedAttributes": [
|
||||||
|
"*"
|
||||||
|
],
|
||||||
|
"searchableAttributes": [
|
||||||
|
"*"
|
||||||
|
],
|
||||||
|
"filterableAttributes": [
|
||||||
|
"genres",
|
||||||
|
"id"
|
||||||
|
],
|
||||||
|
"sortableAttributes": [
|
||||||
|
"genres",
|
||||||
|
"id"
|
||||||
|
],
|
||||||
|
"rankingRules": [
|
||||||
|
"typo",
|
||||||
|
"words",
|
||||||
|
"proximity",
|
||||||
|
"attribute",
|
||||||
|
"exactness",
|
||||||
|
"release_date:asc"
|
||||||
|
],
|
||||||
|
"stopWords": [],
|
||||||
|
"synonyms": {},
|
||||||
|
"distinctAttribute": null
|
||||||
|
}
|
@ -167,7 +167,9 @@ macro_rules! snapshot {
|
|||||||
let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, Some(&snap_name));
|
let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, Some(&snap_name));
|
||||||
settings.bind(|| {
|
settings.bind(|| {
|
||||||
let snap = format!("{}", $value);
|
let snap = format!("{}", $value);
|
||||||
|
insta::allow_duplicates! {
|
||||||
meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
|
meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
($value:expr, @$inline:literal) => {
|
($value:expr, @$inline:literal) => {
|
||||||
@ -176,7 +178,9 @@ macro_rules! snapshot {
|
|||||||
let (settings, _, _) = $crate::default_snapshot_settings_for_test("", Some("_dummy_argument"));
|
let (settings, _, _) = $crate::default_snapshot_settings_for_test("", Some("_dummy_argument"));
|
||||||
settings.bind(|| {
|
settings.bind(|| {
|
||||||
let snap = format!("{}", $value);
|
let snap = format!("{}", $value);
|
||||||
|
insta::allow_duplicates! {
|
||||||
meili_snap::insta::assert_snapshot!(snap, @$inline);
|
meili_snap::insta::assert_snapshot!(snap, @$inline);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
($value:expr) => {
|
($value:expr) => {
|
||||||
@ -194,7 +198,9 @@ macro_rules! snapshot {
|
|||||||
let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, None);
|
let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, None);
|
||||||
settings.bind(|| {
|
settings.bind(|| {
|
||||||
let snap = format!("{}", $value);
|
let snap = format!("{}", $value);
|
||||||
|
insta::allow_duplicates! {
|
||||||
meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
|
meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use meili_snap::{json_string, snapshot};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use serde_json::{json, Value};
|
use serde_json::{json, Value};
|
||||||
|
|
||||||
@ -60,3 +61,59 @@ async fn geo_sort_with_geo_strings() {
|
|||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn geo_bounding_box_with_string_and_number() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
index.update_settings_filterable_attributes(json!(["_geo"])).await;
|
||||||
|
index.update_settings_sortable_attributes(json!(["_geo"])).await;
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(2).await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.search(
|
||||||
|
json!({
|
||||||
|
"filter": "_geoBoundingBox([89, 179], [-89, -179])",
|
||||||
|
}),
|
||||||
|
|response, code| {
|
||||||
|
assert_eq!(code, 200, "{}", response);
|
||||||
|
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"name": "Taco Truck",
|
||||||
|
"address": "444 Salsa Street, Burritoville",
|
||||||
|
"type": "Mexican",
|
||||||
|
"rating": 9,
|
||||||
|
"_geo": {
|
||||||
|
"lat": 34.0522,
|
||||||
|
"lng": -118.2437
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"name": "La Bella Italia",
|
||||||
|
"address": "456 Elm Street, Townsville",
|
||||||
|
"type": "Italian",
|
||||||
|
"rating": 9,
|
||||||
|
"_geo": {
|
||||||
|
"lat": "45.4777599",
|
||||||
|
"lng": "9.1967508"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "",
|
||||||
|
"processingTimeMs": "[time]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 2
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
@ -1718,11 +1718,11 @@ pub(crate) mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
index
|
index
|
||||||
.add_documents(documents!([
|
.add_documents(documents!([
|
||||||
{ "id": 0, "_geo": { "lat": 0, "lng": 0 } },
|
{ "id": 0, "_geo": { "lat": "0", "lng": "0" } },
|
||||||
{ "id": 1, "_geo": { "lat": 0, "lng": -175 } },
|
{ "id": 1, "_geo": { "lat": 0, "lng": "-175" } },
|
||||||
{ "id": 2, "_geo": { "lat": 0, "lng": 175 } },
|
{ "id": 2, "_geo": { "lat": "0", "lng": 175 } },
|
||||||
{ "id": 3, "_geo": { "lat": 85, "lng": 0 } },
|
{ "id": 3, "_geo": { "lat": 85, "lng": 0 } },
|
||||||
{ "id": 4, "_geo": { "lat": -85, "lng": 0 } },
|
{ "id": 4, "_geo": { "lat": "-85", "lng": "0" } },
|
||||||
]))
|
]))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
@ -28,11 +28,13 @@ pub struct ExtractedFacetValues {
|
|||||||
///
|
///
|
||||||
/// Returns the generated grenad reader containing the docid the fid and the orginal value as key
|
/// Returns the generated grenad reader containing the docid the fid and the orginal value as key
|
||||||
/// and the normalized value as value extracted from the given chunk of documents.
|
/// and the normalized value as value extracted from the given chunk of documents.
|
||||||
|
/// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially.
|
||||||
#[logging_timer::time]
|
#[logging_timer::time]
|
||||||
pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||||
obkv_documents: grenad::Reader<R>,
|
obkv_documents: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
faceted_fields: &HashSet<FieldId>,
|
faceted_fields: &HashSet<FieldId>,
|
||||||
|
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||||
) -> Result<ExtractedFacetValues> {
|
) -> Result<ExtractedFacetValues> {
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
@ -82,7 +84,10 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
|||||||
|
|
||||||
let value = from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
|
let value = from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
|
||||||
|
|
||||||
match extract_facet_values(&value) {
|
match extract_facet_values(
|
||||||
|
&value,
|
||||||
|
geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng),
|
||||||
|
) {
|
||||||
FilterableValues::Null => {
|
FilterableValues::Null => {
|
||||||
facet_is_null_docids.entry(field_id).or_default().insert(document);
|
facet_is_null_docids.entry(field_id).or_default().insert(document);
|
||||||
}
|
}
|
||||||
@ -175,12 +180,13 @@ enum FilterableValues {
|
|||||||
Values { numbers: Vec<f64>, strings: Vec<(String, String)> },
|
Values { numbers: Vec<f64>, strings: Vec<(String, String)> },
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extract_facet_values(value: &Value) -> FilterableValues {
|
fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues {
|
||||||
fn inner_extract_facet_values(
|
fn inner_extract_facet_values(
|
||||||
value: &Value,
|
value: &Value,
|
||||||
can_recurse: bool,
|
can_recurse: bool,
|
||||||
output_numbers: &mut Vec<f64>,
|
output_numbers: &mut Vec<f64>,
|
||||||
output_strings: &mut Vec<(String, String)>,
|
output_strings: &mut Vec<(String, String)>,
|
||||||
|
geo_field: bool,
|
||||||
) {
|
) {
|
||||||
match value {
|
match value {
|
||||||
Value::Null => (),
|
Value::Null => (),
|
||||||
@ -191,13 +197,30 @@ fn extract_facet_values(value: &Value) -> FilterableValues {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
Value::String(original) => {
|
Value::String(original) => {
|
||||||
|
// if we're working on a geofield it MUST be something we can parse or else there was an internal error
|
||||||
|
// in the enrich pipeline. But since the enrich pipeline worked, we want to avoid crashing at all costs.
|
||||||
|
if geo_field {
|
||||||
|
if let Ok(float) = original.parse() {
|
||||||
|
output_numbers.push(float);
|
||||||
|
} else {
|
||||||
|
log::warn!(
|
||||||
|
"Internal error, could not parse a geofield that has been validated. Please open an issue."
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
let normalized = crate::normalize_facet(original);
|
let normalized = crate::normalize_facet(original);
|
||||||
output_strings.push((normalized, original.clone()));
|
output_strings.push((normalized, original.clone()));
|
||||||
}
|
}
|
||||||
Value::Array(values) => {
|
Value::Array(values) => {
|
||||||
if can_recurse {
|
if can_recurse {
|
||||||
for value in values {
|
for value in values {
|
||||||
inner_extract_facet_values(value, false, output_numbers, output_strings);
|
inner_extract_facet_values(
|
||||||
|
value,
|
||||||
|
false,
|
||||||
|
output_numbers,
|
||||||
|
output_strings,
|
||||||
|
geo_field,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -213,7 +236,7 @@ fn extract_facet_values(value: &Value) -> FilterableValues {
|
|||||||
otherwise => {
|
otherwise => {
|
||||||
let mut numbers = Vec::new();
|
let mut numbers = Vec::new();
|
||||||
let mut strings = Vec::new();
|
let mut strings = Vec::new();
|
||||||
inner_extract_facet_values(otherwise, true, &mut numbers, &mut strings);
|
inner_extract_facet_values(otherwise, true, &mut numbers, &mut strings, geo_field);
|
||||||
FilterableValues::Values { numbers, strings }
|
FilterableValues::Values { numbers, strings }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -366,6 +366,7 @@ fn send_and_extract_flattened_documents_data(
|
|||||||
flattened_documents_chunk.clone(),
|
flattened_documents_chunk.clone(),
|
||||||
indexer,
|
indexer,
|
||||||
faceted_fields,
|
faceted_fields,
|
||||||
|
geo_fields_ids,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
// send docid_fid_facet_numbers_chunk to DB writer
|
// send docid_fid_facet_numbers_chunk to DB writer
|
||||||
|
Loading…
Reference in New Issue
Block a user