From 9d061cec26da1ab08e66d67bf448c940d1804fd4 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 8 Aug 2023 16:28:07 +0200 Subject: [PATCH 1/4] automatically parse the filterable attribute to float if it's a geo field --- .../extract/extract_fid_docid_facet_values.rs | 31 ++++++++++++++++--- .../src/update/index_documents/extract/mod.rs | 1 + 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs index 77a5561fe..9502c68eb 100644 --- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs +++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs @@ -28,11 +28,13 @@ pub struct ExtractedFacetValues { /// /// Returns the generated grenad reader containing the docid the fid and the orginal value as key /// and the normalized value as value extracted from the given chunk of documents. +/// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially. #[logging_timer::time] pub fn extract_fid_docid_facet_values( obkv_documents: grenad::Reader, indexer: GrenadParameters, faceted_fields: &HashSet, + geo_fields_ids: Option<(FieldId, FieldId)>, ) -> Result { let max_memory = indexer.max_memory_by_thread(); @@ -82,7 +84,10 @@ pub fn extract_fid_docid_facet_values( let value = from_slice(field_bytes).map_err(InternalError::SerdeJson)?; - match extract_facet_values(&value) { + match extract_facet_values( + &value, + geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng), + ) { FilterableValues::Null => { facet_is_null_docids.entry(field_id).or_default().insert(document); } @@ -175,12 +180,13 @@ enum FilterableValues { Values { numbers: Vec, strings: Vec<(String, String)> }, } -fn extract_facet_values(value: &Value) -> FilterableValues { +fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues { fn inner_extract_facet_values( value: &Value, can_recurse: bool, output_numbers: &mut Vec, output_strings: &mut Vec<(String, String)>, + geo_field: bool, ) { match value { Value::Null => (), @@ -191,13 +197,30 @@ fn extract_facet_values(value: &Value) -> FilterableValues { } } Value::String(original) => { + // if we're working on a geofield it MUST be something we can parse or else there was an internal error + // in the enrich pipeline. But since the enrich pipeline worked, we want to avoid crashing at all costs. + if geo_field { + if let Ok(float) = original.parse() { + output_numbers.push(float); + } else { + log::warn!( + "Internal error, could not parse a geofield that has been validated. Please open an issue." + ) + } + } let normalized = crate::normalize_facet(original); output_strings.push((normalized, original.clone())); } Value::Array(values) => { if can_recurse { for value in values { - inner_extract_facet_values(value, false, output_numbers, output_strings); + inner_extract_facet_values( + value, + false, + output_numbers, + output_strings, + geo_field, + ); } } } @@ -213,7 +236,7 @@ fn extract_facet_values(value: &Value) -> FilterableValues { otherwise => { let mut numbers = Vec::new(); let mut strings = Vec::new(); - inner_extract_facet_values(otherwise, true, &mut numbers, &mut strings); + inner_extract_facet_values(otherwise, true, &mut numbers, &mut strings, geo_field); FilterableValues::Values { numbers, strings } } } diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 6259c7272..0bb48fa99 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -366,6 +366,7 @@ fn send_and_extract_flattened_documents_data( flattened_documents_chunk.clone(), indexer, faceted_fields, + geo_fields_ids, )?; // send docid_fid_facet_numbers_chunk to DB writer From 83991ee7704a6626a63728b111e3f603a7e7de6c Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 8 Aug 2023 16:28:38 +0200 Subject: [PATCH 2/4] enable the multi-snapshot attribute in insta. This will let us use insta in loops --- meili-snap/src/lib.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/meili-snap/src/lib.rs b/meili-snap/src/lib.rs index 32d3385d9..c467aef49 100644 --- a/meili-snap/src/lib.rs +++ b/meili-snap/src/lib.rs @@ -167,7 +167,9 @@ macro_rules! snapshot { let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, Some(&snap_name)); settings.bind(|| { let snap = format!("{}", $value); - meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap); + insta::allow_duplicates! { + meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap); + } }); }; ($value:expr, @$inline:literal) => { @@ -176,7 +178,9 @@ macro_rules! snapshot { let (settings, _, _) = $crate::default_snapshot_settings_for_test("", Some("_dummy_argument")); settings.bind(|| { let snap = format!("{}", $value); - meili_snap::insta::assert_snapshot!(snap, @$inline); + insta::allow_duplicates! { + meili_snap::insta::assert_snapshot!(snap, @$inline); + } }); }; ($value:expr) => { @@ -194,7 +198,9 @@ macro_rules! snapshot { let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, None); settings.bind(|| { let snap = format!("{}", $value); - meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap); + insta::allow_duplicates! { + meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap); + } }); }; } From 4988199bb915f8e2757c7a197c2769afe25be1ed Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 8 Aug 2023 16:29:25 +0200 Subject: [PATCH 3/4] ensure the geoboundingbox works with strings and int geofields in milli and meilisearch --- meilisearch/tests/search/geo.rs | 57 +++++++++++++++++++++++++++++++++ milli/src/index.rs | 8 ++--- 2 files changed, 61 insertions(+), 4 deletions(-) diff --git a/meilisearch/tests/search/geo.rs b/meilisearch/tests/search/geo.rs index 4253bc210..5eec5857a 100644 --- a/meilisearch/tests/search/geo.rs +++ b/meilisearch/tests/search/geo.rs @@ -1,3 +1,4 @@ +use meili_snap::{json_string, snapshot}; use once_cell::sync::Lazy; use serde_json::{json, Value}; @@ -60,3 +61,59 @@ async fn geo_sort_with_geo_strings() { ) .await; } + +#[actix_rt::test] +async fn geo_bounding_box_with_string_and_number() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.update_settings_filterable_attributes(json!(["_geo"])).await; + index.update_settings_sortable_attributes(json!(["_geo"])).await; + index.add_documents(documents, None).await; + index.wait_task(2).await; + + index + .search( + json!({ + "filter": "_geoBoundingBox([89, 179], [-89, -179])", + }), + |response, code| { + assert_eq!(code, 200, "{}", response); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "id": 1, + "name": "Taco Truck", + "address": "444 Salsa Street, Burritoville", + "type": "Mexican", + "rating": 9, + "_geo": { + "lat": 34.0522, + "lng": -118.2437 + } + }, + { + "id": 2, + "name": "La Bella Italia", + "address": "456 Elm Street, Townsville", + "type": "Italian", + "rating": 9, + "_geo": { + "lat": "45.4777599", + "lng": "9.1967508" + } + } + ], + "query": "", + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2 + } + "###); + }, + ) + .await; +} diff --git a/milli/src/index.rs b/milli/src/index.rs index 0ddfcda94..23c0f5fae 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1718,11 +1718,11 @@ pub(crate) mod tests { .unwrap(); index .add_documents(documents!([ - { "id": 0, "_geo": { "lat": 0, "lng": 0 } }, - { "id": 1, "_geo": { "lat": 0, "lng": -175 } }, - { "id": 2, "_geo": { "lat": 0, "lng": 175 } }, + { "id": 0, "_geo": { "lat": "0", "lng": "0" } }, + { "id": 1, "_geo": { "lat": 0, "lng": "-175" } }, + { "id": 2, "_geo": { "lat": "0", "lng": 175 } }, { "id": 3, "_geo": { "lat": 85, "lng": 0 } }, - { "id": 4, "_geo": { "lat": -85, "lng": 0 } }, + { "id": 4, "_geo": { "lat": "-85", "lng": "0" } }, ])) .unwrap(); From 4f4c669d50ccc62fe7476603def117c698d5df3a Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 8 Aug 2023 16:58:14 +0200 Subject: [PATCH 4/4] add back some dump snapshots that disappeared. it's completely unrelated to this PR --- ...dump__reader__test__import_dump_v1-10.snap | 24 ++++++++++++ .../dump__reader__test__import_dump_v1-4.snap | 38 +++++++++++++++++++ .../dump__reader__test__import_dump_v1-7.snap | 31 +++++++++++++++ 3 files changed, 93 insertions(+) create mode 100644 dump/src/reader/snapshots/dump__reader__test__import_dump_v1-10.snap create mode 100644 dump/src/reader/snapshots/dump__reader__test__import_dump_v1-4.snap create mode 100644 dump/src/reader/snapshots/dump__reader__test__import_dump_v1-7.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-10.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-10.snap new file mode 100644 index 000000000..92fc61d72 --- /dev/null +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-10.snap @@ -0,0 +1,24 @@ +--- +source: dump/src/reader/mod.rs +expression: spells.settings().unwrap() +--- +{ + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [], + "sortableAttributes": [], + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "exactness" + ], + "stopWords": [], + "synonyms": {}, + "distinctAttribute": null +} diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-4.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-4.snap new file mode 100644 index 000000000..b0b54c136 --- /dev/null +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-4.snap @@ -0,0 +1,38 @@ +--- +source: dump/src/reader/mod.rs +expression: products.settings().unwrap() +--- +{ + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [], + "sortableAttributes": [], + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "exactness" + ], + "stopWords": [], + "synonyms": { + "android": [ + "phone", + "smartphone" + ], + "iphone": [ + "phone", + "smartphone" + ], + "phone": [ + "android", + "iphone", + "smartphone" + ] + }, + "distinctAttribute": null +} diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-7.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-7.snap new file mode 100644 index 000000000..5c12a0438 --- /dev/null +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-7.snap @@ -0,0 +1,31 @@ +--- +source: dump/src/reader/mod.rs +expression: movies.settings().unwrap() +--- +{ + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [ + "genres", + "id" + ], + "sortableAttributes": [ + "genres", + "id" + ], + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "exactness", + "release_date:asc" + ], + "stopWords": [], + "synonyms": {}, + "distinctAttribute": null +}