From 9d061cec26da1ab08e66d67bf448c940d1804fd4 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 8 Aug 2023 16:28:07 +0200
Subject: [PATCH 1/4] automatically parse the filterable attribute to float if
 it's a geo field

---
 .../extract/extract_fid_docid_facet_values.rs | 31 ++++++++++++++++---
 .../src/update/index_documents/extract/mod.rs |  1 +
 2 files changed, 28 insertions(+), 4 deletions(-)
diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
index 77a5561fe..9502c68eb 100644
--- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@@ -28,11 +28,13 @@ pub struct ExtractedFacetValues {
 ///
 /// Returns the generated grenad reader containing the docid the fid and the orginal value as key
 /// and the normalized value as value extracted from the given chunk of documents.
+/// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially.
 #[logging_timer::time]
 pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
     obkv_documents: grenad::Reader<R>,
     indexer: GrenadParameters,
     faceted_fields: &HashSet<FieldId>,
+    geo_fields_ids: Option<(FieldId, FieldId)>,
 ) -> Result<ExtractedFacetValues> {
     let max_memory = indexer.max_memory_by_thread();
 
@@ -82,7 +84,10 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
 
                 let value = from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
 
-                match extract_facet_values(&value) {
+                match extract_facet_values(
+                    &value,
+                    geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng),
+                ) {
                     FilterableValues::Null => {
                         facet_is_null_docids.entry(field_id).or_default().insert(document);
                     }
@@ -175,12 +180,13 @@ enum FilterableValues {
     Values { numbers: Vec<f64>, strings: Vec<(String, String)> },
 }
 
-fn extract_facet_values(value: &Value) -> FilterableValues {
+fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues {
     fn inner_extract_facet_values(
         value: &Value,
         can_recurse: bool,
         output_numbers: &mut Vec<f64>,
         output_strings: &mut Vec<(String, String)>,
+        geo_field: bool,
     ) {
         match value {
             Value::Null => (),
@@ -191,13 +197,30 @@ fn extract_facet_values(value: &Value) -> FilterableValues {
                 }
             }
             Value::String(original) => {
+                // if we're working on a geofield it MUST be something we can parse or else there was an internal error
+                // in the enrich pipeline. But since the enrich pipeline worked, we want to avoid crashing at all costs.
+                if geo_field {
+                    if let Ok(float) = original.parse() {
+                        output_numbers.push(float);
+                    } else {
+                        log::warn!(
+                            "Internal error, could not parse a geofield that has been validated. Please open an issue."
+                        )
+                    }
+                }
                 let normalized = crate::normalize_facet(original);
                 output_strings.push((normalized, original.clone()));
             }
             Value::Array(values) => {
                 if can_recurse {
                     for value in values {
-                        inner_extract_facet_values(value, false, output_numbers, output_strings);
+                        inner_extract_facet_values(
+                            value,
+                            false,
+                            output_numbers,
+                            output_strings,
+                            geo_field,
+                        );
                     }
                 }
             }
@@ -213,7 +236,7 @@ fn extract_facet_values(value: &Value) -> FilterableValues {
         otherwise => {
             let mut numbers = Vec::new();
             let mut strings = Vec::new();
-            inner_extract_facet_values(otherwise, true, &mut numbers, &mut strings);
+            inner_extract_facet_values(otherwise, true, &mut numbers, &mut strings, geo_field);
             FilterableValues::Values { numbers, strings }
         }
     }
diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs
index 6259c7272..0bb48fa99 100644
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@@ -366,6 +366,7 @@ fn send_and_extract_flattened_documents_data(
                     flattened_documents_chunk.clone(),
                     indexer,
                     faceted_fields,
+                    geo_fields_ids,
                 )?;
 
                 // send docid_fid_facet_numbers_chunk to DB writer

From 83991ee7704a6626a63728b111e3f603a7e7de6c Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 8 Aug 2023 16:28:38 +0200
Subject: [PATCH 2/4] enable the multi-snapshot attribute in insta. This will
 let us use insta in loops

---
 meili-snap/src/lib.rs | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/meili-snap/src/lib.rs b/meili-snap/src/lib.rs
index 32d3385d9..c467aef49 100644
--- a/meili-snap/src/lib.rs
+++ b/meili-snap/src/lib.rs
@@ -167,7 +167,9 @@ macro_rules! snapshot {
         let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, Some(&snap_name));
         settings.bind(|| {
             let snap = format!("{}", $value);
-            meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
+            insta::allow_duplicates! {
+                meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
+            }
         });
     };
     ($value:expr, @$inline:literal) => {
@@ -176,7 +178,9 @@ macro_rules! snapshot {
         let (settings, _, _) = $crate::default_snapshot_settings_for_test("", Some("_dummy_argument"));
         settings.bind(|| {
             let snap = format!("{}", $value);
-            meili_snap::insta::assert_snapshot!(snap, @$inline);
+            insta::allow_duplicates! {
+                meili_snap::insta::assert_snapshot!(snap, @$inline);
+            }
         });
     };
     ($value:expr) => {
@@ -194,7 +198,9 @@ macro_rules! snapshot {
         let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, None);
         settings.bind(|| {
             let snap = format!("{}", $value);
-            meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
+            insta::allow_duplicates! {
+                meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
+            }
         });
     };
 }

From 4988199bb915f8e2757c7a197c2769afe25be1ed Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 8 Aug 2023 16:29:25 +0200
Subject: [PATCH 3/4] ensure the geoboundingbox works with strings and int
 geofields in milli and meilisearch

---
 meilisearch/tests/search/geo.rs | 57 +++++++++++++++++++++++++++++++++
 milli/src/index.rs              |  8 ++---
 2 files changed, 61 insertions(+), 4 deletions(-)

diff --git a/meilisearch/tests/search/geo.rs b/meilisearch/tests/search/geo.rs
index 4253bc210..5eec5857a 100644
--- a/meilisearch/tests/search/geo.rs
+++ b/meilisearch/tests/search/geo.rs
@@ -1,3 +1,4 @@
+use meili_snap::{json_string, snapshot};
 use once_cell::sync::Lazy;
 use serde_json::{json, Value};
 
@@ -60,3 +61,59 @@ async fn geo_sort_with_geo_strings() {
         )
         .await;
 }
+
+#[actix_rt::test]
+async fn geo_bounding_box_with_string_and_number() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = DOCUMENTS.clone();
+    index.update_settings_filterable_attributes(json!(["_geo"])).await;
+    index.update_settings_sortable_attributes(json!(["_geo"])).await;
+    index.add_documents(documents, None).await;
+    index.wait_task(2).await;
+
+    index
+        .search(
+            json!({
+                "filter": "_geoBoundingBox([89, 179], [-89, -179])",
+            }),
+            |response, code| {
+                assert_eq!(code, 200, "{}", response);
+                snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
+                {
+                  "hits": [
+                    {
+                      "id": 1,
+                      "name": "Taco Truck",
+                      "address": "444 Salsa Street, Burritoville",
+                      "type": "Mexican",
+                      "rating": 9,
+                      "_geo": {
+                        "lat": 34.0522,
+                        "lng": -118.2437
+                      }
+                    },
+                    {
+                      "id": 2,
+                      "name": "La Bella Italia",
+                      "address": "456 Elm Street, Townsville",
+                      "type": "Italian",
+                      "rating": 9,
+                      "_geo": {
+                        "lat": "45.4777599",
+                        "lng": "9.1967508"
+                      }
+                    }
+                  ],
+                  "query": "",
+                  "processingTimeMs": "[time]",
+                  "limit": 20,
+                  "offset": 0,
+                  "estimatedTotalHits": 2
+                }
+                "###);
+            },
+        )
+        .await;
+}
diff --git a/milli/src/index.rs b/milli/src/index.rs
index 0ddfcda94..23c0f5fae 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -1718,11 +1718,11 @@ pub(crate) mod tests {
             .unwrap();
         index
             .add_documents(documents!([
-                { "id": 0, "_geo": { "lat": 0, "lng": 0 } },
-                { "id": 1, "_geo": { "lat": 0, "lng": -175 } },
-                { "id": 2, "_geo": { "lat": 0, "lng": 175 } },
+                { "id": 0, "_geo": { "lat": "0", "lng": "0" } },
+                { "id": 1, "_geo": { "lat": 0, "lng": "-175" } },
+                { "id": 2, "_geo": { "lat": "0", "lng": 175 } },
                 { "id": 3, "_geo": { "lat": 85, "lng": 0 } },
-                { "id": 4, "_geo": { "lat": -85, "lng": 0 } },
+                { "id": 4, "_geo": { "lat": "-85", "lng": "0" } },
             ]))
             .unwrap();
 

From 4f4c669d50ccc62fe7476603def117c698d5df3a Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 8 Aug 2023 16:58:14 +0200
Subject: [PATCH 4/4] add back some dump snapshots that disappeared. it's
 completely unrelated to this PR

---
 ...dump__reader__test__import_dump_v1-10.snap | 24 ++++++++++++
 .../dump__reader__test__import_dump_v1-4.snap | 38 +++++++++++++++++++
 .../dump__reader__test__import_dump_v1-7.snap | 31 +++++++++++++++
 3 files changed, 93 insertions(+)
 create mode 100644 dump/src/reader/snapshots/dump__reader__test__import_dump_v1-10.snap
 create mode 100644 dump/src/reader/snapshots/dump__reader__test__import_dump_v1-4.snap
 create mode 100644 dump/src/reader/snapshots/dump__reader__test__import_dump_v1-7.snap

diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-10.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-10.snap
new file mode 100644
index 000000000..92fc61d72
--- /dev/null
+++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-10.snap
@@ -0,0 +1,24 @@
+---
+source: dump/src/reader/mod.rs
+expression: spells.settings().unwrap()
+---
+{
+  "displayedAttributes": [
+    "*"
+  ],
+  "searchableAttributes": [
+    "*"
+  ],
+  "filterableAttributes": [],
+  "sortableAttributes": [],
+  "rankingRules": [
+    "typo",
+    "words",
+    "proximity",
+    "attribute",
+    "exactness"
+  ],
+  "stopWords": [],
+  "synonyms": {},
+  "distinctAttribute": null
+}
diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-4.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-4.snap
new file mode 100644
index 000000000..b0b54c136
--- /dev/null
+++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-4.snap
@@ -0,0 +1,38 @@
+---
+source: dump/src/reader/mod.rs
+expression: products.settings().unwrap()
+---
+{
+  "displayedAttributes": [
+    "*"
+  ],
+  "searchableAttributes": [
+    "*"
+  ],
+  "filterableAttributes": [],
+  "sortableAttributes": [],
+  "rankingRules": [
+    "typo",
+    "words",
+    "proximity",
+    "attribute",
+    "exactness"
+  ],
+  "stopWords": [],
+  "synonyms": {
+    "android": [
+      "phone",
+      "smartphone"
+    ],
+    "iphone": [
+      "phone",
+      "smartphone"
+    ],
+    "phone": [
+      "android",
+      "iphone",
+      "smartphone"
+    ]
+  },
+  "distinctAttribute": null
+}
diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-7.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-7.snap
new file mode 100644
index 000000000..5c12a0438
--- /dev/null
+++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-7.snap
@@ -0,0 +1,31 @@
+---
+source: dump/src/reader/mod.rs
+expression: movies.settings().unwrap()
+---
+{
+  "displayedAttributes": [
+    "*"
+  ],
+  "searchableAttributes": [
+    "*"
+  ],
+  "filterableAttributes": [
+    "genres",
+    "id"
+  ],
+  "sortableAttributes": [
+    "genres",
+    "id"
+  ],
+  "rankingRules": [
+    "typo",
+    "words",
+    "proximity",
+    "attribute",
+    "exactness",
+    "release_date:asc"
+  ],
+  "stopWords": [],
+  "synonyms": {},
+  "distinctAttribute": null
+}