Merge #3986

3986: Fix geo bounding box with strings r=ManyTheFish a=irevoire # Pull Request When sending a document with one geofield of type string (i.e.: `{ "_geo": { "lat": 12, "lng": "13" }}`), the geobounding box would exclude this document. This PR fixes this issue by automatically parsing the string value in case we're working on a geofield. ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/3973 ## What does this PR do? - Automatically parse the facet value iif we're working on a geofield. - Make insta works with snapshots in loops or closure executed multiple times. (you may need to update your cli if it panics after this PR: `cargo install cargo-insta`). - Add one integration test in milli and in meilisearch to ensure it works forever. - Add three snapshots for the dump that mysteriously disappeared I don't know how Co-authored-by: Tamo <tamo@meilisearch.com>
2025-05-25 09:03:59 +02:00 · 2023-08-09 07:58:15 +00:00 · 2023-08-09 07:58:15 +00:00 · 44c1900f36
commit 44c1900f36
parent 04671d0751 4f4c669d50
8 changed files with 191 additions and 11 deletions
--- a/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-10.snap
+++ b/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-10.snap
@ -0,0 +1,24 @@
 ---
 source: dump/src/reader/mod.rs
 expression: spells.settings().unwrap()
 ---
 {
  "displayedAttributes": [
    "*"
  ],
  "searchableAttributes": [
    "*"
  ],
  "filterableAttributes": [],
  "sortableAttributes": [],
  "rankingRules": [
    "typo",
    "words",
    "proximity",
    "attribute",
    "exactness"
  ],
  "stopWords": [],
  "synonyms": {},
  "distinctAttribute": null
 }
--- a/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-4.snap
+++ b/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-4.snap
@ -0,0 +1,38 @@
 ---
 source: dump/src/reader/mod.rs
 expression: products.settings().unwrap()
 ---
 {
  "displayedAttributes": [
    "*"
  ],
  "searchableAttributes": [
    "*"
  ],
  "filterableAttributes": [],
  "sortableAttributes": [],
  "rankingRules": [
    "typo",
    "words",
    "proximity",
    "attribute",
    "exactness"
  ],
  "stopWords": [],
  "synonyms": {
    "android": [
      "phone",
      "smartphone"
    ],
    "iphone": [
      "phone",
      "smartphone"
    ],
    "phone": [
      "android",
      "iphone",
      "smartphone"
    ]
  },
  "distinctAttribute": null
 }
--- a/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-7.snap
+++ b/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-7.snap
@ -0,0 +1,31 @@
 ---
 source: dump/src/reader/mod.rs
 expression: movies.settings().unwrap()
 ---
 {
  "displayedAttributes": [
    "*"
  ],
  "searchableAttributes": [
    "*"
  ],
  "filterableAttributes": [
    "genres",
    "id"
  ],
  "sortableAttributes": [
    "genres",
    "id"
  ],
  "rankingRules": [
    "typo",
    "words",
    "proximity",
    "attribute",
    "exactness",
    "release_date:asc"
  ],
  "stopWords": [],
  "synonyms": {},
  "distinctAttribute": null
 }
--- a/meili-snap/src/lib.rs
+++ b/meili-snap/src/lib.rs
@ -167,7 +167,9 @@ macro_rules! snapshot {
        let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, Some(&snap_name));
        settings.bind(|| {
            let snap = format!("{}", $value);
-            meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
+            insta::allow_duplicates! {
                meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
            }
        });
    };
    ($value:expr, @$inline:literal) => {
@ -176,7 +178,9 @@ macro_rules! snapshot {
        let (settings, _, _) = $crate::default_snapshot_settings_for_test("", Some("_dummy_argument"));
        settings.bind(|| {
            let snap = format!("{}", $value);
-            meili_snap::insta::assert_snapshot!(snap, @$inline);
+            insta::allow_duplicates! {
                meili_snap::insta::assert_snapshot!(snap, @$inline);
            }
        });
    };
    ($value:expr) => {
@ -194,7 +198,9 @@ macro_rules! snapshot {
        let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, None);
        settings.bind(|| {
            let snap = format!("{}", $value);
-            meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
+            insta::allow_duplicates! {
                meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
            }
        });
    };
 }
--- a/meilisearch/tests/search/geo.rs
+++ b/meilisearch/tests/search/geo.rs
@ -1,3 +1,4 @@
 use meili_snap::{json_string, snapshot};
 use once_cell::sync::Lazy;
 use serde_json::{json, Value};
@ -60,3 +61,59 @@ async fn geo_sort_with_geo_strings() {
        )
        .await;
 }
 #[actix_rt::test]
 async fn geo_bounding_box_with_string_and_number() {
    let server = Server::new().await;
    let index = server.index("test");
    let documents = DOCUMENTS.clone();
    index.update_settings_filterable_attributes(json!(["_geo"])).await;
    index.update_settings_sortable_attributes(json!(["_geo"])).await;
    index.add_documents(documents, None).await;
    index.wait_task(2).await;
    index
        .search(
            json!({
                "filter": "_geoBoundingBox([89, 179], [-89, -179])",
            }),
            |response, code| {
                assert_eq!(code, 200, "{}", response);
                snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
                {
                  "hits": [
                    {
                      "id": 1,
                      "name": "Taco Truck",
                      "address": "444 Salsa Street, Burritoville",
                      "type": "Mexican",
                      "rating": 9,
                      "_geo": {
                        "lat": 34.0522,
                        "lng": -118.2437
                      }
                    },
                    {
                      "id": 2,
                      "name": "La Bella Italia",
                      "address": "456 Elm Street, Townsville",
                      "type": "Italian",
                      "rating": 9,
                      "_geo": {
                        "lat": "45.4777599",
                        "lng": "9.1967508"
                      }
                    }
                  ],
                  "query": "",
                  "processingTimeMs": "[time]",
                  "limit": 20,
                  "offset": 0,
                  "estimatedTotalHits": 2
                }
                "###);
            },
        )
        .await;
 }
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@ -1718,11 +1718,11 @@ pub(crate) mod tests {
            .unwrap();
        index
            .add_documents(documents!([
-                { "id": 0, "_geo": { "lat": 0, "lng": 0 } },
+                { "id": 0, "_geo": { "lat": "0", "lng": "0" } },
-                { "id": 1, "_geo": { "lat": 0, "lng": -175 } },
+                { "id": 1, "_geo": { "lat": 0, "lng": "-175" } },
-                { "id": 2, "_geo": { "lat": 0, "lng": 175 } },
+                { "id": 2, "_geo": { "lat": "0", "lng": 175 } },
                { "id": 3, "_geo": { "lat": 85, "lng": 0 } },
-                { "id": 4, "_geo": { "lat": -85, "lng": 0 } },
+                { "id": 4, "_geo": { "lat": "-85", "lng": "0" } },
            ]))
            .unwrap();
--- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@ -28,11 +28,13 @@ pub struct ExtractedFacetValues {
 ///
 /// Returns the generated grenad reader containing the docid the fid and the orginal value as key
 /// and the normalized value as value extracted from the given chunk of documents.
 /// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially.
 #[logging_timer::time]
 pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
    obkv_documents: grenad::Reader<R>,
    indexer: GrenadParameters,
    faceted_fields: &HashSet<FieldId>,
    geo_fields_ids: Option<(FieldId, FieldId)>,
 ) -> Result<ExtractedFacetValues> {
    let max_memory = indexer.max_memory_by_thread();
@ -82,7 +84,10 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
                let value = from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
-                match extract_facet_values(&value) {
+                match extract_facet_values(
                    &value,
                    geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng),
                ) {
                    FilterableValues::Null => {
                        facet_is_null_docids.entry(field_id).or_default().insert(document);
                    }
@ -175,12 +180,13 @@ enum FilterableValues {
    Values { numbers: Vec<f64>, strings: Vec<(String, String)> },
 }
-fn extract_facet_values(value: &Value) -> FilterableValues {
+fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues {
    fn inner_extract_facet_values(
        value: &Value,
        can_recurse: bool,
        output_numbers: &mut Vec<f64>,
        output_strings: &mut Vec<(String, String)>,
        geo_field: bool,
    ) {
        match value {
            Value::Null => (),
@ -191,13 +197,30 @@ fn extract_facet_values(value: &Value) -> FilterableValues {
                }
            }
            Value::String(original) => {
                // if we're working on a geofield it MUST be something we can parse or else there was an internal error
                // in the enrich pipeline. But since the enrich pipeline worked, we want to avoid crashing at all costs.
                if geo_field {
                    if let Ok(float) = original.parse() {
                        output_numbers.push(float);
                    } else {
                        log::warn!(
                            "Internal error, could not parse a geofield that has been validated. Please open an issue."
                        )
                    }
                }
                let normalized = crate::normalize_facet(original);
                output_strings.push((normalized, original.clone()));
            }
            Value::Array(values) => {
                if can_recurse {
                    for value in values {
-                        inner_extract_facet_values(value, false, output_numbers, output_strings);
+                        inner_extract_facet_values(
                            value,
                            false,
                            output_numbers,
                            output_strings,
                            geo_field,
                        );
                    }
                }
            }
@ -213,7 +236,7 @@ fn extract_facet_values(value: &Value) -> FilterableValues {
        otherwise => {
            let mut numbers = Vec::new();
            let mut strings = Vec::new();
-            inner_extract_facet_values(otherwise, true, &mut numbers, &mut strings);
+            inner_extract_facet_values(otherwise, true, &mut numbers, &mut strings, geo_field);
            FilterableValues::Values { numbers, strings }
        }
    }
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@ -366,6 +366,7 @@ fn send_and_extract_flattened_documents_data(
                    flattened_documents_chunk.clone(),
                    indexer,
                    faceted_fields,
                    geo_fields_ids,
                )?;
                // send docid_fid_facet_numbers_chunk to DB writer