5407: Geo update bug r=irevoire a=ManyTheFish

# Pull Request

## Related issue
Fixes #5380
Fixes #5399



Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2025-03-17 10:24:33 +00:00 committed by GitHub
commit 13a88d6131
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 62 additions and 36 deletions

View File

@ -1897,11 +1897,11 @@ async fn update_documents_with_geo_field() {
}, },
{ {
"id": "3", "id": "3",
"_geo": { "lat": 1, "lng": 1 }, "_geo": { "lat": 3, "lng": 0 },
}, },
{ {
"id": "4", "id": "4",
"_geo": { "lat": "1", "lng": "1" }, "_geo": { "lat": "4", "lng": "0" },
}, },
]); ]);
@ -1928,9 +1928,7 @@ async fn update_documents_with_geo_field() {
} }
"###); "###);
let (response, code) = index let (response, code) = index.search_post(json!({"sort": ["_geoPoint(10,0):asc"]})).await;
.search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]}))
.await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
// we are expecting docs 4 and 3 first as they have geo // we are expecting docs 4 and 3 first as they have geo
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
@ -1940,18 +1938,18 @@ async fn update_documents_with_geo_field() {
{ {
"id": "4", "id": "4",
"_geo": { "_geo": {
"lat": "1", "lat": "4",
"lng": "1" "lng": "0"
}, },
"_geoDistance": 5522018 "_geoDistance": 667170
}, },
{ {
"id": "3", "id": "3",
"_geo": { "_geo": {
"lat": 1, "lat": 3,
"lng": 1 "lng": 0
}, },
"_geoDistance": 5522018 "_geoDistance": 778364
}, },
{ {
"id": "1" "id": "1"
@ -1969,10 +1967,13 @@ async fn update_documents_with_geo_field() {
} }
"###); "###);
let updated_documents = json!([{ let updated_documents = json!([
{
"id": "3", "id": "3",
"doggo": "kefir", "doggo": "kefir",
}]); "_geo": { "lat": 5, "lng": 0 },
}
]);
let (task, _status_code) = index.update_documents(updated_documents, None).await; let (task, _status_code) = index.update_documents(updated_documents, None).await;
let response = index.wait_task(task.uid()).await; let response = index.wait_task(task.uid()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@ -2012,16 +2013,16 @@ async fn update_documents_with_geo_field() {
{ {
"id": "3", "id": "3",
"_geo": { "_geo": {
"lat": 1, "lat": 5,
"lng": 1 "lng": 0
}, },
"doggo": "kefir" "doggo": "kefir"
}, },
{ {
"id": "4", "id": "4",
"_geo": { "_geo": {
"lat": "1", "lat": "4",
"lng": "1" "lng": "0"
} }
} }
], ],
@ -2031,31 +2032,29 @@ async fn update_documents_with_geo_field() {
} }
"###); "###);
let (response, code) = index let (response, code) = index.search_post(json!({"sort": ["_geoPoint(10,0):asc"]})).await;
.search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]}))
.await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
// the search response should not have changed: we are expecting docs 4 and 3 first as they have geo // the search response should not have changed: we are expecting docs 4 and 3 first as they have geo
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
@r###" @r###"
{ {
"hits": [ "hits": [
{
"id": "4",
"_geo": {
"lat": "1",
"lng": "1"
},
"_geoDistance": 5522018
},
{ {
"id": "3", "id": "3",
"_geo": { "_geo": {
"lat": 1, "lat": 5,
"lng": 1 "lng": 0
}, },
"doggo": "kefir", "doggo": "kefir",
"_geoDistance": 5522018 "_geoDistance": 555975
},
{
"id": "4",
"_geo": {
"lat": "4",
"lng": "0"
},
"_geoDistance": 667170
}, },
{ {
"id": "1" "id": "1"

View File

@ -1,5 +1,6 @@
use bumpalo::Bump; use bumpalo::Bump;
use heed::RoTxn; use heed::RoTxn;
use serde_json::Value;
use super::document::{ use super::document::{
Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions, Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions,
@ -10,7 +11,7 @@ use super::vector_document::{
use crate::attribute_patterns::PatternMatch; use crate::attribute_patterns::PatternMatch;
use crate::documents::FieldIdMapper; use crate::documents::FieldIdMapper;
use crate::vector::EmbeddingConfigs; use crate::vector::EmbeddingConfigs;
use crate::{DocumentId, Index, Result}; use crate::{DocumentId, Index, InternalError, Result};
pub enum DocumentChange<'doc> { pub enum DocumentChange<'doc> {
Deletion(Deletion<'doc>), Deletion(Deletion<'doc>),
@ -243,6 +244,29 @@ impl<'doc> Update<'doc> {
Ok(has_deleted_fields) Ok(has_deleted_fields)
} }
/// Returns `true` if the geo fields have changed.
pub fn has_changed_for_geo_fields<'t, Mapper: FieldIdMapper>(
&self,
rtxn: &'t RoTxn,
index: &'t Index,
mapper: &'t Mapper,
) -> Result<bool> {
let current = self.current(rtxn, index, mapper)?;
let current_geo = current.geo_field()?;
let updated_geo = self.only_changed_fields().geo_field()?;
match (current_geo, updated_geo) {
(Some(current_geo), Some(updated_geo)) => {
let current: Value =
serde_json::from_str(current_geo.get()).map_err(InternalError::SerdeJson)?;
let updated: Value =
serde_json::from_str(updated_geo.get()).map_err(InternalError::SerdeJson)?;
Ok(current != updated)
}
(None, None) => Ok(false),
_ => Ok(true),
}
}
pub fn only_changed_vectors( pub fn only_changed_vectors(
&self, &self,
doc_alloc: &'doc Bump, doc_alloc: &'doc Bump,

View File

@ -117,7 +117,7 @@ impl FacetedDocidsExtractor {
}, },
), ),
DocumentChange::Update(inner) => { DocumentChange::Update(inner) => {
if !inner.has_changed_for_fields( let has_changed = inner.has_changed_for_fields(
&mut |field_name| { &mut |field_name| {
match_faceted_field( match_faceted_field(
field_name, field_name,
@ -130,7 +130,10 @@ impl FacetedDocidsExtractor {
rtxn, rtxn,
index, index,
context.db_fields_ids_map, context.db_fields_ids_map,
)? { )?;
let has_changed_for_geo_fields =
inner.has_changed_for_geo_fields(rtxn, index, context.db_fields_ids_map)?;
if !has_changed && !has_changed_for_geo_fields {
return Ok(()); return Ok(());
} }