diff --git a/milli/src/update/index_documents/extract/extract_geo_points.rs b/milli/src/update/index_documents/extract/extract_geo_points.rs index 285a4bdba..36be9b5b6 100644 --- a/milli/src/update/index_documents/extract/extract_geo_points.rs +++ b/milli/src/update/index_documents/extract/extract_geo_points.rs @@ -6,6 +6,7 @@ use serde_json::Value; use super::helpers::{create_writer, writer_into_reader, GrenadParameters}; use crate::error::GeoError; +use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; use crate::update::index_documents::extract_finite_float_from_value; use crate::{FieldId, InternalError, Result}; @@ -14,6 +15,7 @@ use crate::{FieldId, InternalError, Result}; /// Returns the generated grenad reader containing the docid as key associated to the (latitude, longitude) #[logging_timer::time] pub fn extract_geo_points( + // TODO grenad::Reader>> obkv_documents: grenad::Reader, indexer: GrenadParameters, primary_key_id: FieldId, @@ -30,39 +32,72 @@ pub fn extract_geo_points( let mut cursor = obkv_documents.into_cursor()?; while let Some((docid_bytes, value)) = cursor.move_on_next()? { let obkv = obkv::KvReader::new(value); - // since we only needs the primary key when we throw an error we create this getter to - // lazily get it when needed + // since we only need the primary key when we throw an error + // we create this getter to lazily get it when needed let document_id = || -> Value { let document_id = obkv.get(primary_key_id).unwrap(); serde_json::from_slice(document_id).unwrap() }; + // HELP we will receive two DelAdds here, one for the lat and one for the lng + // what happens if there is a missing Del or Add for one of them? + // first we get the two fields - let lat = obkv.get(lat_fid); - let lng = obkv.get(lng_fid); + match (obkv.get(lat_fid), obkv.get(lng_fid)) { + (Some(lat), Some(lng)) => { + let deladd_lat_obkv = KvReaderDelAdd::new(lat); + let deladd_lng_obkv = KvReaderDelAdd::new(lng); - if let Some((lat, lng)) = lat.zip(lng) { - // then we extract the values - let lat = extract_finite_float_from_value( - serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?, - ) - .map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?; + // then we extract the values + let del_lat_lng = deladd_lat_obkv + .get(DelAdd::Deletion) + .zip(deladd_lng_obkv.get(DelAdd::Deletion)) + .map(|(lat, lng)| extract_lat_lng(lat, lng, document_id)) + .transpose()?; + let add_lat_lng = deladd_lat_obkv + .get(DelAdd::Addition) + .zip(deladd_lng_obkv.get(DelAdd::Addition)) + .map(|(lat, lng)| extract_lat_lng(lat, lng, document_id)) + .transpose()?; - let lng = extract_finite_float_from_value( - serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?, - ) - .map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?; - - #[allow(clippy::drop_non_drop)] - let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()]; - writer.insert(docid_bytes, bytes)?; - } else if lat.is_none() && lng.is_some() { - return Err(GeoError::MissingLatitude { document_id: document_id() })?; - } else if lat.is_some() && lng.is_none() { - return Err(GeoError::MissingLongitude { document_id: document_id() })?; + let mut obkv = KvWriterDelAdd::memory(); + if let Some([lat, lng]) = del_lat_lng { + #[allow(clippy::drop_non_drop)] + let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()]; + obkv.insert(DelAdd::Deletion, bytes)?; + } + if let Some([lat, lng]) = add_lat_lng { + #[allow(clippy::drop_non_drop)] + let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()]; + obkv.insert(DelAdd::Addition, bytes)?; + } + let bytes = obkv.into_inner()?; + writer.insert(docid_bytes, bytes)?; + } + (None, Some(_)) => { + return Err(GeoError::MissingLatitude { document_id: document_id() }.into()) + } + (Some(_), None) => { + return Err(GeoError::MissingLongitude { document_id: document_id() }.into()) + } + (None, None) => (), } - // else => the _geo object was `null`, there is nothing to do } writer_into_reader(writer) } + +/// Extract the finite floats lat and lng from two bytes slices. +fn extract_lat_lng(lat: &[u8], lng: &[u8], document_id: impl Fn() -> Value) -> Result<[f64; 2]> { + let lat = extract_finite_float_from_value( + serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?, + ) + .map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?; + + let lng = extract_finite_float_from_value( + serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?, + ) + .map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?; + + Ok([lat, lng]) +}