diff --git a/crates/meilisearch/tests/documents/add_documents.rs b/crates/meilisearch/tests/documents/add_documents.rs index 8c9601e0f..17b1d6697 100644 --- a/crates/meilisearch/tests/documents/add_documents.rs +++ b/crates/meilisearch/tests/documents/add_documents.rs @@ -1664,7 +1664,7 @@ async fn add_documents_invalid_geo_field() { "indexedDocuments": 0 }, "error": { - "message": "The `_geo` field in the document with the id: `11` is not an object. Was expecting an object with the `_geo.lat` and `_geo.lng` fields but instead got `\"foobar\"`.", + "message": "The `_geo` field in the document with the id: `\"11\"` is not an object. Was expecting an object with the `_geo.lat` and `_geo.lng` fields but instead got `\"foobar\"`.", "code": "invalid_document_geo_field", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field" @@ -1701,7 +1701,7 @@ async fn add_documents_invalid_geo_field() { "indexedDocuments": 0 }, "error": { - "message": "Could not find latitude nor longitude in the document with the id: `11`. Was expecting `_geo.lat` and `_geo.lng` fields.", + "message": "Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.", "code": "invalid_document_geo_field", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field" @@ -1738,7 +1738,7 @@ async fn add_documents_invalid_geo_field() { "indexedDocuments": 0 }, "error": { - "message": "Could not find latitude nor longitude in the document with the id: `11`. Was expecting `_geo.lat` and `_geo.lng` fields.", + "message": "Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.", "code": "invalid_document_geo_field", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field" @@ -1775,7 +1775,7 @@ async fn add_documents_invalid_geo_field() { "indexedDocuments": 0 }, "error": { - "message": "Could not find longitude in the document with the id: `11`. Was expecting a `_geo.lng` field.", + "message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.", "code": "invalid_document_geo_field", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field" @@ -1812,7 +1812,7 @@ async fn add_documents_invalid_geo_field() { "indexedDocuments": 0 }, "error": { - "message": "Could not find latitude in the document with the id: `11`. Was expecting a `_geo.lat` field.", + "message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.", "code": "invalid_document_geo_field", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field" @@ -1849,7 +1849,7 @@ async fn add_documents_invalid_geo_field() { "indexedDocuments": 0 }, "error": { - "message": "Could not find longitude in the document with the id: `11`. Was expecting a `_geo.lng` field.", + "message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.", "code": "invalid_document_geo_field", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field" @@ -1886,7 +1886,7 @@ async fn add_documents_invalid_geo_field() { "indexedDocuments": 0 }, "error": { - "message": "Could not find latitude in the document with the id: `11`. Was expecting a `_geo.lat` field.", + "message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.", "code": "invalid_document_geo_field", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field" @@ -1923,7 +1923,7 @@ async fn add_documents_invalid_geo_field() { "indexedDocuments": 0 }, "error": { - "message": "Could not parse latitude nor longitude in the document with the id: `11`. Was expecting finite numbers but instead got `false` and `true`.", + "message": "Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `false` and `true`.", "code": "invalid_document_geo_field", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field" @@ -1960,7 +1960,7 @@ async fn add_documents_invalid_geo_field() { "indexedDocuments": 0 }, "error": { - "message": "Could not find longitude in the document with the id: `11`. Was expecting a `_geo.lng` field.", + "message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.", "code": "invalid_document_geo_field", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field" @@ -1997,7 +1997,7 @@ async fn add_documents_invalid_geo_field() { "indexedDocuments": 0 }, "error": { - "message": "Could not find latitude in the document with the id: `11`. Was expecting a `_geo.lat` field.", + "message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.", "code": "invalid_document_geo_field", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field" @@ -2034,7 +2034,7 @@ async fn add_documents_invalid_geo_field() { "indexedDocuments": 0 }, "error": { - "message": "Could not parse latitude nor longitude in the document with the id: `11`. Was expecting finite numbers but instead got `\"doggo\"` and `\"doggo\"`.", + "message": "Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `\"doggo\"` and `\"doggo\"`.", "code": "invalid_document_geo_field", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field" @@ -2071,7 +2071,7 @@ async fn add_documents_invalid_geo_field() { "indexedDocuments": 0 }, "error": { - "message": "The `_geo` field in the document with the id: `11` contains the following unexpected fields: `{\"doggo\":\"are the best\"}`.", + "message": "The `_geo` field in the document with the id: `\"11\"` contains the following unexpected fields: `{\"doggo\":\"are the best\"}`.", "code": "invalid_document_geo_field", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field" @@ -2109,7 +2109,7 @@ async fn add_documents_invalid_geo_field() { "indexedDocuments": 0 }, "error": { - "message": "Could not parse longitude in the document with the id: `12`. Was expecting a finite number but instead got `null`.", + "message": "Could not parse longitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.", "code": "invalid_document_geo_field", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field" @@ -2145,7 +2145,7 @@ async fn add_documents_invalid_geo_field() { "indexedDocuments": 0 }, "error": { - "message": "Could not parse latitude in the document with the id: `12`. Was expecting a finite number but instead got `null`.", + "message": "Could not parse latitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.", "code": "invalid_document_geo_field", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field" @@ -2181,7 +2181,7 @@ async fn add_documents_invalid_geo_field() { "indexedDocuments": 0 }, "error": { - "message": "Could not parse latitude nor longitude in the document with the id: `13`. Was expecting finite numbers but instead got `null` and `null`.", + "message": "Could not parse latitude nor longitude in the document with the id: `\"13\"`. Was expecting finite numbers but instead got `null` and `null`.", "code": "invalid_document_geo_field", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field" diff --git a/crates/meilisearch/tests/search/geo.rs b/crates/meilisearch/tests/search/geo.rs index 7804f1ad0..e92056191 100644 --- a/crates/meilisearch/tests/search/geo.rs +++ b/crates/meilisearch/tests/search/geo.rs @@ -70,8 +70,8 @@ async fn geo_bounding_box_with_string_and_number() { let documents = DOCUMENTS.clone(); index.update_settings_filterable_attributes(json!(["_geo"])).await; index.update_settings_sortable_attributes(json!(["_geo"])).await; - index.add_documents(documents, None).await; - index.wait_task(2).await; + let (ret, _code) = index.add_documents(documents, None).await; + index.wait_task(ret.uid()).await.succeeded(); index .search( diff --git a/crates/milli/src/update/new/document.rs b/crates/milli/src/update/new/document.rs index 8d4e3b0a9..ddf508ad7 100644 --- a/crates/milli/src/update/new/document.rs +++ b/crates/milli/src/update/new/document.rs @@ -354,6 +354,8 @@ where if let Some(geo_value) = document.geo_field()? { let fid = fields_ids_map.id_or_insert("_geo").ok_or(UserError::AttributeLimitReached)?; + fields_ids_map.id_or_insert("_geo.lat").ok_or(UserError::AttributeLimitReached)?; + fields_ids_map.id_or_insert("_geo.lng").ok_or(UserError::AttributeLimitReached)?; unordered_field_buffer.push((fid, geo_value)); } diff --git a/crates/milli/src/update/new/extract/documents.rs b/crates/milli/src/update/new/extract/documents.rs index b76fe207a..42fce3c3d 100644 --- a/crates/milli/src/update/new/extract/documents.rs +++ b/crates/milli/src/update/new/extract/documents.rs @@ -58,7 +58,8 @@ impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> { context.index, &context.db_fields_ids_map, )?; - for res in content.iter_top_level_fields() { + let geo_iter = content.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv))); + for res in content.iter_top_level_fields().chain(geo_iter) { let (f, _) = res?; let entry = document_extractor_data .field_distribution_delta @@ -73,7 +74,8 @@ impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> { let docid = update.docid(); let content = update.current(&context.rtxn, context.index, &context.db_fields_ids_map)?; - for res in content.iter_top_level_fields() { + let geo_iter = content.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv))); + for res in content.iter_top_level_fields().chain(geo_iter) { let (f, _) = res?; let entry = document_extractor_data .field_distribution_delta @@ -82,7 +84,8 @@ impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> { *entry -= 1; } let content = update.updated(); - for res in content.iter_top_level_fields() { + let geo_iter = content.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv))); + for res in content.iter_top_level_fields().chain(geo_iter) { let (f, _) = res?; let entry = document_extractor_data .field_distribution_delta @@ -111,7 +114,8 @@ impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> { DocumentChange::Insertion(insertion) => { let docid = insertion.docid(); let content = insertion.inserted(); - for res in content.iter_top_level_fields() { + let geo_iter = content.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv))); + for res in content.iter_top_level_fields().chain(geo_iter) { let (f, _) = res?; let entry = document_extractor_data .field_distribution_delta diff --git a/crates/milli/src/update/new/extract/faceted/extract_facets.rs b/crates/milli/src/update/new/extract/faceted/extract_facets.rs index 89223bc55..19e908612 100644 --- a/crates/milli/src/update/new/extract/faceted/extract_facets.rs +++ b/crates/milli/src/update/new/extract/faceted/extract_facets.rs @@ -1,18 +1,16 @@ use std::cell::RefCell; use std::collections::HashSet; -use std::mem::size_of; use std::ops::DerefMut as _; use bumpalo::collections::Vec as BVec; use bumpalo::Bump; use hashbrown::HashMap; -use heed::{BytesDecode, RoTxn}; +use heed::RoTxn; use serde_json::Value; use super::super::cache::BalancedCaches; use super::facet_document::extract_document_facets; use super::FacetKind; -use crate::facet::value_encoding::f64_into_bytes; use crate::heed_codec::facet::OrderedF64Codec; use crate::update::del_add::DelAdd; use crate::update::new::channel::FieldIdDocidFacetSender; @@ -80,6 +78,7 @@ impl FacetedDocidsExtractor { DocumentChange::Deletion(inner) => extract_document_facets( attributes_to_extract, inner.current(rtxn, index, context.db_fields_ids_map)?, + inner.external_document_id(), new_fields_ids_map.deref_mut(), &mut |fid, value| { Self::facet_fn_with_options( @@ -98,6 +97,7 @@ impl FacetedDocidsExtractor { extract_document_facets( attributes_to_extract, inner.current(rtxn, index, context.db_fields_ids_map)?, + inner.external_document_id(), new_fields_ids_map.deref_mut(), &mut |fid, value| { Self::facet_fn_with_options( @@ -116,6 +116,7 @@ impl FacetedDocidsExtractor { extract_document_facets( attributes_to_extract, inner.merged(rtxn, index, context.db_fields_ids_map)?, + inner.external_document_id(), new_fields_ids_map.deref_mut(), &mut |fid, value| { Self::facet_fn_with_options( @@ -134,6 +135,7 @@ impl FacetedDocidsExtractor { DocumentChange::Insertion(inner) => extract_document_facets( attributes_to_extract, inner.inserted(), + inner.external_document_id(), new_fields_ids_map.deref_mut(), &mut |fid, value| { Self::facet_fn_with_options( diff --git a/crates/milli/src/update/new/extract/faceted/facet_document.rs b/crates/milli/src/update/new/extract/faceted/facet_document.rs index 4308d0aa5..141af7fbe 100644 --- a/crates/milli/src/update/new/extract/faceted/facet_document.rs +++ b/crates/milli/src/update/new/extract/faceted/facet_document.rs @@ -1,17 +1,18 @@ use serde_json::Value; use crate::update::new::document::Document; +use crate::update::new::extract::geo::extract_geo_coordinates; use crate::update::new::extract::perm_json_p; use crate::{FieldId, GlobalFieldsIdsMap, InternalError, Result, UserError}; pub fn extract_document_facets<'doc>( attributes_to_extract: &[&str], document: impl Document<'doc>, + external_document_id: &str, field_id_map: &mut GlobalFieldsIdsMap, facet_fn: &mut impl FnMut(FieldId, &Value) -> Result<()>, ) -> Result<()> { - let geo = document.geo_field().transpose().map(|res| res.map(|rval| ("_geo", rval))); - for res in document.iter_top_level_fields().chain(geo) { + for res in document.iter_top_level_fields() { let (field_name, value) = res?; let mut tokenize_field = |name: &str, value: &Value| match field_id_map.id_or_insert(name) { @@ -42,5 +43,19 @@ pub fn extract_document_facets<'doc>( } } + if attributes_to_extract.contains(&"_geo") { + if let Some(geo_value) = document.geo_field()? { + if let Some([lat, lng]) = extract_geo_coordinates(external_document_id, geo_value)? { + let (lat_fid, lng_fid) = field_id_map + .id_or_insert("_geo.lat") + .zip(field_id_map.id_or_insert("_geo.lng")) + .ok_or(UserError::AttributeLimitReached)?; + + facet_fn(lat_fid, &lat.into())?; + facet_fn(lng_fid, &lng.into())?; + } + } + } + Ok(()) } diff --git a/crates/milli/src/update/new/extract/geo/mod.rs b/crates/milli/src/update/new/extract/geo/mod.rs index 180611eee..e26a7dc6c 100644 --- a/crates/milli/src/update/new/extract/geo/mod.rs +++ b/crates/milli/src/update/new/extract/geo/mod.rs @@ -4,7 +4,7 @@ use std::io::{self, BufReader, BufWriter, ErrorKind, Read, Write as _}; use std::{iter, mem, result}; use bumpalo::Bump; -use bytemuck::{bytes_of, from_bytes, pod_read_unaligned, Pod, Zeroable}; +use bytemuck::{bytes_of, pod_read_unaligned, Pod, Zeroable}; use heed::RoTxn; use serde_json::value::RawValue; use serde_json::Value; @@ -15,7 +15,7 @@ use crate::update::new::indexer::document_changes::{DocumentChangeContext, Extra use crate::update::new::ref_cell_ext::RefCellExt as _; use crate::update::new::DocumentChange; use crate::update::GrenadParameters; -use crate::{lat_lng_to_xyz, DocumentId, GeoPoint, Index, InternalError, Object, Result}; +use crate::{lat_lng_to_xyz, DocumentId, GeoPoint, Index, InternalError, Result}; pub struct GeoExtractor { grenad_parameters: GrenadParameters, @@ -244,7 +244,10 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor { /// Extracts and validate the latitude and latitude from a document geo field. /// /// It can be of the form `{ "lat": 0.0, "lng": "1.0" }`. -fn extract_geo_coordinates(external_id: &str, raw_value: &RawValue) -> Result> { +pub fn extract_geo_coordinates( + external_id: &str, + raw_value: &RawValue, +) -> Result> { let mut geo = match serde_json::from_str(raw_value.get()).map_err(InternalError::SerdeJson)? { Value::Null => return Ok(None), Value::Object(map) => map, @@ -256,12 +259,22 @@ fn extract_geo_coordinates(external_id: &str, raw_value: &RawValue) -> Result [lat, lng], + (Some(lat), Some(lng)) => { + if geo.is_empty() { + [lat, lng] + } else { + return Err(GeoError::UnexpectedExtraFields { + document_id: Value::from(external_id), + value: Value::from(geo), + } + .into()); + } + } (Some(_), None) => { - return Err(GeoError::MissingLatitude { document_id: Value::from(external_id) }.into()) + return Err(GeoError::MissingLongitude { document_id: Value::from(external_id) }.into()) } (None, Some(_)) => { - return Err(GeoError::MissingLongitude { document_id: Value::from(external_id) }.into()) + return Err(GeoError::MissingLatitude { document_id: Value::from(external_id) }.into()) } (None, None) => { return Err(GeoError::MissingLatitudeAndLongitude { @@ -271,13 +284,21 @@ fn extract_geo_coordinates(external_id: &str, raw_value: &RawValue) -> Result Ok(Some([lat, lng])), + (Ok(_), Err(value)) => { + Err(GeoError::BadLongitude { document_id: Value::from(external_id), value }.into()) + } + (Err(value), Ok(_)) => { + Err(GeoError::BadLatitude { document_id: Value::from(external_id), value }.into()) + } + (Err(lat), Err(lng)) => Err(GeoError::BadLatitudeAndLongitude { + document_id: Value::from(external_id), + lat, + lng, + } + .into()), + } } /// Extracts and validate that a serde JSON Value is actually a finite f64. diff --git a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs index 0223895e6..c67fc347a 100644 --- a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs +++ b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs @@ -419,6 +419,6 @@ impl WordDocidsExtractors { } fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result> { - Ok(vec![]) + Ok(vec!["_geo"]) } } diff --git a/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs b/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs index f637cff49..bbc6365df 100644 --- a/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs +++ b/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs @@ -25,7 +25,7 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor { } fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result> { - Ok(vec![]) + Ok(vec!["_geo"]) } // This method is reimplemented to count the number of words in the document in each field diff --git a/crates/milli/src/update/new/merger.rs b/crates/milli/src/update/new/merger.rs index c81f84f43..c0ff93901 100644 --- a/crates/milli/src/update/new/merger.rs +++ b/crates/milli/src/update/new/merger.rs @@ -50,7 +50,7 @@ where let mut file = tempfile::tempfile()?; /// manage error - bincode::serialize_into(&mut file, dbg!(&rtree)).unwrap(); + bincode::serialize_into(&mut file, &rtree).unwrap(); file.sync_all()?; let rtree_mmap = unsafe { Mmap::map(&file)? };