Compute the field distribution and convert _geo into an f64s

This commit is contained in:
Clément Renault 2024-11-13 14:15:42 +01:00
parent e627e182ce
commit 8e5b1a3ec1
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
10 changed files with 86 additions and 42 deletions

View File

@ -1664,7 +1664,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "The `_geo` field in the document with the id: `11` is not an object. Was expecting an object with the `_geo.lat` and `_geo.lng` fields but instead got `\"foobar\"`.",
"message": "The `_geo` field in the document with the id: `\"11\"` is not an object. Was expecting an object with the `_geo.lat` and `_geo.lng` fields but instead got `\"foobar\"`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1701,7 +1701,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not find latitude nor longitude in the document with the id: `11`. Was expecting `_geo.lat` and `_geo.lng` fields.",
"message": "Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1738,7 +1738,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not find latitude nor longitude in the document with the id: `11`. Was expecting `_geo.lat` and `_geo.lng` fields.",
"message": "Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1775,7 +1775,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not find longitude in the document with the id: `11`. Was expecting a `_geo.lng` field.",
"message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1812,7 +1812,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not find latitude in the document with the id: `11`. Was expecting a `_geo.lat` field.",
"message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1849,7 +1849,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not find longitude in the document with the id: `11`. Was expecting a `_geo.lng` field.",
"message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1886,7 +1886,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not find latitude in the document with the id: `11`. Was expecting a `_geo.lat` field.",
"message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1923,7 +1923,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not parse latitude nor longitude in the document with the id: `11`. Was expecting finite numbers but instead got `false` and `true`.",
"message": "Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `false` and `true`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1960,7 +1960,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not find longitude in the document with the id: `11`. Was expecting a `_geo.lng` field.",
"message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1997,7 +1997,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not find latitude in the document with the id: `11`. Was expecting a `_geo.lat` field.",
"message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -2034,7 +2034,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not parse latitude nor longitude in the document with the id: `11`. Was expecting finite numbers but instead got `\"doggo\"` and `\"doggo\"`.",
"message": "Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `\"doggo\"` and `\"doggo\"`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -2071,7 +2071,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "The `_geo` field in the document with the id: `11` contains the following unexpected fields: `{\"doggo\":\"are the best\"}`.",
"message": "The `_geo` field in the document with the id: `\"11\"` contains the following unexpected fields: `{\"doggo\":\"are the best\"}`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -2109,7 +2109,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not parse longitude in the document with the id: `12`. Was expecting a finite number but instead got `null`.",
"message": "Could not parse longitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -2145,7 +2145,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not parse latitude in the document with the id: `12`. Was expecting a finite number but instead got `null`.",
"message": "Could not parse latitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -2181,7 +2181,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not parse latitude nor longitude in the document with the id: `13`. Was expecting finite numbers but instead got `null` and `null`.",
"message": "Could not parse latitude nor longitude in the document with the id: `\"13\"`. Was expecting finite numbers but instead got `null` and `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"

View File

@ -70,8 +70,8 @@ async fn geo_bounding_box_with_string_and_number() {
let documents = DOCUMENTS.clone();
index.update_settings_filterable_attributes(json!(["_geo"])).await;
index.update_settings_sortable_attributes(json!(["_geo"])).await;
index.add_documents(documents, None).await;
index.wait_task(2).await;
let (ret, _code) = index.add_documents(documents, None).await;
index.wait_task(ret.uid()).await.succeeded();
index
.search(

View File

@ -354,6 +354,8 @@ where
if let Some(geo_value) = document.geo_field()? {
let fid = fields_ids_map.id_or_insert("_geo").ok_or(UserError::AttributeLimitReached)?;
fields_ids_map.id_or_insert("_geo.lat").ok_or(UserError::AttributeLimitReached)?;
fields_ids_map.id_or_insert("_geo.lng").ok_or(UserError::AttributeLimitReached)?;
unordered_field_buffer.push((fid, geo_value));
}

View File

@ -58,7 +58,8 @@ impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> {
context.index,
&context.db_fields_ids_map,
)?;
for res in content.iter_top_level_fields() {
let geo_iter = content.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv)));
for res in content.iter_top_level_fields().chain(geo_iter) {
let (f, _) = res?;
let entry = document_extractor_data
.field_distribution_delta
@ -73,7 +74,8 @@ impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> {
let docid = update.docid();
let content =
update.current(&context.rtxn, context.index, &context.db_fields_ids_map)?;
for res in content.iter_top_level_fields() {
let geo_iter = content.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv)));
for res in content.iter_top_level_fields().chain(geo_iter) {
let (f, _) = res?;
let entry = document_extractor_data
.field_distribution_delta
@ -82,7 +84,8 @@ impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> {
*entry -= 1;
}
let content = update.updated();
for res in content.iter_top_level_fields() {
let geo_iter = content.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv)));
for res in content.iter_top_level_fields().chain(geo_iter) {
let (f, _) = res?;
let entry = document_extractor_data
.field_distribution_delta
@ -111,7 +114,8 @@ impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> {
DocumentChange::Insertion(insertion) => {
let docid = insertion.docid();
let content = insertion.inserted();
for res in content.iter_top_level_fields() {
let geo_iter = content.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv)));
for res in content.iter_top_level_fields().chain(geo_iter) {
let (f, _) = res?;
let entry = document_extractor_data
.field_distribution_delta

View File

@ -1,18 +1,16 @@
use std::cell::RefCell;
use std::collections::HashSet;
use std::mem::size_of;
use std::ops::DerefMut as _;
use bumpalo::collections::Vec as BVec;
use bumpalo::Bump;
use hashbrown::HashMap;
use heed::{BytesDecode, RoTxn};
use heed::RoTxn;
use serde_json::Value;
use super::super::cache::BalancedCaches;
use super::facet_document::extract_document_facets;
use super::FacetKind;
use crate::facet::value_encoding::f64_into_bytes;
use crate::heed_codec::facet::OrderedF64Codec;
use crate::update::del_add::DelAdd;
use crate::update::new::channel::FieldIdDocidFacetSender;
@ -80,6 +78,7 @@ impl FacetedDocidsExtractor {
DocumentChange::Deletion(inner) => extract_document_facets(
attributes_to_extract,
inner.current(rtxn, index, context.db_fields_ids_map)?,
inner.external_document_id(),
new_fields_ids_map.deref_mut(),
&mut |fid, value| {
Self::facet_fn_with_options(
@ -98,6 +97,7 @@ impl FacetedDocidsExtractor {
extract_document_facets(
attributes_to_extract,
inner.current(rtxn, index, context.db_fields_ids_map)?,
inner.external_document_id(),
new_fields_ids_map.deref_mut(),
&mut |fid, value| {
Self::facet_fn_with_options(
@ -116,6 +116,7 @@ impl FacetedDocidsExtractor {
extract_document_facets(
attributes_to_extract,
inner.merged(rtxn, index, context.db_fields_ids_map)?,
inner.external_document_id(),
new_fields_ids_map.deref_mut(),
&mut |fid, value| {
Self::facet_fn_with_options(
@ -134,6 +135,7 @@ impl FacetedDocidsExtractor {
DocumentChange::Insertion(inner) => extract_document_facets(
attributes_to_extract,
inner.inserted(),
inner.external_document_id(),
new_fields_ids_map.deref_mut(),
&mut |fid, value| {
Self::facet_fn_with_options(

View File

@ -1,17 +1,18 @@
use serde_json::Value;
use crate::update::new::document::Document;
use crate::update::new::extract::geo::extract_geo_coordinates;
use crate::update::new::extract::perm_json_p;
use crate::{FieldId, GlobalFieldsIdsMap, InternalError, Result, UserError};
pub fn extract_document_facets<'doc>(
attributes_to_extract: &[&str],
document: impl Document<'doc>,
external_document_id: &str,
field_id_map: &mut GlobalFieldsIdsMap,
facet_fn: &mut impl FnMut(FieldId, &Value) -> Result<()>,
) -> Result<()> {
let geo = document.geo_field().transpose().map(|res| res.map(|rval| ("_geo", rval)));
for res in document.iter_top_level_fields().chain(geo) {
for res in document.iter_top_level_fields() {
let (field_name, value) = res?;
let mut tokenize_field = |name: &str, value: &Value| match field_id_map.id_or_insert(name) {
@ -42,5 +43,19 @@ pub fn extract_document_facets<'doc>(
}
}
if attributes_to_extract.contains(&"_geo") {
if let Some(geo_value) = document.geo_field()? {
if let Some([lat, lng]) = extract_geo_coordinates(external_document_id, geo_value)? {
let (lat_fid, lng_fid) = field_id_map
.id_or_insert("_geo.lat")
.zip(field_id_map.id_or_insert("_geo.lng"))
.ok_or(UserError::AttributeLimitReached)?;
facet_fn(lat_fid, &lat.into())?;
facet_fn(lng_fid, &lng.into())?;
}
}
}
Ok(())
}

View File

@ -4,7 +4,7 @@ use std::io::{self, BufReader, BufWriter, ErrorKind, Read, Write as _};
use std::{iter, mem, result};
use bumpalo::Bump;
use bytemuck::{bytes_of, from_bytes, pod_read_unaligned, Pod, Zeroable};
use bytemuck::{bytes_of, pod_read_unaligned, Pod, Zeroable};
use heed::RoTxn;
use serde_json::value::RawValue;
use serde_json::Value;
@ -15,7 +15,7 @@ use crate::update::new::indexer::document_changes::{DocumentChangeContext, Extra
use crate::update::new::ref_cell_ext::RefCellExt as _;
use crate::update::new::DocumentChange;
use crate::update::GrenadParameters;
use crate::{lat_lng_to_xyz, DocumentId, GeoPoint, Index, InternalError, Object, Result};
use crate::{lat_lng_to_xyz, DocumentId, GeoPoint, Index, InternalError, Result};
pub struct GeoExtractor {
grenad_parameters: GrenadParameters,
@ -244,7 +244,10 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor {
/// Extracts and validate the latitude and latitude from a document geo field.
///
/// It can be of the form `{ "lat": 0.0, "lng": "1.0" }`.
fn extract_geo_coordinates(external_id: &str, raw_value: &RawValue) -> Result<Option<[f64; 2]>> {
pub fn extract_geo_coordinates(
external_id: &str,
raw_value: &RawValue,
) -> Result<Option<[f64; 2]>> {
let mut geo = match serde_json::from_str(raw_value.get()).map_err(InternalError::SerdeJson)? {
Value::Null => return Ok(None),
Value::Object(map) => map,
@ -256,12 +259,22 @@ fn extract_geo_coordinates(external_id: &str, raw_value: &RawValue) -> Result<Op
};
let [lat, lng] = match (geo.remove("lat"), geo.remove("lng")) {
(Some(lat), Some(lng)) => [lat, lng],
(Some(lat), Some(lng)) => {
if geo.is_empty() {
[lat, lng]
} else {
return Err(GeoError::UnexpectedExtraFields {
document_id: Value::from(external_id),
value: Value::from(geo),
}
.into());
}
}
(Some(_), None) => {
return Err(GeoError::MissingLatitude { document_id: Value::from(external_id) }.into())
return Err(GeoError::MissingLongitude { document_id: Value::from(external_id) }.into())
}
(None, Some(_)) => {
return Err(GeoError::MissingLongitude { document_id: Value::from(external_id) }.into())
return Err(GeoError::MissingLatitude { document_id: Value::from(external_id) }.into())
}
(None, None) => {
return Err(GeoError::MissingLatitudeAndLongitude {
@ -271,13 +284,21 @@ fn extract_geo_coordinates(external_id: &str, raw_value: &RawValue) -> Result<Op
}
};
let lat = extract_finite_float_from_value(lat)
.map_err(|value| GeoError::BadLatitude { document_id: Value::from(external_id), value })?;
let lng = extract_finite_float_from_value(lng)
.map_err(|value| GeoError::BadLongitude { document_id: Value::from(external_id), value })?;
Ok(Some([lat, lng]))
match (extract_finite_float_from_value(lat), extract_finite_float_from_value(lng)) {
(Ok(lat), Ok(lng)) => Ok(Some([lat, lng])),
(Ok(_), Err(value)) => {
Err(GeoError::BadLongitude { document_id: Value::from(external_id), value }.into())
}
(Err(value), Ok(_)) => {
Err(GeoError::BadLatitude { document_id: Value::from(external_id), value }.into())
}
(Err(lat), Err(lng)) => Err(GeoError::BadLatitudeAndLongitude {
document_id: Value::from(external_id),
lat,
lng,
}
.into()),
}
}
/// Extracts and validate that a serde JSON Value is actually a finite f64.

View File

@ -419,6 +419,6 @@ impl WordDocidsExtractors {
}
fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
Ok(vec![])
Ok(vec!["_geo"])
}
}

View File

@ -25,7 +25,7 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
}
fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
Ok(vec![])
Ok(vec!["_geo"])
}
// This method is reimplemented to count the number of words in the document in each field

View File

@ -50,7 +50,7 @@ where
let mut file = tempfile::tempfile()?;
/// manage error
bincode::serialize_into(&mut file, dbg!(&rtree)).unwrap();
bincode::serialize_into(&mut file, &rtree).unwrap();
file.sync_all()?;
let rtree_mmap = unsafe { Mmap::map(&file)? };