mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-15 09:28:55 +01:00
Compute the field distribution and convert _geo into an f64s
This commit is contained in:
parent
e627e182ce
commit
8e5b1a3ec1
@ -1664,7 +1664,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "The `_geo` field in the document with the id: `11` is not an object. Was expecting an object with the `_geo.lat` and `_geo.lng` fields but instead got `\"foobar\"`.",
|
||||
"message": "The `_geo` field in the document with the id: `\"11\"` is not an object. Was expecting an object with the `_geo.lat` and `_geo.lng` fields but instead got `\"foobar\"`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@ -1701,7 +1701,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find latitude nor longitude in the document with the id: `11`. Was expecting `_geo.lat` and `_geo.lng` fields.",
|
||||
"message": "Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@ -1738,7 +1738,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find latitude nor longitude in the document with the id: `11`. Was expecting `_geo.lat` and `_geo.lng` fields.",
|
||||
"message": "Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@ -1775,7 +1775,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find longitude in the document with the id: `11`. Was expecting a `_geo.lng` field.",
|
||||
"message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@ -1812,7 +1812,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find latitude in the document with the id: `11`. Was expecting a `_geo.lat` field.",
|
||||
"message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@ -1849,7 +1849,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find longitude in the document with the id: `11`. Was expecting a `_geo.lng` field.",
|
||||
"message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@ -1886,7 +1886,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find latitude in the document with the id: `11`. Was expecting a `_geo.lat` field.",
|
||||
"message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@ -1923,7 +1923,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not parse latitude nor longitude in the document with the id: `11`. Was expecting finite numbers but instead got `false` and `true`.",
|
||||
"message": "Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `false` and `true`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@ -1960,7 +1960,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find longitude in the document with the id: `11`. Was expecting a `_geo.lng` field.",
|
||||
"message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@ -1997,7 +1997,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not find latitude in the document with the id: `11`. Was expecting a `_geo.lat` field.",
|
||||
"message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@ -2034,7 +2034,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not parse latitude nor longitude in the document with the id: `11`. Was expecting finite numbers but instead got `\"doggo\"` and `\"doggo\"`.",
|
||||
"message": "Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `\"doggo\"` and `\"doggo\"`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@ -2071,7 +2071,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "The `_geo` field in the document with the id: `11` contains the following unexpected fields: `{\"doggo\":\"are the best\"}`.",
|
||||
"message": "The `_geo` field in the document with the id: `\"11\"` contains the following unexpected fields: `{\"doggo\":\"are the best\"}`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@ -2109,7 +2109,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not parse longitude in the document with the id: `12`. Was expecting a finite number but instead got `null`.",
|
||||
"message": "Could not parse longitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@ -2145,7 +2145,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not parse latitude in the document with the id: `12`. Was expecting a finite number but instead got `null`.",
|
||||
"message": "Could not parse latitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
@ -2181,7 +2181,7 @@ async fn add_documents_invalid_geo_field() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "Could not parse latitude nor longitude in the document with the id: `13`. Was expecting finite numbers but instead got `null` and `null`.",
|
||||
"message": "Could not parse latitude nor longitude in the document with the id: `\"13\"`. Was expecting finite numbers but instead got `null` and `null`.",
|
||||
"code": "invalid_document_geo_field",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
|
||||
|
@ -70,8 +70,8 @@ async fn geo_bounding_box_with_string_and_number() {
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.update_settings_filterable_attributes(json!(["_geo"])).await;
|
||||
index.update_settings_sortable_attributes(json!(["_geo"])).await;
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(2).await;
|
||||
let (ret, _code) = index.add_documents(documents, None).await;
|
||||
index.wait_task(ret.uid()).await.succeeded();
|
||||
|
||||
index
|
||||
.search(
|
||||
|
@ -354,6 +354,8 @@ where
|
||||
|
||||
if let Some(geo_value) = document.geo_field()? {
|
||||
let fid = fields_ids_map.id_or_insert("_geo").ok_or(UserError::AttributeLimitReached)?;
|
||||
fields_ids_map.id_or_insert("_geo.lat").ok_or(UserError::AttributeLimitReached)?;
|
||||
fields_ids_map.id_or_insert("_geo.lng").ok_or(UserError::AttributeLimitReached)?;
|
||||
unordered_field_buffer.push((fid, geo_value));
|
||||
}
|
||||
|
||||
|
@ -58,7 +58,8 @@ impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> {
|
||||
context.index,
|
||||
&context.db_fields_ids_map,
|
||||
)?;
|
||||
for res in content.iter_top_level_fields() {
|
||||
let geo_iter = content.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv)));
|
||||
for res in content.iter_top_level_fields().chain(geo_iter) {
|
||||
let (f, _) = res?;
|
||||
let entry = document_extractor_data
|
||||
.field_distribution_delta
|
||||
@ -73,7 +74,8 @@ impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> {
|
||||
let docid = update.docid();
|
||||
let content =
|
||||
update.current(&context.rtxn, context.index, &context.db_fields_ids_map)?;
|
||||
for res in content.iter_top_level_fields() {
|
||||
let geo_iter = content.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv)));
|
||||
for res in content.iter_top_level_fields().chain(geo_iter) {
|
||||
let (f, _) = res?;
|
||||
let entry = document_extractor_data
|
||||
.field_distribution_delta
|
||||
@ -82,7 +84,8 @@ impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> {
|
||||
*entry -= 1;
|
||||
}
|
||||
let content = update.updated();
|
||||
for res in content.iter_top_level_fields() {
|
||||
let geo_iter = content.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv)));
|
||||
for res in content.iter_top_level_fields().chain(geo_iter) {
|
||||
let (f, _) = res?;
|
||||
let entry = document_extractor_data
|
||||
.field_distribution_delta
|
||||
@ -111,7 +114,8 @@ impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> {
|
||||
DocumentChange::Insertion(insertion) => {
|
||||
let docid = insertion.docid();
|
||||
let content = insertion.inserted();
|
||||
for res in content.iter_top_level_fields() {
|
||||
let geo_iter = content.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv)));
|
||||
for res in content.iter_top_level_fields().chain(geo_iter) {
|
||||
let (f, _) = res?;
|
||||
let entry = document_extractor_data
|
||||
.field_distribution_delta
|
||||
|
@ -1,18 +1,16 @@
|
||||
use std::cell::RefCell;
|
||||
use std::collections::HashSet;
|
||||
use std::mem::size_of;
|
||||
use std::ops::DerefMut as _;
|
||||
|
||||
use bumpalo::collections::Vec as BVec;
|
||||
use bumpalo::Bump;
|
||||
use hashbrown::HashMap;
|
||||
use heed::{BytesDecode, RoTxn};
|
||||
use heed::RoTxn;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::super::cache::BalancedCaches;
|
||||
use super::facet_document::extract_document_facets;
|
||||
use super::FacetKind;
|
||||
use crate::facet::value_encoding::f64_into_bytes;
|
||||
use crate::heed_codec::facet::OrderedF64Codec;
|
||||
use crate::update::del_add::DelAdd;
|
||||
use crate::update::new::channel::FieldIdDocidFacetSender;
|
||||
@ -80,6 +78,7 @@ impl FacetedDocidsExtractor {
|
||||
DocumentChange::Deletion(inner) => extract_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
||||
inner.external_document_id(),
|
||||
new_fields_ids_map.deref_mut(),
|
||||
&mut |fid, value| {
|
||||
Self::facet_fn_with_options(
|
||||
@ -98,6 +97,7 @@ impl FacetedDocidsExtractor {
|
||||
extract_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
||||
inner.external_document_id(),
|
||||
new_fields_ids_map.deref_mut(),
|
||||
&mut |fid, value| {
|
||||
Self::facet_fn_with_options(
|
||||
@ -116,6 +116,7 @@ impl FacetedDocidsExtractor {
|
||||
extract_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.merged(rtxn, index, context.db_fields_ids_map)?,
|
||||
inner.external_document_id(),
|
||||
new_fields_ids_map.deref_mut(),
|
||||
&mut |fid, value| {
|
||||
Self::facet_fn_with_options(
|
||||
@ -134,6 +135,7 @@ impl FacetedDocidsExtractor {
|
||||
DocumentChange::Insertion(inner) => extract_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.inserted(),
|
||||
inner.external_document_id(),
|
||||
new_fields_ids_map.deref_mut(),
|
||||
&mut |fid, value| {
|
||||
Self::facet_fn_with_options(
|
||||
|
@ -1,17 +1,18 @@
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::update::new::document::Document;
|
||||
use crate::update::new::extract::geo::extract_geo_coordinates;
|
||||
use crate::update::new::extract::perm_json_p;
|
||||
use crate::{FieldId, GlobalFieldsIdsMap, InternalError, Result, UserError};
|
||||
|
||||
pub fn extract_document_facets<'doc>(
|
||||
attributes_to_extract: &[&str],
|
||||
document: impl Document<'doc>,
|
||||
external_document_id: &str,
|
||||
field_id_map: &mut GlobalFieldsIdsMap,
|
||||
facet_fn: &mut impl FnMut(FieldId, &Value) -> Result<()>,
|
||||
) -> Result<()> {
|
||||
let geo = document.geo_field().transpose().map(|res| res.map(|rval| ("_geo", rval)));
|
||||
for res in document.iter_top_level_fields().chain(geo) {
|
||||
for res in document.iter_top_level_fields() {
|
||||
let (field_name, value) = res?;
|
||||
|
||||
let mut tokenize_field = |name: &str, value: &Value| match field_id_map.id_or_insert(name) {
|
||||
@ -42,5 +43,19 @@ pub fn extract_document_facets<'doc>(
|
||||
}
|
||||
}
|
||||
|
||||
if attributes_to_extract.contains(&"_geo") {
|
||||
if let Some(geo_value) = document.geo_field()? {
|
||||
if let Some([lat, lng]) = extract_geo_coordinates(external_document_id, geo_value)? {
|
||||
let (lat_fid, lng_fid) = field_id_map
|
||||
.id_or_insert("_geo.lat")
|
||||
.zip(field_id_map.id_or_insert("_geo.lng"))
|
||||
.ok_or(UserError::AttributeLimitReached)?;
|
||||
|
||||
facet_fn(lat_fid, &lat.into())?;
|
||||
facet_fn(lng_fid, &lng.into())?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -4,7 +4,7 @@ use std::io::{self, BufReader, BufWriter, ErrorKind, Read, Write as _};
|
||||
use std::{iter, mem, result};
|
||||
|
||||
use bumpalo::Bump;
|
||||
use bytemuck::{bytes_of, from_bytes, pod_read_unaligned, Pod, Zeroable};
|
||||
use bytemuck::{bytes_of, pod_read_unaligned, Pod, Zeroable};
|
||||
use heed::RoTxn;
|
||||
use serde_json::value::RawValue;
|
||||
use serde_json::Value;
|
||||
@ -15,7 +15,7 @@ use crate::update::new::indexer::document_changes::{DocumentChangeContext, Extra
|
||||
use crate::update::new::ref_cell_ext::RefCellExt as _;
|
||||
use crate::update::new::DocumentChange;
|
||||
use crate::update::GrenadParameters;
|
||||
use crate::{lat_lng_to_xyz, DocumentId, GeoPoint, Index, InternalError, Object, Result};
|
||||
use crate::{lat_lng_to_xyz, DocumentId, GeoPoint, Index, InternalError, Result};
|
||||
|
||||
pub struct GeoExtractor {
|
||||
grenad_parameters: GrenadParameters,
|
||||
@ -244,7 +244,10 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor {
|
||||
/// Extracts and validate the latitude and latitude from a document geo field.
|
||||
///
|
||||
/// It can be of the form `{ "lat": 0.0, "lng": "1.0" }`.
|
||||
fn extract_geo_coordinates(external_id: &str, raw_value: &RawValue) -> Result<Option<[f64; 2]>> {
|
||||
pub fn extract_geo_coordinates(
|
||||
external_id: &str,
|
||||
raw_value: &RawValue,
|
||||
) -> Result<Option<[f64; 2]>> {
|
||||
let mut geo = match serde_json::from_str(raw_value.get()).map_err(InternalError::SerdeJson)? {
|
||||
Value::Null => return Ok(None),
|
||||
Value::Object(map) => map,
|
||||
@ -256,12 +259,22 @@ fn extract_geo_coordinates(external_id: &str, raw_value: &RawValue) -> Result<Op
|
||||
};
|
||||
|
||||
let [lat, lng] = match (geo.remove("lat"), geo.remove("lng")) {
|
||||
(Some(lat), Some(lng)) => [lat, lng],
|
||||
(Some(lat), Some(lng)) => {
|
||||
if geo.is_empty() {
|
||||
[lat, lng]
|
||||
} else {
|
||||
return Err(GeoError::UnexpectedExtraFields {
|
||||
document_id: Value::from(external_id),
|
||||
value: Value::from(geo),
|
||||
}
|
||||
.into());
|
||||
}
|
||||
}
|
||||
(Some(_), None) => {
|
||||
return Err(GeoError::MissingLatitude { document_id: Value::from(external_id) }.into())
|
||||
return Err(GeoError::MissingLongitude { document_id: Value::from(external_id) }.into())
|
||||
}
|
||||
(None, Some(_)) => {
|
||||
return Err(GeoError::MissingLongitude { document_id: Value::from(external_id) }.into())
|
||||
return Err(GeoError::MissingLatitude { document_id: Value::from(external_id) }.into())
|
||||
}
|
||||
(None, None) => {
|
||||
return Err(GeoError::MissingLatitudeAndLongitude {
|
||||
@ -271,13 +284,21 @@ fn extract_geo_coordinates(external_id: &str, raw_value: &RawValue) -> Result<Op
|
||||
}
|
||||
};
|
||||
|
||||
let lat = extract_finite_float_from_value(lat)
|
||||
.map_err(|value| GeoError::BadLatitude { document_id: Value::from(external_id), value })?;
|
||||
|
||||
let lng = extract_finite_float_from_value(lng)
|
||||
.map_err(|value| GeoError::BadLongitude { document_id: Value::from(external_id), value })?;
|
||||
|
||||
Ok(Some([lat, lng]))
|
||||
match (extract_finite_float_from_value(lat), extract_finite_float_from_value(lng)) {
|
||||
(Ok(lat), Ok(lng)) => Ok(Some([lat, lng])),
|
||||
(Ok(_), Err(value)) => {
|
||||
Err(GeoError::BadLongitude { document_id: Value::from(external_id), value }.into())
|
||||
}
|
||||
(Err(value), Ok(_)) => {
|
||||
Err(GeoError::BadLatitude { document_id: Value::from(external_id), value }.into())
|
||||
}
|
||||
(Err(lat), Err(lng)) => Err(GeoError::BadLatitudeAndLongitude {
|
||||
document_id: Value::from(external_id),
|
||||
lat,
|
||||
lng,
|
||||
}
|
||||
.into()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Extracts and validate that a serde JSON Value is actually a finite f64.
|
||||
|
@ -419,6 +419,6 @@ impl WordDocidsExtractors {
|
||||
}
|
||||
|
||||
fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
|
||||
Ok(vec![])
|
||||
Ok(vec!["_geo"])
|
||||
}
|
||||
}
|
||||
|
@ -25,7 +25,7 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
||||
}
|
||||
|
||||
fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
|
||||
Ok(vec![])
|
||||
Ok(vec!["_geo"])
|
||||
}
|
||||
|
||||
// This method is reimplemented to count the number of words in the document in each field
|
||||
|
@ -50,7 +50,7 @@ where
|
||||
|
||||
let mut file = tempfile::tempfile()?;
|
||||
/// manage error
|
||||
bincode::serialize_into(&mut file, dbg!(&rtree)).unwrap();
|
||||
bincode::serialize_into(&mut file, &rtree).unwrap();
|
||||
file.sync_all()?;
|
||||
|
||||
let rtree_mmap = unsafe { Mmap::map(&file)? };
|
||||
|
Loading…
Reference in New Issue
Block a user