diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs index 89f3dcab2..5dbb0c326 100644 --- a/http-ui/src/main.rs +++ b/http-ui/src/main.rs @@ -695,7 +695,7 @@ async fn main() -> anyhow::Result<()> { struct QueryBody { query: Option, filters: Option, - sorters: Option, + sort: Option, facet_filters: Option, String>>>, facet_distribution: Option, limit: Option, @@ -755,7 +755,7 @@ async fn main() -> anyhow::Result<()> { search.limit(limit); } - if let Some(sort) = query.sorters { + if let Some(sort) = query.sort { search.sort_criteria(vec![sort.parse().unwrap()]); } diff --git a/milli/src/criterion.rs b/milli/src/criterion.rs index e05829eb4..b95080b4b 100644 --- a/milli/src/criterion.rs +++ b/milli/src/criterion.rs @@ -84,12 +84,16 @@ impl FromStr for Member { text.strip_prefix("_geoPoint(") .and_then(|point| point.strip_suffix(")")) .ok_or_else(|| UserError::InvalidRankingRuleName { name: text.to_string() })?; - let point = point - .split(',') - .map(|el| el.trim().parse()) - .collect::, _>>() - .map_err(|_| UserError::InvalidRankingRuleName { name: text.to_string() })?; - Ok(Member::Geo([point[0], point[1]])) + let (lat, long) = point + .split_once(',') + .ok_or_else(|| UserError::InvalidRankingRuleName { name: text.to_string() }) + .and_then(|(lat, long)| { + lat.trim() + .parse() + .and_then(|lat| long.trim().parse().map(|long| (lat, long))) + .map_err(|_| UserError::InvalidRankingRuleName { name: text.to_string() }) + })?; + Ok(Member::Geo([lat, long])) } else { Ok(Member::Field(text.to_string())) } @@ -99,7 +103,7 @@ impl FromStr for Member { impl fmt::Display for Member { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Member::Field(name) => write!(f, "{}", name), + Member::Field(name) => f.write_str(name), Member::Geo([lat, lng]) => write!(f, "_geoPoint({}, {})", lat, lng), } } diff --git a/milli/src/error.rs b/milli/src/error.rs index 157fe4be9..21b77b5a7 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -12,10 +12,8 @@ use crate::{DocumentId, FieldId}; pub type Object = Map; -const RESERVED_KEYWORD: &[&'static str] = &["_geo", "_geoDistance"]; - pub fn is_reserved_keyword(keyword: &str) -> bool { - RESERVED_KEYWORD.contains(&keyword) + ["_geo", "_geoDistance"].contains(&keyword) } #[derive(Debug)] diff --git a/milli/src/index.rs b/milli/src/index.rs index f2ddba699..4ab19f175 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -299,6 +299,7 @@ impl Index { /* geo rtree */ + /// Writes the provided `rtree` which associates coordinates to documents ids. pub(crate) fn put_geo_rtree( &self, wtxn: &mut RwTxn, @@ -307,10 +308,12 @@ impl Index { self.main.put::<_, Str, SerdeBincode>>(wtxn, main_key::GEO_RTREE_KEY, rtree) } + /// Delete the `rtree` which associates coordinates to documents ids. pub(crate) fn delete_geo_rtree(&self, wtxn: &mut RwTxn) -> heed::Result { self.main.delete::<_, Str>(wtxn, main_key::GEO_RTREE_KEY) } + /// Returns the `rtree` which associates coordinates to documents ids. pub fn geo_rtree<'t>(&self, rtxn: &'t RoTxn) -> Result>> { match self .main @@ -323,7 +326,7 @@ impl Index { /* geo faceted */ - /// Writes the documents ids that are faceted with a _geo field + /// Writes the documents ids that are faceted with a _geo field. pub(crate) fn put_geo_faceted_documents_ids( &self, wtxn: &mut RwTxn, @@ -336,16 +339,12 @@ impl Index { ) } - /// Delete the documents ids that are faceted with a _geo field - pub(crate) fn delete_geo_faceted_documents_ids(&self, wtxn: &mut RwTxn) -> heed::Result<()> { - self.main.put::<_, Str, RoaringBitmapCodec>( - wtxn, - main_key::GEO_FACETED_DOCUMENTS_IDS_KEY, - &RoaringBitmap::new(), - ) + /// Delete the documents ids that are faceted with a _geo field. + pub(crate) fn delete_geo_faceted_documents_ids(&self, wtxn: &mut RwTxn) -> heed::Result { + self.main.delete::<_, Str>(wtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY) } - /// Retrieve all the documents ids that faceted with a _geo field + /// Retrieve all the documents ids that faceted with a _geo field. pub fn geo_faceted_documents_ids(&self, rtxn: &RoTxn) -> heed::Result { match self .main diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 3c7713308..4f066365c 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -142,7 +142,7 @@ where Some((head, tail)) } -/// Return the distance between two points in meters +/// Return the distance between two points in meters. fn distance_between_two_points(a: &[f64; 2], b: &[f64; 2]) -> f64 { let a = haversine::Location { latitude: a[0], longitude: a[1] }; let b = haversine::Location { latitude: b[0], longitude: b[1] }; diff --git a/milli/src/search/criteria/geo.rs b/milli/src/search/criteria/geo.rs index 6f8f1406a..78f741b57 100644 --- a/milli/src/search/criteria/geo.rs +++ b/milli/src/search/criteria/geo.rs @@ -1,3 +1,5 @@ +use std::iter; + use roaring::RoaringBitmap; use rstar::RTree; @@ -23,7 +25,7 @@ impl<'t> Geo<'t> { parent: Box, point: [f64; 2], ) -> Result { - let candidates = Box::new(std::iter::empty()); + let candidates = Box::new(iter::empty()); let allowed_candidates = index.geo_faceted_documents_ids(rtxn)?; let bucket_candidates = RoaringBitmap::new(); let rtree = index.geo_rtree(rtxn)?; @@ -41,7 +43,7 @@ impl<'t> Geo<'t> { } } -impl<'t> Criterion for Geo<'t> { +impl Criterion for Geo<'_> { fn next(&mut self, params: &mut CriterionParameters) -> Result> { // if there is no rtree we have nothing to returns let rtree = match self.rtree.as_ref() { @@ -108,7 +110,7 @@ fn geo_point( let results = rtree .nearest_neighbor_iter(&point) .filter_map(move |point| candidates.contains(point.data).then(|| point.data)) - .map(|id| std::iter::once(id).collect::()) + .map(|id| iter::once(id).collect::()) .collect::>(); Box::new(results.into_iter()) diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs index fca159900..105a69194 100644 --- a/milli/src/search/criteria/mod.rs +++ b/milli/src/search/criteria/mod.rs @@ -311,7 +311,9 @@ impl<'t> CriteriaBuilder<'t> { point.clone(), )?), AscDescName::Desc(Member::Geo(_point)) => { - return Err(UserError::InvalidSortName { name: "Sorting in descending order is currently not supported for the geosearch".to_string() })? + return Err(UserError::InvalidSortName { + name: "Sorting in descending order is currently not supported for the geosearch".to_string(), + })? } }; } diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index 08a84899f..1e5bf9ad0 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -21,7 +21,9 @@ use crate::error::UserError; use crate::heed_codec::facet::{ FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec, }; -use crate::{CboRoaringBitmapCodec, FieldId, FieldsIdsMap, Index, Result}; +use crate::{ + distance_between_two_points, CboRoaringBitmapCodec, FieldId, FieldsIdsMap, Index, Result, +}; #[derive(Debug, Clone, PartialEq)] pub enum Operator { @@ -198,10 +200,10 @@ impl FilterCondition { ErrorVariant::CustomError { message: format!("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"), }, - // we want to point to the last parameters and if there was no parameters we + // we want to point to the last parameters and if there was no parameters we // point to the parenthesis - parameters.last().map(|param| param.1.clone()).unwrap_or(param_span), - )))?; + parameters.last().map(|param| param.1.clone()).unwrap_or(param_span), + )))?; } let (lat, lng, distance) = (¶meters[0], ¶meters[1], parameters[2].0); if let Some(span) = (!(-181.0..181.).contains(&lat.0)) @@ -505,19 +507,18 @@ impl FilterCondition { LowerThanOrEqual(val) => (Included(f64::MIN), Included(*val)), Between(left, right) => (Included(*left), Included(*right)), GeoLowerThan(base_point, distance) => { - let mut result = RoaringBitmap::new(); let rtree = match index.geo_rtree(rtxn)? { Some(rtree) => rtree, - None => return Ok(result), + None => return Ok(RoaringBitmap::new()), }; - rtree + let result = rtree .nearest_neighbor_iter(base_point) .take_while(|point| { - dbg!(crate::distance_between_two_points(base_point, point.geom())) - < *distance + distance_between_two_points(base_point, point.geom()) < *distance }) - .for_each(|point| drop(result.insert(point.data))); + .map(|point| point.data) + .collect(); return Ok(result); } @@ -600,14 +601,15 @@ fn field_id( let key = items.next().unwrap(); if key.as_rule() == Rule::reserved { return Err(PestError::new_from_span( - ErrorVariant::CustomError { - message: format!( - "`{}` is a reserved keyword and thus can't be used as a filter expression. Available filterable attributes are: {}", + ErrorVariant::CustomError { + message: format!( + "`{}` is a reserved keyword and therefore can't be used as a filter expression. \ + Available filterable attributes are: {}", key.as_str(), filterable_fields.iter().join(", "), - ), - }, - key.as_span(), + ), + }, + key.as_span(), )); } @@ -691,7 +693,7 @@ mod tests { assert!(result.is_err()); let error = result.unwrap_err(); assert!(error.to_string().contains( - "`_geo` is a reserved keyword and thus can't be used as a filter expression." + "`_geo` is a reserved keyword and therefore can't be used as a filter expression." )); } diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index 639770bd6..b49cdc3cd 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -383,15 +383,16 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { if let Some(mut rtree) = self.index.geo_rtree(self.wtxn)? { let mut geo_faceted_doc_ids = self.index.geo_faceted_documents_ids(self.wtxn)?; - let points_to_remove: Vec<_> = rtree + let (points_to_remove, docids_to_remove): (Vec<_>, RoaringBitmap) = rtree .iter() .filter(|&point| self.documents_ids.contains(point.data)) .cloned() - .collect(); + .map(|point| (point, point.data)) + .unzip(); points_to_remove.iter().for_each(|point| { rtree.remove(&point); - geo_faceted_doc_ids.remove(point.data); }); + geo_faceted_doc_ids -= docids_to_remove; self.index.put_geo_rtree(self.wtxn, &rtree)?; self.index.put_geo_faceted_documents_ids(self.wtxn, &geo_faceted_doc_ids)?; diff --git a/milli/src/update/index_documents/extract/extract_geo_points.rs b/milli/src/update/index_documents/extract/extract_geo_points.rs index 1af22d010..a36b608ee 100644 --- a/milli/src/update/index_documents/extract/extract_geo_points.rs +++ b/milli/src/update/index_documents/extract/extract_geo_points.rs @@ -22,11 +22,10 @@ pub fn extract_geo_points( while let Some((docid_bytes, value)) = obkv_documents.next()? { let obkv = obkv::KvReader::new(value); - let point = match obkv.get(geo_field_id) { - Some(point) => point, + let point: Value = match obkv.get(geo_field_id) { + Some(point) => serde_json::from_slice(point).map_err(InternalError::SerdeJson)?, None => continue, }; - let point: Value = serde_json::from_slice(point).map_err(InternalError::SerdeJson)?; if let Some((lat, lng)) = point["lat"].as_f64().zip(point["lng"].as_f64()) { // this will create an array of 16 bytes (two 8 bytes floats) diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 36e3c870f..47a62be67 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -189,12 +189,9 @@ fn extract_documents_data( let documents_chunk_cloned = documents_chunk.clone(); let lmdb_writer_sx_cloned = lmdb_writer_sx.clone(); rayon::spawn(move || { - let _ = match extract_geo_points( - documents_chunk_cloned, - indexer, - primary_key_id, - geo_field_id, - ) { + let result = + extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, geo_field_id); + let _ = match result { Ok(geo_points) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))), Err(error) => lmdb_writer_sx_cloned.send(Err(error)), }; diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index d3b8e47b0..336165894 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -237,12 +237,17 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { // get filterable fields for facet databases let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; // get the fid of the `_geo` field. - let geo_field_id = if let Some(gfid) = self.index.fields_ids_map(self.wtxn)?.id("_geo") { - (self.index.sortable_fields_ids(self.wtxn)?.contains(&gfid) - || self.index.filterable_fields_ids(self.wtxn)?.contains(&gfid)) - .then(|| gfid) - } else { - None + let geo_field_id = match self.index.fields_ids_map(self.wtxn)?.id("_geo") { + Some(gfid) => { + let is_sortable = self.index.sortable_fields_ids(self.wtxn)?.contains(&gfid); + let is_filterable = self.index.filterable_fields_ids(self.wtxn)?.contains(&gfid); + if is_sortable || is_filterable { + Some(gfid) + } else { + None + } + } + None => None, }; let stop_words = self.index.stop_words(self.wtxn)?; diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 5c27c195f..b17f28b66 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -1,4 +1,5 @@ use std::borrow::Cow; +use std::convert::TryInto; use std::fs::File; use heed::types::ByteSlice; @@ -11,7 +12,7 @@ use super::helpers::{ }; use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string}; use crate::update::index_documents::helpers::into_clonable_grenad; -use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, GeoPoint, Index, Result}; +use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result}; pub(crate) enum TypedChunk { DocidWordPositions(grenad::Reader), @@ -180,24 +181,22 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::GeoPoints(mut geo_points) => { - // TODO: we should create the rtree with the `RTree::bulk_load` function let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default(); - let mut doc_ids = index.geo_faceted_documents_ids(wtxn)?; + let mut geo_faceted_docids = index.geo_faceted_documents_ids(wtxn)?; while let Some((key, value)) = geo_points.next()? { // convert the key back to a u32 (4 bytes) - let (key, _) = helpers::try_split_array_at::(key).unwrap(); - let key = u32::from_be_bytes(key); + let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap(); // convert the latitude and longitude back to a f64 (8 bytes) let (lat, tail) = helpers::try_split_array_at::(value).unwrap(); let (lng, _) = helpers::try_split_array_at::(tail).unwrap(); let point = [f64::from_ne_bytes(lat), f64::from_ne_bytes(lng)]; - rtree.insert(GeoPoint::new(point, key)); - doc_ids.insert(key); + rtree.insert(GeoPoint::new(point, docid)); + geo_faceted_docids.insert(docid); } index.put_geo_rtree(wtxn, &rtree)?; - index.put_geo_faceted_documents_ids(wtxn, &doc_ids)?; + index.put_geo_faceted_documents_ids(wtxn, &geo_faceted_docids)?; } }