From 98a365aaae53e2d543f65f5261811a23dad65660 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 14 Dec 2021 12:21:24 +0100 Subject: [PATCH] store the geopoint in three dimensions --- milli/src/lib.rs | 17 ++++++++++++++++- milli/src/search/criteria/geo.rs | 8 +++++--- milli/src/search/facet/filter.rs | 12 ++++++++---- milli/src/update/delete_documents.rs | 6 +++--- milli/src/update/index_documents/typed_chunk.rs | 9 +++++++-- 5 files changed, 39 insertions(+), 13 deletions(-) diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 9e7bb8966..bb4f47e47 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -54,7 +54,11 @@ pub type FieldId = u16; pub type Position = u32; pub type RelativePosition = u16; pub type FieldDistribution = BTreeMap; -pub type GeoPoint = rstar::primitives::GeomWithData<[f64; 2], DocumentId>; + +/// A GeoPoint is a point in cartesian plan, called xyz_point in the code. Its metadata +/// is a tuple composed of 1. the DocumentId of the associated document and 2. the original point +/// expressed in term of latitude and longitude. +pub type GeoPoint = rstar::primitives::GeomWithData<[f64; 3], (DocumentId, [f64; 2])>; pub const MAX_POSITION_PER_ATTRIBUTE: u32 = u16::MAX as u32 + 1; @@ -168,6 +172,17 @@ pub fn distance_between_two_points(a: &[f64; 2], b: &[f64; 2]) -> f64 { a.haversine_distance_to(&b).meters() } +/// Convert a point expressed in terms of latitude and longitude to a point in the +/// cartesian coordinate expressed in terms of x, y and z. +pub fn lat_lng_to_xyz(coord: &[f64; 2]) -> [f64; 3] { + let [lat, lng] = coord.map(|f| f.to_radians()); + let x = lat.cos() * lng.cos(); + let y = lat.cos() * lng.sin(); + let z = lat.sin(); + + [x, y, z] +} + #[cfg(test)] mod tests { use serde_json::json; diff --git a/milli/src/search/criteria/geo.rs b/milli/src/search/criteria/geo.rs index de6de8912..e3bda51de 100644 --- a/milli/src/search/criteria/geo.rs +++ b/milli/src/search/criteria/geo.rs @@ -5,7 +5,7 @@ use rstar::RTree; use super::{Criterion, CriterionParameters, CriterionResult}; use crate::search::criteria::{resolve_query_tree, CriteriaBuilder}; -use crate::{GeoPoint, Index, Result}; +use crate::{lat_lng_to_xyz, GeoPoint, Index, Result}; pub struct Geo<'t> { index: &'t Index, @@ -132,10 +132,12 @@ fn geo_point( point: [f64; 2], ascending: bool, ) -> Box> { + let point = lat_lng_to_xyz(&point); + let mut results = Vec::new(); for point in rtree.nearest_neighbor_iter(&point) { - if candidates.remove(point.data) { - results.push(std::iter::once(point.data).collect()); + if candidates.remove(point.data.0) { + results.push(std::iter::once(point.data.0).collect()); if candidates.is_empty() { break; } diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 6ece17eb4..551fa0d4e 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -13,7 +13,9 @@ use crate::error::{Error, UserError}; use crate::heed_codec::facet::{ FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec, }; -use crate::{distance_between_two_points, CboRoaringBitmapCodec, FieldId, Index, Result}; +use crate::{ + distance_between_two_points, lat_lng_to_xyz, CboRoaringBitmapCodec, FieldId, Index, Result, +}; /// The maximum number of filters the filter AST can process. const MAX_FILTER_DEPTH: usize = 2000; @@ -402,12 +404,14 @@ impl<'a> Filter<'a> { None => return Ok(RoaringBitmap::new()), }; + let xyz_base_point = lat_lng_to_xyz(&base_point); + let result = rtree - .nearest_neighbor_iter(&base_point) + .nearest_neighbor_iter(&xyz_base_point) .take_while(|point| { - distance_between_two_points(&base_point, point.geom()) < radius + distance_between_two_points(&base_point, &point.data.1) < radius }) - .map(|point| point.data) + .map(|point| point.data.0) .collect(); Ok(result) diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index ed87132bd..4c41cbd53 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -395,9 +395,9 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { let (points_to_remove, docids_to_remove): (Vec<_>, RoaringBitmap) = rtree .iter() - .filter(|&point| self.documents_ids.contains(point.data)) + .filter(|&point| self.documents_ids.contains(point.data.0)) .cloned() - .map(|point| (point, point.data)) + .map(|point| (point, point.data.0)) .unzip(); points_to_remove.iter().for_each(|point| { rtree.remove(&point); @@ -747,7 +747,7 @@ mod tests { let all_geo_ids = rtree.iter().map(|point| point.data).collect::>(); let all_geo_documents = index - .documents(&rtxn, all_geo_ids.iter().copied()) + .documents(&rtxn, all_geo_ids.iter().map(|(id, _)| id).copied()) .unwrap() .iter() .map(|(id, _)| *id) diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index b24a03ff6..7f0cfcab3 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -12,7 +12,10 @@ use super::helpers::{ }; use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string}; use crate::update::index_documents::helpers::into_clonable_grenad; -use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result}; +use crate::{ + lat_lng_to_xyz, BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, + Result, +}; pub(crate) enum TypedChunk { DocidWordPositions(grenad::Reader), @@ -192,7 +195,9 @@ pub(crate) fn write_typed_chunk_into_index( let (lat, tail) = helpers::try_split_array_at::(value).unwrap(); let (lng, _) = helpers::try_split_array_at::(tail).unwrap(); let point = [f64::from_ne_bytes(lat), f64::from_ne_bytes(lng)]; - rtree.insert(GeoPoint::new(point, docid)); + let xyz_point = lat_lng_to_xyz(&point); + + rtree.insert(GeoPoint::new(xyz_point, (docid, point))); geo_faceted_docids.insert(docid); } index.put_geo_rtree(wtxn, &rtree)?;