424: Store the geopoint in three dimensions r=Kerollmops a=irevoire

Related to this issue: https://github.com/meilisearch/MeiliSearch/issues/1872

Fix the whole computation of distance for any “geo” operations (sort or filter). Now when you sort points they are returned to you in the right order.
And when you filter on a specific radius you only get points included in the radius.

This PR changes the way we store the geo points in the RTree.
Instead of considering the latitude and longitude as orthogonal coordinates, we convert them to real orthogonal coordinates projected on a sphere with a radius of 1.
This is the conversion formulae.
![image](https://user-images.githubusercontent.com/7032172/145990456-eefe840a-384f-4486-848b-81d0036814ec.png)
Which, in rust, translate to this function:
```rust
pub fn lat_lng_to_xyz(coord: &[f64; 2]) -> [f64; 3] {
    let [lat, lng] = coord.map(|f| f.to_radians());
    let x = lat.cos() * lng.cos();
    let y = lat.cos() * lng.sin();
    let z = lat.sin();

    [x, y, z]
}
```

Storing the points on a sphere is easier / faster to compute than storing the point on an approximation of the real earth shape.
But when we need to compute the distance between two points we still need to use the haversine distance which works with latitude and longitude.
So, to do the fewest search-time computation possible I'm now associating every point with its `DocId` and its lat/lng.

Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
bors[bot] 2022-01-10 15:23:43 +00:00 committed by GitHub
commit 559e019de1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 39 additions and 13 deletions

View File

@ -54,7 +54,11 @@ pub type FieldId = u16;
pub type Position = u32;
pub type RelativePosition = u16;
pub type FieldDistribution = BTreeMap<String, u64>;
pub type GeoPoint = rstar::primitives::GeomWithData<[f64; 2], DocumentId>;
/// A GeoPoint is a point in cartesian plan, called xyz_point in the code. Its metadata
/// is a tuple composed of 1. the DocumentId of the associated document and 2. the original point
/// expressed in term of latitude and longitude.
pub type GeoPoint = rstar::primitives::GeomWithData<[f64; 3], (DocumentId, [f64; 2])>;
pub const MAX_POSITION_PER_ATTRIBUTE: u32 = u16::MAX as u32 + 1;
@ -168,6 +172,17 @@ pub fn distance_between_two_points(a: &[f64; 2], b: &[f64; 2]) -> f64 {
a.haversine_distance_to(&b).meters()
}
/// Convert a point expressed in terms of latitude and longitude to a point in the
/// cartesian coordinate expressed in terms of x, y and z.
pub fn lat_lng_to_xyz(coord: &[f64; 2]) -> [f64; 3] {
let [lat, lng] = coord.map(|f| f.to_radians());
let x = lat.cos() * lng.cos();
let y = lat.cos() * lng.sin();
let z = lat.sin();
[x, y, z]
}
#[cfg(test)]
mod tests {
use serde_json::json;

View File

@ -5,7 +5,7 @@ use rstar::RTree;
use super::{Criterion, CriterionParameters, CriterionResult};
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
use crate::{GeoPoint, Index, Result};
use crate::{lat_lng_to_xyz, GeoPoint, Index, Result};
pub struct Geo<'t> {
index: &'t Index,
@ -132,10 +132,12 @@ fn geo_point(
point: [f64; 2],
ascending: bool,
) -> Box<dyn Iterator<Item = RoaringBitmap>> {
let point = lat_lng_to_xyz(&point);
let mut results = Vec::new();
for point in rtree.nearest_neighbor_iter(&point) {
if candidates.remove(point.data) {
results.push(std::iter::once(point.data).collect());
if candidates.remove(point.data.0) {
results.push(std::iter::once(point.data.0).collect());
if candidates.is_empty() {
break;
}

View File

@ -13,7 +13,9 @@ use crate::error::{Error, UserError};
use crate::heed_codec::facet::{
FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec,
};
use crate::{distance_between_two_points, CboRoaringBitmapCodec, FieldId, Index, Result};
use crate::{
distance_between_two_points, lat_lng_to_xyz, CboRoaringBitmapCodec, FieldId, Index, Result,
};
/// The maximum number of filters the filter AST can process.
const MAX_FILTER_DEPTH: usize = 2000;
@ -402,12 +404,14 @@ impl<'a> Filter<'a> {
None => return Ok(RoaringBitmap::new()),
};
let xyz_base_point = lat_lng_to_xyz(&base_point);
let result = rtree
.nearest_neighbor_iter(&base_point)
.nearest_neighbor_iter(&xyz_base_point)
.take_while(|point| {
distance_between_two_points(&base_point, point.geom()) < radius
distance_between_two_points(&base_point, &point.data.1) < radius
})
.map(|point| point.data)
.map(|point| point.data.0)
.collect();
Ok(result)

View File

@ -395,9 +395,9 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
let (points_to_remove, docids_to_remove): (Vec<_>, RoaringBitmap) = rtree
.iter()
.filter(|&point| self.documents_ids.contains(point.data))
.filter(|&point| self.documents_ids.contains(point.data.0))
.cloned()
.map(|point| (point, point.data))
.map(|point| (point, point.data.0))
.unzip();
points_to_remove.iter().for_each(|point| {
rtree.remove(&point);
@ -747,7 +747,7 @@ mod tests {
let all_geo_ids = rtree.iter().map(|point| point.data).collect::<Vec<_>>();
let all_geo_documents = index
.documents(&rtxn, all_geo_ids.iter().copied())
.documents(&rtxn, all_geo_ids.iter().map(|(id, _)| id).copied())
.unwrap()
.iter()
.map(|(id, _)| *id)

View File

@ -12,7 +12,10 @@ use super::helpers::{
};
use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string};
use crate::update::index_documents::helpers::into_clonable_grenad;
use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result};
use crate::{
lat_lng_to_xyz, BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index,
Result,
};
pub(crate) enum TypedChunk {
DocidWordPositions(grenad::Reader<CursorClonableMmap>),
@ -192,7 +195,9 @@ pub(crate) fn write_typed_chunk_into_index(
let (lat, tail) = helpers::try_split_array_at::<u8, 8>(value).unwrap();
let (lng, _) = helpers::try_split_array_at::<u8, 8>(tail).unwrap();
let point = [f64::from_ne_bytes(lat), f64::from_ne_bytes(lng)];
rtree.insert(GeoPoint::new(point, docid));
let xyz_point = lat_lng_to_xyz(&point);
rtree.insert(GeoPoint::new(xyz_point, (docid, point)));
geo_faceted_docids.insert(docid);
}
index.put_geo_rtree(wtxn, &rtree)?;