mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Merge #424
424: Store the geopoint in three dimensions r=Kerollmops a=irevoire Related to this issue: https://github.com/meilisearch/MeiliSearch/issues/1872 Fix the whole computation of distance for any “geo” operations (sort or filter). Now when you sort points they are returned to you in the right order. And when you filter on a specific radius you only get points included in the radius. This PR changes the way we store the geo points in the RTree. Instead of considering the latitude and longitude as orthogonal coordinates, we convert them to real orthogonal coordinates projected on a sphere with a radius of 1. This is the conversion formulae. ![image](https://user-images.githubusercontent.com/7032172/145990456-eefe840a-384f-4486-848b-81d0036814ec.png) Which, in rust, translate to this function: ```rust pub fn lat_lng_to_xyz(coord: &[f64; 2]) -> [f64; 3] { let [lat, lng] = coord.map(|f| f.to_radians()); let x = lat.cos() * lng.cos(); let y = lat.cos() * lng.sin(); let z = lat.sin(); [x, y, z] } ``` Storing the points on a sphere is easier / faster to compute than storing the point on an approximation of the real earth shape. But when we need to compute the distance between two points we still need to use the haversine distance which works with latitude and longitude. So, to do the fewest search-time computation possible I'm now associating every point with its `DocId` and its lat/lng. Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
commit
559e019de1
@ -54,7 +54,11 @@ pub type FieldId = u16;
|
||||
pub type Position = u32;
|
||||
pub type RelativePosition = u16;
|
||||
pub type FieldDistribution = BTreeMap<String, u64>;
|
||||
pub type GeoPoint = rstar::primitives::GeomWithData<[f64; 2], DocumentId>;
|
||||
|
||||
/// A GeoPoint is a point in cartesian plan, called xyz_point in the code. Its metadata
|
||||
/// is a tuple composed of 1. the DocumentId of the associated document and 2. the original point
|
||||
/// expressed in term of latitude and longitude.
|
||||
pub type GeoPoint = rstar::primitives::GeomWithData<[f64; 3], (DocumentId, [f64; 2])>;
|
||||
|
||||
pub const MAX_POSITION_PER_ATTRIBUTE: u32 = u16::MAX as u32 + 1;
|
||||
|
||||
@ -168,6 +172,17 @@ pub fn distance_between_two_points(a: &[f64; 2], b: &[f64; 2]) -> f64 {
|
||||
a.haversine_distance_to(&b).meters()
|
||||
}
|
||||
|
||||
/// Convert a point expressed in terms of latitude and longitude to a point in the
|
||||
/// cartesian coordinate expressed in terms of x, y and z.
|
||||
pub fn lat_lng_to_xyz(coord: &[f64; 2]) -> [f64; 3] {
|
||||
let [lat, lng] = coord.map(|f| f.to_radians());
|
||||
let x = lat.cos() * lng.cos();
|
||||
let y = lat.cos() * lng.sin();
|
||||
let z = lat.sin();
|
||||
|
||||
[x, y, z]
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use serde_json::json;
|
||||
|
@ -5,7 +5,7 @@ use rstar::RTree;
|
||||
|
||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
||||
use crate::{GeoPoint, Index, Result};
|
||||
use crate::{lat_lng_to_xyz, GeoPoint, Index, Result};
|
||||
|
||||
pub struct Geo<'t> {
|
||||
index: &'t Index,
|
||||
@ -132,10 +132,12 @@ fn geo_point(
|
||||
point: [f64; 2],
|
||||
ascending: bool,
|
||||
) -> Box<dyn Iterator<Item = RoaringBitmap>> {
|
||||
let point = lat_lng_to_xyz(&point);
|
||||
|
||||
let mut results = Vec::new();
|
||||
for point in rtree.nearest_neighbor_iter(&point) {
|
||||
if candidates.remove(point.data) {
|
||||
results.push(std::iter::once(point.data).collect());
|
||||
if candidates.remove(point.data.0) {
|
||||
results.push(std::iter::once(point.data.0).collect());
|
||||
if candidates.is_empty() {
|
||||
break;
|
||||
}
|
||||
|
@ -13,7 +13,9 @@ use crate::error::{Error, UserError};
|
||||
use crate::heed_codec::facet::{
|
||||
FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec,
|
||||
};
|
||||
use crate::{distance_between_two_points, CboRoaringBitmapCodec, FieldId, Index, Result};
|
||||
use crate::{
|
||||
distance_between_two_points, lat_lng_to_xyz, CboRoaringBitmapCodec, FieldId, Index, Result,
|
||||
};
|
||||
|
||||
/// The maximum number of filters the filter AST can process.
|
||||
const MAX_FILTER_DEPTH: usize = 2000;
|
||||
@ -402,12 +404,14 @@ impl<'a> Filter<'a> {
|
||||
None => return Ok(RoaringBitmap::new()),
|
||||
};
|
||||
|
||||
let xyz_base_point = lat_lng_to_xyz(&base_point);
|
||||
|
||||
let result = rtree
|
||||
.nearest_neighbor_iter(&base_point)
|
||||
.nearest_neighbor_iter(&xyz_base_point)
|
||||
.take_while(|point| {
|
||||
distance_between_two_points(&base_point, point.geom()) < radius
|
||||
distance_between_two_points(&base_point, &point.data.1) < radius
|
||||
})
|
||||
.map(|point| point.data)
|
||||
.map(|point| point.data.0)
|
||||
.collect();
|
||||
|
||||
Ok(result)
|
||||
|
@ -395,9 +395,9 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
|
||||
let (points_to_remove, docids_to_remove): (Vec<_>, RoaringBitmap) = rtree
|
||||
.iter()
|
||||
.filter(|&point| self.documents_ids.contains(point.data))
|
||||
.filter(|&point| self.documents_ids.contains(point.data.0))
|
||||
.cloned()
|
||||
.map(|point| (point, point.data))
|
||||
.map(|point| (point, point.data.0))
|
||||
.unzip();
|
||||
points_to_remove.iter().for_each(|point| {
|
||||
rtree.remove(&point);
|
||||
@ -747,7 +747,7 @@ mod tests {
|
||||
|
||||
let all_geo_ids = rtree.iter().map(|point| point.data).collect::<Vec<_>>();
|
||||
let all_geo_documents = index
|
||||
.documents(&rtxn, all_geo_ids.iter().copied())
|
||||
.documents(&rtxn, all_geo_ids.iter().map(|(id, _)| id).copied())
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|(id, _)| *id)
|
||||
|
@ -12,7 +12,10 @@ use super::helpers::{
|
||||
};
|
||||
use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string};
|
||||
use crate::update::index_documents::helpers::into_clonable_grenad;
|
||||
use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result};
|
||||
use crate::{
|
||||
lat_lng_to_xyz, BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index,
|
||||
Result,
|
||||
};
|
||||
|
||||
pub(crate) enum TypedChunk {
|
||||
DocidWordPositions(grenad::Reader<CursorClonableMmap>),
|
||||
@ -192,7 +195,9 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
let (lat, tail) = helpers::try_split_array_at::<u8, 8>(value).unwrap();
|
||||
let (lng, _) = helpers::try_split_array_at::<u8, 8>(tail).unwrap();
|
||||
let point = [f64::from_ne_bytes(lat), f64::from_ne_bytes(lng)];
|
||||
rtree.insert(GeoPoint::new(point, docid));
|
||||
let xyz_point = lat_lng_to_xyz(&point);
|
||||
|
||||
rtree.insert(GeoPoint::new(xyz_point, (docid, point)));
|
||||
geo_faceted_docids.insert(docid);
|
||||
}
|
||||
index.put_geo_rtree(wtxn, &rtree)?;
|
||||
|
Loading…
Reference in New Issue
Block a user