diff --git a/crates/meilisearch/src/routes/indexes/documents.rs b/crates/meilisearch/src/routes/indexes/documents.rs index bcd227300..5545c870e 100644 --- a/crates/meilisearch/src/routes/indexes/documents.rs +++ b/crates/meilisearch/src/routes/indexes/documents.rs @@ -641,16 +641,12 @@ fn documents_by_query( }; let sort_criteria = if let Some(sort) = &sort { - let sorts: Vec<_> = - match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() { - Ok(sorts) => sorts, - Err(asc_desc_error) => { - return Err(milli::Error::from(milli::SortError::from( - asc_desc_error, - )) - .into()) - } - }; + let sorts: Vec<_> = match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() { + Ok(sorts) => sorts, + Err(asc_desc_error) => { + return Err(milli::Error::from(milli::SortError::from(asc_desc_error)).into()) + } + }; Some(sorts) } else { None diff --git a/crates/milli/src/documents/geo_sort.rs b/crates/milli/src/documents/geo_sort.rs index 5b3968b39..5b899e6d5 100644 --- a/crates/milli/src/documents/geo_sort.rs +++ b/crates/milli/src/documents/geo_sort.rs @@ -1,14 +1,70 @@ -use std::collections::VecDeque; - -use heed::RoTxn; +use crate::{ + distance_between_two_points, + heed_codec::facet::{FieldDocIdFacetCodec, OrderedF64Codec}, + lat_lng_to_xyz, + search::new::{facet_string_values, facet_values_prefix_key}, + GeoPoint, Index, +}; +use heed::{ + types::{Bytes, Unit}, + RoPrefix, RoTxn, +}; use roaring::RoaringBitmap; use rstar::RTree; +use std::collections::VecDeque; -use crate::{ - distance_between_two_points, lat_lng_to_xyz, - search::new::geo_sort::{geo_value, opposite_of}, - GeoPoint, GeoSortStrategy, Index, -}; +#[derive(Debug, Clone, Copy)] +pub struct GeoSortParameter { + // Define the strategy used by the geo sort + pub strategy: GeoSortStrategy, + // Limit the number of docs in a single bucket to avoid unexpectedly large overhead + pub max_bucket_size: u64, + // Considering the errors of GPS and geographical calculations, distances less than distance_error_margin will be treated as equal + pub distance_error_margin: f64, +} + +impl Default for GeoSortParameter { + fn default() -> Self { + Self { + strategy: GeoSortStrategy::default(), + max_bucket_size: 1000, + distance_error_margin: 1.0, + } + } +} +/// Define the strategy used by the geo sort. +/// The parameter represents the cache size, and, in the case of the Dynamic strategy, +/// the point where we move from using the iterative strategy to the rtree. +#[derive(Debug, Clone, Copy)] +pub enum GeoSortStrategy { + AlwaysIterative(usize), + AlwaysRtree(usize), + Dynamic(usize), +} + +impl Default for GeoSortStrategy { + fn default() -> Self { + GeoSortStrategy::Dynamic(1000) + } +} + +impl GeoSortStrategy { + pub fn use_rtree(&self, candidates: usize) -> bool { + match self { + GeoSortStrategy::AlwaysIterative(_) => false, + GeoSortStrategy::AlwaysRtree(_) => true, + GeoSortStrategy::Dynamic(i) => candidates >= *i, + } + } + + pub fn cache_size(&self) -> usize { + match self { + GeoSortStrategy::AlwaysIterative(i) + | GeoSortStrategy::AlwaysRtree(i) + | GeoSortStrategy::Dynamic(i) => *i, + } + } +} // TODO: Make it take a mut reference to cache #[allow(clippy::too_many_arguments)] @@ -74,7 +130,8 @@ pub fn fill_cache( .map(|id| -> crate::Result<_> { Ok((id, geo_value(id, lat, lng, index, txn)?)) }) .collect::>>()?; // computing the distance between two points is expensive thus we cache the result - documents.sort_by_cached_key(|(_, p)| distance_between_two_points(&target_point, p) as usize); + documents + .sort_by_cached_key(|(_, p)| distance_between_two_points(&target_point, p) as usize); cached_sorted_docids.extend(documents); }; @@ -86,19 +143,13 @@ pub fn next_bucket( index: &Index, txn: &RoTxn, universe: &RoaringBitmap, - strategy: GeoSortStrategy, ascending: bool, target_point: [f64; 2], field_ids: &Option<[u16; 2]>, rtree: &mut Option>, - cached_sorted_docids: &mut VecDeque<(u32, [f64; 2])>, geo_candidates: &RoaringBitmap, - - // Limit the number of docs in a single bucket to avoid unexpectedly large overhead - max_bucket_size: u64, - // Considering the errors of GPS and geographical calculations, distances less than distance_error_margin will be treated as equal - distance_error_margin: f64, + parameter: GeoSortParameter, ) -> crate::Result)>> { let mut geo_candidates = geo_candidates & universe; @@ -130,7 +181,7 @@ pub fn next_bucket( if geo_candidates.contains(id) { let distance = distance_between_two_points(&target_point, &point); if let Some((point0, bucket_distance)) = current_distance.as_ref() { - if (bucket_distance - distance).abs() > distance_error_margin { + if (bucket_distance - distance).abs() > parameter.distance_error_margin { // different distance, point belongs to next bucket put_back(cached_sorted_docids, (id, point)); return Ok(Some((current_bucket, Some(point0.to_owned())))); @@ -140,7 +191,7 @@ pub fn next_bucket( // remove from candidates to prevent it from being added to the cache again geo_candidates.remove(id); // current bucket size reaches limit, force return - if current_bucket.len() == max_bucket_size { + if current_bucket.len() == parameter.max_bucket_size { return Ok(Some((current_bucket, Some(point0.to_owned())))); } } @@ -150,7 +201,7 @@ pub fn next_bucket( current_bucket.insert(id); geo_candidates.remove(id); // current bucket size reaches limit, force return - if current_bucket.len() == max_bucket_size { + if current_bucket.len() == parameter.max_bucket_size { return Ok(Some((current_bucket, Some(point.to_owned())))); } } @@ -160,7 +211,7 @@ pub fn next_bucket( fill_cache( index, txn, - strategy, + parameter.strategy, ascending, target_point, field_ids, @@ -180,3 +231,65 @@ pub fn next_bucket( } } } + +/// Return an iterator over each number value in the given field of the given document. +fn facet_number_values<'a>( + docid: u32, + field_id: u16, + index: &Index, + txn: &'a RoTxn<'a>, +) -> crate::Result, Unit>> { + let key = facet_values_prefix_key(field_id, docid); + + let iter = index + .field_id_docid_facet_f64s + .remap_key_type::() + .prefix_iter(txn, &key)? + .remap_key_type(); + + Ok(iter) +} + +/// Extracts the lat and long values from a single document. +/// +/// If it is not able to find it in the facet number index it will extract it +/// from the facet string index and parse it as f64 (as the geo extraction behaves). +pub(crate) fn geo_value( + docid: u32, + field_lat: u16, + field_lng: u16, + index: &Index, + rtxn: &RoTxn<'_>, +) -> crate::Result<[f64; 2]> { + let extract_geo = |geo_field: u16| -> crate::Result { + match facet_number_values(docid, geo_field, index, rtxn)?.next() { + Some(Ok(((_, _, geo), ()))) => Ok(geo), + Some(Err(e)) => Err(e.into()), + None => match facet_string_values(docid, geo_field, index, rtxn)?.next() { + Some(Ok((_, geo))) => { + Ok(geo.parse::().expect("cannot parse geo field as f64")) + } + Some(Err(e)) => Err(e.into()), + None => panic!("A geo faceted document doesn't contain any lat or lng"), + }, + } + }; + + let lat = extract_geo(field_lat)?; + let lng = extract_geo(field_lng)?; + + Ok([lat, lng]) +} + +/// Compute the antipodal coordinate of `coord` +pub(crate) fn opposite_of(mut coord: [f64; 2]) -> [f64; 2] { + coord[0] *= -1.; + // in the case of x,0 we want to return x,180 + if coord[1] > 0. { + coord[1] -= 180.; + } else { + coord[1] += 180.; + } + + coord +} diff --git a/crates/milli/src/documents/mod.rs b/crates/milli/src/documents/mod.rs index 6a05f61a5..b515c4e98 100644 --- a/crates/milli/src/documents/mod.rs +++ b/crates/milli/src/documents/mod.rs @@ -1,9 +1,9 @@ mod builder; mod enriched; +pub mod geo_sort; mod primary_key; mod reader; mod serde_impl; -pub mod geo_sort; use std::fmt::Debug; use std::io; @@ -20,6 +20,7 @@ pub use primary_key::{ pub use reader::{DocumentsBatchCursor, DocumentsBatchCursorError, DocumentsBatchReader}; use serde::{Deserialize, Serialize}; +pub use self::geo_sort::{GeoSortParameter, GeoSortStrategy}; use crate::error::{FieldIdMapMissingEntry, InternalError}; use crate::{FieldId, Object, Result}; diff --git a/crates/milli/src/facet/facet_sort_recursive.rs b/crates/milli/src/facet/facet_sort_recursive.rs index 47c3696f3..7342114ef 100644 --- a/crates/milli/src/facet/facet_sort_recursive.rs +++ b/crates/milli/src/facet/facet_sort_recursive.rs @@ -1,6 +1,16 @@ -use roaring::RoaringBitmap; +use crate::{ + heed_codec::{ + facet::{FacetGroupKeyCodec, FacetGroupValueCodec}, + BytesRefCodec, + }, + search::{ + facet::{ascending_facet_sort, descending_facet_sort}, + new::check_sort_criteria, + }, + AscDesc, DocumentId, Member, +}; use heed::Database; -use crate::{heed_codec::{facet::{FacetGroupKeyCodec, FacetGroupValueCodec}, BytesRefCodec}, search::{facet::{ascending_facet_sort, descending_facet_sort}, new::check_sort_criteria}, AscDesc, DocumentId, Member}; +use roaring::RoaringBitmap; /// Builder for a [`SortedDocumentsIterator`]. /// Most builders won't ever be built, because pagination will skip them. @@ -15,13 +25,8 @@ pub struct SortedDocumentsIteratorBuilder<'ctx> { impl<'ctx> SortedDocumentsIteratorBuilder<'ctx> { /// Performs the sort and builds a [`SortedDocumentsIterator`]. fn build(self) -> heed::Result> { - let SortedDocumentsIteratorBuilder { - rtxn, - number_db, - string_db, - fields, - candidates, - } = self; + let SortedDocumentsIteratorBuilder { rtxn, number_db, string_db, fields, candidates } = + self; let size = candidates.len() as usize; // There is no point sorting a 1-element array @@ -42,33 +47,13 @@ impl<'ctx> SortedDocumentsIteratorBuilder<'ctx> { // Perform the sort on the first field let (number_iter, string_iter) = if ascending { - let number_iter = ascending_facet_sort( - rtxn, - number_db, - field_id, - candidates.clone(), - )?; - let string_iter = ascending_facet_sort( - rtxn, - string_db, - field_id, - candidates, - )?; + let number_iter = ascending_facet_sort(rtxn, number_db, field_id, candidates.clone())?; + let string_iter = ascending_facet_sort(rtxn, string_db, field_id, candidates)?; (itertools::Either::Left(number_iter), itertools::Either::Left(string_iter)) } else { - let number_iter = descending_facet_sort( - rtxn, - number_db, - field_id, - candidates.clone(), - )?; - let string_iter = descending_facet_sort( - rtxn, - string_db, - field_id, - candidates, - )?; + let number_iter = descending_facet_sort(rtxn, number_db, field_id, candidates.clone())?; + let string_iter = descending_facet_sort(rtxn, string_db, field_id, candidates)?; (itertools::Either::Right(number_iter), itertools::Either::Right(string_iter)) }; @@ -76,26 +61,28 @@ impl<'ctx> SortedDocumentsIteratorBuilder<'ctx> { // Create builders for the next level of the tree let number_db2 = number_db; let string_db2 = string_db; - let number_iter = number_iter.map(move |r| -> heed::Result { - let (docids, _bytes) = r?; - Ok(SortedDocumentsIteratorBuilder { - rtxn, - number_db, - string_db, - fields: &fields[1..], - candidates: docids, - }) - }); - let string_iter = string_iter.map(move |r| -> heed::Result { - let (docids, _bytes) = r?; - Ok(SortedDocumentsIteratorBuilder { - rtxn, - number_db: number_db2, - string_db: string_db2, - fields: &fields[1..], - candidates: docids, - }) - }); + let number_iter = + number_iter.map(move |r| -> heed::Result { + let (docids, _bytes) = r?; + Ok(SortedDocumentsIteratorBuilder { + rtxn, + number_db, + string_db, + fields: &fields[1..], + candidates: docids, + }) + }); + let string_iter = + string_iter.map(move |r| -> heed::Result { + let (docids, _bytes) = r?; + Ok(SortedDocumentsIteratorBuilder { + rtxn, + number_db: number_db2, + string_db: string_db2, + fields: &fields[1..], + candidates: docids, + }) + }); Ok(SortedDocumentsIterator::Branch { current_child: None, @@ -112,7 +99,7 @@ pub enum SortedDocumentsIterator<'ctx> { Leaf { /// The exact number of documents remaining size: usize, - values: Box + 'ctx> + values: Box + 'ctx>, }, Branch { /// The current child, got from the children iterator @@ -120,20 +107,27 @@ pub enum SortedDocumentsIterator<'ctx> { /// The exact number of documents remaining, excluding documents in the current child next_children_size: usize, /// Iterators to become the current child once it is exhausted - next_children: Box>> + 'ctx>, - } + next_children: + Box>> + 'ctx>, + }, } impl SortedDocumentsIterator<'_> { /// Takes care of updating the current child if it is `None`, and also updates the size - fn update_current<'ctx>(current_child: &mut Option>>, next_children_size: &mut usize, next_children: &mut Box>> + 'ctx>) -> heed::Result<()> { + fn update_current<'ctx>( + current_child: &mut Option>>, + next_children_size: &mut usize, + next_children: &mut Box< + dyn Iterator>> + 'ctx, + >, + ) -> heed::Result<()> { if current_child.is_none() { *current_child = match next_children.next() { Some(Ok(builder)) => { let next_child = Box::new(builder.build()?); *next_children_size -= next_child.size_hint().0; Some(next_child) - }, + } Some(Err(e)) => return Err(e), None => return Ok(()), }; @@ -150,15 +144,23 @@ impl Iterator for SortedDocumentsIterator<'_> { let (current_child, next_children, next_children_size) = match self { SortedDocumentsIterator::Leaf { values, size } => { *size = size.saturating_sub(n); - return values.nth(n).map(Ok) - }, - SortedDocumentsIterator::Branch { current_child, next_children, next_children_size } => (current_child, next_children, next_children_size), + return values.nth(n).map(Ok); + } + SortedDocumentsIterator::Branch { + current_child, + next_children, + next_children_size, + } => (current_child, next_children, next_children_size), }; // Otherwise don't directly iterate over children, skip them if we know we will go further let mut to_skip = n - 1; while to_skip > 0 { - if let Err(e) = SortedDocumentsIterator::update_current(current_child, next_children_size, next_children) { + if let Err(e) = SortedDocumentsIterator::update_current( + current_child, + next_children_size, + next_children, + ) { return Some(Err(e)); } let Some(inner) = current_child else { @@ -183,8 +185,14 @@ impl Iterator for SortedDocumentsIterator<'_> { fn size_hint(&self) -> (usize, Option) { let size = match self { SortedDocumentsIterator::Leaf { size, .. } => *size, - SortedDocumentsIterator::Branch { next_children_size, current_child: Some(current_child), .. } => current_child.size_hint().0 + next_children_size, - SortedDocumentsIterator::Branch { next_children_size, current_child: None, .. } => *next_children_size, + SortedDocumentsIterator::Branch { + next_children_size, + current_child: Some(current_child), + .. + } => current_child.size_hint().0 + next_children_size, + SortedDocumentsIterator::Branch { next_children_size, current_child: None, .. } => { + *next_children_size + } }; (size, Some(size)) @@ -198,12 +206,20 @@ impl Iterator for SortedDocumentsIterator<'_> { *size -= 1; } result - }, - SortedDocumentsIterator::Branch { current_child, next_children_size, next_children } => { + } + SortedDocumentsIterator::Branch { + current_child, + next_children_size, + next_children, + } => { let mut result = None; while result.is_none() { // Ensure we have selected an iterator to work with - if let Err(e) = SortedDocumentsIterator::update_current(current_child, next_children_size, next_children) { + if let Err(e) = SortedDocumentsIterator::update_current( + current_child, + next_children_size, + next_children, + ) { return Some(Err(e)); } let Some(inner) = current_child else { @@ -232,7 +248,7 @@ pub struct SortedDocuments<'ctx> { candidates: &'ctx RoaringBitmap, } -impl <'ctx> SortedDocuments<'ctx> { +impl<'ctx> SortedDocuments<'ctx> { pub fn iter(&'ctx self) -> heed::Result> { let builder = SortedDocumentsIteratorBuilder { rtxn: self.rtxn, @@ -266,19 +282,10 @@ pub fn recursive_facet_sort<'ctx>( fields.push((field_id, ascending)); // FIXME: Should this return an error if the field is not found? } } - - let number_db = index - .facet_id_f64_docids - .remap_key_type::>(); - let string_db = index - .facet_id_string_docids - .remap_key_type::>(); - Ok(SortedDocuments { - rtxn, - fields, - number_db, - string_db, - candidates, - }) + let number_db = index.facet_id_f64_docids.remap_key_type::>(); + let string_db = + index.facet_id_string_docids.remap_key_type::>(); + + Ok(SortedDocuments { rtxn, fields, number_db, string_db, candidates }) } diff --git a/crates/milli/src/facet/mod.rs b/crates/milli/src/facet/mod.rs index a6351b42c..8b0b9a25e 100644 --- a/crates/milli/src/facet/mod.rs +++ b/crates/milli/src/facet/mod.rs @@ -1,7 +1,7 @@ +pub mod facet_sort_recursive; mod facet_type; mod facet_value; pub mod value_encoding; -pub mod facet_sort_recursive; pub use self::facet_type::FacetType; pub use self::facet_value::FacetValue; diff --git a/crates/milli/src/lib.rs b/crates/milli/src/lib.rs index 504b4c68d..6fdae86b3 100644 --- a/crates/milli/src/lib.rs +++ b/crates/milli/src/lib.rs @@ -43,12 +43,13 @@ use std::fmt; use std::hash::BuildHasherDefault; use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer}; +pub use documents::GeoSortStrategy; pub use filter_parser::{Condition, FilterCondition, Span, Token}; use fxhash::{FxHasher32, FxHasher64}; pub use grenad::CompressionType; pub use search::new::{ - execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, SearchContext, - SearchLogger, VisualSearchLogger, + execute_search, filtered_universe, DefaultSearchLogger, SearchContext, SearchLogger, + VisualSearchLogger, }; use serde_json::Value; pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder}; diff --git a/crates/milli/src/search/mod.rs b/crates/milli/src/search/mod.rs index 62183afc3..48013b2ee 100644 --- a/crates/milli/src/search/mod.rs +++ b/crates/milli/src/search/mod.rs @@ -9,6 +9,8 @@ use roaring::bitmap::RoaringBitmap; pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords}; use self::new::{execute_vector_search, PartialSearchResult, VectorStoreStats}; +use crate::documents::GeoSortParameter; +use crate::documents::GeoSortStrategy; use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features}; use crate::index::MatchingStrategy; use crate::score_details::{ScoreDetails, ScoringStrategy}; @@ -46,7 +48,7 @@ pub struct Search<'a> { sort_criteria: Option>, distinct: Option, searchable_attributes: Option<&'a [String]>, - geo_param: new::GeoSortParameter, + geo_param: GeoSortParameter, terms_matching_strategy: TermsMatchingStrategy, scoring_strategy: ScoringStrategy, words_limit: usize, @@ -69,7 +71,7 @@ impl<'a> Search<'a> { sort_criteria: None, distinct: None, searchable_attributes: None, - geo_param: new::GeoSortParameter::default(), + geo_param: GeoSortParameter::default(), terms_matching_strategy: TermsMatchingStrategy::default(), scoring_strategy: Default::default(), exhaustive_number_hits: false, @@ -145,7 +147,7 @@ impl<'a> Search<'a> { } #[cfg(test)] - pub fn geo_sort_strategy(&mut self, strategy: new::GeoSortStrategy) -> &mut Search<'a> { + pub fn geo_sort_strategy(&mut self, strategy: GeoSortStrategy) -> &mut Search<'a> { self.geo_param.strategy = strategy; self } diff --git a/crates/milli/src/search/new/distinct.rs b/crates/milli/src/search/new/distinct.rs index 48ad152ee..455b495f5 100644 --- a/crates/milli/src/search/new/distinct.rs +++ b/crates/milli/src/search/new/distinct.rs @@ -118,7 +118,7 @@ pub fn facet_string_values<'a>( } #[allow(clippy::drop_non_drop)] -fn facet_values_prefix_key(distinct: u16, id: u32) -> [u8; FID_SIZE + DOCID_SIZE] { +pub(crate) fn facet_values_prefix_key(distinct: u16, id: u32) -> [u8; FID_SIZE + DOCID_SIZE] { concat_arrays::concat_arrays!(distinct.to_be_bytes(), id.to_be_bytes()) } diff --git a/crates/milli/src/search/new/geo_sort.rs b/crates/milli/src/search/new/geo_sort.rs index a52a84575..47001267d 100644 --- a/crates/milli/src/search/new/geo_sort.rs +++ b/crates/milli/src/search/new/geo_sort.rs @@ -8,6 +8,7 @@ use rstar::RTree; use super::facet_string_values; use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait}; use crate::documents::geo_sort::{fill_cache, next_bucket}; +use crate::documents::{GeoSortParameter, GeoSortStrategy}; use crate::heed_codec::facet::{FieldDocIdFacetCodec, OrderedF64Codec}; use crate::score_details::{self, ScoreDetails}; use crate::{GeoPoint, Index, Result, SearchContext, SearchLogger}; @@ -20,75 +21,10 @@ fn facet_values_prefix_key(distinct: u16, id: u32) -> [u8; FID_SIZE + DOCID_SIZE concat_arrays::concat_arrays!(distinct.to_be_bytes(), id.to_be_bytes()) } -/// Return an iterator over each number value in the given field of the given document. -fn facet_number_values<'a>( - docid: u32, - field_id: u16, - index: &Index, - txn: &'a RoTxn<'a>, -) -> Result, Unit>> { - let key = facet_values_prefix_key(field_id, docid); - - let iter = index - .field_id_docid_facet_f64s - .remap_key_type::() - .prefix_iter(txn, &key)? - .remap_key_type(); - - Ok(iter) -} - -#[derive(Debug, Clone, Copy)] -pub struct Parameter { - // Define the strategy used by the geo sort - pub strategy: Strategy, - // Limit the number of docs in a single bucket to avoid unexpectedly large overhead - pub max_bucket_size: u64, - // Considering the errors of GPS and geographical calculations, distances less than distance_error_margin will be treated as equal - pub distance_error_margin: f64, -} - -impl Default for Parameter { - fn default() -> Self { - Self { strategy: Strategy::default(), max_bucket_size: 1000, distance_error_margin: 1.0 } - } -} -/// Define the strategy used by the geo sort. -/// The parameter represents the cache size, and, in the case of the Dynamic strategy, -/// the point where we move from using the iterative strategy to the rtree. -#[derive(Debug, Clone, Copy)] -pub enum Strategy { - AlwaysIterative(usize), - AlwaysRtree(usize), - Dynamic(usize), -} - -impl Default for Strategy { - fn default() -> Self { - Strategy::Dynamic(1000) - } -} - -impl Strategy { - pub fn use_rtree(&self, candidates: usize) -> bool { - match self { - Strategy::AlwaysIterative(_) => false, - Strategy::AlwaysRtree(_) => true, - Strategy::Dynamic(i) => candidates >= *i, - } - } - - pub fn cache_size(&self) -> usize { - match self { - Strategy::AlwaysIterative(i) | Strategy::AlwaysRtree(i) | Strategy::Dynamic(i) => *i, - } - } -} - pub struct GeoSort { query: Option, - strategy: Strategy, + strategy: GeoSortStrategy, ascending: bool, point: [f64; 2], field_ids: Option<[u16; 2]>, @@ -105,12 +41,12 @@ pub struct GeoSort { impl GeoSort { pub fn new( - parameter: Parameter, + parameter: GeoSortParameter, geo_faceted_docids: RoaringBitmap, point: [f64; 2], ascending: bool, ) -> Result { - let Parameter { strategy, max_bucket_size, distance_error_margin } = parameter; + let GeoSortParameter { strategy, max_bucket_size, distance_error_margin } = parameter; Ok(Self { query: None, strategy, @@ -148,37 +84,6 @@ impl GeoSort { } } -/// Extracts the lat and long values from a single document. -/// -/// If it is not able to find it in the facet number index it will extract it -/// from the facet string index and parse it as f64 (as the geo extraction behaves). -pub(crate) fn geo_value( - docid: u32, - field_lat: u16, - field_lng: u16, - index: &Index, - rtxn: &RoTxn<'_>, -) -> Result<[f64; 2]> { - let extract_geo = |geo_field: u16| -> Result { - match facet_number_values(docid, geo_field, index, rtxn)?.next() { - Some(Ok(((_, _, geo), ()))) => Ok(geo), - Some(Err(e)) => Err(e.into()), - None => match facet_string_values(docid, geo_field, index, rtxn)?.next() { - Some(Ok((_, geo))) => { - Ok(geo.parse::().expect("cannot parse geo field as f64")) - } - Some(Err(e)) => Err(e.into()), - None => panic!("A geo faceted document doesn't contain any lat or lng"), - }, - } - }; - - let lat = extract_geo(field_lat)?; - let lng = extract_geo(field_lng)?; - - Ok([lat, lng]) -} - impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort { fn id(&self) -> String { "geo_sort".to_owned() @@ -224,15 +129,17 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort { ctx.index, ctx.txn, universe, - self.strategy, self.ascending, self.point, &self.field_ids, &mut self.rtree, &mut self.cached_sorted_docids, &self.geo_candidates, - self.max_bucket_size, - self.distance_error_margin, + GeoSortParameter { + strategy: self.strategy, + max_bucket_size: self.max_bucket_size, + distance_error_margin: self.distance_error_margin, + }, ) .map(|o| { o.map(|(candidates, point)| RankingRuleOutput { @@ -254,16 +161,3 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort { self.cached_sorted_docids.clear(); } } - -/// Compute the antipodal coordinate of `coord` -pub(crate) fn opposite_of(mut coord: [f64; 2]) -> [f64; 2] { - coord[0] *= -1.; - // in the case of x,0 we want to return x,180 - if coord[1] > 0. { - coord[1] -= 180.; - } else { - coord[1] += 180.; - } - - coord -} diff --git a/crates/milli/src/search/new/mod.rs b/crates/milli/src/search/new/mod.rs index da5e971af..b5258413e 100644 --- a/crates/milli/src/search/new/mod.rs +++ b/crates/milli/src/search/new/mod.rs @@ -46,14 +46,14 @@ use resolve_query_graph::{compute_query_graph_docids, PhraseDocIdsCache}; use roaring::RoaringBitmap; use sort::Sort; -use self::distinct::facet_string_values; +pub(crate) use self::distinct::{facet_string_values, facet_values_prefix_key}; use self::geo_sort::GeoSort; -pub use self::geo_sort::{Parameter as GeoSortParameter, Strategy as GeoSortStrategy}; use self::graph_based_ranking_rule::Words; use self::interner::Interned; use self::vector_sort::VectorSort; use crate::attribute_patterns::{match_pattern, PatternMatch}; use crate::constants::RESERVED_GEO_FIELD_NAME; +use crate::documents::GeoSortParameter; use crate::index::PrefixSearch; use crate::localized_attributes_rules::LocalizedFieldIds; use crate::score_details::{ScoreDetails, ScoringStrategy}; @@ -319,7 +319,7 @@ fn resolve_negative_phrases( fn get_ranking_rules_for_placeholder_search<'ctx>( ctx: &SearchContext<'ctx>, sort_criteria: &Option>, - geo_param: geo_sort::Parameter, + geo_param: GeoSortParameter, ) -> Result>> { let mut sort = false; let mut sorted_fields = HashSet::new(); @@ -371,7 +371,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>( fn get_ranking_rules_for_vector<'ctx>( ctx: &SearchContext<'ctx>, sort_criteria: &Option>, - geo_param: geo_sort::Parameter, + geo_param: GeoSortParameter, limit_plus_offset: usize, target: &[f32], embedder_name: &str, @@ -448,7 +448,7 @@ fn get_ranking_rules_for_vector<'ctx>( fn get_ranking_rules_for_query_graph_search<'ctx>( ctx: &SearchContext<'ctx>, sort_criteria: &Option>, - geo_param: geo_sort::Parameter, + geo_param: GeoSortParameter, terms_matching_strategy: TermsMatchingStrategy, ) -> Result>> { // query graph search @@ -559,7 +559,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>( ranking_rules: &mut Vec>, sorted_fields: &mut HashSet, geo_sorted: &mut bool, - geo_param: geo_sort::Parameter, + geo_param: GeoSortParameter, ) -> Result<()> { let sort_criteria = sort_criteria.clone().unwrap_or_default(); ranking_rules.reserve(sort_criteria.len()); @@ -629,7 +629,7 @@ pub fn execute_vector_search( universe: RoaringBitmap, sort_criteria: &Option>, distinct: &Option, - geo_param: geo_sort::Parameter, + geo_param: GeoSortParameter, from: usize, length: usize, embedder_name: &str, @@ -692,7 +692,7 @@ pub fn execute_search( mut universe: RoaringBitmap, sort_criteria: &Option>, distinct: &Option, - geo_param: geo_sort::Parameter, + geo_param: GeoSortParameter, from: usize, length: usize, words_limit: Option,