diff --git a/.gitignore b/.gitignore index fc24b8306..764447352 100644 --- a/.gitignore +++ b/.gitignore @@ -11,12 +11,18 @@ /bench /_xtask_benchmark.ms /benchmarks +.DS_Store # Snapshots ## ... large *.full.snap -## ... unreviewed +## ... unreviewed *.snap.new +## ... pending +*.pending-snap + +# Tmp files +.tmp* # Database snapshot crates/meilisearch/db.snapshot diff --git a/crates/benchmarks/Cargo.toml b/crates/benchmarks/Cargo.toml index 9dccc444b..68ed5aff4 100644 --- a/crates/benchmarks/Cargo.toml +++ b/crates/benchmarks/Cargo.toml @@ -51,3 +51,8 @@ harness = false [[bench]] name = "indexing" harness = false + +[[bench]] +name = "sort" +harness = false + diff --git a/crates/benchmarks/benches/sort.rs b/crates/benchmarks/benches/sort.rs new file mode 100644 index 000000000..c3e934432 --- /dev/null +++ b/crates/benchmarks/benches/sort.rs @@ -0,0 +1,114 @@ +//! This benchmark module is used to compare the performance of sorting documents in /search VS /documents +//! +//! The tests/benchmarks were designed in the context of a query returning only 20 documents. + +mod datasets_paths; +mod utils; + +use criterion::{criterion_group, criterion_main}; +use milli::update::Settings; +use utils::Conf; + +#[cfg(not(windows))] +#[global_allocator] +static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; + +fn base_conf(builder: &mut Settings) { + let displayed_fields = + ["geonameid", "name", "asciiname", "alternatenames", "_geo", "population"] + .iter() + .map(|s| s.to_string()) + .collect(); + builder.set_displayed_fields(displayed_fields); + + let sortable_fields = + ["_geo", "name", "population", "elevation", "timezone", "modification-date"] + .iter() + .map(|s| s.to_string()) + .collect(); + builder.set_sortable_fields(sortable_fields); +} + +#[rustfmt::skip] +const BASE_CONF: Conf = Conf { + dataset: datasets_paths::SMOL_ALL_COUNTRIES, + dataset_format: "jsonl", + configure: base_conf, + primary_key: Some("geonameid"), + queries: &[""], + offsets: &[ + Some((0, 20)), // The most common query in the real world + Some((0, 500)), // A query that ranges over many documents + Some((980, 20)), // The worst query that could happen in the real world + Some((800_000, 20)) // The worst query + ], + get_documents: true, + ..Conf::BASE +}; + +fn bench_sort(c: &mut criterion::Criterion) { + #[rustfmt::skip] + let confs = &[ + utils::Conf { + group_name: "without sort", + sort: None, + ..BASE_CONF + }, + + utils::Conf { + group_name: "sort on many different values", + sort: Some(vec!["name:asc"]), + ..BASE_CONF + }, + + utils::Conf { + group_name: "sort on many similar values", + sort: Some(vec!["timezone:desc"]), + ..BASE_CONF + }, + + utils::Conf { + group_name: "sort on many similar then different values", + sort: Some(vec!["timezone:desc", "name:asc"]), + ..BASE_CONF + }, + + utils::Conf { + group_name: "sort on many different then similar values", + sort: Some(vec!["timezone:desc", "name:asc"]), + ..BASE_CONF + }, + + utils::Conf { + group_name: "geo sort", + sample_size: Some(10), + sort: Some(vec!["_geoPoint(45.4777599, 9.1967508):asc"]), + ..BASE_CONF + }, + + utils::Conf { + group_name: "sort on many similar values then geo sort", + sample_size: Some(50), + sort: Some(vec!["timezone:desc", "_geoPoint(45.4777599, 9.1967508):asc"]), + ..BASE_CONF + }, + + utils::Conf { + group_name: "sort on many different values then geo sort", + sample_size: Some(50), + sort: Some(vec!["name:desc", "_geoPoint(45.4777599, 9.1967508):asc"]), + ..BASE_CONF + }, + + utils::Conf { + group_name: "sort on many fields", + sort: Some(vec!["population:asc", "name:asc", "elevation:asc", "timezone:asc"]), + ..BASE_CONF + }, + ]; + + utils::run_benches(c, confs); +} + +criterion_group!(benches, bench_sort); +criterion_main!(benches); diff --git a/crates/benchmarks/benches/utils.rs b/crates/benchmarks/benches/utils.rs index b12408051..0911d5102 100644 --- a/crates/benchmarks/benches/utils.rs +++ b/crates/benchmarks/benches/utils.rs @@ -9,6 +9,7 @@ use anyhow::Context; use bumpalo::Bump; use criterion::BenchmarkId; use memmap2::Mmap; +use milli::documents::sort::recursive_sort; use milli::heed::EnvOpenOptions; use milli::progress::Progress; use milli::update::new::indexer; @@ -35,6 +36,12 @@ pub struct Conf<'a> { pub configure: fn(&mut Settings), pub filter: Option<&'a str>, pub sort: Option>, + /// set to skip documents (offset, limit) + pub offsets: &'a [Option<(usize, usize)>], + /// enable if you want to bench getting documents without querying + pub get_documents: bool, + /// configure the benchmark sample size + pub sample_size: Option, /// enable or disable the optional words on the query pub optional_words: bool, /// primary key, if there is None we'll auto-generate docids for every documents @@ -52,6 +59,9 @@ impl Conf<'_> { configure: |_| (), filter: None, sort: None, + offsets: &[None], + get_documents: false, + sample_size: None, optional_words: true, primary_key: None, }; @@ -145,25 +155,81 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) { let file_name = Path::new(conf.dataset).file_name().and_then(|f| f.to_str()).unwrap(); let name = format!("{}: {}", file_name, conf.group_name); let mut group = c.benchmark_group(&name); + if let Some(sample_size) = conf.sample_size { + group.sample_size(sample_size); + } for &query in conf.queries { - group.bench_with_input(BenchmarkId::from_parameter(query), &query, |b, &query| { - b.iter(|| { - let rtxn = index.read_txn().unwrap(); - let mut search = index.search(&rtxn); - search.query(query).terms_matching_strategy(TermsMatchingStrategy::default()); - if let Some(filter) = conf.filter { - let filter = Filter::from_str(filter).unwrap().unwrap(); - search.filter(filter); - } - if let Some(sort) = &conf.sort { - let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect(); - search.sort_criteria(sort); - } - let _ids = search.execute().unwrap(); - }); - }); + for offset in conf.offsets { + let parameter = match (query.is_empty(), offset) { + (true, None) => String::from("placeholder"), + (true, Some((offset, limit))) => format!("placeholder[{offset}:{limit}]"), + (false, None) => query.to_string(), + (false, Some((offset, limit))) => format!("{query}[{offset}:{limit}]"), + }; + group.bench_with_input( + BenchmarkId::from_parameter(parameter), + &query, + |b, &query| { + b.iter(|| { + let rtxn = index.read_txn().unwrap(); + let mut search = index.search(&rtxn); + search + .query(query) + .terms_matching_strategy(TermsMatchingStrategy::default()); + if let Some(filter) = conf.filter { + let filter = Filter::from_str(filter).unwrap().unwrap(); + search.filter(filter); + } + if let Some(sort) = &conf.sort { + let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect(); + search.sort_criteria(sort); + } + if let Some((offset, limit)) = offset { + search.offset(*offset).limit(*limit); + } + + let _ids = search.execute().unwrap(); + }); + }, + ); + } } + + if conf.get_documents { + for offset in conf.offsets { + let parameter = match offset { + None => String::from("get_documents"), + Some((offset, limit)) => format!("get_documents[{offset}:{limit}]"), + }; + group.bench_with_input(BenchmarkId::from_parameter(parameter), &(), |b, &()| { + b.iter(|| { + let rtxn = index.read_txn().unwrap(); + if let Some(sort) = &conf.sort { + let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect(); + let all_docs = index.documents_ids(&rtxn).unwrap(); + let facet_sort = + recursive_sort(&index, &rtxn, sort, &all_docs).unwrap(); + let iter = facet_sort.iter().unwrap(); + if let Some((offset, limit)) = offset { + let _results = iter.skip(*offset).take(*limit).collect::>(); + } else { + let _results = iter.collect::>(); + } + } else { + let all_docs = index.documents_ids(&rtxn).unwrap(); + if let Some((offset, limit)) = offset { + let _results = + all_docs.iter().skip(*offset).take(*limit).collect::>(); + } else { + let _results = all_docs.iter().collect::>(); + } + } + }); + }); + } + } + group.finish(); index.prepare_for_closing().wait(); diff --git a/crates/meilisearch-types/src/error.rs b/crates/meilisearch-types/src/error.rs index c57e2d042..bf02ce7ef 100644 --- a/crates/meilisearch-types/src/error.rs +++ b/crates/meilisearch-types/src/error.rs @@ -237,6 +237,7 @@ InvalidDocumentRetrieveVectors , InvalidRequest , BAD_REQU MissingDocumentFilter , InvalidRequest , BAD_REQUEST ; MissingDocumentEditionFunction , InvalidRequest , BAD_REQUEST ; InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ; +InvalidDocumentSort , InvalidRequest , BAD_REQUEST ; InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ; InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ; InvalidVectorsType , InvalidRequest , BAD_REQUEST ; @@ -476,7 +477,8 @@ impl ErrorCode for milli::Error { UserError::InvalidDistinctAttribute { .. } => Code::InvalidSearchDistinct, UserError::SortRankingRuleMissing => Code::InvalidSearchSort, UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets, - UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort, + UserError::InvalidSearchSortableAttribute { .. } => Code::InvalidSearchSort, + UserError::InvalidDocumentSortableAttribute { .. } => Code::InvalidDocumentSort, UserError::InvalidSearchableAttribute { .. } => { Code::InvalidSearchAttributesToSearchOn } @@ -492,7 +494,8 @@ impl ErrorCode for milli::Error { UserError::InvalidVectorsMapType { .. } | UserError::InvalidVectorsEmbedderConf { .. } => Code::InvalidVectorsType, UserError::TooManyVectors(_, _) => Code::TooManyVectors, - UserError::SortError(_) => Code::InvalidSearchSort, + UserError::SortError { search: true, .. } => Code::InvalidSearchSort, + UserError::SortError { search: false, .. } => Code::InvalidDocumentSort, UserError::InvalidMinTypoWordLenSetting(_, _) => { Code::InvalidSettingsTypoTolerance } diff --git a/crates/meilisearch/src/routes/indexes/documents.rs b/crates/meilisearch/src/routes/indexes/documents.rs index a93d736f7..7a914697f 100644 --- a/crates/meilisearch/src/routes/indexes/documents.rs +++ b/crates/meilisearch/src/routes/indexes/documents.rs @@ -1,6 +1,7 @@ use std::collections::HashSet; use std::io::{ErrorKind, Seek as _}; use std::marker::PhantomData; +use std::str::FromStr; use actix_web::http::header::CONTENT_TYPE; use actix_web::web::Data; @@ -17,9 +18,10 @@ use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::{Code, ResponseError}; use meilisearch_types::heed::RoTxn; use meilisearch_types::index_uid::IndexUid; +use meilisearch_types::milli::documents::sort::recursive_sort; use meilisearch_types::milli::update::IndexDocumentsMethod; use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors; -use meilisearch_types::milli::DocumentId; +use meilisearch_types::milli::{AscDesc, DocumentId}; use meilisearch_types::serde_cs::vec::CS; use meilisearch_types::star_or::OptionStarOrList; use meilisearch_types::tasks::KindWithContent; @@ -42,6 +44,7 @@ use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::payload::Payload; use crate::extractors::sequential_extractor::SeqHandler; +use crate::routes::indexes::search::fix_sort_query_parameters; use crate::routes::{ get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT, }; @@ -135,6 +138,8 @@ pub struct DocumentsFetchAggregator { per_document_id: bool, // if a filter was used per_filter: bool, + // if documents were sorted + sort: bool, #[serde(rename = "vector.retrieve_vectors")] retrieve_vectors: bool, @@ -153,16 +158,28 @@ pub struct DocumentsFetchAggregator { #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum DocumentFetchKind { - PerDocumentId { retrieve_vectors: bool }, - Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool, ids: usize }, + PerDocumentId { + retrieve_vectors: bool, + sort: bool, + }, + Normal { + with_filter: bool, + limit: usize, + offset: usize, + retrieve_vectors: bool, + sort: bool, + ids: usize, + }, } impl DocumentsFetchAggregator { pub fn from_query(query: &DocumentFetchKind) -> Self { - let (limit, offset, retrieve_vectors) = match query { - DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors), - DocumentFetchKind::Normal { limit, offset, retrieve_vectors, .. } => { - (*limit, *offset, *retrieve_vectors) + let (limit, offset, retrieve_vectors, sort) = match query { + DocumentFetchKind::PerDocumentId { retrieve_vectors, sort } => { + (1, 0, *retrieve_vectors, *sort) + } + DocumentFetchKind::Normal { limit, offset, retrieve_vectors, sort, .. } => { + (*limit, *offset, *retrieve_vectors, *sort) } }; @@ -176,6 +193,7 @@ impl DocumentsFetchAggregator { per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter), max_limit: limit, max_offset: offset, + sort, retrieve_vectors, max_document_ids: ids, @@ -193,6 +211,7 @@ impl Aggregate for DocumentsFetchAggregator { Box::new(Self { per_document_id: self.per_document_id | new.per_document_id, per_filter: self.per_filter | new.per_filter, + sort: self.sort | new.sort, retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors, max_limit: self.max_limit.max(new.max_limit), max_offset: self.max_offset.max(new.max_offset), @@ -276,6 +295,7 @@ pub async fn get_document( retrieve_vectors: param_retrieve_vectors.0, per_document_id: true, per_filter: false, + sort: false, max_limit: 0, max_offset: 0, max_document_ids: 0, @@ -406,6 +426,8 @@ pub struct BrowseQueryGet { #[param(default, value_type = Option, example = "popularity > 1000")] #[deserr(default, error = DeserrQueryParamError)] filter: Option, + #[deserr(default, error = DeserrQueryParamError)] + sort: Option, } #[derive(Debug, Deserr, ToSchema)] @@ -430,6 +452,9 @@ pub struct BrowseQuery { #[schema(default, value_type = Option, example = "popularity > 1000")] #[deserr(default, error = DeserrJsonError)] filter: Option, + #[schema(default, value_type = Option>, example = json!(["title:asc", "rating:desc"]))] + #[deserr(default, error = DeserrJsonError)] + sort: Option>, } /// Get documents with POST @@ -495,6 +520,7 @@ pub async fn documents_by_query_post( analytics.publish( DocumentsFetchAggregator:: { per_filter: body.filter.is_some(), + sort: body.sort.is_some(), retrieve_vectors: body.retrieve_vectors, max_limit: body.limit, max_offset: body.offset, @@ -571,7 +597,7 @@ pub async fn get_documents( ) -> Result { debug!(parameters = ?params, "Get documents GET"); - let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter, ids } = + let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter, ids, sort } = params.into_inner(); let filter = match filter { @@ -582,20 +608,20 @@ pub async fn get_documents( None => None, }; - let ids = ids.map(|ids| ids.into_iter().map(Into::into).collect()); - let query = BrowseQuery { offset: offset.0, limit: limit.0, fields: fields.merge_star_and_none(), retrieve_vectors: retrieve_vectors.0, filter, - ids, + ids: ids.map(|ids| ids.into_iter().map(Into::into).collect()), + sort: sort.map(|attr| fix_sort_query_parameters(&attr)), }; analytics.publish( DocumentsFetchAggregator:: { per_filter: query.filter.is_some(), + sort: query.sort.is_some(), retrieve_vectors: query.retrieve_vectors, max_limit: query.limit, max_offset: query.offset, @@ -615,7 +641,7 @@ fn documents_by_query( query: BrowseQuery, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; - let BrowseQuery { offset, limit, fields, retrieve_vectors, filter, ids } = query; + let BrowseQuery { offset, limit, fields, retrieve_vectors, filter, ids, sort } = query; let retrieve_vectors = RetrieveVectors::new(retrieve_vectors); @@ -633,6 +659,18 @@ fn documents_by_query( None }; + let sort_criteria = if let Some(sort) = &sort { + let sorts: Vec<_> = match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() { + Ok(sorts) => sorts, + Err(asc_desc_error) => { + return Err(milli::SortError::from(asc_desc_error).into_document_error().into()) + } + }; + Some(sorts) + } else { + None + }; + let index = index_scheduler.index(&index_uid)?; let (total, documents) = retrieve_documents( &index, @@ -643,6 +681,7 @@ fn documents_by_query( fields, retrieve_vectors, index_scheduler.features(), + sort_criteria, )?; let ret = PaginationView::new(offset, limit, total as usize, documents); @@ -1494,6 +1533,7 @@ fn retrieve_documents>( attributes_to_retrieve: Option>, retrieve_vectors: RetrieveVectors, features: RoFeatures, + sort_criteria: Option>, ) -> Result<(u64, Vec), ResponseError> { let rtxn = index.read_txn()?; let filter = &filter; @@ -1526,15 +1566,29 @@ fn retrieve_documents>( })? } - let (it, number_of_documents) = { + let facet_sort; + let (it, number_of_documents) = if let Some(sort) = sort_criteria { + let number_of_documents = candidates.len(); + facet_sort = recursive_sort(index, &rtxn, sort, &candidates)?; + let iter = facet_sort.iter()?; + ( + itertools::Either::Left(some_documents( + index, + &rtxn, + iter.map(|d| d.unwrap()).skip(offset).take(limit), + retrieve_vectors, + )?), + number_of_documents, + ) + } else { let number_of_documents = candidates.len(); ( - some_documents( + itertools::Either::Right(some_documents( index, &rtxn, candidates.into_iter().skip(offset).take(limit), retrieve_vectors, - )?, + )?), number_of_documents, ) }; diff --git a/crates/meilisearch/src/search/federated/perform.rs b/crates/meilisearch/src/search/federated/perform.rs index 5ad64d63c..c0fec01e8 100644 --- a/crates/meilisearch/src/search/federated/perform.rs +++ b/crates/meilisearch/src/search/federated/perform.rs @@ -745,10 +745,9 @@ impl SearchByIndex { match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() { Ok(sorts) => sorts, Err(asc_desc_error) => { - return Err(milli::Error::from(milli::SortError::from( - asc_desc_error, - )) - .into()) + return Err(milli::SortError::from(asc_desc_error) + .into_search_error() + .into()) } }; Some(sorts) diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs index 1c987a70c..60e128927 100644 --- a/crates/meilisearch/src/search/mod.rs +++ b/crates/meilisearch/src/search/mod.rs @@ -1091,7 +1091,7 @@ pub fn prepare_search<'t>( let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() { Ok(sorts) => sorts, Err(asc_desc_error) => { - return Err(milli::Error::from(SortError::from(asc_desc_error)).into()) + return Err(SortError::from(asc_desc_error).into_search_error().into()) } }; diff --git a/crates/meilisearch/tests/common/index.rs b/crates/meilisearch/tests/common/index.rs index e324d2ff5..f1fdeba91 100644 --- a/crates/meilisearch/tests/common/index.rs +++ b/crates/meilisearch/tests/common/index.rs @@ -562,5 +562,7 @@ pub struct GetAllDocumentsOptions { pub offset: Option, #[serde(skip_serializing_if = "Option::is_none")] pub fields: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub sort: Option>, pub retrieve_vectors: bool, } diff --git a/crates/meilisearch/tests/documents/get_documents.rs b/crates/meilisearch/tests/documents/get_documents.rs index 63dc224c2..5ee838232 100644 --- a/crates/meilisearch/tests/documents/get_documents.rs +++ b/crates/meilisearch/tests/documents/get_documents.rs @@ -5,8 +5,8 @@ use urlencoding::encode as urlencode; use crate::common::encoder::Encoder; use crate::common::{ - shared_does_not_exists_index, shared_empty_index, shared_index_with_test_set, - GetAllDocumentsOptions, Server, Value, + shared_does_not_exists_index, shared_empty_index, shared_index_with_geo_documents, + shared_index_with_test_set, GetAllDocumentsOptions, Server, Value, }; use crate::json; @@ -83,6 +83,311 @@ async fn get_document() { ); } +#[actix_rt::test] +async fn get_document_sorted() { + let server = Server::new_shared(); + let index = server.unique_index(); + index.load_test_set().await; + + let (task, _status_code) = + index.update_settings_sortable_attributes(json!(["age", "email", "gender", "name"])).await; + server.wait_task(task.uid()).await.succeeded(); + + let (response, _code) = index + .get_all_documents(GetAllDocumentsOptions { + fields: Some(vec!["id", "age", "email"]), + sort: Some(vec!["age:asc", "email:desc"]), + ..Default::default() + }) + .await; + let results = response["results"].as_array().unwrap(); + snapshot!(json_string!(results), @r#" + [ + { + "id": 5, + "age": 20, + "email": "warrenwatson@chorizon.com" + }, + { + "id": 6, + "age": 20, + "email": "sheliaberry@chorizon.com" + }, + { + "id": 57, + "age": 20, + "email": "kaitlinconner@chorizon.com" + }, + { + "id": 45, + "age": 20, + "email": "irenebennett@chorizon.com" + }, + { + "id": 40, + "age": 21, + "email": "staffordemerson@chorizon.com" + }, + { + "id": 41, + "age": 21, + "email": "salinasgamble@chorizon.com" + }, + { + "id": 63, + "age": 21, + "email": "knowleshebert@chorizon.com" + }, + { + "id": 50, + "age": 21, + "email": "guerramcintyre@chorizon.com" + }, + { + "id": 44, + "age": 22, + "email": "jonispears@chorizon.com" + }, + { + "id": 56, + "age": 23, + "email": "tuckerbarry@chorizon.com" + }, + { + "id": 51, + "age": 23, + "email": "keycervantes@chorizon.com" + }, + { + "id": 60, + "age": 23, + "email": "jodyherrera@chorizon.com" + }, + { + "id": 70, + "age": 23, + "email": "glassperkins@chorizon.com" + }, + { + "id": 75, + "age": 24, + "email": "emmajacobs@chorizon.com" + }, + { + "id": 68, + "age": 24, + "email": "angelinadyer@chorizon.com" + }, + { + "id": 17, + "age": 25, + "email": "ortegabrennan@chorizon.com" + }, + { + "id": 76, + "age": 25, + "email": "claricegardner@chorizon.com" + }, + { + "id": 43, + "age": 25, + "email": "arnoldbender@chorizon.com" + }, + { + "id": 12, + "age": 25, + "email": "aidakirby@chorizon.com" + }, + { + "id": 9, + "age": 26, + "email": "kellimendez@chorizon.com" + } + ] + "#); + + let (response, _code) = index + .get_all_documents(GetAllDocumentsOptions { + fields: Some(vec!["id", "gender", "name"]), + sort: Some(vec!["gender:asc", "name:asc"]), + ..Default::default() + }) + .await; + let results = response["results"].as_array().unwrap(); + snapshot!(json_string!(results), @r#" + [ + { + "id": 3, + "name": "Adeline Flynn", + "gender": "female" + }, + { + "id": 12, + "name": "Aida Kirby", + "gender": "female" + }, + { + "id": 68, + "name": "Angelina Dyer", + "gender": "female" + }, + { + "id": 15, + "name": "Aurelia Contreras", + "gender": "female" + }, + { + "id": 36, + "name": "Barbra Valenzuela", + "gender": "female" + }, + { + "id": 23, + "name": "Blanca Mcclain", + "gender": "female" + }, + { + "id": 53, + "name": "Caitlin Burnett", + "gender": "female" + }, + { + "id": 71, + "name": "Candace Sawyer", + "gender": "female" + }, + { + "id": 65, + "name": "Carole Rowland", + "gender": "female" + }, + { + "id": 33, + "name": "Cecilia Greer", + "gender": "female" + }, + { + "id": 1, + "name": "Cherry Orr", + "gender": "female" + }, + { + "id": 38, + "name": "Christina Short", + "gender": "female" + }, + { + "id": 7, + "name": "Chrystal Boyd", + "gender": "female" + }, + { + "id": 76, + "name": "Clarice Gardner", + "gender": "female" + }, + { + "id": 73, + "name": "Eleanor Shepherd", + "gender": "female" + }, + { + "id": 75, + "name": "Emma Jacobs", + "gender": "female" + }, + { + "id": 16, + "name": "Estella Bass", + "gender": "female" + }, + { + "id": 62, + "name": "Estelle Ramirez", + "gender": "female" + }, + { + "id": 20, + "name": "Florence Long", + "gender": "female" + }, + { + "id": 42, + "name": "Graciela Russell", + "gender": "female" + } + ] + "#); +} + +#[actix_rt::test] +async fn get_document_geosorted() { + let index = shared_index_with_geo_documents().await; + + let (response, _code) = index + .get_all_documents(GetAllDocumentsOptions { + sort: Some(vec!["_geoPoint(45.4777599, 9.1967508):asc"]), + ..Default::default() + }) + .await; + let results = response["results"].as_array().unwrap(); + snapshot!(json_string!(results), @r#" + [ + { + "id": 2, + "name": "La Bella Italia", + "address": "456 Elm Street, Townsville", + "type": "Italian", + "rating": 9, + "_geo": { + "lat": "45.4777599", + "lng": "9.1967508" + } + }, + { + "id": 1, + "name": "Taco Truck", + "address": "444 Salsa Street, Burritoville", + "type": "Mexican", + "rating": 9, + "_geo": { + "lat": 34.0522, + "lng": -118.2437 + } + }, + { + "id": 3, + "name": "Crêpe Truck", + "address": "2 Billig Avenue, Rouenville", + "type": "French", + "rating": 10 + } + ] + "#); +} + +#[actix_rt::test] +async fn get_document_sort_the_unsortable() { + let index = shared_index_with_test_set().await; + + let (response, _code) = index + .get_all_documents(GetAllDocumentsOptions { + fields: Some(vec!["id", "name"]), + sort: Some(vec!["name:asc"]), + ..Default::default() + }) + .await; + + snapshot!(json_string!(response), @r#" + { + "message": "Attribute `name` is not sortable. This index does not have configured sortable attributes.", + "code": "invalid_document_sort", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_document_sort" + } + "#); +} + #[actix_rt::test] async fn error_get_unexisting_index_all_documents() { let index = shared_does_not_exists_index().await; diff --git a/crates/meilisearch/tests/vector/settings.rs b/crates/meilisearch/tests/vector/settings.rs index 50253f930..d26174faf 100644 --- a/crates/meilisearch/tests/vector/settings.rs +++ b/crates/meilisearch/tests/vector/settings.rs @@ -101,14 +101,7 @@ async fn reset_embedder_documents() { server.wait_task(response.uid()).await; // Make sure the documents are still present - let (documents, _code) = index - .get_all_documents(GetAllDocumentsOptions { - limit: None, - offset: None, - retrieve_vectors: false, - fields: None, - }) - .await; + let (documents, _code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; snapshot!(json_string!(documents), @r###" { "results": [ diff --git a/crates/milli/src/asc_desc.rs b/crates/milli/src/asc_desc.rs index e75adf83d..d7288faa3 100644 --- a/crates/milli/src/asc_desc.rs +++ b/crates/milli/src/asc_desc.rs @@ -168,6 +168,16 @@ pub enum SortError { ReservedNameForFilter { name: String }, } +impl SortError { + pub fn into_search_error(self) -> Error { + Error::UserError(UserError::SortError { error: self, search: true }) + } + + pub fn into_document_error(self) -> Error { + Error::UserError(UserError::SortError { error: self, search: false }) + } +} + impl From for SortError { fn from(error: AscDescError) -> Self { match error { @@ -190,12 +200,6 @@ impl From for SortError { } } -impl From for Error { - fn from(error: SortError) -> Self { - Self::UserError(UserError::SortError(error)) - } -} - #[cfg(test)] mod tests { use big_s::S; diff --git a/crates/milli/src/documents/geo_sort.rs b/crates/milli/src/documents/geo_sort.rs new file mode 100644 index 000000000..0750dfe5c --- /dev/null +++ b/crates/milli/src/documents/geo_sort.rs @@ -0,0 +1,294 @@ +use crate::{ + distance_between_two_points, + heed_codec::facet::{FieldDocIdFacetCodec, OrderedF64Codec}, + lat_lng_to_xyz, + search::new::{facet_string_values, facet_values_prefix_key}, + GeoPoint, Index, +}; +use heed::{ + types::{Bytes, Unit}, + RoPrefix, RoTxn, +}; +use roaring::RoaringBitmap; +use rstar::RTree; +use std::collections::VecDeque; + +#[derive(Debug, Clone, Copy)] +pub struct GeoSortParameter { + // Define the strategy used by the geo sort + pub strategy: GeoSortStrategy, + // Limit the number of docs in a single bucket to avoid unexpectedly large overhead + pub max_bucket_size: u64, + // Considering the errors of GPS and geographical calculations, distances less than distance_error_margin will be treated as equal + pub distance_error_margin: f64, +} + +impl Default for GeoSortParameter { + fn default() -> Self { + Self { + strategy: GeoSortStrategy::default(), + max_bucket_size: 1000, + distance_error_margin: 1.0, + } + } +} +/// Define the strategy used by the geo sort. +/// The parameter represents the cache size, and, in the case of the Dynamic strategy, +/// the point where we move from using the iterative strategy to the rtree. +#[derive(Debug, Clone, Copy)] +pub enum GeoSortStrategy { + AlwaysIterative(usize), + AlwaysRtree(usize), + Dynamic(usize), +} + +impl Default for GeoSortStrategy { + fn default() -> Self { + GeoSortStrategy::Dynamic(1000) + } +} + +impl GeoSortStrategy { + pub fn use_rtree(&self, candidates: usize) -> bool { + match self { + GeoSortStrategy::AlwaysIterative(_) => false, + GeoSortStrategy::AlwaysRtree(_) => true, + GeoSortStrategy::Dynamic(i) => candidates >= *i, + } + } + + pub fn cache_size(&self) -> usize { + match self { + GeoSortStrategy::AlwaysIterative(i) + | GeoSortStrategy::AlwaysRtree(i) + | GeoSortStrategy::Dynamic(i) => *i, + } + } +} + +#[allow(clippy::too_many_arguments)] +pub fn fill_cache( + index: &Index, + txn: &RoTxn, + strategy: GeoSortStrategy, + ascending: bool, + target_point: [f64; 2], + field_ids: &Option<[u16; 2]>, + rtree: &mut Option>, + geo_candidates: &RoaringBitmap, + cached_sorted_docids: &mut VecDeque<(u32, [f64; 2])>, +) -> crate::Result<()> { + debug_assert!(cached_sorted_docids.is_empty()); + + // lazily initialize the rtree if needed by the strategy, and cache it in `self.rtree` + let rtree = if strategy.use_rtree(geo_candidates.len() as usize) { + if let Some(rtree) = rtree.as_ref() { + // get rtree from cache + Some(rtree) + } else { + let rtree2 = index.geo_rtree(txn)?.expect("geo candidates but no rtree"); + // insert rtree in cache and returns it. + // Can't use `get_or_insert_with` because getting the rtree from the DB is a fallible operation. + Some(&*rtree.insert(rtree2)) + } + } else { + None + }; + + let cache_size = strategy.cache_size(); + if let Some(rtree) = rtree { + if ascending { + let point = lat_lng_to_xyz(&target_point); + for point in rtree.nearest_neighbor_iter(&point) { + if geo_candidates.contains(point.data.0) { + cached_sorted_docids.push_back(point.data); + if cached_sorted_docids.len() >= cache_size { + break; + } + } + } + } else { + // in the case of the desc geo sort we look for the closest point to the opposite of the queried point + // and we insert the points in reverse order they get reversed when emptying the cache later on + let point = lat_lng_to_xyz(&opposite_of(target_point)); + for point in rtree.nearest_neighbor_iter(&point) { + if geo_candidates.contains(point.data.0) { + cached_sorted_docids.push_front(point.data); + if cached_sorted_docids.len() >= cache_size { + break; + } + } + } + } + } else { + // the iterative version + let [lat, lng] = field_ids.expect("fill_buffer can't be called without the lat&lng"); + + let mut documents = geo_candidates + .iter() + .map(|id| -> crate::Result<_> { Ok((id, geo_value(id, lat, lng, index, txn)?)) }) + .collect::>>()?; + // computing the distance between two points is expensive thus we cache the result + documents + .sort_by_cached_key(|(_, p)| distance_between_two_points(&target_point, p) as usize); + cached_sorted_docids.extend(documents); + }; + + Ok(()) +} + +#[allow(clippy::too_many_arguments)] +pub fn next_bucket( + index: &Index, + txn: &RoTxn, + universe: &RoaringBitmap, + ascending: bool, + target_point: [f64; 2], + field_ids: &Option<[u16; 2]>, + rtree: &mut Option>, + cached_sorted_docids: &mut VecDeque<(u32, [f64; 2])>, + geo_candidates: &RoaringBitmap, + parameter: GeoSortParameter, +) -> crate::Result)>> { + let mut geo_candidates = geo_candidates & universe; + + if geo_candidates.is_empty() { + return Ok(Some((universe.clone(), None))); + } + + let next = |cache: &mut VecDeque<_>| { + if ascending { + cache.pop_front() + } else { + cache.pop_back() + } + }; + let put_back = |cache: &mut VecDeque<_>, x: _| { + if ascending { + cache.push_front(x) + } else { + cache.push_back(x) + } + }; + + let mut current_bucket = RoaringBitmap::new(); + // current_distance stores the first point and distance in current bucket + let mut current_distance: Option<([f64; 2], f64)> = None; + loop { + // The loop will only exit when we have found all points with equal distance or have exhausted the candidates. + if let Some((id, point)) = next(cached_sorted_docids) { + if geo_candidates.contains(id) { + let distance = distance_between_two_points(&target_point, &point); + if let Some((point0, bucket_distance)) = current_distance.as_ref() { + if (bucket_distance - distance).abs() > parameter.distance_error_margin { + // different distance, point belongs to next bucket + put_back(cached_sorted_docids, (id, point)); + return Ok(Some((current_bucket, Some(point0.to_owned())))); + } else { + // same distance, point belongs to current bucket + current_bucket.insert(id); + // remove from candidates to prevent it from being added to the cache again + geo_candidates.remove(id); + // current bucket size reaches limit, force return + if current_bucket.len() == parameter.max_bucket_size { + return Ok(Some((current_bucket, Some(point0.to_owned())))); + } + } + } else { + // first doc in current bucket + current_distance = Some((point, distance)); + current_bucket.insert(id); + geo_candidates.remove(id); + // current bucket size reaches limit, force return + if current_bucket.len() == parameter.max_bucket_size { + return Ok(Some((current_bucket, Some(point.to_owned())))); + } + } + } + } else { + // cache exhausted, we need to refill it + fill_cache( + index, + txn, + parameter.strategy, + ascending, + target_point, + field_ids, + rtree, + &geo_candidates, + cached_sorted_docids, + )?; + + if cached_sorted_docids.is_empty() { + // candidates exhausted, exit + if let Some((point0, _)) = current_distance.as_ref() { + return Ok(Some((current_bucket, Some(point0.to_owned())))); + } else { + return Ok(Some((universe.clone(), None))); + } + } + } + } +} + +/// Return an iterator over each number value in the given field of the given document. +fn facet_number_values<'a>( + docid: u32, + field_id: u16, + index: &Index, + txn: &'a RoTxn<'a>, +) -> crate::Result, Unit>> { + let key = facet_values_prefix_key(field_id, docid); + + let iter = index + .field_id_docid_facet_f64s + .remap_key_type::() + .prefix_iter(txn, &key)? + .remap_key_type(); + + Ok(iter) +} + +/// Extracts the lat and long values from a single document. +/// +/// If it is not able to find it in the facet number index it will extract it +/// from the facet string index and parse it as f64 (as the geo extraction behaves). +pub(crate) fn geo_value( + docid: u32, + field_lat: u16, + field_lng: u16, + index: &Index, + rtxn: &RoTxn<'_>, +) -> crate::Result<[f64; 2]> { + let extract_geo = |geo_field: u16| -> crate::Result { + match facet_number_values(docid, geo_field, index, rtxn)?.next() { + Some(Ok(((_, _, geo), ()))) => Ok(geo), + Some(Err(e)) => Err(e.into()), + None => match facet_string_values(docid, geo_field, index, rtxn)?.next() { + Some(Ok((_, geo))) => { + Ok(geo.parse::().expect("cannot parse geo field as f64")) + } + Some(Err(e)) => Err(e.into()), + None => panic!("A geo faceted document doesn't contain any lat or lng"), + }, + } + }; + + let lat = extract_geo(field_lat)?; + let lng = extract_geo(field_lng)?; + + Ok([lat, lng]) +} + +/// Compute the antipodal coordinate of `coord` +pub(crate) fn opposite_of(mut coord: [f64; 2]) -> [f64; 2] { + coord[0] *= -1.; + // in the case of x,0 we want to return x,180 + if coord[1] > 0. { + coord[1] -= 180.; + } else { + coord[1] += 180.; + } + + coord +} diff --git a/crates/milli/src/documents/mod.rs b/crates/milli/src/documents/mod.rs index f43f7e842..7a4babfa8 100644 --- a/crates/milli/src/documents/mod.rs +++ b/crates/milli/src/documents/mod.rs @@ -1,8 +1,10 @@ mod builder; mod enriched; +pub mod geo_sort; mod primary_key; mod reader; mod serde_impl; +pub mod sort; use std::fmt::Debug; use std::io; @@ -19,6 +21,7 @@ pub use primary_key::{ pub use reader::{DocumentsBatchCursor, DocumentsBatchCursorError, DocumentsBatchReader}; use serde::{Deserialize, Serialize}; +pub use self::geo_sort::{GeoSortParameter, GeoSortStrategy}; use crate::error::{FieldIdMapMissingEntry, InternalError}; use crate::{FieldId, Object, Result}; diff --git a/crates/milli/src/documents/sort.rs b/crates/milli/src/documents/sort.rs new file mode 100644 index 000000000..59858caad --- /dev/null +++ b/crates/milli/src/documents/sort.rs @@ -0,0 +1,430 @@ +use std::collections::{BTreeSet, VecDeque}; + +use crate::{ + constants::RESERVED_GEO_FIELD_NAME, + documents::{geo_sort::next_bucket, GeoSortParameter}, + heed_codec::{ + facet::{FacetGroupKeyCodec, FacetGroupValueCodec}, + BytesRefCodec, + }, + is_faceted, + search::facet::{ascending_facet_sort, descending_facet_sort}, + AscDesc, DocumentId, Member, UserError, +}; +use heed::Database; +use roaring::RoaringBitmap; + +#[derive(Debug, Clone, Copy)] +enum AscDescId { + Facet { field_id: u16, ascending: bool }, + Geo { field_ids: [u16; 2], target_point: [f64; 2], ascending: bool }, +} + +/// A [`SortedDocumentsIterator`] allows efficient access to a continuous range of sorted documents. +/// This is ideal in the context of paginated queries in which only a small number of documents are needed at a time. +/// Search operations will only be performed upon access. +pub enum SortedDocumentsIterator<'ctx> { + Leaf { + /// The exact number of documents remaining + size: usize, + values: Box + 'ctx>, + }, + Branch { + /// The current child, got from the children iterator + current_child: Option>>, + /// The exact number of documents remaining, excluding documents in the current child + next_children_size: usize, + /// Iterators to become the current child once it is exhausted + next_children: + Box>> + 'ctx>, + }, +} + +impl SortedDocumentsIterator<'_> { + /// Takes care of updating the current child if it is `None`, and also updates the size + fn update_current<'ctx>( + current_child: &mut Option>>, + next_children_size: &mut usize, + next_children: &mut Box< + dyn Iterator>> + 'ctx, + >, + ) -> crate::Result<()> { + if current_child.is_none() { + *current_child = match next_children.next() { + Some(Ok(builder)) => { + let next_child = Box::new(builder.build()?); + *next_children_size -= next_child.size_hint().0; + Some(next_child) + } + Some(Err(e)) => return Err(e), + None => return Ok(()), + }; + } + Ok(()) + } +} + +impl Iterator for SortedDocumentsIterator<'_> { + type Item = crate::Result; + + /// Implementing the `nth` method allows for efficient access to the nth document in the sorted order. + /// It's used by `skip` internally. + /// The default implementation of `nth` would iterate over all children, which is inefficient for large datasets. + /// This implementation will jump over whole chunks of children until it gets close. + fn nth(&mut self, n: usize) -> Option { + // If it's at the leaf level, just forward the call to the values iterator + let (current_child, next_children, next_children_size) = match self { + SortedDocumentsIterator::Leaf { values, size } => { + *size = size.saturating_sub(n); + return values.nth(n).map(Ok); + } + SortedDocumentsIterator::Branch { + current_child, + next_children, + next_children_size, + } => (current_child, next_children, next_children_size), + }; + + // Otherwise don't directly iterate over children, skip them if we know we will go further + let mut to_skip = n - 1; + while to_skip > 0 { + if let Err(e) = SortedDocumentsIterator::update_current( + current_child, + next_children_size, + next_children, + ) { + return Some(Err(e)); + } + let Some(inner) = current_child else { + return None; // No more inner iterators, everything has been consumed. + }; + + if to_skip >= inner.size_hint().0 { + // The current child isn't large enough to contain the nth element. + // Skip it and continue with the next one. + to_skip -= inner.size_hint().0; + *current_child = None; + continue; + } else { + // The current iterator is large enough, so we can forward the call to it. + return inner.nth(to_skip + 1); + } + } + + self.next() + } + + /// Iterators need to keep track of their size so that they can be skipped efficiently by the `nth` method. + fn size_hint(&self) -> (usize, Option) { + let size = match self { + SortedDocumentsIterator::Leaf { size, .. } => *size, + SortedDocumentsIterator::Branch { + next_children_size, + current_child: Some(current_child), + .. + } => current_child.size_hint().0 + next_children_size, + SortedDocumentsIterator::Branch { next_children_size, current_child: None, .. } => { + *next_children_size + } + }; + + (size, Some(size)) + } + + fn next(&mut self) -> Option { + match self { + SortedDocumentsIterator::Leaf { values, size } => { + let result = values.next().map(Ok); + if result.is_some() { + *size -= 1; + } + result + } + SortedDocumentsIterator::Branch { + current_child, + next_children_size, + next_children, + } => { + let mut result = None; + while result.is_none() { + // Ensure we have selected an iterator to work with + if let Err(e) = SortedDocumentsIterator::update_current( + current_child, + next_children_size, + next_children, + ) { + return Some(Err(e)); + } + let Some(inner) = current_child else { + return None; + }; + + result = inner.next(); + + // If the current iterator is exhausted, we need to try the next one + if result.is_none() { + *current_child = None; + } + } + result + } + } + } +} + +/// Builder for a [`SortedDocumentsIterator`]. +/// Most builders won't ever be built, because pagination will skip them. +pub struct SortedDocumentsIteratorBuilder<'ctx> { + index: &'ctx crate::Index, + rtxn: &'ctx heed::RoTxn<'ctx>, + number_db: Database, FacetGroupValueCodec>, + string_db: Database, FacetGroupValueCodec>, + fields: &'ctx [AscDescId], + candidates: RoaringBitmap, + geo_candidates: &'ctx RoaringBitmap, +} + +impl<'ctx> SortedDocumentsIteratorBuilder<'ctx> { + /// Performs the sort and builds a [`SortedDocumentsIterator`]. + fn build(self) -> crate::Result> { + let size = self.candidates.len() as usize; + + // There is no point sorting a 1-element array + if size <= 1 { + return Ok(SortedDocumentsIterator::Leaf { + size, + values: Box::new(self.candidates.into_iter()), + }); + } + + match self.fields.first().copied() { + Some(AscDescId::Facet { field_id, ascending }) => self.build_facet(field_id, ascending), + Some(AscDescId::Geo { field_ids, target_point, ascending }) => { + self.build_geo(field_ids, target_point, ascending) + } + None => Ok(SortedDocumentsIterator::Leaf { + size, + values: Box::new(self.candidates.into_iter()), + }), + } + } + + /// Builds a [`SortedDocumentsIterator`] based on the results of a facet sort. + fn build_facet( + self, + field_id: u16, + ascending: bool, + ) -> crate::Result> { + let SortedDocumentsIteratorBuilder { + index, + rtxn, + number_db, + string_db, + fields, + candidates, + geo_candidates, + } = self; + let size = candidates.len() as usize; + + // Perform the sort on the first field + let (number_iter, string_iter) = if ascending { + let number_iter = ascending_facet_sort(rtxn, number_db, field_id, candidates.clone())?; + let string_iter = ascending_facet_sort(rtxn, string_db, field_id, candidates)?; + + (itertools::Either::Left(number_iter), itertools::Either::Left(string_iter)) + } else { + let number_iter = descending_facet_sort(rtxn, number_db, field_id, candidates.clone())?; + let string_iter = descending_facet_sort(rtxn, string_db, field_id, candidates)?; + + (itertools::Either::Right(number_iter), itertools::Either::Right(string_iter)) + }; + + // Create builders for the next level of the tree + let number_iter = number_iter.map(|r| r.map(|(d, _)| d)); + let string_iter = string_iter.map(|r| r.map(|(d, _)| d)); + let next_children = number_iter.chain(string_iter).map(move |r| { + Ok(SortedDocumentsIteratorBuilder { + index, + rtxn, + number_db, + string_db, + fields: &fields[1..], + candidates: r?, + geo_candidates, + }) + }); + + Ok(SortedDocumentsIterator::Branch { + current_child: None, + next_children_size: size, + next_children: Box::new(next_children), + }) + } + + /// Builds a [`SortedDocumentsIterator`] based on the (lazy) results of a geo sort. + fn build_geo( + self, + field_ids: [u16; 2], + target_point: [f64; 2], + ascending: bool, + ) -> crate::Result> { + let SortedDocumentsIteratorBuilder { + index, + rtxn, + number_db, + string_db, + fields, + candidates, + geo_candidates, + } = self; + + let mut cache = VecDeque::new(); + let mut rtree = None; + let size = candidates.len() as usize; + let not_geo_candidates = candidates.clone() - geo_candidates; + let mut geo_remaining = size - not_geo_candidates.len() as usize; + let mut not_geo_candidates = Some(not_geo_candidates); + + let next_children = std::iter::from_fn(move || { + // Find the next bucket of geo-sorted documents. + // next_bucket loops and will go back to the beginning so we use a variable to track how many are left. + if geo_remaining > 0 { + if let Ok(Some((docids, _point))) = next_bucket( + index, + rtxn, + &candidates, + ascending, + target_point, + &Some(field_ids), + &mut rtree, + &mut cache, + geo_candidates, + GeoSortParameter::default(), + ) { + geo_remaining -= docids.len() as usize; + return Some(Ok(SortedDocumentsIteratorBuilder { + index, + rtxn, + number_db, + string_db, + fields: &fields[1..], + candidates: docids, + geo_candidates, + })); + } + } + + // Once all geo candidates have been processed, we can return the others + if let Some(not_geo_candidates) = not_geo_candidates.take() { + if !not_geo_candidates.is_empty() { + return Some(Ok(SortedDocumentsIteratorBuilder { + index, + rtxn, + number_db, + string_db, + fields: &fields[1..], + candidates: not_geo_candidates, + geo_candidates, + })); + } + } + + None + }); + + Ok(SortedDocumentsIterator::Branch { + current_child: None, + next_children_size: size, + next_children: Box::new(next_children), + }) + } +} + +/// A structure owning the data needed during the lifetime of a [`SortedDocumentsIterator`]. +pub struct SortedDocuments<'ctx> { + index: &'ctx crate::Index, + rtxn: &'ctx heed::RoTxn<'ctx>, + fields: Vec, + number_db: Database, FacetGroupValueCodec>, + string_db: Database, FacetGroupValueCodec>, + candidates: &'ctx RoaringBitmap, + geo_candidates: RoaringBitmap, +} + +impl<'ctx> SortedDocuments<'ctx> { + pub fn iter(&'ctx self) -> crate::Result> { + let builder = SortedDocumentsIteratorBuilder { + index: self.index, + rtxn: self.rtxn, + number_db: self.number_db, + string_db: self.string_db, + fields: &self.fields, + candidates: self.candidates.clone(), + geo_candidates: &self.geo_candidates, + }; + builder.build() + } +} + +pub fn recursive_sort<'ctx>( + index: &'ctx crate::Index, + rtxn: &'ctx heed::RoTxn<'ctx>, + sort: Vec, + candidates: &'ctx RoaringBitmap, +) -> crate::Result> { + let sortable_fields: BTreeSet<_> = index.sortable_fields(rtxn)?.into_iter().collect(); + let fields_ids_map = index.fields_ids_map(rtxn)?; + + // Retrieve the field ids that are used for sorting + let mut fields = Vec::new(); + let mut need_geo_candidates = false; + for asc_desc in sort { + let (field, geofield) = match asc_desc { + AscDesc::Asc(Member::Field(field)) => (Some((field, true)), None), + AscDesc::Desc(Member::Field(field)) => (Some((field, false)), None), + AscDesc::Asc(Member::Geo(target_point)) => (None, Some((target_point, true))), + AscDesc::Desc(Member::Geo(target_point)) => (None, Some((target_point, false))), + }; + if let Some((field, ascending)) = field { + if is_faceted(&field, &sortable_fields) { + if let Some(field_id) = fields_ids_map.id(&field) { + fields.push(AscDescId::Facet { field_id, ascending }); + continue; + } + } + return Err(UserError::InvalidDocumentSortableAttribute { + field: field.to_string(), + sortable_fields: sortable_fields.clone(), + } + .into()); + } + if let Some((target_point, ascending)) = geofield { + if sortable_fields.contains(RESERVED_GEO_FIELD_NAME) { + if let (Some(lat), Some(lng)) = + (fields_ids_map.id("_geo.lat"), fields_ids_map.id("_geo.lng")) + { + need_geo_candidates = true; + fields.push(AscDescId::Geo { field_ids: [lat, lng], target_point, ascending }); + continue; + } + } + return Err(UserError::InvalidDocumentSortableAttribute { + field: RESERVED_GEO_FIELD_NAME.to_string(), + sortable_fields: sortable_fields.clone(), + } + .into()); + } + } + + let geo_candidates = if need_geo_candidates { + index.geo_faceted_documents_ids(rtxn)? + } else { + RoaringBitmap::new() + }; + + let number_db = index.facet_id_f64_docids.remap_key_type::>(); + let string_db = + index.facet_id_string_docids.remap_key_type::>(); + + Ok(SortedDocuments { index, rtxn, fields, number_db, string_db, candidates, geo_candidates }) +} diff --git a/crates/milli/src/error.rs b/crates/milli/src/error.rs index f8886da8e..9ad9d0511 100644 --- a/crates/milli/src/error.rs +++ b/crates/milli/src/error.rs @@ -191,7 +191,21 @@ and can not be more than 511 bytes.", .document_id.to_string() ), } )] - InvalidSortableAttribute { field: String, valid_fields: BTreeSet, hidden_fields: bool }, + InvalidSearchSortableAttribute { + field: String, + valid_fields: BTreeSet, + hidden_fields: bool, + }, + #[error("Attribute `{}` is not sortable. {}", + .field, + match .sortable_fields.is_empty() { + true => "This index does not have configured sortable attributes.".to_string(), + false => format!("Available sortable attributes are: `{}`.", + sortable_fields.iter().map(AsRef::as_ref).collect::>().join(", ") + ), + } + )] + InvalidDocumentSortableAttribute { field: String, sortable_fields: BTreeSet }, #[error("Attribute `{}` is not filterable and thus, cannot be used as distinct attribute. {}", .field, match (.valid_patterns.is_empty(), .matching_rule_index) { @@ -272,8 +286,8 @@ and can not be more than 511 bytes.", .document_id.to_string() PrimaryKeyCannotBeChanged(String), #[error(transparent)] SerdeJson(serde_json::Error), - #[error(transparent)] - SortError(#[from] SortError), + #[error("{error}")] + SortError { error: SortError, search: bool }, #[error("An unknown internal document id have been used: `{document_id}`.")] UnknownInternalDocumentId { document_id: DocumentId }, #[error("`minWordSizeForTypos` setting is invalid. `oneTypo` and `twoTypos` fields should be between `0` and `255`, and `twoTypos` should be greater or equals to `oneTypo` but found `oneTypo: {0}` and twoTypos: {1}`.")] @@ -616,7 +630,7 @@ fn conditionally_lookup_for_error_message() { ]; for (list, suffix) in messages { - let err = UserError::InvalidSortableAttribute { + let err = UserError::InvalidSearchSortableAttribute { field: "name".to_string(), valid_fields: list, hidden_fields: false, diff --git a/crates/milli/src/lib.rs b/crates/milli/src/lib.rs index 504b4c68d..6fdae86b3 100644 --- a/crates/milli/src/lib.rs +++ b/crates/milli/src/lib.rs @@ -43,12 +43,13 @@ use std::fmt; use std::hash::BuildHasherDefault; use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer}; +pub use documents::GeoSortStrategy; pub use filter_parser::{Condition, FilterCondition, Span, Token}; use fxhash::{FxHasher32, FxHasher64}; pub use grenad::CompressionType; pub use search::new::{ - execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, SearchContext, - SearchLogger, VisualSearchLogger, + execute_search, filtered_universe, DefaultSearchLogger, SearchContext, SearchLogger, + VisualSearchLogger, }; use serde_json::Value; pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder}; diff --git a/crates/milli/src/search/mod.rs b/crates/milli/src/search/mod.rs index 97d542524..0165ec7da 100644 --- a/crates/milli/src/search/mod.rs +++ b/crates/milli/src/search/mod.rs @@ -9,6 +9,7 @@ use roaring::bitmap::RoaringBitmap; pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords}; use self::new::{execute_vector_search, PartialSearchResult, VectorStoreStats}; +use crate::documents::GeoSortParameter; use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features}; use crate::index::MatchingStrategy; use crate::score_details::{ScoreDetails, ScoringStrategy}; @@ -47,7 +48,7 @@ pub struct Search<'a> { sort_criteria: Option>, distinct: Option, searchable_attributes: Option<&'a [String]>, - geo_param: new::GeoSortParameter, + geo_param: GeoSortParameter, terms_matching_strategy: TermsMatchingStrategy, scoring_strategy: ScoringStrategy, words_limit: usize, @@ -70,7 +71,7 @@ impl<'a> Search<'a> { sort_criteria: None, distinct: None, searchable_attributes: None, - geo_param: new::GeoSortParameter::default(), + geo_param: GeoSortParameter::default(), terms_matching_strategy: TermsMatchingStrategy::default(), scoring_strategy: Default::default(), exhaustive_number_hits: false, @@ -147,7 +148,7 @@ impl<'a> Search<'a> { } #[cfg(test)] - pub fn geo_sort_strategy(&mut self, strategy: new::GeoSortStrategy) -> &mut Search<'a> { + pub fn geo_sort_strategy(&mut self, strategy: crate::GeoSortStrategy) -> &mut Search<'a> { self.geo_param.strategy = strategy; self } diff --git a/crates/milli/src/search/new/distinct.rs b/crates/milli/src/search/new/distinct.rs index 36172302a..455b495f5 100644 --- a/crates/milli/src/search/new/distinct.rs +++ b/crates/milli/src/search/new/distinct.rs @@ -82,7 +82,7 @@ fn facet_value_docids( } /// Return an iterator over each number value in the given field of the given document. -fn facet_number_values<'a>( +pub(crate) fn facet_number_values<'a>( docid: u32, field_id: u16, index: &Index, @@ -118,7 +118,7 @@ pub fn facet_string_values<'a>( } #[allow(clippy::drop_non_drop)] -fn facet_values_prefix_key(distinct: u16, id: u32) -> [u8; FID_SIZE + DOCID_SIZE] { +pub(crate) fn facet_values_prefix_key(distinct: u16, id: u32) -> [u8; FID_SIZE + DOCID_SIZE] { concat_arrays::concat_arrays!(distinct.to_be_bytes(), id.to_be_bytes()) } diff --git a/crates/milli/src/search/new/geo_sort.rs b/crates/milli/src/search/new/geo_sort.rs index 3e7fe3458..6c7d7b03b 100644 --- a/crates/milli/src/search/new/geo_sort.rs +++ b/crates/milli/src/search/new/geo_sort.rs @@ -1,96 +1,18 @@ use std::collections::VecDeque; -use heed::types::{Bytes, Unit}; -use heed::{RoPrefix, RoTxn}; use roaring::RoaringBitmap; use rstar::RTree; -use super::facet_string_values; use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait}; -use crate::heed_codec::facet::{FieldDocIdFacetCodec, OrderedF64Codec}; +use crate::documents::geo_sort::{fill_cache, next_bucket}; +use crate::documents::{GeoSortParameter, GeoSortStrategy}; use crate::score_details::{self, ScoreDetails}; -use crate::{ - distance_between_two_points, lat_lng_to_xyz, GeoPoint, Index, Result, SearchContext, - SearchLogger, -}; - -const FID_SIZE: usize = 2; -const DOCID_SIZE: usize = 4; - -#[allow(clippy::drop_non_drop)] -fn facet_values_prefix_key(distinct: u16, id: u32) -> [u8; FID_SIZE + DOCID_SIZE] { - concat_arrays::concat_arrays!(distinct.to_be_bytes(), id.to_be_bytes()) -} - -/// Return an iterator over each number value in the given field of the given document. -fn facet_number_values<'a>( - docid: u32, - field_id: u16, - index: &Index, - txn: &'a RoTxn<'a>, -) -> Result, Unit>> { - let key = facet_values_prefix_key(field_id, docid); - - let iter = index - .field_id_docid_facet_f64s - .remap_key_type::() - .prefix_iter(txn, &key)? - .remap_key_type(); - - Ok(iter) -} - -#[derive(Debug, Clone, Copy)] -pub struct Parameter { - // Define the strategy used by the geo sort - pub strategy: Strategy, - // Limit the number of docs in a single bucket to avoid unexpectedly large overhead - pub max_bucket_size: u64, - // Considering the errors of GPS and geographical calculations, distances less than distance_error_margin will be treated as equal - pub distance_error_margin: f64, -} - -impl Default for Parameter { - fn default() -> Self { - Self { strategy: Strategy::default(), max_bucket_size: 1000, distance_error_margin: 1.0 } - } -} -/// Define the strategy used by the geo sort. -/// The parameter represents the cache size, and, in the case of the Dynamic strategy, -/// the point where we move from using the iterative strategy to the rtree. -#[derive(Debug, Clone, Copy)] -pub enum Strategy { - AlwaysIterative(usize), - AlwaysRtree(usize), - Dynamic(usize), -} - -impl Default for Strategy { - fn default() -> Self { - Strategy::Dynamic(1000) - } -} - -impl Strategy { - pub fn use_rtree(&self, candidates: usize) -> bool { - match self { - Strategy::AlwaysIterative(_) => false, - Strategy::AlwaysRtree(_) => true, - Strategy::Dynamic(i) => candidates >= *i, - } - } - - pub fn cache_size(&self) -> usize { - match self { - Strategy::AlwaysIterative(i) | Strategy::AlwaysRtree(i) | Strategy::Dynamic(i) => *i, - } - } -} +use crate::{GeoPoint, Result, SearchContext, SearchLogger}; pub struct GeoSort { query: Option, - strategy: Strategy, + strategy: GeoSortStrategy, ascending: bool, point: [f64; 2], field_ids: Option<[u16; 2]>, @@ -107,12 +29,12 @@ pub struct GeoSort { impl GeoSort { pub fn new( - parameter: Parameter, + parameter: GeoSortParameter, geo_faceted_docids: RoaringBitmap, point: [f64; 2], ascending: bool, ) -> Result { - let Parameter { strategy, max_bucket_size, distance_error_margin } = parameter; + let GeoSortParameter { strategy, max_bucket_size, distance_error_margin } = parameter; Ok(Self { query: None, strategy, @@ -134,98 +56,22 @@ impl GeoSort { ctx: &mut SearchContext<'_>, geo_candidates: &RoaringBitmap, ) -> Result<()> { - debug_assert!(self.field_ids.is_some(), "fill_buffer can't be called without the lat&lng"); - debug_assert!(self.cached_sorted_docids.is_empty()); - - // lazily initialize the rtree if needed by the strategy, and cache it in `self.rtree` - let rtree = if self.strategy.use_rtree(geo_candidates.len() as usize) { - if let Some(rtree) = self.rtree.as_ref() { - // get rtree from cache - Some(rtree) - } else { - let rtree = ctx.index.geo_rtree(ctx.txn)?.expect("geo candidates but no rtree"); - // insert rtree in cache and returns it. - // Can't use `get_or_insert_with` because getting the rtree from the DB is a fallible operation. - Some(&*self.rtree.insert(rtree)) - } - } else { - None - }; - - let cache_size = self.strategy.cache_size(); - if let Some(rtree) = rtree { - if self.ascending { - let point = lat_lng_to_xyz(&self.point); - for point in rtree.nearest_neighbor_iter(&point) { - if geo_candidates.contains(point.data.0) { - self.cached_sorted_docids.push_back(point.data); - if self.cached_sorted_docids.len() >= cache_size { - break; - } - } - } - } else { - // in the case of the desc geo sort we look for the closest point to the opposite of the queried point - // and we insert the points in reverse order they get reversed when emptying the cache later on - let point = lat_lng_to_xyz(&opposite_of(self.point)); - for point in rtree.nearest_neighbor_iter(&point) { - if geo_candidates.contains(point.data.0) { - self.cached_sorted_docids.push_front(point.data); - if self.cached_sorted_docids.len() >= cache_size { - break; - } - } - } - } - } else { - // the iterative version - let [lat, lng] = self.field_ids.unwrap(); - - let mut documents = geo_candidates - .iter() - .map(|id| -> Result<_> { Ok((id, geo_value(id, lat, lng, ctx.index, ctx.txn)?)) }) - .collect::>>()?; - // computing the distance between two points is expensive thus we cache the result - documents - .sort_by_cached_key(|(_, p)| distance_between_two_points(&self.point, p) as usize); - self.cached_sorted_docids.extend(documents); - }; + fill_cache( + ctx.index, + ctx.txn, + self.strategy, + self.ascending, + self.point, + &self.field_ids, + &mut self.rtree, + geo_candidates, + &mut self.cached_sorted_docids, + )?; Ok(()) } } -/// Extracts the lat and long values from a single document. -/// -/// If it is not able to find it in the facet number index it will extract it -/// from the facet string index and parse it as f64 (as the geo extraction behaves). -fn geo_value( - docid: u32, - field_lat: u16, - field_lng: u16, - index: &Index, - rtxn: &RoTxn<'_>, -) -> Result<[f64; 2]> { - let extract_geo = |geo_field: u16| -> Result { - match facet_number_values(docid, geo_field, index, rtxn)?.next() { - Some(Ok(((_, _, geo), ()))) => Ok(geo), - Some(Err(e)) => Err(e.into()), - None => match facet_string_values(docid, geo_field, index, rtxn)?.next() { - Some(Ok((_, geo))) => { - Ok(geo.parse::().expect("cannot parse geo field as f64")) - } - Some(Err(e)) => Err(e.into()), - None => panic!("A geo faceted document doesn't contain any lat or lng"), - }, - } - }; - - let lat = extract_geo(field_lat)?; - let lng = extract_geo(field_lng)?; - - Ok([lat, lng]) -} - impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort { fn id(&self) -> String { "geo_sort".to_owned() @@ -267,124 +113,33 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort { ) -> Result>> { let query = self.query.as_ref().unwrap().clone(); - let mut geo_candidates = &self.geo_candidates & universe; - - if geo_candidates.is_empty() { - return Ok(Some(RankingRuleOutput { + next_bucket( + ctx.index, + ctx.txn, + universe, + self.ascending, + self.point, + &self.field_ids, + &mut self.rtree, + &mut self.cached_sorted_docids, + &self.geo_candidates, + GeoSortParameter { + strategy: self.strategy, + max_bucket_size: self.max_bucket_size, + distance_error_margin: self.distance_error_margin, + }, + ) + .map(|o| { + o.map(|(candidates, point)| RankingRuleOutput { query, - candidates: universe.clone(), + candidates, score: ScoreDetails::GeoSort(score_details::GeoSort { target_point: self.point, ascending: self.ascending, - value: None, + value: point, }), - })); - } - - let ascending = self.ascending; - let next = |cache: &mut VecDeque<_>| { - if ascending { - cache.pop_front() - } else { - cache.pop_back() - } - }; - let put_back = |cache: &mut VecDeque<_>, x: _| { - if ascending { - cache.push_front(x) - } else { - cache.push_back(x) - } - }; - - let mut current_bucket = RoaringBitmap::new(); - // current_distance stores the first point and distance in current bucket - let mut current_distance: Option<([f64; 2], f64)> = None; - loop { - // The loop will only exit when we have found all points with equal distance or have exhausted the candidates. - if let Some((id, point)) = next(&mut self.cached_sorted_docids) { - if geo_candidates.contains(id) { - let distance = distance_between_two_points(&self.point, &point); - if let Some((point0, bucket_distance)) = current_distance.as_ref() { - if (bucket_distance - distance).abs() > self.distance_error_margin { - // different distance, point belongs to next bucket - put_back(&mut self.cached_sorted_docids, (id, point)); - return Ok(Some(RankingRuleOutput { - query, - candidates: current_bucket, - score: ScoreDetails::GeoSort(score_details::GeoSort { - target_point: self.point, - ascending: self.ascending, - value: Some(point0.to_owned()), - }), - })); - } else { - // same distance, point belongs to current bucket - current_bucket.insert(id); - // remove from cadidates to prevent it from being added to the cache again - geo_candidates.remove(id); - // current bucket size reaches limit, force return - if current_bucket.len() == self.max_bucket_size { - return Ok(Some(RankingRuleOutput { - query, - candidates: current_bucket, - score: ScoreDetails::GeoSort(score_details::GeoSort { - target_point: self.point, - ascending: self.ascending, - value: Some(point0.to_owned()), - }), - })); - } - } - } else { - // first doc in current bucket - current_distance = Some((point, distance)); - current_bucket.insert(id); - geo_candidates.remove(id); - // current bucket size reaches limit, force return - if current_bucket.len() == self.max_bucket_size { - return Ok(Some(RankingRuleOutput { - query, - candidates: current_bucket, - score: ScoreDetails::GeoSort(score_details::GeoSort { - target_point: self.point, - ascending: self.ascending, - value: Some(point.to_owned()), - }), - })); - } - } - } - } else { - // cache exhausted, we need to refill it - self.fill_buffer(ctx, &geo_candidates)?; - - if self.cached_sorted_docids.is_empty() { - // candidates exhausted, exit - if let Some((point0, _)) = current_distance.as_ref() { - return Ok(Some(RankingRuleOutput { - query, - candidates: current_bucket, - score: ScoreDetails::GeoSort(score_details::GeoSort { - target_point: self.point, - ascending: self.ascending, - value: Some(point0.to_owned()), - }), - })); - } else { - return Ok(Some(RankingRuleOutput { - query, - candidates: universe.clone(), - score: ScoreDetails::GeoSort(score_details::GeoSort { - target_point: self.point, - ascending: self.ascending, - value: None, - }), - })); - } - } - } - } + }) + }) } #[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")] @@ -394,16 +149,3 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort { self.cached_sorted_docids.clear(); } } - -/// Compute the antipodal coordinate of `coord` -fn opposite_of(mut coord: [f64; 2]) -> [f64; 2] { - coord[0] *= -1.; - // in the case of x,0 we want to return x,180 - if coord[1] > 0. { - coord[1] -= 180.; - } else { - coord[1] += 180.; - } - - coord -} diff --git a/crates/milli/src/search/new/mod.rs b/crates/milli/src/search/new/mod.rs index a65b4076b..ecc51161d 100644 --- a/crates/milli/src/search/new/mod.rs +++ b/crates/milli/src/search/new/mod.rs @@ -1,7 +1,7 @@ mod bucket_sort; mod db_cache; mod distinct; -mod geo_sort; +pub(crate) mod geo_sort; mod graph_based_ranking_rule; mod interner; mod limits; @@ -46,14 +46,14 @@ use resolve_query_graph::{compute_query_graph_docids, PhraseDocIdsCache}; use roaring::RoaringBitmap; use sort::Sort; -use self::distinct::facet_string_values; +pub(crate) use self::distinct::{facet_string_values, facet_values_prefix_key}; use self::geo_sort::GeoSort; -pub use self::geo_sort::{Parameter as GeoSortParameter, Strategy as GeoSortStrategy}; use self::graph_based_ranking_rule::Words; use self::interner::Interned; use self::vector_sort::VectorSort; use crate::attribute_patterns::{match_pattern, PatternMatch}; use crate::constants::RESERVED_GEO_FIELD_NAME; +use crate::documents::GeoSortParameter; use crate::index::PrefixSearch; use crate::localized_attributes_rules::LocalizedFieldIds; use crate::score_details::{ScoreDetails, ScoringStrategy}; @@ -319,7 +319,7 @@ fn resolve_negative_phrases( fn get_ranking_rules_for_placeholder_search<'ctx>( ctx: &SearchContext<'ctx>, sort_criteria: &Option>, - geo_param: geo_sort::Parameter, + geo_param: GeoSortParameter, ) -> Result>> { let mut sort = false; let mut sorted_fields = HashSet::new(); @@ -371,7 +371,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>( fn get_ranking_rules_for_vector<'ctx>( ctx: &SearchContext<'ctx>, sort_criteria: &Option>, - geo_param: geo_sort::Parameter, + geo_param: GeoSortParameter, limit_plus_offset: usize, target: &[f32], embedder_name: &str, @@ -448,7 +448,7 @@ fn get_ranking_rules_for_vector<'ctx>( fn get_ranking_rules_for_query_graph_search<'ctx>( ctx: &SearchContext<'ctx>, sort_criteria: &Option>, - geo_param: geo_sort::Parameter, + geo_param: GeoSortParameter, terms_matching_strategy: TermsMatchingStrategy, ) -> Result>> { // query graph search @@ -559,7 +559,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>( ranking_rules: &mut Vec>, sorted_fields: &mut HashSet, geo_sorted: &mut bool, - geo_param: geo_sort::Parameter, + geo_param: GeoSortParameter, ) -> Result<()> { let sort_criteria = sort_criteria.clone().unwrap_or_default(); ranking_rules.reserve(sort_criteria.len()); @@ -629,7 +629,7 @@ pub fn execute_vector_search( universe: RoaringBitmap, sort_criteria: &Option>, distinct: &Option, - geo_param: geo_sort::Parameter, + geo_param: GeoSortParameter, from: usize, length: usize, embedder_name: &str, @@ -692,7 +692,7 @@ pub fn execute_search( mut universe: RoaringBitmap, sort_criteria: &Option>, distinct: &Option, - geo_param: geo_sort::Parameter, + geo_param: GeoSortParameter, from: usize, length: usize, words_limit: Option, @@ -872,7 +872,7 @@ pub fn execute_search( }) } -fn check_sort_criteria( +pub(crate) fn check_sort_criteria( ctx: &SearchContext<'_>, sort_criteria: Option<&Vec>, ) -> Result<()> { @@ -902,7 +902,7 @@ fn check_sort_criteria( let (valid_fields, hidden_fields) = ctx.index.remove_hidden_fields(ctx.txn, sortable_fields)?; - return Err(UserError::InvalidSortableAttribute { + return Err(UserError::InvalidSearchSortableAttribute { field: field.to_string(), valid_fields, hidden_fields, @@ -913,7 +913,7 @@ fn check_sort_criteria( let (valid_fields, hidden_fields) = ctx.index.remove_hidden_fields(ctx.txn, sortable_fields)?; - return Err(UserError::InvalidSortableAttribute { + return Err(UserError::InvalidSearchSortableAttribute { field: RESERVED_GEO_FIELD_NAME.to_string(), valid_fields, hidden_fields,