This commit is contained in:
Mubelotix 2025-07-04 01:26:43 +01:00 committed by GitHub
commit a719ed1d11
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 1411 additions and 375 deletions

View file

@ -1,6 +1,7 @@
use std::collections::HashSet;
use std::io::{ErrorKind, Seek as _};
use std::marker::PhantomData;
use std::str::FromStr;
use actix_web::http::header::CONTENT_TYPE;
use actix_web::web::Data;
@ -17,9 +18,10 @@ use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::documents::sort::recursive_sort;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
use meilisearch_types::milli::DocumentId;
use meilisearch_types::milli::{AscDesc, DocumentId};
use meilisearch_types::serde_cs::vec::CS;
use meilisearch_types::star_or::OptionStarOrList;
use meilisearch_types::tasks::KindWithContent;
@ -42,6 +44,7 @@ use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::payload::Payload;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::indexes::search::fix_sort_query_parameters;
use crate::routes::{
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
};
@ -135,6 +138,8 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
per_document_id: bool,
// if a filter was used
per_filter: bool,
// if documents were sorted
sort: bool,
#[serde(rename = "vector.retrieve_vectors")]
retrieve_vectors: bool,
@ -153,16 +158,28 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DocumentFetchKind {
PerDocumentId { retrieve_vectors: bool },
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool, ids: usize },
PerDocumentId {
retrieve_vectors: bool,
sort: bool,
},
Normal {
with_filter: bool,
limit: usize,
offset: usize,
retrieve_vectors: bool,
sort: bool,
ids: usize,
},
}
impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> {
pub fn from_query(query: &DocumentFetchKind) -> Self {
let (limit, offset, retrieve_vectors) = match query {
DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors),
DocumentFetchKind::Normal { limit, offset, retrieve_vectors, .. } => {
(*limit, *offset, *retrieve_vectors)
let (limit, offset, retrieve_vectors, sort) = match query {
DocumentFetchKind::PerDocumentId { retrieve_vectors, sort } => {
(1, 0, *retrieve_vectors, *sort)
}
DocumentFetchKind::Normal { limit, offset, retrieve_vectors, sort, .. } => {
(*limit, *offset, *retrieve_vectors, *sort)
}
};
@ -176,6 +193,7 @@ impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> {
per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
max_limit: limit,
max_offset: offset,
sort,
retrieve_vectors,
max_document_ids: ids,
@ -193,6 +211,7 @@ impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
Box::new(Self {
per_document_id: self.per_document_id | new.per_document_id,
per_filter: self.per_filter | new.per_filter,
sort: self.sort | new.sort,
retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors,
max_limit: self.max_limit.max(new.max_limit),
max_offset: self.max_offset.max(new.max_offset),
@ -276,6 +295,7 @@ pub async fn get_document(
retrieve_vectors: param_retrieve_vectors.0,
per_document_id: true,
per_filter: false,
sort: false,
max_limit: 0,
max_offset: 0,
max_document_ids: 0,
@ -406,6 +426,8 @@ pub struct BrowseQueryGet {
#[param(default, value_type = Option<String>, example = "popularity > 1000")]
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)]
filter: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentSort>)]
sort: Option<String>,
}
#[derive(Debug, Deserr, ToSchema)]
@ -430,6 +452,9 @@ pub struct BrowseQuery {
#[schema(default, value_type = Option<Value>, example = "popularity > 1000")]
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
filter: Option<Value>,
#[schema(default, value_type = Option<Vec<String>>, example = json!(["title:asc", "rating:desc"]))]
#[deserr(default, error = DeserrJsonError<InvalidDocumentSort>)]
sort: Option<Vec<String>>,
}
/// Get documents with POST
@ -495,6 +520,7 @@ pub async fn documents_by_query_post(
analytics.publish(
DocumentsFetchAggregator::<DocumentsPOST> {
per_filter: body.filter.is_some(),
sort: body.sort.is_some(),
retrieve_vectors: body.retrieve_vectors,
max_limit: body.limit,
max_offset: body.offset,
@ -571,7 +597,7 @@ pub async fn get_documents(
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Get documents GET");
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter, ids } =
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter, ids, sort } =
params.into_inner();
let filter = match filter {
@ -582,20 +608,20 @@ pub async fn get_documents(
None => None,
};
let ids = ids.map(|ids| ids.into_iter().map(Into::into).collect());
let query = BrowseQuery {
offset: offset.0,
limit: limit.0,
fields: fields.merge_star_and_none(),
retrieve_vectors: retrieve_vectors.0,
filter,
ids,
ids: ids.map(|ids| ids.into_iter().map(Into::into).collect()),
sort: sort.map(|attr| fix_sort_query_parameters(&attr)),
};
analytics.publish(
DocumentsFetchAggregator::<DocumentsGET> {
per_filter: query.filter.is_some(),
sort: query.sort.is_some(),
retrieve_vectors: query.retrieve_vectors,
max_limit: query.limit,
max_offset: query.offset,
@ -615,7 +641,7 @@ fn documents_by_query(
query: BrowseQuery,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter, ids } = query;
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter, ids, sort } = query;
let retrieve_vectors = RetrieveVectors::new(retrieve_vectors);
@ -633,6 +659,18 @@ fn documents_by_query(
None
};
let sort_criteria = if let Some(sort) = &sort {
let sorts: Vec<_> = match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() {
Ok(sorts) => sorts,
Err(asc_desc_error) => {
return Err(milli::SortError::from(asc_desc_error).into_document_error().into())
}
};
Some(sorts)
} else {
None
};
let index = index_scheduler.index(&index_uid)?;
let (total, documents) = retrieve_documents(
&index,
@ -643,6 +681,7 @@ fn documents_by_query(
fields,
retrieve_vectors,
index_scheduler.features(),
sort_criteria,
)?;
let ret = PaginationView::new(offset, limit, total as usize, documents);
@ -1494,6 +1533,7 @@ fn retrieve_documents<S: AsRef<str>>(
attributes_to_retrieve: Option<Vec<S>>,
retrieve_vectors: RetrieveVectors,
features: RoFeatures,
sort_criteria: Option<Vec<AscDesc>>,
) -> Result<(u64, Vec<Document>), ResponseError> {
let rtxn = index.read_txn()?;
let filter = &filter;
@ -1526,15 +1566,29 @@ fn retrieve_documents<S: AsRef<str>>(
})?
}
let (it, number_of_documents) = {
let facet_sort;
let (it, number_of_documents) = if let Some(sort) = sort_criteria {
let number_of_documents = candidates.len();
facet_sort = recursive_sort(index, &rtxn, sort, &candidates)?;
let iter = facet_sort.iter()?;
(
itertools::Either::Left(some_documents(
index,
&rtxn,
iter.map(|d| d.unwrap()).skip(offset).take(limit),
retrieve_vectors,
)?),
number_of_documents,
)
} else {
let number_of_documents = candidates.len();
(
some_documents(
itertools::Either::Right(some_documents(
index,
&rtxn,
candidates.into_iter().skip(offset).take(limit),
retrieve_vectors,
)?,
)?),
number_of_documents,
)
};

View file

@ -745,10 +745,9 @@ impl SearchByIndex {
match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() {
Ok(sorts) => sorts,
Err(asc_desc_error) => {
return Err(milli::Error::from(milli::SortError::from(
asc_desc_error,
))
.into())
return Err(milli::SortError::from(asc_desc_error)
.into_search_error()
.into())
}
};
Some(sorts)

View file

@ -1091,7 +1091,7 @@ pub fn prepare_search<'t>(
let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() {
Ok(sorts) => sorts,
Err(asc_desc_error) => {
return Err(milli::Error::from(SortError::from(asc_desc_error)).into())
return Err(SortError::from(asc_desc_error).into_search_error().into())
}
};

View file

@ -562,5 +562,7 @@ pub struct GetAllDocumentsOptions {
pub offset: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub fields: Option<Vec<&'static str>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub sort: Option<Vec<&'static str>>,
pub retrieve_vectors: bool,
}

View file

@ -5,8 +5,8 @@ use urlencoding::encode as urlencode;
use crate::common::encoder::Encoder;
use crate::common::{
shared_does_not_exists_index, shared_empty_index, shared_index_with_test_set,
GetAllDocumentsOptions, Server, Value,
shared_does_not_exists_index, shared_empty_index, shared_index_with_geo_documents,
shared_index_with_test_set, GetAllDocumentsOptions, Server, Value,
};
use crate::json;
@ -83,6 +83,311 @@ async fn get_document() {
);
}
#[actix_rt::test]
async fn get_document_sorted() {
let server = Server::new_shared();
let index = server.unique_index();
index.load_test_set().await;
let (task, _status_code) =
index.update_settings_sortable_attributes(json!(["age", "email", "gender", "name"])).await;
server.wait_task(task.uid()).await.succeeded();
let (response, _code) = index
.get_all_documents(GetAllDocumentsOptions {
fields: Some(vec!["id", "age", "email"]),
sort: Some(vec!["age:asc", "email:desc"]),
..Default::default()
})
.await;
let results = response["results"].as_array().unwrap();
snapshot!(json_string!(results), @r#"
[
{
"id": 5,
"age": 20,
"email": "warrenwatson@chorizon.com"
},
{
"id": 6,
"age": 20,
"email": "sheliaberry@chorizon.com"
},
{
"id": 57,
"age": 20,
"email": "kaitlinconner@chorizon.com"
},
{
"id": 45,
"age": 20,
"email": "irenebennett@chorizon.com"
},
{
"id": 40,
"age": 21,
"email": "staffordemerson@chorizon.com"
},
{
"id": 41,
"age": 21,
"email": "salinasgamble@chorizon.com"
},
{
"id": 63,
"age": 21,
"email": "knowleshebert@chorizon.com"
},
{
"id": 50,
"age": 21,
"email": "guerramcintyre@chorizon.com"
},
{
"id": 44,
"age": 22,
"email": "jonispears@chorizon.com"
},
{
"id": 56,
"age": 23,
"email": "tuckerbarry@chorizon.com"
},
{
"id": 51,
"age": 23,
"email": "keycervantes@chorizon.com"
},
{
"id": 60,
"age": 23,
"email": "jodyherrera@chorizon.com"
},
{
"id": 70,
"age": 23,
"email": "glassperkins@chorizon.com"
},
{
"id": 75,
"age": 24,
"email": "emmajacobs@chorizon.com"
},
{
"id": 68,
"age": 24,
"email": "angelinadyer@chorizon.com"
},
{
"id": 17,
"age": 25,
"email": "ortegabrennan@chorizon.com"
},
{
"id": 76,
"age": 25,
"email": "claricegardner@chorizon.com"
},
{
"id": 43,
"age": 25,
"email": "arnoldbender@chorizon.com"
},
{
"id": 12,
"age": 25,
"email": "aidakirby@chorizon.com"
},
{
"id": 9,
"age": 26,
"email": "kellimendez@chorizon.com"
}
]
"#);
let (response, _code) = index
.get_all_documents(GetAllDocumentsOptions {
fields: Some(vec!["id", "gender", "name"]),
sort: Some(vec!["gender:asc", "name:asc"]),
..Default::default()
})
.await;
let results = response["results"].as_array().unwrap();
snapshot!(json_string!(results), @r#"
[
{
"id": 3,
"name": "Adeline Flynn",
"gender": "female"
},
{
"id": 12,
"name": "Aida Kirby",
"gender": "female"
},
{
"id": 68,
"name": "Angelina Dyer",
"gender": "female"
},
{
"id": 15,
"name": "Aurelia Contreras",
"gender": "female"
},
{
"id": 36,
"name": "Barbra Valenzuela",
"gender": "female"
},
{
"id": 23,
"name": "Blanca Mcclain",
"gender": "female"
},
{
"id": 53,
"name": "Caitlin Burnett",
"gender": "female"
},
{
"id": 71,
"name": "Candace Sawyer",
"gender": "female"
},
{
"id": 65,
"name": "Carole Rowland",
"gender": "female"
},
{
"id": 33,
"name": "Cecilia Greer",
"gender": "female"
},
{
"id": 1,
"name": "Cherry Orr",
"gender": "female"
},
{
"id": 38,
"name": "Christina Short",
"gender": "female"
},
{
"id": 7,
"name": "Chrystal Boyd",
"gender": "female"
},
{
"id": 76,
"name": "Clarice Gardner",
"gender": "female"
},
{
"id": 73,
"name": "Eleanor Shepherd",
"gender": "female"
},
{
"id": 75,
"name": "Emma Jacobs",
"gender": "female"
},
{
"id": 16,
"name": "Estella Bass",
"gender": "female"
},
{
"id": 62,
"name": "Estelle Ramirez",
"gender": "female"
},
{
"id": 20,
"name": "Florence Long",
"gender": "female"
},
{
"id": 42,
"name": "Graciela Russell",
"gender": "female"
}
]
"#);
}
#[actix_rt::test]
async fn get_document_geosorted() {
let index = shared_index_with_geo_documents().await;
let (response, _code) = index
.get_all_documents(GetAllDocumentsOptions {
sort: Some(vec!["_geoPoint(45.4777599, 9.1967508):asc"]),
..Default::default()
})
.await;
let results = response["results"].as_array().unwrap();
snapshot!(json_string!(results), @r#"
[
{
"id": 2,
"name": "La Bella Italia",
"address": "456 Elm Street, Townsville",
"type": "Italian",
"rating": 9,
"_geo": {
"lat": "45.4777599",
"lng": "9.1967508"
}
},
{
"id": 1,
"name": "Taco Truck",
"address": "444 Salsa Street, Burritoville",
"type": "Mexican",
"rating": 9,
"_geo": {
"lat": 34.0522,
"lng": -118.2437
}
},
{
"id": 3,
"name": "Crêpe Truck",
"address": "2 Billig Avenue, Rouenville",
"type": "French",
"rating": 10
}
]
"#);
}
#[actix_rt::test]
async fn get_document_sort_the_unsortable() {
let index = shared_index_with_test_set().await;
let (response, _code) = index
.get_all_documents(GetAllDocumentsOptions {
fields: Some(vec!["id", "name"]),
sort: Some(vec!["name:asc"]),
..Default::default()
})
.await;
snapshot!(json_string!(response), @r#"
{
"message": "Attribute `name` is not sortable. This index does not have configured sortable attributes.",
"code": "invalid_document_sort",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_sort"
}
"#);
}
#[actix_rt::test]
async fn error_get_unexisting_index_all_documents() {
let index = shared_does_not_exists_index().await;

View file

@ -101,14 +101,7 @@ async fn reset_embedder_documents() {
server.wait_task(response.uid()).await;
// Make sure the documents are still present
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions {
limit: None,
offset: None,
retrieve_vectors: false,
fields: None,
})
.await;
let (documents, _code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(json_string!(documents), @r###"
{
"results": [