diff --git a/crates/meilisearch-types/src/error.rs b/crates/meilisearch-types/src/error.rs index c7f375eff..335da32cb 100644 --- a/crates/meilisearch-types/src/error.rs +++ b/crates/meilisearch-types/src/error.rs @@ -241,6 +241,7 @@ InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ; InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ; InvalidVectorsType , InvalidRequest , BAD_REQUEST ; InvalidDocumentId , InvalidRequest , BAD_REQUEST ; +InvalidDocumentIds , InvalidRequest , BAD_REQUEST ; InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ; InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ; InvalidSearchEmbedder , InvalidRequest , BAD_REQUEST ; diff --git a/crates/meilisearch/src/routes/indexes/documents.rs b/crates/meilisearch/src/routes/indexes/documents.rs index 54f01b4d6..50eec46fe 100644 --- a/crates/meilisearch/src/routes/indexes/documents.rs +++ b/crates/meilisearch/src/routes/indexes/documents.rs @@ -20,11 +20,13 @@ use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::update::IndexDocumentsMethod; use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors; use meilisearch_types::milli::DocumentId; +use meilisearch_types::serde_cs::vec::CS; use meilisearch_types::star_or::OptionStarOrList; use meilisearch_types::tasks::KindWithContent; use meilisearch_types::{milli, Document, Index}; use mime::Mime; use once_cell::sync::Lazy; +use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; use serde_json::Value; use tempfile::tempfile; @@ -43,7 +45,7 @@ use crate::extractors::sequential_extractor::SeqHandler; use crate::routes::{ get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT, }; -use crate::search::{parse_filter, RetrieveVectors}; +use crate::search::{parse_filter, ExternalDocumentId, RetrieveVectors}; use crate::{aggregate_methods, Opt}; static ACCEPTED_CONTENT_TYPE: Lazy> = Lazy::new(|| { @@ -137,6 +139,9 @@ pub struct DocumentsFetchAggregator { #[serde(rename = "vector.retrieve_vectors")] retrieve_vectors: bool, + // maximum size of `ids` array. 0 if always empty or `null` + max_document_ids: usize, + // pagination #[serde(rename = "pagination.max_limit")] max_limit: usize, @@ -149,7 +154,7 @@ pub struct DocumentsFetchAggregator { #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum DocumentFetchKind { PerDocumentId { retrieve_vectors: bool }, - Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool }, + Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool, ids: usize }, } impl DocumentsFetchAggregator { @@ -161,12 +166,18 @@ impl DocumentsFetchAggregator { } }; + let ids = match query { + DocumentFetchKind::Normal { ids, .. } => *ids, + DocumentFetchKind::PerDocumentId { .. } => 0, + }; + Self { per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }), per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter), max_limit: limit, max_offset: offset, retrieve_vectors, + max_document_ids: ids, marker: PhantomData, } @@ -185,6 +196,7 @@ impl Aggregate for DocumentsFetchAggregator { retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors, max_limit: self.max_limit.max(new.max_limit), max_offset: self.max_offset.max(new.max_offset), + max_document_ids: self.max_document_ids.max(new.max_document_ids), marker: PhantomData, }) } @@ -266,6 +278,7 @@ pub async fn get_document( per_filter: false, max_limit: 0, max_offset: 0, + max_document_ids: 0, marker: PhantomData, }, &req, @@ -387,6 +400,9 @@ pub struct BrowseQueryGet { #[param(default, value_type = Option)] #[deserr(default, error = DeserrQueryParamError)] retrieve_vectors: Param, + #[param(default, value_type = Option>)] + #[deserr(default, error = DeserrQueryParamError)] + ids: Option>, #[param(default, value_type = Option, example = "popularity > 1000")] #[deserr(default, error = DeserrQueryParamError)] filter: Option, @@ -408,6 +424,9 @@ pub struct BrowseQuery { #[schema(default, example = true)] #[deserr(default, error = DeserrJsonError)] retrieve_vectors: bool, + #[schema(value_type = Option>, example = json!(["cody", "finn", "brandy", "gambit"]))] + #[deserr(default, error = DeserrJsonError)] + ids: Option>, #[schema(default, value_type = Option, example = "popularity > 1000")] #[deserr(default, error = DeserrJsonError)] filter: Option, @@ -479,6 +498,7 @@ pub async fn documents_by_query_post( retrieve_vectors: body.retrieve_vectors, max_limit: body.limit, max_offset: body.offset, + max_document_ids: body.ids.as_ref().map(Vec::len).unwrap_or_default(), per_document_id: false, marker: PhantomData, }, @@ -551,7 +571,8 @@ pub async fn get_documents( ) -> Result { debug!(parameters = ?params, "Get documents GET"); - let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter } = params.into_inner(); + let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter, ids } = + params.into_inner(); let filter = match filter { Some(f) => match serde_json::from_str(&f) { @@ -561,12 +582,15 @@ pub async fn get_documents( None => None, }; + let ids = ids.map(|ids| ids.into_iter().map(Into::into).collect()); + let query = BrowseQuery { offset: offset.0, limit: limit.0, fields: fields.merge_star_and_none(), retrieve_vectors: retrieve_vectors.0, filter, + ids, }; analytics.publish( @@ -575,6 +599,7 @@ pub async fn get_documents( retrieve_vectors: query.retrieve_vectors, max_limit: query.limit, max_offset: query.offset, + max_document_ids: query.ids.as_ref().map(Vec::len).unwrap_or_default(), per_document_id: false, marker: PhantomData, }, @@ -590,15 +615,30 @@ fn documents_by_query( query: BrowseQuery, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; - let BrowseQuery { offset, limit, fields, retrieve_vectors, filter } = query; + let BrowseQuery { offset, limit, fields, retrieve_vectors, filter, ids } = query; let retrieve_vectors = RetrieveVectors::new(retrieve_vectors); + let ids = if let Some(ids) = ids { + let mut parsed_ids = Vec::with_capacity(ids.len()); + for (index, id) in ids.into_iter().enumerate() { + let id = id.try_into().map_err(|error| { + let msg = format!("In `.ids[{index}]`: {error}"); + ResponseError::from_msg(msg, Code::InvalidDocumentIds) + })?; + parsed_ids.push(id) + } + Some(parsed_ids) + } else { + None + }; + let index = index_scheduler.index(&index_uid)?; let (total, documents) = retrieve_documents( &index, offset, limit, + ids, filter, fields, retrieve_vectors, @@ -1451,10 +1491,12 @@ fn some_documents<'a, 't: 'a>( })) } +#[allow(clippy::too_many_arguments)] fn retrieve_documents>( index: &Index, offset: usize, limit: usize, + ids: Option>, filter: Option, attributes_to_retrieve: Option>, retrieve_vectors: RetrieveVectors, @@ -1468,16 +1510,28 @@ fn retrieve_documents>( None }; - let candidates = if let Some(filter) = filter { - filter.evaluate(&rtxn, index).map_err(|err| match err { + let mut candidates = if let Some(ids) = ids { + let external_document_ids = index.external_documents_ids(); + let mut candidates = RoaringBitmap::new(); + for id in ids.iter() { + let Some(docid) = external_document_ids.get(&rtxn, id)? else { + continue; + }; + candidates.insert(docid); + } + candidates + } else { + index.documents_ids(&rtxn)? + }; + + if let Some(filter) = filter { + candidates &= filter.evaluate(&rtxn, index).map_err(|err| match err { milli::Error::UserError(milli::UserError::InvalidFilter(_)) => { ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter) } e => e.into(), })? - } else { - index.documents_ids(&rtxn)? - }; + } let (it, number_of_documents) = { let number_of_documents = candidates.len(); diff --git a/crates/meilisearch/src/routes/indexes/similar.rs b/crates/meilisearch/src/routes/indexes/similar.rs index fcd64291e..400be331b 100644 --- a/crates/meilisearch/src/routes/indexes/similar.rs +++ b/crates/meilisearch/src/routes/indexes/similar.rs @@ -5,7 +5,7 @@ use index_scheduler::IndexScheduler; use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::error::deserr_codes::*; -use meilisearch_types::error::{ErrorCode as _, ResponseError}; +use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::keys::actions; use meilisearch_types::serde_cs::vec::CS; @@ -111,7 +111,7 @@ pub async fn similar_get( ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; - let query = params.0.try_into()?; + let query = params.0.into(); let mut aggregate = SimilarAggregator::::from_query(&query); @@ -295,10 +295,8 @@ impl std::convert::TryFrom for RankingScoreThresholdGet { } } -impl TryFrom for SimilarQuery { - type Error = ResponseError; - - fn try_from( +impl From for SimilarQuery { + fn from( SimilarQueryGet { id, offset, @@ -311,7 +309,7 @@ impl TryFrom for SimilarQuery { embedder, ranking_score_threshold, }: SimilarQueryGet, - ) -> Result { + ) -> Self { let filter = match filter { Some(f) => match serde_json::from_str(&f) { Ok(v) => Some(v), @@ -320,10 +318,8 @@ impl TryFrom for SimilarQuery { None => None, }; - Ok(SimilarQuery { - id: id.0.try_into().map_err(|code: InvalidSimilarId| { - ResponseError::from_msg(code.to_string(), code.error_code()) - })?, + SimilarQuery { + id: serde_json::Value::String(id.0), offset: offset.0, limit: limit.0, filter, @@ -333,6 +329,6 @@ impl TryFrom for SimilarQuery { show_ranking_score: show_ranking_score.0, show_ranking_score_details: show_ranking_score_details.0, ranking_score_threshold: ranking_score_threshold.map(|x| x.0), - }) + } } } diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs index 58a181d3b..d0cab1672 100644 --- a/crates/meilisearch/src/search/mod.rs +++ b/crates/meilisearch/src/search/mod.rs @@ -635,7 +635,7 @@ impl SearchQueryWithIndex { pub struct SimilarQuery { #[deserr(error = DeserrJsonError)] #[schema(value_type = String)] - pub id: ExternalDocumentId, + pub id: serde_json::Value, #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError)] pub offset: usize, #[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError)] @@ -657,8 +657,7 @@ pub struct SimilarQuery { pub ranking_score_threshold: Option, } -#[derive(Debug, Clone, PartialEq, Deserr)] -#[deserr(try_from(Value) = TryFrom::try_from -> InvalidSimilarId)] +#[derive(Debug, Clone, PartialEq)] pub struct ExternalDocumentId(String); impl AsRef for ExternalDocumentId { @@ -674,7 +673,7 @@ impl ExternalDocumentId { } impl TryFrom for ExternalDocumentId { - type Error = InvalidSimilarId; + type Error = milli::UserError; fn try_from(value: String) -> Result { serde_json::Value::String(value).try_into() @@ -682,10 +681,10 @@ impl TryFrom for ExternalDocumentId { } impl TryFrom for ExternalDocumentId { - type Error = InvalidSimilarId; + type Error = milli::UserError; fn try_from(value: Value) -> Result { - Ok(Self(milli::documents::validate_document_id_value(value).map_err(|_| InvalidSimilarId)?)) + Ok(Self(milli::documents::validate_document_id_value(value)?)) } } @@ -1598,6 +1597,11 @@ pub fn perform_similar( ranking_score_threshold, } = query; + let id: ExternalDocumentId = id.try_into().map_err(|error| { + let msg = format!("Invalid value at `.id`: {error}"); + ResponseError::from_msg(msg, Code::InvalidSimilarId) + })?; + // using let-else rather than `?` so that the borrow checker identifies we're always returning here, // preventing a use-after-move let Some(internal_id) = index.external_documents_ids().get(&rtxn, &id)? else { diff --git a/crates/meilisearch/tests/common/index.rs b/crates/meilisearch/tests/common/index.rs index b8c1d2824..529fb0793 100644 --- a/crates/meilisearch/tests/common/index.rs +++ b/crates/meilisearch/tests/common/index.rs @@ -411,7 +411,7 @@ impl Index<'_, State> { self.service.get(url).await } - pub async fn get_document_by_filter(&self, payload: Value) -> (Value, StatusCode) { + pub async fn fetch_documents(&self, payload: Value) -> (Value, StatusCode) { let url = format!("/indexes/{}/documents/fetch", urlencode(self.uid.as_ref())); self.service.post(url, payload).await } diff --git a/crates/meilisearch/tests/documents/errors.rs b/crates/meilisearch/tests/documents/errors.rs index c02c1f000..afca9498b 100644 --- a/crates/meilisearch/tests/documents/errors.rs +++ b/crates/meilisearch/tests/documents/errors.rs @@ -667,7 +667,7 @@ async fn fetch_document_by_filter() { .await; index.wait_task(task.uid()).await.succeeded(); - let (response, code) = index.get_document_by_filter(json!(null)).await; + let (response, code) = index.fetch_documents(json!(null)).await; snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { @@ -678,7 +678,7 @@ async fn fetch_document_by_filter() { } "###); - let (response, code) = index.get_document_by_filter(json!({ "offset": "doggo" })).await; + let (response, code) = index.fetch_documents(json!({ "offset": "doggo" })).await; snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { @@ -689,7 +689,7 @@ async fn fetch_document_by_filter() { } "###); - let (response, code) = index.get_document_by_filter(json!({ "limit": "doggo" })).await; + let (response, code) = index.fetch_documents(json!({ "limit": "doggo" })).await; snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { @@ -700,7 +700,7 @@ async fn fetch_document_by_filter() { } "###); - let (response, code) = index.get_document_by_filter(json!({ "fields": "doggo" })).await; + let (response, code) = index.fetch_documents(json!({ "fields": "doggo" })).await; snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { @@ -711,7 +711,7 @@ async fn fetch_document_by_filter() { } "###); - let (response, code) = index.get_document_by_filter(json!({ "filter": true })).await; + let (response, code) = index.fetch_documents(json!({ "filter": true })).await; snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { @@ -722,7 +722,7 @@ async fn fetch_document_by_filter() { } "###); - let (response, code) = index.get_document_by_filter(json!({ "filter": "cool doggo" })).await; + let (response, code) = index.fetch_documents(json!({ "filter": "cool doggo" })).await; snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { @@ -733,8 +733,7 @@ async fn fetch_document_by_filter() { } "###); - let (response, code) = - index.get_document_by_filter(json!({ "filter": "doggo = bernese" })).await; + let (response, code) = index.fetch_documents(json!({ "filter": "doggo = bernese" })).await; snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { @@ -762,8 +761,7 @@ async fn retrieve_vectors() { "###); // FETCH ALL DOCUMENTS BY POST - let (response, _code) = - index.get_document_by_filter(json!({ "retrieveVectors": "tamo" })).await; + let (response, _code) = index.fetch_documents(json!({ "retrieveVectors": "tamo" })).await; snapshot!(response, @r###" { "message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found a string: `\"tamo\"`", diff --git a/crates/meilisearch/tests/documents/get_documents.rs b/crates/meilisearch/tests/documents/get_documents.rs index 966fccefd..f87a18b9f 100644 --- a/crates/meilisearch/tests/documents/get_documents.rs +++ b/crates/meilisearch/tests/documents/get_documents.rs @@ -371,7 +371,7 @@ async fn get_document_by_filter() { .await; index.wait_task(task.uid()).await.succeeded(); - let (response, code) = index.get_document_by_filter(json!({})).await; + let (response, code) = index.fetch_documents(json!({})).await; let (response2, code2) = index.get_all_documents_raw("").await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" @@ -401,7 +401,7 @@ async fn get_document_by_filter() { assert_eq!(code, code2); assert_eq!(response, response2); - let (response, code) = index.get_document_by_filter(json!({ "filter": "color = blue" })).await; + let (response, code) = index.fetch_documents(json!({ "filter": "color = blue" })).await; let (response2, code2) = index.get_all_documents_raw("?filter=color=blue").await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" @@ -424,9 +424,8 @@ async fn get_document_by_filter() { assert_eq!(code, code2); assert_eq!(response, response2); - let (response, code) = index - .get_document_by_filter(json!({ "offset": 1, "limit": 1, "filter": "color != blue" })) - .await; + let (response, code) = + index.fetch_documents(json!({ "offset": 1, "limit": 1, "filter": "color != blue" })).await; let (response2, code2) = index.get_all_documents_raw("?filter=color!=blue&offset=1&limit=1").await; snapshot!(code, @"200 OK"); @@ -446,9 +445,7 @@ async fn get_document_by_filter() { assert_eq!(response, response2); let (response, code) = index - .get_document_by_filter( - json!({ "limit": 1, "filter": "color != blue", "fields": ["color"] }), - ) + .fetch_documents(json!({ "limit": 1, "filter": "color != blue", "fields": ["color"] })) .await; let (response2, code2) = index.get_all_documents_raw("?limit=1&filter=color!=blue&fields=color").await; @@ -471,7 +468,7 @@ async fn get_document_by_filter() { // Now testing more complex filter that the get route can't represent let (response, code) = - index.get_document_by_filter(json!({ "filter": [["color = blue", "color = red"]] })).await; + index.fetch_documents(json!({ "filter": [["color = blue", "color = red"]] })).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" { @@ -495,9 +492,8 @@ async fn get_document_by_filter() { } "###); - let (response, code) = index - .get_document_by_filter(json!({ "filter": [["color != blue"], "color EXISTS"] })) - .await; + let (response, code) = + index.fetch_documents(json!({ "filter": [["color != blue"], "color EXISTS"] })).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" { @@ -514,6 +510,326 @@ async fn get_document_by_filter() { "###); } +#[actix_rt::test] +async fn get_document_by_ids() { + let server = Server::new_shared(); + let index = server.unique_index(); + let (task, _code) = index + .add_documents( + json!([ + { "id": 0, "color": "red" }, + { "id": 1, "color": "blue" }, + { "id": 2, "color": "blue" }, + { "id": 3 }, + ]), + Some("id"), + ) + .await; + index.wait_task(task.uid()).await.succeeded(); + + let (response, code) = index + .fetch_documents(json!({ + "ids": ["0", 1, 2, 3] + })) + .await; + let (response2, code2) = index.get_all_documents_raw("?ids=0,1,2,3").await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "results": [ + { + "id": 0, + "color": "red" + }, + { + "id": 1, + "color": "blue" + }, + { + "id": 2, + "color": "blue" + }, + { + "id": 3 + } + ], + "offset": 0, + "limit": 20, + "total": 4 + } + "###); + assert_eq!(code, code2); + assert_eq!(response, response2); + + let (response, code) = index.fetch_documents(json!({ "ids": [2, "1"] })).await; + let (response2, code2) = index.get_all_documents_raw("?ids=2,1").await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "results": [ + { + "id": 1, + "color": "blue" + }, + { + "id": 2, + "color": "blue" + } + ], + "offset": 0, + "limit": 20, + "total": 2 + } + "###); + assert_eq!(code, code2); + assert_eq!(response, response2); + + let (response, code) = + index.fetch_documents(json!({ "offset": 1, "limit": 1, "ids": ["0", 0, 3] })).await; + let (response2, code2) = index.get_all_documents_raw("?ids=3,0&offset=1&limit=1").await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "results": [ + { + "id": 3 + } + ], + "offset": 1, + "limit": 1, + "total": 2 + } + "###); + assert_eq!(code, code2); + assert_eq!(response, response2); + + let (response, code) = + index.fetch_documents(json!({ "limit": 1, "ids": [0, 3], "fields": ["color"] })).await; + let (response2, code2) = index.get_all_documents_raw("?limit=1&ids=0,3&fields=color").await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "results": [ + { + "color": "red" + } + ], + "offset": 0, + "limit": 1, + "total": 2 + } + "###); + assert_eq!(code, code2); + assert_eq!(response, response2); + + // Now testing more complex requests that the get route can't represent + + let (response, code) = index.fetch_documents(json!({ "ids": [] })).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "results": [], + "offset": 0, + "limit": 20, + "total": 0 + } + "###); +} + +#[actix_rt::test] +async fn get_document_invalid_ids() { + let server = Server::new_shared(); + let index = server.unique_index(); + let (task, _code) = index + .add_documents( + json!([ + { "id": 0, "color": "red" }, + { "id": 1, "color": "blue" }, + { "id": 2, "color": "blue" }, + { "id": 3 }, + ]), + Some("id"), + ) + .await; + index.wait_task(task.uid()).await.succeeded(); + + let (response, code) = index.fetch_documents(json!({"ids": ["0", "illegal/docid"] })).await; + let (response2, code2) = index.get_all_documents_raw("?ids=0,illegal/docid").await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "message": "In `.ids[1]`: Document identifier `\"illegal/docid\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.", + "code": "invalid_document_ids", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_document_ids" + } + "###); + assert_eq!(code, code2); + assert_eq!(response, response2); +} + +#[actix_rt::test] +async fn get_document_not_found_ids() { + let server = Server::new_shared(); + let index = server.unique_index(); + let (task, _code) = index + .add_documents( + json!([ + { "id": 0, "color": "red" }, + { "id": 1, "color": "blue" }, + { "id": 2, "color": "blue" }, + { "id": 3 }, + ]), + Some("id"), + ) + .await; + index.wait_task(task.uid()).await.succeeded(); + + let (response, code) = index.fetch_documents(json!({"ids": ["0", 3, 42] })).await; + let (response2, code2) = index.get_all_documents_raw("?ids=0,3,42").await; + // the document with id 42 is not in the results since it doesn't exist + // however, no error is raised + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "results": [ + { + "id": 0, + "color": "red" + }, + { + "id": 3 + } + ], + "offset": 0, + "limit": 20, + "total": 2 + } + "###); + assert_eq!(code, code2); + assert_eq!(response, response2); +} + +#[actix_rt::test] +async fn get_document_by_ids_and_filter() { + let server = Server::new_shared(); + let index = server.unique_index(); + index.update_settings_filterable_attributes(json!(["color"])).await; + let (task, _code) = index + .add_documents( + json!([ + { "id": 0, "color": "red" }, + { "id": 1, "color": "blue" }, + { "id": 2, "color": "blue" }, + { "id": 3 }, + ]), + Some("id"), + ) + .await; + index.wait_task(task.uid()).await.succeeded(); + + let (response, code) = + index.fetch_documents(json!({"ids": [2], "filter": "color = blue" })).await; + let (response2, code2) = index.get_all_documents_raw("?ids=2&filter=color=blue").await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "results": [ + { + "id": 2, + "color": "blue" + } + ], + "offset": 0, + "limit": 20, + "total": 1 + } + "###); + assert_eq!(code, code2); + assert_eq!(response, response2); + + let (response, code) = index + .fetch_documents( + json!({ "offset": 1, "limit": 1, "ids": [0, 1, 2, 3], "filter": "color != blue" }), + ) + .await; + let (response2, code2) = + index.get_all_documents_raw("?ids=0,1,2,3&filter=color!=blue&offset=1&limit=1").await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "results": [ + { + "id": 3 + } + ], + "offset": 1, + "limit": 1, + "total": 2 + } + "###); + assert_eq!(code, code2); + assert_eq!(response, response2); + + let (response, code) = index + .fetch_documents(json!({ "limit": 1, "ids": [0, 1, 2,3], "filter": "color != blue", "fields": ["color"] })) + .await; + let (response2, code2) = + index.get_all_documents_raw("?ids=0,1,2,3&limit=1&filter=color!=blue&fields=color").await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "results": [ + { + "color": "red" + } + ], + "offset": 0, + "limit": 1, + "total": 2 + } + "###); + assert_eq!(code, code2); + assert_eq!(response, response2); + + // Now testing more complex filter that the get route can't represent + + let (response, code) = index + .fetch_documents(json!({ "ids": [0, "2"], "filter": [["color = blue", "color = red"]] })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "results": [ + { + "id": 0, + "color": "red" + }, + { + "id": 2, + "color": "blue" + } + ], + "offset": 0, + "limit": 20, + "total": 2 + } + "###); + + let (response, code) = index + .fetch_documents(json!({ "filter": [["color != blue"], "color EXISTS"], "ids": [1, 2, 3] })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "results": [], + "offset": 0, + "limit": 20, + "total": 0 + } + "###); +} + #[actix_rt::test] async fn get_document_with_vectors() { let server = Server::new().await; diff --git a/crates/meilisearch/tests/similar/errors.rs b/crates/meilisearch/tests/similar/errors.rs index 5c4ac1f38..30ff5b145 100644 --- a/crates/meilisearch/tests/similar/errors.rs +++ b/crates/meilisearch/tests/similar/errors.rs @@ -55,11 +55,11 @@ async fn similar_bad_id() { snapshot!(code, @"202 Accepted"); server.wait_task(response.uid()).await; - let (response, code) = index.similar_post(json!({"id": ["doggo"]})).await; + let (response, code) = index.similar_post(json!({"id": ["doggo"], "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.", + "message": "Invalid value at `.id`: Document identifier `[\"doggo\"]` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.", "code": "invalid_similar_id", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_similar_id" @@ -145,11 +145,12 @@ async fn similar_invalid_id() { snapshot!(code, @"202 Accepted"); server.wait_task(response.uid()).await; - let (response, code) = index.similar_post(json!({"id": "http://invalid-docid/"})).await; + let (response, code) = + index.similar_post(json!({"id": "http://invalid-docid/", "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.", + "message": "Invalid value at `.id`: Document identifier `\"http://invalid-docid/\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.", "code": "invalid_similar_id", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_similar_id"