5384: Get multiple documents by ids r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #5345 

## What does this PR do?
- Implements [public usage](https://www.notion.so/meilisearch/Get-documents-by-ID-1994b06b651f805ba273e1c6b75ce4d8)
- Slightly refactor error messages for the `/similar` route

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2025-03-12 17:26:49 +00:00 committed by GitHub
commit e2d0ce52ba
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 424 additions and 54 deletions

View file

@ -667,7 +667,7 @@ async fn fetch_document_by_filter() {
.await;
index.wait_task(task.uid()).await.succeeded();
let (response, code) = index.get_document_by_filter(json!(null)).await;
let (response, code) = index.fetch_documents(json!(null)).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
@ -678,7 +678,7 @@ async fn fetch_document_by_filter() {
}
"###);
let (response, code) = index.get_document_by_filter(json!({ "offset": "doggo" })).await;
let (response, code) = index.fetch_documents(json!({ "offset": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
@ -689,7 +689,7 @@ async fn fetch_document_by_filter() {
}
"###);
let (response, code) = index.get_document_by_filter(json!({ "limit": "doggo" })).await;
let (response, code) = index.fetch_documents(json!({ "limit": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
@ -700,7 +700,7 @@ async fn fetch_document_by_filter() {
}
"###);
let (response, code) = index.get_document_by_filter(json!({ "fields": "doggo" })).await;
let (response, code) = index.fetch_documents(json!({ "fields": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
@ -711,7 +711,7 @@ async fn fetch_document_by_filter() {
}
"###);
let (response, code) = index.get_document_by_filter(json!({ "filter": true })).await;
let (response, code) = index.fetch_documents(json!({ "filter": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
@ -722,7 +722,7 @@ async fn fetch_document_by_filter() {
}
"###);
let (response, code) = index.get_document_by_filter(json!({ "filter": "cool doggo" })).await;
let (response, code) = index.fetch_documents(json!({ "filter": "cool doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
@ -733,8 +733,7 @@ async fn fetch_document_by_filter() {
}
"###);
let (response, code) =
index.get_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
let (response, code) = index.fetch_documents(json!({ "filter": "doggo = bernese" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
@ -762,8 +761,7 @@ async fn retrieve_vectors() {
"###);
// FETCHALL DOCUMENTS BY POST
let (response, _code) =
index.get_document_by_filter(json!({ "retrieveVectors": "tamo" })).await;
let (response, _code) = index.fetch_documents(json!({ "retrieveVectors": "tamo" })).await;
snapshot!(response, @r###"
{
"message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found a string: `\"tamo\"`",

View file

@ -371,7 +371,7 @@ async fn get_document_by_filter() {
.await;
index.wait_task(task.uid()).await.succeeded();
let (response, code) = index.get_document_by_filter(json!({})).await;
let (response, code) = index.fetch_documents(json!({})).await;
let (response2, code2) = index.get_all_documents_raw("").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
@ -401,7 +401,7 @@ async fn get_document_by_filter() {
assert_eq!(code, code2);
assert_eq!(response, response2);
let (response, code) = index.get_document_by_filter(json!({ "filter": "color = blue" })).await;
let (response, code) = index.fetch_documents(json!({ "filter": "color = blue" })).await;
let (response2, code2) = index.get_all_documents_raw("?filter=color=blue").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
@ -424,9 +424,8 @@ async fn get_document_by_filter() {
assert_eq!(code, code2);
assert_eq!(response, response2);
let (response, code) = index
.get_document_by_filter(json!({ "offset": 1, "limit": 1, "filter": "color != blue" }))
.await;
let (response, code) =
index.fetch_documents(json!({ "offset": 1, "limit": 1, "filter": "color != blue" })).await;
let (response2, code2) =
index.get_all_documents_raw("?filter=color!=blue&offset=1&limit=1").await;
snapshot!(code, @"200 OK");
@ -446,9 +445,7 @@ async fn get_document_by_filter() {
assert_eq!(response, response2);
let (response, code) = index
.get_document_by_filter(
json!({ "limit": 1, "filter": "color != blue", "fields": ["color"] }),
)
.fetch_documents(json!({ "limit": 1, "filter": "color != blue", "fields": ["color"] }))
.await;
let (response2, code2) =
index.get_all_documents_raw("?limit=1&filter=color!=blue&fields=color").await;
@ -471,7 +468,7 @@ async fn get_document_by_filter() {
// Now testing more complex filter that the get route can't represent
let (response, code) =
index.get_document_by_filter(json!({ "filter": [["color = blue", "color = red"]] })).await;
index.fetch_documents(json!({ "filter": [["color = blue", "color = red"]] })).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
@ -495,9 +492,8 @@ async fn get_document_by_filter() {
}
"###);
let (response, code) = index
.get_document_by_filter(json!({ "filter": [["color != blue"], "color EXISTS"] }))
.await;
let (response, code) =
index.fetch_documents(json!({ "filter": [["color != blue"], "color EXISTS"] })).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
@ -514,6 +510,326 @@ async fn get_document_by_filter() {
"###);
}
#[actix_rt::test]
async fn get_document_by_ids() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _code) = index
.add_documents(
json!([
{ "id": 0, "color": "red" },
{ "id": 1, "color": "blue" },
{ "id": 2, "color": "blue" },
{ "id": 3 },
]),
Some("id"),
)
.await;
index.wait_task(task.uid()).await.succeeded();
let (response, code) = index
.fetch_documents(json!({
"ids": ["0", 1, 2, 3]
}))
.await;
let (response2, code2) = index.get_all_documents_raw("?ids=0,1,2,3").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 0,
"color": "red"
},
{
"id": 1,
"color": "blue"
},
{
"id": 2,
"color": "blue"
},
{
"id": 3
}
],
"offset": 0,
"limit": 20,
"total": 4
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
let (response, code) = index.fetch_documents(json!({ "ids": [2, "1"] })).await;
let (response2, code2) = index.get_all_documents_raw("?ids=2,1").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 1,
"color": "blue"
},
{
"id": 2,
"color": "blue"
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
let (response, code) =
index.fetch_documents(json!({ "offset": 1, "limit": 1, "ids": ["0", 0, 3] })).await;
let (response2, code2) = index.get_all_documents_raw("?ids=3,0&offset=1&limit=1").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 3
}
],
"offset": 1,
"limit": 1,
"total": 2
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
let (response, code) =
index.fetch_documents(json!({ "limit": 1, "ids": [0, 3], "fields": ["color"] })).await;
let (response2, code2) = index.get_all_documents_raw("?limit=1&ids=0,3&fields=color").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"color": "red"
}
],
"offset": 0,
"limit": 1,
"total": 2
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
// Now testing more complex requests that the get route can't represent
let (response, code) = index.fetch_documents(json!({ "ids": [] })).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [],
"offset": 0,
"limit": 20,
"total": 0
}
"###);
}
#[actix_rt::test]
async fn get_document_invalid_ids() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _code) = index
.add_documents(
json!([
{ "id": 0, "color": "red" },
{ "id": 1, "color": "blue" },
{ "id": 2, "color": "blue" },
{ "id": 3 },
]),
Some("id"),
)
.await;
index.wait_task(task.uid()).await.succeeded();
let (response, code) = index.fetch_documents(json!({"ids": ["0", "illegal/docid"] })).await;
let (response2, code2) = index.get_all_documents_raw("?ids=0,illegal/docid").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"message": "In `.ids[1]`: Document identifier `\"illegal/docid\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
"code": "invalid_document_ids",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_ids"
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
}
#[actix_rt::test]
async fn get_document_not_found_ids() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _code) = index
.add_documents(
json!([
{ "id": 0, "color": "red" },
{ "id": 1, "color": "blue" },
{ "id": 2, "color": "blue" },
{ "id": 3 },
]),
Some("id"),
)
.await;
index.wait_task(task.uid()).await.succeeded();
let (response, code) = index.fetch_documents(json!({"ids": ["0", 3, 42] })).await;
let (response2, code2) = index.get_all_documents_raw("?ids=0,3,42").await;
// the document with id 42 is not in the results since it doesn't exist
// however, no error is raised
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 0,
"color": "red"
},
{
"id": 3
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
}
#[actix_rt::test]
async fn get_document_by_ids_and_filter() {
let server = Server::new_shared();
let index = server.unique_index();
index.update_settings_filterable_attributes(json!(["color"])).await;
let (task, _code) = index
.add_documents(
json!([
{ "id": 0, "color": "red" },
{ "id": 1, "color": "blue" },
{ "id": 2, "color": "blue" },
{ "id": 3 },
]),
Some("id"),
)
.await;
index.wait_task(task.uid()).await.succeeded();
let (response, code) =
index.fetch_documents(json!({"ids": [2], "filter": "color = blue" })).await;
let (response2, code2) = index.get_all_documents_raw("?ids=2&filter=color=blue").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 2,
"color": "blue"
}
],
"offset": 0,
"limit": 20,
"total": 1
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
let (response, code) = index
.fetch_documents(
json!({ "offset": 1, "limit": 1, "ids": [0, 1, 2, 3], "filter": "color != blue" }),
)
.await;
let (response2, code2) =
index.get_all_documents_raw("?ids=0,1,2,3&filter=color!=blue&offset=1&limit=1").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 3
}
],
"offset": 1,
"limit": 1,
"total": 2
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
let (response, code) = index
.fetch_documents(json!({ "limit": 1, "ids": [0, 1, 2,3], "filter": "color != blue", "fields": ["color"] }))
.await;
let (response2, code2) =
index.get_all_documents_raw("?ids=0,1,2,3&limit=1&filter=color!=blue&fields=color").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"color": "red"
}
],
"offset": 0,
"limit": 1,
"total": 2
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
// Now testing more complex filter that the get route can't represent
let (response, code) = index
.fetch_documents(json!({ "ids": [0, "2"], "filter": [["color = blue", "color = red"]] }))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 0,
"color": "red"
},
{
"id": 2,
"color": "blue"
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
let (response, code) = index
.fetch_documents(json!({ "filter": [["color != blue"], "color EXISTS"], "ids": [1, 2, 3] }))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [],
"offset": 0,
"limit": 20,
"total": 0
}
"###);
}
#[actix_rt::test]
async fn get_document_with_vectors() {
let server = Server::new().await;