5144: Exactly 512 bytes docid fails r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #5050 

## What does this PR do?
- Return a user error rather than an internal one for docids of exactly 512 bytes
- Fix up error message to indicate that exactly 512 bytes long docids are not supported.
- Fix up error message to reflect that index uids are actually limited to 400 bytes in length

## Impact

- Impacts docs: 
    - update [this paragraph](https://www.meilisearch.com/docs/learn/resources/known_limitations#length-of-primary-key-values) to say 511 bytes instead of 512 

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-12-11 15:41:05 +00:00 committed by GitHub
commit eaabc1af2f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 90 additions and 12 deletions

View File

@ -1,12 +1,13 @@
use std::fmt::Display;
use crate::TaskId;
use meilisearch_types::batches::BatchId;
use meilisearch_types::error::{Code, ErrorCode};
use meilisearch_types::tasks::{Kind, Status};
use meilisearch_types::{heed, milli};
use thiserror::Error;
use crate::TaskId;
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DateField {
BeforeEnqueuedAt,
@ -103,7 +104,7 @@ pub enum Error {
)]
InvalidTaskCanceledBy { canceled_by: String },
#[error(
"{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes."
"{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 400 bytes."
)]
InvalidIndexUid { index_uid: String },
#[error("Task `{0}` not found.")]

View File

@ -550,7 +550,7 @@ impl fmt::Display for deserr_codes::InvalidSimilarId {
"the value of `id` is invalid. \
A document identifier can be of type integer or string, \
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \
and can not be more than 512 bytes."
and can not be more than 511 bytes."
)
}
}

View File

@ -1264,15 +1264,18 @@ async fn error_add_documents_bad_document_id() {
let server = Server::new().await;
let index = server.index("test");
index.create(Some("docid")).await;
// unsupported characters
let documents = json!([
{
"docid": "foo & bar",
"content": "foobar"
}
]);
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (response, code) = index.get_task(1).await;
let (value, _code) = index.add_documents(documents, None).await;
index.wait_task(value.uid()).await;
let (response, code) = index.get_task(value.uid()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
@ -1288,7 +1291,81 @@ async fn error_add_documents_bad_document_id() {
"indexedDocuments": 0
},
"error": {
"message": "Document identifier `\"foo & bar\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"message": "Document identifier `\"foo & bar\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
"code": "invalid_document_id",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_id"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
// More than 512 bytes
let documents = json!([
{
"docid": "a".repeat(600),
"content": "foobar"
}
]);
let (value, _code) = index.add_documents(documents, None).await;
index.wait_task(value.uid()).await;
let (response, code) = index.get_task(value.uid()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"uid": 2,
"batchUid": 2,
"indexUid": "test",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Document identifier `\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
"code": "invalid_document_id",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_id"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
// Exactly 512 bytes
let documents = json!([
{
"docid": "a".repeat(512),
"content": "foobar"
}
]);
let (value, _code) = index.add_documents(documents, None).await;
index.wait_task(value.uid()).await;
let (response, code) = index.get_task(value.uid()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"uid": 3,
"batchUid": 3,
"indexUid": "test",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Document identifier `\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
"code": "invalid_document_id",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_id"

View File

@ -172,7 +172,7 @@ async fn error_update_documents_bad_document_id() {
assert_eq!(
response["error"]["message"],
json!(
r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes."#
r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes."#
)
);
assert_eq!(response["error"]["code"], json!("invalid_document_id"));

View File

@ -79,7 +79,7 @@ async fn similar_bad_id() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
"code": "invalid_similar_id",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"
@ -172,7 +172,7 @@ async fn similar_invalid_id() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
"code": "invalid_similar_id",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"

View File

@ -280,7 +280,7 @@ fn starts_with(selector: &str, key: &str) -> bool {
pub fn validate_document_id_str(document_id: &str) -> Option<&str> {
if document_id.is_empty()
|| document_id.len() > 512
|| document_id.len() >= 512
|| !document_id.chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
{
None

View File

@ -114,7 +114,7 @@ pub enum UserError {
"Document identifier `{}` is invalid. \
A document identifier can be of type integer or string, \
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \
and can not be more than 512 bytes.", .document_id.to_string()
and can not be more than 511 bytes.", .document_id.to_string()
)]
InvalidDocumentId { document_id: Value },
#[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_facets_name))]