mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-12 22:36:29 +01:00
Merge #5144
5144: Exactly 512 bytes docid fails r=Kerollmops a=dureuill # Pull Request ## Related issue Fixes #5050 ## What does this PR do? - Return a user error rather than an internal one for docids of exactly 512 bytes - Fix up error message to indicate that exactly 512 bytes long docids are not supported. - Fix up error message to reflect that index uids are actually limited to 400 bytes in length ## Impact - Impacts docs: - update [this paragraph](https://www.meilisearch.com/docs/learn/resources/known_limitations#length-of-primary-key-values) to say 511 bytes instead of 512 Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
commit
eaabc1af2f
@ -1,12 +1,13 @@
|
|||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
|
|
||||||
use crate::TaskId;
|
|
||||||
use meilisearch_types::batches::BatchId;
|
use meilisearch_types::batches::BatchId;
|
||||||
use meilisearch_types::error::{Code, ErrorCode};
|
use meilisearch_types::error::{Code, ErrorCode};
|
||||||
use meilisearch_types::tasks::{Kind, Status};
|
use meilisearch_types::tasks::{Kind, Status};
|
||||||
use meilisearch_types::{heed, milli};
|
use meilisearch_types::{heed, milli};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
|
use crate::TaskId;
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||||
pub enum DateField {
|
pub enum DateField {
|
||||||
BeforeEnqueuedAt,
|
BeforeEnqueuedAt,
|
||||||
@ -103,7 +104,7 @@ pub enum Error {
|
|||||||
)]
|
)]
|
||||||
InvalidTaskCanceledBy { canceled_by: String },
|
InvalidTaskCanceledBy { canceled_by: String },
|
||||||
#[error(
|
#[error(
|
||||||
"{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes."
|
"{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 400 bytes."
|
||||||
)]
|
)]
|
||||||
InvalidIndexUid { index_uid: String },
|
InvalidIndexUid { index_uid: String },
|
||||||
#[error("Task `{0}` not found.")]
|
#[error("Task `{0}` not found.")]
|
||||||
|
@ -550,7 +550,7 @@ impl fmt::Display for deserr_codes::InvalidSimilarId {
|
|||||||
"the value of `id` is invalid. \
|
"the value of `id` is invalid. \
|
||||||
A document identifier can be of type integer or string, \
|
A document identifier can be of type integer or string, \
|
||||||
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \
|
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \
|
||||||
and can not be more than 512 bytes."
|
and can not be more than 511 bytes."
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1264,15 +1264,18 @@ async fn error_add_documents_bad_document_id() {
|
|||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
let index = server.index("test");
|
let index = server.index("test");
|
||||||
index.create(Some("docid")).await;
|
index.create(Some("docid")).await;
|
||||||
|
|
||||||
|
// unsupported characters
|
||||||
|
|
||||||
let documents = json!([
|
let documents = json!([
|
||||||
{
|
{
|
||||||
"docid": "foo & bar",
|
"docid": "foo & bar",
|
||||||
"content": "foobar"
|
"content": "foobar"
|
||||||
}
|
}
|
||||||
]);
|
]);
|
||||||
index.add_documents(documents, None).await;
|
let (value, _code) = index.add_documents(documents, None).await;
|
||||||
index.wait_task(1).await;
|
index.wait_task(value.uid()).await;
|
||||||
let (response, code) = index.get_task(1).await;
|
let (response, code) = index.get_task(value.uid()).await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
||||||
@r###"
|
@r###"
|
||||||
@ -1288,7 +1291,81 @@ async fn error_add_documents_bad_document_id() {
|
|||||||
"indexedDocuments": 0
|
"indexedDocuments": 0
|
||||||
},
|
},
|
||||||
"error": {
|
"error": {
|
||||||
"message": "Document identifier `\"foo & bar\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
"message": "Document identifier `\"foo & bar\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
|
||||||
|
"code": "invalid_document_id",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_document_id"
|
||||||
|
},
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// More than 512 bytes
|
||||||
|
let documents = json!([
|
||||||
|
{
|
||||||
|
"docid": "a".repeat(600),
|
||||||
|
"content": "foobar"
|
||||||
|
}
|
||||||
|
]);
|
||||||
|
let (value, _code) = index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(value.uid()).await;
|
||||||
|
let (response, code) = index.get_task(value.uid()).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
||||||
|
@r###"
|
||||||
|
{
|
||||||
|
"uid": 2,
|
||||||
|
"batchUid": 2,
|
||||||
|
"indexUid": "test",
|
||||||
|
"status": "failed",
|
||||||
|
"type": "documentAdditionOrUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"receivedDocuments": 1,
|
||||||
|
"indexedDocuments": 0
|
||||||
|
},
|
||||||
|
"error": {
|
||||||
|
"message": "Document identifier `\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
|
||||||
|
"code": "invalid_document_id",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_document_id"
|
||||||
|
},
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// Exactly 512 bytes
|
||||||
|
let documents = json!([
|
||||||
|
{
|
||||||
|
"docid": "a".repeat(512),
|
||||||
|
"content": "foobar"
|
||||||
|
}
|
||||||
|
]);
|
||||||
|
let (value, _code) = index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(value.uid()).await;
|
||||||
|
let (response, code) = index.get_task(value.uid()).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
||||||
|
@r###"
|
||||||
|
{
|
||||||
|
"uid": 3,
|
||||||
|
"batchUid": 3,
|
||||||
|
"indexUid": "test",
|
||||||
|
"status": "failed",
|
||||||
|
"type": "documentAdditionOrUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"receivedDocuments": 1,
|
||||||
|
"indexedDocuments": 0
|
||||||
|
},
|
||||||
|
"error": {
|
||||||
|
"message": "Document identifier `\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
|
||||||
"code": "invalid_document_id",
|
"code": "invalid_document_id",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_document_id"
|
"link": "https://docs.meilisearch.com/errors#invalid_document_id"
|
||||||
|
@ -172,7 +172,7 @@ async fn error_update_documents_bad_document_id() {
|
|||||||
assert_eq!(
|
assert_eq!(
|
||||||
response["error"]["message"],
|
response["error"]["message"],
|
||||||
json!(
|
json!(
|
||||||
r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes."#
|
r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes."#
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
assert_eq!(response["error"]["code"], json!("invalid_document_id"));
|
assert_eq!(response["error"]["code"], json!("invalid_document_id"));
|
||||||
|
@ -79,7 +79,7 @@ async fn similar_bad_id() {
|
|||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
|
||||||
"code": "invalid_similar_id",
|
"code": "invalid_similar_id",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"
|
||||||
@ -172,7 +172,7 @@ async fn similar_invalid_id() {
|
|||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
|
||||||
"code": "invalid_similar_id",
|
"code": "invalid_similar_id",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"
|
||||||
|
@ -280,7 +280,7 @@ fn starts_with(selector: &str, key: &str) -> bool {
|
|||||||
|
|
||||||
pub fn validate_document_id_str(document_id: &str) -> Option<&str> {
|
pub fn validate_document_id_str(document_id: &str) -> Option<&str> {
|
||||||
if document_id.is_empty()
|
if document_id.is_empty()
|
||||||
|| document_id.len() > 512
|
|| document_id.len() >= 512
|
||||||
|| !document_id.chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
|
|| !document_id.chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
|
||||||
{
|
{
|
||||||
None
|
None
|
||||||
|
@ -114,7 +114,7 @@ pub enum UserError {
|
|||||||
"Document identifier `{}` is invalid. \
|
"Document identifier `{}` is invalid. \
|
||||||
A document identifier can be of type integer or string, \
|
A document identifier can be of type integer or string, \
|
||||||
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \
|
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \
|
||||||
and can not be more than 512 bytes.", .document_id.to_string()
|
and can not be more than 511 bytes.", .document_id.to_string()
|
||||||
)]
|
)]
|
||||||
InvalidDocumentId { document_id: Value },
|
InvalidDocumentId { document_id: Value },
|
||||||
#[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_facets_name))]
|
#[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_facets_name))]
|
||||||
|
Loading…
Reference in New Issue
Block a user