From d6f9a60a322998590bceb5d095c74f63e3077414 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 3 Aug 2022 11:38:40 +0200 Subject: [PATCH] fix: Remove whitespace trimming during document id validation fix #592 --- milli/src/update/index_documents/enrich.rs | 2 - milli/src/update/index_documents/mod.rs | 47 ++++++++++++++++++++++ 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/milli/src/update/index_documents/enrich.rs b/milli/src/update/index_documents/enrich.rs index 7c9a016d8..15fbe9319 100644 --- a/milli/src/update/index_documents/enrich.rs +++ b/milli/src/update/index_documents/enrich.rs @@ -294,9 +294,7 @@ pub fn fetch_matching_values_in_object( } } -/// Returns a trimmed version of the document id or `None` if it is invalid. pub fn validate_document_id(document_id: &str) -> Option<&str> { - let document_id = document_id.trim(); if !document_id.is_empty() && document_id.chars().all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_')) { diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index c9890f93f..0f0eaca5a 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -2086,4 +2086,51 @@ mod tests { let (_builder, user_error) = builder.add_documents(doc4).unwrap(); assert!(user_error.is_err()); } + + #[test] + fn primary_key_must_not_contain_whitespace() { + let tmp = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(4096 * 100); + let index = Index::new(options, tmp).unwrap(); + let mut wtxn = index.write_txn().unwrap(); + let indexer_config = IndexerConfig::default(); + let builder = IndexDocuments::new( + &mut wtxn, + &index, + &indexer_config, + IndexDocumentsConfig::default(), + |_| (), + ) + .unwrap(); + + let doc1 = documents! {[{ + "id": " 1", + "title": "asdsad", + }]}; + + let doc2 = documents! {[{ + "id": "\t2", + "title": "something", + }]}; + + let doc3 = documents! {[{ + "id": "\r3", + "title": "something", + }]}; + + let doc4 = documents! {[{ + "id": "\n4", + "title": "something", + }]}; + + let (builder, user_error) = builder.add_documents(doc1).unwrap(); + assert!(user_error.is_err()); + let (builder, user_error) = builder.add_documents(doc2).unwrap(); + assert!(user_error.is_err()); + let (builder, user_error) = builder.add_documents(doc3).unwrap(); + assert!(user_error.is_err()); + let (_builder, user_error) = builder.add_documents(doc4).unwrap(); + assert!(user_error.is_err()); + } }