From b3776598d88cf5cac1e4922b3788efb05d3f37fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 13 Feb 2021 14:04:23 +0100 Subject: [PATCH 1/3] Add a test to check deletion of documents with number as primary key --- milli/src/update/delete_documents.rs | 36 ++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index 2b67535c9..bd134891d 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -248,3 +248,39 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { Ok(self.documents_ids.len() as usize) } } + +#[cfg(test)] +mod tests { + use heed::EnvOpenOptions; + + use crate::update::{IndexDocuments, UpdateFormat}; + use super::*; + + #[test] + fn delete_documents_with_numbers_as_primary_key() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // First we send 3 documents with an id for only one of them. + let mut wtxn = index.write_txn().unwrap(); + let content = &br#"[ + { "id": 0, "name": "kevin", "object": { "key1": "value1", "key2": "value2" } }, + { "id": 1, "name": "kevina", "array": ["I", "am", "fine"] }, + { "id": 2, "name": "benoit", "array_of_object": [{ "wow": "amazing" }] } + ]"#[..]; + let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); + builder.update_format(UpdateFormat::Json); + builder.execute(content, |_, _| ()).unwrap(); + + // delete those documents, ids are synchronous therefore 0, 1, and 2. + let mut builder = DeleteDocuments::new(&mut wtxn, &index, 1).unwrap(); + builder.delete_document(0); + builder.delete_document(1); + builder.delete_document(2); + builder.execute().unwrap(); + + wtxn.commit().unwrap(); + } +} From 69acdd437e3c560620397d1363d699b33370c0b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 13 Feb 2021 13:57:53 +0100 Subject: [PATCH 2/3] Deserialize documents ids into JSON Values on deletion --- milli/src/update/delete_documents.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index bd134891d..932589dd7 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -1,6 +1,8 @@ +use anyhow::anyhow; use fst::IntoStreamer; use heed::types::ByteSlice; use roaring::RoaringBitmap; +use serde_json::Value; use crate::facet::FacetType; use crate::{Index, BEU32, SmallString32, ExternalDocumentsIds}; @@ -95,7 +97,11 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { let mut iter = documents.range_mut(self.wtxn, &(key..=key))?; if let Some((_key, obkv)) = iter.next().transpose()? { if let Some(content) = obkv.get(id_field) { - let external_id: SmallString32 = serde_json::from_slice(content).unwrap(); + let external_id = match serde_json::from_slice(content).unwrap() { + Value::String(string) => SmallString32::from(string.as_str()), + Value::Number(number) => SmallString32::from(number.to_string()), + _ => return Err(anyhow!("documents ids must be either strings or numbers")), + }; external_ids.push(external_id); } iter.del_current()?; From 89ce4e74fefb8894dbc122bb67fe7da69fc2b30f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 13 Feb 2021 14:16:27 +0100 Subject: [PATCH 3/3] Do not change the primary key type when we serialize documents --- milli/src/update/index_documents/transform.rs | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index d53b83361..68888aad9 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -178,16 +178,10 @@ impl Transform<'_, '_> { serde_json::to_writer(&mut json_buffer, value)?; writer.insert(field_id, &json_buffer)?; } - else if field_id == primary_key_id { - // We validate the document id [a-zA-Z0-9\-_]. - let external_id = match validate_document_id(&external_id) { - Some(valid) => valid, - None => return Err(anyhow!("invalid document id: {:?}", external_id)), - }; - // We serialize the document id. - serde_json::to_writer(&mut json_buffer, &external_id)?; - writer.insert(field_id, &json_buffer)?; + // We validate the document id [a-zA-Z0-9\-_]. + if field_id == primary_key_id && validate_document_id(&external_id).is_none() { + return Err(anyhow!("invalid document id: {:?}", external_id)); } }