From 754efe1f42665b0836269eb9c8c63874f5d488f4 Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 7 Jul 2020 14:52:49 +0200 Subject: [PATCH 1/2] fix document id uniqueness bug --- .../src/update/documents_addition.rs | 12 ++++++++++-- meilisearch-core/src/update/helpers.rs | 18 +++++++++++------- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/meilisearch-core/src/update/documents_addition.rs b/meilisearch-core/src/update/documents_addition.rs index a4b41d2a8..5603966b0 100644 --- a/meilisearch-core/src/update/documents_addition.rs +++ b/meilisearch-core/src/update/documents_addition.rs @@ -171,15 +171,23 @@ pub fn apply_addition<'a, 'b>( let mut new_internal_docids = Vec::with_capacity(new_documents.len()); for mut document in new_documents { + let external_docids_get = |docid: &str| { + match (external_docids.get(docid), new_external_docids.get(docid)) { + (_, Some(&id)) + | (Some(id), _) => Some(id as u32), + (None, None) => None, + } + }; + let (internal_docid, external_docid) = extract_document_id( &primary_key, &document, - &external_docids, + &external_docids_get, &mut available_ids, )?; - new_external_docids.insert(external_docid, internal_docid.0); + new_external_docids.insert(external_docid, internal_docid.0 as u64); new_internal_docids.push(internal_docid); if partial { diff --git a/meilisearch-core/src/update/helpers.rs b/meilisearch-core/src/update/helpers.rs index 6e8902182..1942c322c 100644 --- a/meilisearch-core/src/update/helpers.rs +++ b/meilisearch-core/src/update/helpers.rs @@ -98,15 +98,17 @@ pub fn value_to_number(value: &Value) -> Option { /// Validates a string representation to be a correct document id and returns /// the corresponding id or generate a new one, this is the way we produce documents ids. -pub fn discover_document_id( +pub fn discover_document_id( docid: &str, - external_docids: &FstMapCow, + external_docids_get: F, available_docids: &mut DiscoverIds<'_>, ) -> Result +where + F: FnOnce(&str) -> Option { if docid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') { - match external_docids.get(docid) { - Some(id) => Ok(DocumentId(id as u32)), + match external_docids_get(docid) { + Some(id) => Ok(DocumentId(id)), None => { let internal_id = available_docids.next().expect("no more ids available"); Ok(internal_id) @@ -118,12 +120,14 @@ pub fn discover_document_id( } /// Extracts and validates the document id of a document. -pub fn extract_document_id( +pub fn extract_document_id( primary_key: &str, document: &IndexMap, - external_docids: &FstMapCow, + external_docids_get: F, available_docids: &mut DiscoverIds<'_>, ) -> Result<(DocumentId, String), SerializerError> +where + F: FnOnce(&str) -> Option { match document.get(primary_key) { Some(value) => { @@ -132,7 +136,7 @@ pub fn extract_document_id( Value::String(string) => string.clone(), _ => return Err(SerializerError::InvalidDocumentIdFormat), }; - discover_document_id(&docid, external_docids, available_docids).map(|id| (id, docid)) + discover_document_id(&docid, external_docids_get, available_docids).map(|id| (id, docid)) } None => Err(SerializerError::DocumentIdNotFound), } From f54397e0cfba5ce54a11e385ed788c3b751c5704 Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 7 Jul 2020 15:12:27 +0200 Subject: [PATCH 2/2] test unique document id bug --- meilisearch-core/src/update/helpers.rs | 2 +- meilisearch-http/tests/documents_add.rs | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/meilisearch-core/src/update/helpers.rs b/meilisearch-core/src/update/helpers.rs index 1942c322c..1aad1f505 100644 --- a/meilisearch-core/src/update/helpers.rs +++ b/meilisearch-core/src/update/helpers.rs @@ -6,7 +6,7 @@ use meilisearch_types::DocumentId; use ordered_float::OrderedFloat; use serde_json::Value; -use crate::{Number, FstMapCow}; +use crate::Number; use crate::raw_indexer::RawIndexer; use crate::serde::SerializerError; use crate::store::DiscoverIds; diff --git a/meilisearch-http/tests/documents_add.rs b/meilisearch-http/tests/documents_add.rs index 994fd55e5..fae29f15e 100644 --- a/meilisearch-http/tests/documents_add.rs +++ b/meilisearch-http/tests/documents_add.rs @@ -195,3 +195,23 @@ async fn add_document_with_long_field() { let (response, _status) = server.search_post(json!({ "q": "request_buffering" })).await; assert!(!response["hits"].as_array().unwrap().is_empty()); } + +#[actix_rt::test] +async fn documents_with_same_id_are_overwritten() { + let mut server = common::Server::with_uid("test"); + server.create_index(json!({ "uid": "test"})).await; + let documents = json!([ + { + "id": 1, + "content": "test1" + }, + { + "id": 1, + "content": "test2" + }, + ]); + server.add_or_replace_multiple_documents(documents).await; + let (response, _status) = server.get_all_documents().await; + assert_eq!(response.as_array().unwrap().len(), 1); + assert_eq!(response.as_array().unwrap()[0].as_object().unwrap()["content"], "test2"); +}