841: Unique docid bugfix r=LegendreM a=MarinPostma

fix #827 

Co-authored-by: mpostma <postma.marin@protonmail.com>
This commit is contained in:
bors[bot] 2020-07-13 13:36:32 +00:00 committed by GitHub
commit 308630c094
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 42 additions and 10 deletions

View File

@ -171,15 +171,23 @@ pub fn apply_addition<'a, 'b>(
let mut new_internal_docids = Vec::with_capacity(new_documents.len());
for mut document in new_documents {
let external_docids_get = |docid: &str| {
match (external_docids.get(docid), new_external_docids.get(docid)) {
(_, Some(&id))
| (Some(id), _) => Some(id as u32),
(None, None) => None,
}
};
let (internal_docid, external_docid) =
extract_document_id(
&primary_key,
&document,
&external_docids,
&external_docids_get,
&mut available_ids,
)?;
new_external_docids.insert(external_docid, internal_docid.0);
new_external_docids.insert(external_docid, internal_docid.0 as u64);
new_internal_docids.push(internal_docid);
if partial {

View File

@ -6,7 +6,7 @@ use meilisearch_types::DocumentId;
use ordered_float::OrderedFloat;
use serde_json::Value;
use crate::{Number, FstMapCow};
use crate::Number;
use crate::raw_indexer::RawIndexer;
use crate::serde::SerializerError;
use crate::store::DiscoverIds;
@ -98,15 +98,17 @@ pub fn value_to_number(value: &Value) -> Option<Number> {
/// Validates a string representation to be a correct document id and returns
/// the corresponding id or generate a new one, this is the way we produce documents ids.
pub fn discover_document_id(
pub fn discover_document_id<F>(
docid: &str,
external_docids: &FstMapCow,
external_docids_get: F,
available_docids: &mut DiscoverIds<'_>,
) -> Result<DocumentId, SerializerError>
where
F: FnOnce(&str) -> Option<u32>
{
if docid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') {
match external_docids.get(docid) {
Some(id) => Ok(DocumentId(id as u32)),
match external_docids_get(docid) {
Some(id) => Ok(DocumentId(id)),
None => {
let internal_id = available_docids.next().expect("no more ids available");
Ok(internal_id)
@ -118,12 +120,14 @@ pub fn discover_document_id(
}
/// Extracts and validates the document id of a document.
pub fn extract_document_id(
pub fn extract_document_id<F>(
primary_key: &str,
document: &IndexMap<String, Value>,
external_docids: &FstMapCow,
external_docids_get: F,
available_docids: &mut DiscoverIds<'_>,
) -> Result<(DocumentId, String), SerializerError>
where
F: FnOnce(&str) -> Option<u32>
{
match document.get(primary_key) {
Some(value) => {
@ -132,7 +136,7 @@ pub fn extract_document_id(
Value::String(string) => string.clone(),
_ => return Err(SerializerError::InvalidDocumentIdFormat),
};
discover_document_id(&docid, external_docids, available_docids).map(|id| (id, docid))
discover_document_id(&docid, external_docids_get, available_docids).map(|id| (id, docid))
}
None => Err(SerializerError::DocumentIdNotFound),
}

View File

@ -195,3 +195,23 @@ async fn add_document_with_long_field() {
let (response, _status) = server.search_post(json!({ "q": "request_buffering" })).await;
assert!(!response["hits"].as_array().unwrap().is_empty());
}
#[actix_rt::test]
async fn documents_with_same_id_are_overwritten() {
let mut server = common::Server::with_uid("test");
server.create_index(json!({ "uid": "test"})).await;
let documents = json!([
{
"id": 1,
"content": "test1"
},
{
"id": 1,
"content": "test2"
},
]);
server.add_or_replace_multiple_documents(documents).await;
let (response, _status) = server.get_all_documents().await;
assert_eq!(response.as_array().unwrap().len(), 1);
assert_eq!(response.as_array().unwrap()[0].as_object().unwrap()["content"], "test2");
}