fix document id uniqueness bug

This commit is contained in:
mpostma 2020-07-07 14:52:49 +02:00
parent a0637c2c6d
commit 754efe1f42
2 changed files with 21 additions and 9 deletions

View File

@ -171,15 +171,23 @@ pub fn apply_addition<'a, 'b>(
let mut new_internal_docids = Vec::with_capacity(new_documents.len()); let mut new_internal_docids = Vec::with_capacity(new_documents.len());
for mut document in new_documents { for mut document in new_documents {
let external_docids_get = |docid: &str| {
match (external_docids.get(docid), new_external_docids.get(docid)) {
(_, Some(&id))
| (Some(id), _) => Some(id as u32),
(None, None) => None,
}
};
let (internal_docid, external_docid) = let (internal_docid, external_docid) =
extract_document_id( extract_document_id(
&primary_key, &primary_key,
&document, &document,
&external_docids, &external_docids_get,
&mut available_ids, &mut available_ids,
)?; )?;
new_external_docids.insert(external_docid, internal_docid.0); new_external_docids.insert(external_docid, internal_docid.0 as u64);
new_internal_docids.push(internal_docid); new_internal_docids.push(internal_docid);
if partial { if partial {

View File

@ -98,15 +98,17 @@ pub fn value_to_number(value: &Value) -> Option<Number> {
/// Validates a string representation to be a correct document id and returns /// Validates a string representation to be a correct document id and returns
/// the corresponding id or generate a new one, this is the way we produce documents ids. /// the corresponding id or generate a new one, this is the way we produce documents ids.
pub fn discover_document_id( pub fn discover_document_id<F>(
docid: &str, docid: &str,
external_docids: &FstMapCow, external_docids_get: F,
available_docids: &mut DiscoverIds<'_>, available_docids: &mut DiscoverIds<'_>,
) -> Result<DocumentId, SerializerError> ) -> Result<DocumentId, SerializerError>
where
F: FnOnce(&str) -> Option<u32>
{ {
if docid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') { if docid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') {
match external_docids.get(docid) { match external_docids_get(docid) {
Some(id) => Ok(DocumentId(id as u32)), Some(id) => Ok(DocumentId(id)),
None => { None => {
let internal_id = available_docids.next().expect("no more ids available"); let internal_id = available_docids.next().expect("no more ids available");
Ok(internal_id) Ok(internal_id)
@ -118,12 +120,14 @@ pub fn discover_document_id(
} }
/// Extracts and validates the document id of a document. /// Extracts and validates the document id of a document.
pub fn extract_document_id( pub fn extract_document_id<F>(
primary_key: &str, primary_key: &str,
document: &IndexMap<String, Value>, document: &IndexMap<String, Value>,
external_docids: &FstMapCow, external_docids_get: F,
available_docids: &mut DiscoverIds<'_>, available_docids: &mut DiscoverIds<'_>,
) -> Result<(DocumentId, String), SerializerError> ) -> Result<(DocumentId, String), SerializerError>
where
F: FnOnce(&str) -> Option<u32>
{ {
match document.get(primary_key) { match document.get(primary_key) {
Some(value) => { Some(value) => {
@ -132,7 +136,7 @@ pub fn extract_document_id(
Value::String(string) => string.clone(), Value::String(string) => string.clone(),
_ => return Err(SerializerError::InvalidDocumentIdFormat), _ => return Err(SerializerError::InvalidDocumentIdFormat),
}; };
discover_document_id(&docid, external_docids, available_docids).map(|id| (id, docid)) discover_document_id(&docid, external_docids_get, available_docids).map(|id| (id, docid))
} }
None => Err(SerializerError::DocumentIdNotFound), None => Err(SerializerError::DocumentIdNotFound),
} }