Merge pull request #84 from meilisearch/stringify-documents-ids

Stringify documents ids even when deleting documents
This commit is contained in:
Clément Renault 2021-02-15 21:30:51 +01:00 committed by GitHub
commit 48b470140b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 46 additions and 10 deletions

View File

@ -1,6 +1,8 @@
use anyhow::anyhow;
use fst::IntoStreamer;
use heed::types::ByteSlice;
use roaring::RoaringBitmap;
use serde_json::Value;
use crate::facet::FacetType;
use crate::{Index, BEU32, SmallString32, ExternalDocumentsIds};
@ -95,7 +97,11 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
let mut iter = documents.range_mut(self.wtxn, &(key..=key))?;
if let Some((_key, obkv)) = iter.next().transpose()? {
if let Some(content) = obkv.get(id_field) {
let external_id: SmallString32 = serde_json::from_slice(content).unwrap();
let external_id = match serde_json::from_slice(content).unwrap() {
Value::String(string) => SmallString32::from(string.as_str()),
Value::Number(number) => SmallString32::from(number.to_string()),
_ => return Err(anyhow!("documents ids must be either strings or numbers")),
};
external_ids.push(external_id);
}
iter.del_current()?;
@ -248,3 +254,39 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
Ok(self.documents_ids.len() as usize)
}
}
#[cfg(test)]
mod tests {
use heed::EnvOpenOptions;
use crate::update::{IndexDocuments, UpdateFormat};
use super::*;
#[test]
fn delete_documents_with_numbers_as_primary_key() {
let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap();
// First we send 3 documents with an id for only one of them.
let mut wtxn = index.write_txn().unwrap();
let content = &br#"[
{ "id": 0, "name": "kevin", "object": { "key1": "value1", "key2": "value2" } },
{ "id": 1, "name": "kevina", "array": ["I", "am", "fine"] },
{ "id": 2, "name": "benoit", "array_of_object": [{ "wow": "amazing" }] }
]"#[..];
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
builder.update_format(UpdateFormat::Json);
builder.execute(content, |_, _| ()).unwrap();
// delete those documents, ids are synchronous therefore 0, 1, and 2.
let mut builder = DeleteDocuments::new(&mut wtxn, &index, 1).unwrap();
builder.delete_document(0);
builder.delete_document(1);
builder.delete_document(2);
builder.execute().unwrap();
wtxn.commit().unwrap();
}
}

View File

@ -178,16 +178,10 @@ impl Transform<'_, '_> {
serde_json::to_writer(&mut json_buffer, value)?;
writer.insert(field_id, &json_buffer)?;
}
else if field_id == primary_key_id {
// We validate the document id [a-zA-Z0-9\-_].
let external_id = match validate_document_id(&external_id) {
Some(valid) => valid,
None => return Err(anyhow!("invalid document id: {:?}", external_id)),
};
// We serialize the document id.
serde_json::to_writer(&mut json_buffer, &external_id)?;
writer.insert(field_id, &json_buffer)?;
// We validate the document id [a-zA-Z0-9\-_].
if field_id == primary_key_id && validate_document_id(&external_id).is_none() {
return Err(anyhow!("invalid document id: {:?}", external_id));
}
}