mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 13:24:27 +01:00
Introduce a parameter to disable the engine to autogenerate docids
This commit is contained in:
parent
21b4d60101
commit
6d52c5b2f0
@ -202,6 +202,7 @@ pub struct IndexDocuments<'t, 'u, 'i> {
|
|||||||
indexing_jobs: Option<usize>,
|
indexing_jobs: Option<usize>,
|
||||||
update_method: IndexDocumentsMethod,
|
update_method: IndexDocumentsMethod,
|
||||||
update_format: UpdateFormat,
|
update_format: UpdateFormat,
|
||||||
|
autogenerate_docids: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'t, 'u, 'i> IndexDocuments<'t, 'u, 'i> {
|
impl<'t, 'u, 'i> IndexDocuments<'t, 'u, 'i> {
|
||||||
@ -219,6 +220,7 @@ impl<'t, 'u, 'i> IndexDocuments<'t, 'u, 'i> {
|
|||||||
indexing_jobs: None,
|
indexing_jobs: None,
|
||||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||||
update_format: UpdateFormat::Json,
|
update_format: UpdateFormat::Json,
|
||||||
|
autogenerate_docids: true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -272,6 +274,16 @@ impl<'t, 'u, 'i> IndexDocuments<'t, 'u, 'i> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn enable_autogenerate_docids(&mut self) -> &mut Self {
|
||||||
|
self.autogenerate_docids = true;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn disable_autogenerate_docids(&mut self) -> &mut Self {
|
||||||
|
self.autogenerate_docids = false;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
pub fn execute<R, F>(self, reader: R, progress_callback: F) -> anyhow::Result<()>
|
pub fn execute<R, F>(self, reader: R, progress_callback: F) -> anyhow::Result<()>
|
||||||
where
|
where
|
||||||
R: io::Read,
|
R: io::Read,
|
||||||
@ -288,6 +300,7 @@ impl<'t, 'u, 'i> IndexDocuments<'t, 'u, 'i> {
|
|||||||
max_nb_chunks: self.max_nb_chunks,
|
max_nb_chunks: self.max_nb_chunks,
|
||||||
max_memory: self.max_memory,
|
max_memory: self.max_memory,
|
||||||
index_documents_method: self.update_method,
|
index_documents_method: self.update_method,
|
||||||
|
autogenerate_docids: self.autogenerate_docids,
|
||||||
};
|
};
|
||||||
|
|
||||||
let output = match self.update_format {
|
let output = match self.update_format {
|
||||||
@ -636,6 +649,56 @@ mod tests {
|
|||||||
drop(rtxn);
|
drop(rtxn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn not_auto_generated_csv_documents_ids() {
|
||||||
|
let path = tempfile::tempdir().unwrap();
|
||||||
|
let mut options = EnvOpenOptions::new();
|
||||||
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||||
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
|
||||||
|
// First we send 3 documents with ids from 1 to 3.
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let content = &b"name\nkevin\nkevina\nbenoit\n"[..];
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index);
|
||||||
|
builder.disable_autogenerate_docids();
|
||||||
|
builder.update_format(UpdateFormat::Csv);
|
||||||
|
assert!(builder.execute(content, |_, _| ()).is_err());
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
// Check that there is no document.
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let count = index.number_of_documents(&rtxn).unwrap();
|
||||||
|
assert_eq!(count, 0);
|
||||||
|
drop(rtxn);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn not_auto_generated_json_documents_ids() {
|
||||||
|
let path = tempfile::tempdir().unwrap();
|
||||||
|
let mut options = EnvOpenOptions::new();
|
||||||
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||||
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
|
||||||
|
// First we send 3 documents and 2 without ids.
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let content = &br#"[
|
||||||
|
{ "name": "kevina", "id": 21 },
|
||||||
|
{ "name": "kevin" },
|
||||||
|
{ "name": "benoit" }
|
||||||
|
]"#[..];
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index);
|
||||||
|
builder.disable_autogenerate_docids();
|
||||||
|
builder.update_format(UpdateFormat::Json);
|
||||||
|
assert!(builder.execute(content, |_, _| ()).is_err());
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
// Check that there is no document.
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let count = index.number_of_documents(&rtxn).unwrap();
|
||||||
|
assert_eq!(count, 0);
|
||||||
|
drop(rtxn);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn simple_auto_generated_documents_ids() {
|
fn simple_auto_generated_documents_ids() {
|
||||||
let path = tempfile::tempdir().unwrap();
|
let path = tempfile::tempdir().unwrap();
|
||||||
|
@ -33,6 +33,7 @@ pub struct Transform<'t, 'i> {
|
|||||||
pub max_nb_chunks: Option<usize>,
|
pub max_nb_chunks: Option<usize>,
|
||||||
pub max_memory: Option<usize>,
|
pub max_memory: Option<usize>,
|
||||||
pub index_documents_method: IndexDocumentsMethod,
|
pub index_documents_method: IndexDocumentsMethod,
|
||||||
|
pub autogenerate_docids: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Transform<'_, '_> {
|
impl Transform<'_, '_> {
|
||||||
@ -57,7 +58,14 @@ impl Transform<'_, '_> {
|
|||||||
None => {
|
None => {
|
||||||
match documents.get(0).and_then(|doc| doc.keys().find(|k| k.contains("id"))) {
|
match documents.get(0).and_then(|doc| doc.keys().find(|k| k.contains("id"))) {
|
||||||
Some(key) => fields_ids_map.insert(&key).context("field id limit reached")?,
|
Some(key) => fields_ids_map.insert(&key).context("field id limit reached")?,
|
||||||
None => fields_ids_map.insert("id").context("field id limit reached")?,
|
None => {
|
||||||
|
if !self.autogenerate_docids {
|
||||||
|
// If there is no primary key in the current document batch, we must
|
||||||
|
// return an error and not automatically generate any document id.
|
||||||
|
return Err(anyhow!("missing primary key"))
|
||||||
|
}
|
||||||
|
fields_ids_map.insert("id").context("field id limit reached")?
|
||||||
|
},
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
@ -130,6 +138,9 @@ impl Transform<'_, '_> {
|
|||||||
_ => return Err(anyhow!("documents ids must be either strings or numbers")),
|
_ => return Err(anyhow!("documents ids must be either strings or numbers")),
|
||||||
},
|
},
|
||||||
None => {
|
None => {
|
||||||
|
if !self.autogenerate_docids {
|
||||||
|
return Err(anyhow!("missing primary key"));
|
||||||
|
}
|
||||||
let uuid = uuid::Uuid::new_v4().to_hyphenated().encode_lower(&mut uuid_buffer);
|
let uuid = uuid::Uuid::new_v4().to_hyphenated().encode_lower(&mut uuid_buffer);
|
||||||
Cow::Borrowed(uuid)
|
Cow::Borrowed(uuid)
|
||||||
},
|
},
|
||||||
@ -180,11 +191,16 @@ impl Transform<'_, '_> {
|
|||||||
let primary_key_field_id = match user_id_pos {
|
let primary_key_field_id = match user_id_pos {
|
||||||
Some(pos) => fields_ids_map.id(&headers[pos]).expect("found the primary key"),
|
Some(pos) => fields_ids_map.id(&headers[pos]).expect("found the primary key"),
|
||||||
None => {
|
None => {
|
||||||
let id = fields_ids_map.insert("id").context("field id limit reached")?;
|
if !self.autogenerate_docids {
|
||||||
|
// If there is no primary key in the current document batch, we must
|
||||||
|
// return an error and not automatically generate any document id.
|
||||||
|
return Err(anyhow!("missing primary key"))
|
||||||
|
}
|
||||||
|
let field_id = fields_ids_map.insert("id").context("field id limit reached")?;
|
||||||
// We make sure to add the primary key field id to the fields ids,
|
// We make sure to add the primary key field id to the fields ids,
|
||||||
// this way it is added to the obks.
|
// this way it is added to the obks.
|
||||||
fields_ids.push((id, usize::max_value()));
|
fields_ids.push((field_id, usize::max_value()));
|
||||||
id
|
field_id
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user