add a way to provide primary_key or autogenerate documents ids

This commit is contained in:
tamo 2021-05-25 17:55:45 +02:00 committed by Tamo
parent 06c414a753
commit 4536dfccd0
No known key found for this signature in database
GPG key ID: 20CD8020AFA88D69
3 changed files with 338 additions and 16 deletions

View file

@ -48,6 +48,7 @@ const BASE_CONF: Conf = Conf {
"Notstandskomitee ", // 4
],
configure: base_conf,
primary_key: Some("id"),
..Conf::BASE
};

View file

@ -56,6 +56,10 @@ pub fn base_setup(conf: &Conf) -> Index {
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
options.max_readers(10);
let index = Index::new(options, conf.database_name).unwrap();
if let Some(primary_key) = conf.primary_key {
let mut wtxn = index.write_txn().unwrap();
index.put_primary_key(&mut wtxn, primary_key).unwrap();
}
let update_builder = UpdateBuilder::new(0);
let mut wtxn = index.write_txn().unwrap();
@ -78,6 +82,9 @@ pub fn base_setup(conf: &Conf) -> Index {
let update_builder = UpdateBuilder::new(0);
let mut wtxn = index.write_txn().unwrap();
let mut builder = update_builder.index_documents(&mut wtxn, &index);
if let None = conf.primary_key {
builder.enable_autogenerate_docids();
}
builder.update_format(UpdateFormat::Csv);
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
let reader = File::open(conf.dataset)