Merge pull request #99 from Kerollmops/simplify-transactional-update

Remove the lifetime restriction for Database Updates
This commit is contained in:
Clément Renault 2019-02-06 18:19:45 +01:00 committed by GitHub
commit 8576218b51
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 42 additions and 42 deletions

View File

@ -61,7 +61,7 @@ fn index(
while !end_of_file {
let tokenizer_builder = DefaultBuilder::new();
let mut update = database.update()?;
let mut update = database.start_update()?;
loop {
end_of_file = !rdr.read_record(&mut raw_record)?;
@ -88,7 +88,7 @@ fn index(
println!();
println!("committing update...");
update.commit()?;
database.commit_update(update)?;
}
Ok(database)

View File

@ -136,13 +136,24 @@ impl Database {
Ok(Database { db, view })
}
pub fn update(&self) -> Result<Update, Box<Error>> {
pub fn start_update(&self) -> Result<Update, Box<Error>> {
let schema = match self.db.get(DATA_SCHEMA)? {
Some(value) => Schema::read_from_bin(&*value)?,
None => panic!("Database does not contain a schema"),
};
Ok(Update::new(self, schema))
Ok(Update::new(schema))
}
pub fn commit_update(&self, update: Update) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>> {
let batch = update.build()?;
self.db.write(batch)?;
let snapshot = Snapshot::new(self.db.clone());
let view = Arc::new(DatabaseView::new(snapshot)?);
self.view.set(view.clone());
Ok(view)
}
pub fn view(&self) -> Arc<DatabaseView<Arc<DB>>> {
@ -202,12 +213,12 @@ mod tests {
};
let tokenizer_builder = DefaultBuilder::new();
let mut builder = database.update()?;
let mut builder = database.start_update()?;
let docid0 = builder.update_document(&doc0, &tokenizer_builder, &stop_words)?;
let docid1 = builder.update_document(&doc1, &tokenizer_builder, &stop_words)?;
let view = builder.commit()?;
let view = database.commit_update(builder)?;
let de_doc0: SimpleDoc = view.document_by_id(docid0)?;
let de_doc1: SimpleDoc = view.document_by_id(docid1)?;
@ -271,15 +282,15 @@ mod tests {
let tokenizer_builder = DefaultBuilder::new();
let mut builder = database.update()?;
let mut builder = database.start_update()?;
let docid0 = builder.update_document(&doc0, &tokenizer_builder, &stop_words)?;
let docid1 = builder.update_document(&doc1, &tokenizer_builder, &stop_words)?;
builder.commit()?;
database.commit_update(builder)?;
let mut builder = database.update()?;
let mut builder = database.start_update()?;
let docid2 = builder.update_document(&doc2, &tokenizer_builder, &stop_words)?;
let docid3 = builder.update_document(&doc3, &tokenizer_builder, &stop_words)?;
let view = builder.commit()?;
let view = database.commit_update(builder)?;
let de_doc0: SimpleDoc = view.document_by_id(docid0)?;
let de_doc1: SimpleDoc = view.document_by_id(docid1)?;
@ -358,7 +369,7 @@ mod bench {
}
let tokenizer_builder = DefaultBuilder;
let mut builder = database.update()?;
let mut builder = database.start_update()?;
let mut rng = XorShiftRng::seed_from_u64(42);
for i in 0..300 {
@ -370,7 +381,7 @@ mod bench {
builder.update_document(&document, &tokenizer_builder, &stop_words)?;
}
builder.commit()?;
database.commit_update(builder)?;
drop(database);
@ -403,7 +414,7 @@ mod bench {
}
let tokenizer_builder = DefaultBuilder;
let mut builder = database.update()?;
let mut builder = database.start_update()?;
let mut rng = XorShiftRng::seed_from_u64(42);
for i in 0..3000 {
@ -415,7 +426,7 @@ mod bench {
builder.update_document(&document, &tokenizer_builder, &stop_words)?;
}
builder.commit()?;
database.commit_update(builder)?;
drop(database);
@ -449,7 +460,7 @@ mod bench {
}
let tokenizer_builder = DefaultBuilder;
let mut builder = database.update()?;
let mut builder = database.start_update()?;
let mut rng = XorShiftRng::seed_from_u64(42);
for i in 0..30_000 {
@ -461,7 +472,7 @@ mod bench {
builder.update_document(&document, &tokenizer_builder, &stop_words)?;
}
builder.commit()?;
database.commit_update(builder)?;
drop(database);
@ -494,7 +505,7 @@ mod bench {
}
let tokenizer_builder = DefaultBuilder;
let mut builder = database.update()?;
let mut builder = database.start_update()?;
let mut rng = XorShiftRng::seed_from_u64(42);
for i in 0..300 {
@ -506,7 +517,7 @@ mod bench {
builder.update_document(&document, &tokenizer_builder, &stop_words)?;
}
let view = builder.commit()?;
let view = database.commit_update(builder)?;
bench.iter(|| {
for q in &["a", "b", "c", "d", "e"] {
@ -539,7 +550,7 @@ mod bench {
}
let tokenizer_builder = DefaultBuilder;
let mut builder = database.update()?;
let mut builder = database.start_update()?;
let mut rng = XorShiftRng::seed_from_u64(42);
for i in 0..3000 {
@ -551,7 +562,7 @@ mod bench {
builder.update_document(&document, &tokenizer_builder, &stop_words)?;
}
let view = builder.commit()?;
let view = database.commit_update(builder)?;
bench.iter(|| {
for q in &["a", "b", "c", "d", "e"] {
@ -585,7 +596,7 @@ mod bench {
}
let tokenizer_builder = DefaultBuilder;
let mut builder = database.update()?;
let mut builder = database.start_update()?;
let mut rng = XorShiftRng::seed_from_u64(42);
for i in 0..30_000 {
@ -597,7 +608,7 @@ mod bench {
builder.update_document(&document, &tokenizer_builder, &stop_words)?;
}
let view = builder.commit()?;
let view = database.commit_update(builder)?;
bench.iter(|| {
for q in &["a", "b", "c", "d", "e"] {

View File

@ -1,14 +1,12 @@
use std::collections::{HashSet, BTreeMap};
use std::error::Error;
use std::sync::Arc;
use rocksdb::rocksdb::{DB, Writable, Snapshot, WriteBatch};
use rocksdb::rocksdb::{Writable, WriteBatch};
use hashbrown::hash_map::HashMap;
use serde::Serialize;
use fst::map::Map;
use sdset::Set;
use crate::database::{DATA_INDEX, Database, DatabaseView};
use crate::database::index::{Positive, PositiveBuilder, Negative};
use crate::database::document_key::{DocumentKey, DocumentKeyAttr};
use crate::database::serde::serializer::Serializer;
@ -17,20 +15,20 @@ use crate::database::schema::SchemaAttr;
use crate::tokenizer::TokenizerBuilder;
use crate::data::{DocIds, DocIndexes};
use crate::database::schema::Schema;
use crate::{DocumentId, DocIndex};
use crate::database::index::Index;
use crate::{DocumentId, DocIndex};
use crate::database::DATA_INDEX;
pub type Token = Vec<u8>; // TODO could be replaced by a SmallVec
pub struct Update<'a> {
database: &'a Database,
pub struct Update {
schema: Schema,
raw_builder: RawUpdateBuilder,
}
impl<'a> Update<'a> {
pub(crate) fn new(database: &'a Database, schema: Schema) -> Update<'a> {
Update { database, schema, raw_builder: RawUpdateBuilder::new() }
impl Update {
pub(crate) fn new(schema: Schema) -> Update {
Update { schema, raw_builder: RawUpdateBuilder::new() }
}
pub fn update_document<T, B>(
@ -65,18 +63,9 @@ impl<'a> Update<'a> {
Ok(document_id)
}
pub fn commit(self) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>> {
let batch = self.raw_builder.build()?;
self.database.db.write(batch)?;
let snapshot = Snapshot::new(self.database.db.clone());
let view = Arc::new(DatabaseView::new(snapshot)?);
self.database.view.set(view.clone());
Ok(view)
pub(crate) fn build(self) -> Result<WriteBatch, Box<Error>> {
self.raw_builder.build()
}
pub fn abort(self) { }
}
#[derive(Copy, Clone, PartialEq, Eq)]