feat: Remove the lifetime restriction for Database Updates

This commit is contained in:
Clément Renault 2019-02-06 18:03:41 +01:00
parent 4398b88a3a
commit 1c1f9201b8
No known key found for this signature in database
GPG Key ID: 0151CDAB43460DAE
3 changed files with 42 additions and 42 deletions

View File

@ -61,7 +61,7 @@ fn index(
while !end_of_file { while !end_of_file {
let tokenizer_builder = DefaultBuilder::new(); let tokenizer_builder = DefaultBuilder::new();
let mut update = database.update()?; let mut update = database.start_update()?;
loop { loop {
end_of_file = !rdr.read_record(&mut raw_record)?; end_of_file = !rdr.read_record(&mut raw_record)?;
@ -88,7 +88,7 @@ fn index(
println!(); println!();
println!("committing update..."); println!("committing update...");
update.commit()?; database.commit_update(update)?;
} }
Ok(database) Ok(database)

View File

@ -136,13 +136,24 @@ impl Database {
Ok(Database { db, view }) Ok(Database { db, view })
} }
pub fn update(&self) -> Result<Update, Box<Error>> { pub fn start_update(&self) -> Result<Update, Box<Error>> {
let schema = match self.db.get(DATA_SCHEMA)? { let schema = match self.db.get(DATA_SCHEMA)? {
Some(value) => Schema::read_from_bin(&*value)?, Some(value) => Schema::read_from_bin(&*value)?,
None => panic!("Database does not contain a schema"), None => panic!("Database does not contain a schema"),
}; };
Ok(Update::new(self, schema)) Ok(Update::new(schema))
}
pub fn commit_update(&self, update: Update) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>> {
let batch = update.build()?;
self.db.write(batch)?;
let snapshot = Snapshot::new(self.db.clone());
let view = Arc::new(DatabaseView::new(snapshot)?);
self.view.set(view.clone());
Ok(view)
} }
pub fn view(&self) -> Arc<DatabaseView<Arc<DB>>> { pub fn view(&self) -> Arc<DatabaseView<Arc<DB>>> {
@ -202,12 +213,12 @@ mod tests {
}; };
let tokenizer_builder = DefaultBuilder::new(); let tokenizer_builder = DefaultBuilder::new();
let mut builder = database.update()?; let mut builder = database.start_update()?;
let docid0 = builder.update_document(&doc0, &tokenizer_builder, &stop_words)?; let docid0 = builder.update_document(&doc0, &tokenizer_builder, &stop_words)?;
let docid1 = builder.update_document(&doc1, &tokenizer_builder, &stop_words)?; let docid1 = builder.update_document(&doc1, &tokenizer_builder, &stop_words)?;
let view = builder.commit()?; let view = database.commit_update(builder)?;
let de_doc0: SimpleDoc = view.document_by_id(docid0)?; let de_doc0: SimpleDoc = view.document_by_id(docid0)?;
let de_doc1: SimpleDoc = view.document_by_id(docid1)?; let de_doc1: SimpleDoc = view.document_by_id(docid1)?;
@ -271,15 +282,15 @@ mod tests {
let tokenizer_builder = DefaultBuilder::new(); let tokenizer_builder = DefaultBuilder::new();
let mut builder = database.update()?; let mut builder = database.start_update()?;
let docid0 = builder.update_document(&doc0, &tokenizer_builder, &stop_words)?; let docid0 = builder.update_document(&doc0, &tokenizer_builder, &stop_words)?;
let docid1 = builder.update_document(&doc1, &tokenizer_builder, &stop_words)?; let docid1 = builder.update_document(&doc1, &tokenizer_builder, &stop_words)?;
builder.commit()?; database.commit_update(builder)?;
let mut builder = database.update()?; let mut builder = database.start_update()?;
let docid2 = builder.update_document(&doc2, &tokenizer_builder, &stop_words)?; let docid2 = builder.update_document(&doc2, &tokenizer_builder, &stop_words)?;
let docid3 = builder.update_document(&doc3, &tokenizer_builder, &stop_words)?; let docid3 = builder.update_document(&doc3, &tokenizer_builder, &stop_words)?;
let view = builder.commit()?; let view = database.commit_update(builder)?;
let de_doc0: SimpleDoc = view.document_by_id(docid0)?; let de_doc0: SimpleDoc = view.document_by_id(docid0)?;
let de_doc1: SimpleDoc = view.document_by_id(docid1)?; let de_doc1: SimpleDoc = view.document_by_id(docid1)?;
@ -358,7 +369,7 @@ mod bench {
} }
let tokenizer_builder = DefaultBuilder; let tokenizer_builder = DefaultBuilder;
let mut builder = database.update()?; let mut builder = database.start_update()?;
let mut rng = XorShiftRng::seed_from_u64(42); let mut rng = XorShiftRng::seed_from_u64(42);
for i in 0..300 { for i in 0..300 {
@ -370,7 +381,7 @@ mod bench {
builder.update_document(&document, &tokenizer_builder, &stop_words)?; builder.update_document(&document, &tokenizer_builder, &stop_words)?;
} }
builder.commit()?; database.commit_update(builder)?;
drop(database); drop(database);
@ -403,7 +414,7 @@ mod bench {
} }
let tokenizer_builder = DefaultBuilder; let tokenizer_builder = DefaultBuilder;
let mut builder = database.update()?; let mut builder = database.start_update()?;
let mut rng = XorShiftRng::seed_from_u64(42); let mut rng = XorShiftRng::seed_from_u64(42);
for i in 0..3000 { for i in 0..3000 {
@ -415,7 +426,7 @@ mod bench {
builder.update_document(&document, &tokenizer_builder, &stop_words)?; builder.update_document(&document, &tokenizer_builder, &stop_words)?;
} }
builder.commit()?; database.commit_update(builder)?;
drop(database); drop(database);
@ -449,7 +460,7 @@ mod bench {
} }
let tokenizer_builder = DefaultBuilder; let tokenizer_builder = DefaultBuilder;
let mut builder = database.update()?; let mut builder = database.start_update()?;
let mut rng = XorShiftRng::seed_from_u64(42); let mut rng = XorShiftRng::seed_from_u64(42);
for i in 0..30_000 { for i in 0..30_000 {
@ -461,7 +472,7 @@ mod bench {
builder.update_document(&document, &tokenizer_builder, &stop_words)?; builder.update_document(&document, &tokenizer_builder, &stop_words)?;
} }
builder.commit()?; database.commit_update(builder)?;
drop(database); drop(database);
@ -494,7 +505,7 @@ mod bench {
} }
let tokenizer_builder = DefaultBuilder; let tokenizer_builder = DefaultBuilder;
let mut builder = database.update()?; let mut builder = database.start_update()?;
let mut rng = XorShiftRng::seed_from_u64(42); let mut rng = XorShiftRng::seed_from_u64(42);
for i in 0..300 { for i in 0..300 {
@ -506,7 +517,7 @@ mod bench {
builder.update_document(&document, &tokenizer_builder, &stop_words)?; builder.update_document(&document, &tokenizer_builder, &stop_words)?;
} }
let view = builder.commit()?; let view = database.commit_update(builder)?;
bench.iter(|| { bench.iter(|| {
for q in &["a", "b", "c", "d", "e"] { for q in &["a", "b", "c", "d", "e"] {
@ -539,7 +550,7 @@ mod bench {
} }
let tokenizer_builder = DefaultBuilder; let tokenizer_builder = DefaultBuilder;
let mut builder = database.update()?; let mut builder = database.start_update()?;
let mut rng = XorShiftRng::seed_from_u64(42); let mut rng = XorShiftRng::seed_from_u64(42);
for i in 0..3000 { for i in 0..3000 {
@ -551,7 +562,7 @@ mod bench {
builder.update_document(&document, &tokenizer_builder, &stop_words)?; builder.update_document(&document, &tokenizer_builder, &stop_words)?;
} }
let view = builder.commit()?; let view = database.commit_update(builder)?;
bench.iter(|| { bench.iter(|| {
for q in &["a", "b", "c", "d", "e"] { for q in &["a", "b", "c", "d", "e"] {
@ -585,7 +596,7 @@ mod bench {
} }
let tokenizer_builder = DefaultBuilder; let tokenizer_builder = DefaultBuilder;
let mut builder = database.update()?; let mut builder = database.start_update()?;
let mut rng = XorShiftRng::seed_from_u64(42); let mut rng = XorShiftRng::seed_from_u64(42);
for i in 0..30_000 { for i in 0..30_000 {
@ -597,7 +608,7 @@ mod bench {
builder.update_document(&document, &tokenizer_builder, &stop_words)?; builder.update_document(&document, &tokenizer_builder, &stop_words)?;
} }
let view = builder.commit()?; let view = database.commit_update(builder)?;
bench.iter(|| { bench.iter(|| {
for q in &["a", "b", "c", "d", "e"] { for q in &["a", "b", "c", "d", "e"] {

View File

@ -1,14 +1,12 @@
use std::collections::{HashSet, BTreeMap}; use std::collections::{HashSet, BTreeMap};
use std::error::Error; use std::error::Error;
use std::sync::Arc;
use rocksdb::rocksdb::{DB, Writable, Snapshot, WriteBatch}; use rocksdb::rocksdb::{Writable, WriteBatch};
use hashbrown::hash_map::HashMap; use hashbrown::hash_map::HashMap;
use serde::Serialize; use serde::Serialize;
use fst::map::Map; use fst::map::Map;
use sdset::Set; use sdset::Set;
use crate::database::{DATA_INDEX, Database, DatabaseView};
use crate::database::index::{Positive, PositiveBuilder, Negative}; use crate::database::index::{Positive, PositiveBuilder, Negative};
use crate::database::document_key::{DocumentKey, DocumentKeyAttr}; use crate::database::document_key::{DocumentKey, DocumentKeyAttr};
use crate::database::serde::serializer::Serializer; use crate::database::serde::serializer::Serializer;
@ -17,20 +15,20 @@ use crate::database::schema::SchemaAttr;
use crate::tokenizer::TokenizerBuilder; use crate::tokenizer::TokenizerBuilder;
use crate::data::{DocIds, DocIndexes}; use crate::data::{DocIds, DocIndexes};
use crate::database::schema::Schema; use crate::database::schema::Schema;
use crate::{DocumentId, DocIndex};
use crate::database::index::Index; use crate::database::index::Index;
use crate::{DocumentId, DocIndex};
use crate::database::DATA_INDEX;
pub type Token = Vec<u8>; // TODO could be replaced by a SmallVec pub type Token = Vec<u8>; // TODO could be replaced by a SmallVec
pub struct Update<'a> { pub struct Update {
database: &'a Database,
schema: Schema, schema: Schema,
raw_builder: RawUpdateBuilder, raw_builder: RawUpdateBuilder,
} }
impl<'a> Update<'a> { impl Update {
pub(crate) fn new(database: &'a Database, schema: Schema) -> Update<'a> { pub(crate) fn new(schema: Schema) -> Update {
Update { database, schema, raw_builder: RawUpdateBuilder::new() } Update { schema, raw_builder: RawUpdateBuilder::new() }
} }
pub fn update_document<T, B>( pub fn update_document<T, B>(
@ -65,18 +63,9 @@ impl<'a> Update<'a> {
Ok(document_id) Ok(document_id)
} }
pub fn commit(self) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>> { pub(crate) fn build(self) -> Result<WriteBatch, Box<Error>> {
let batch = self.raw_builder.build()?; self.raw_builder.build()
self.database.db.write(batch)?;
let snapshot = Snapshot::new(self.database.db.clone());
let view = Arc::new(DatabaseView::new(snapshot)?);
self.database.view.set(view.clone());
Ok(view)
} }
pub fn abort(self) { }
} }
#[derive(Copy, Clone, PartialEq, Eq)] #[derive(Copy, Clone, PartialEq, Eq)]