From 0e825e05bbd6812a57ea36b2e9ac58bb8e89f687 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 7 Dec 2018 17:59:03 +0100 Subject: [PATCH] feat: Make the DatabaseView become Sync + Send --- src/database/database_view.rs | 58 ++++++++++++-------- src/database/deserializer.rs | 17 ++++-- src/database/mod.rs | 15 +++-- src/rank/criterion/exact.rs | 8 ++- src/rank/criterion/mod.rs | 39 ++++++++----- src/rank/criterion/number_of_words.rs | 8 ++- src/rank/criterion/sum_of_typos.rs | 9 ++- src/rank/criterion/sum_of_words_attribute.rs | 8 ++- src/rank/criterion/sum_of_words_position.rs | 8 ++- src/rank/criterion/words_proximity.rs | 8 ++- src/rank/query_builder.rs | 41 +++++++++----- 11 files changed, 147 insertions(+), 72 deletions(-) diff --git a/src/database/database_view.rs b/src/database/database_view.rs index c6c30d39f..671d18433 100644 --- a/src/database/database_view.rs +++ b/src/database/database_view.rs @@ -1,4 +1,5 @@ use std::error::Error; +use std::ops::Deref; use std::{fmt, marker}; use rocksdb::rocksdb::{DB, DBVector, Snapshot, SeekKey}; @@ -14,14 +15,18 @@ use crate::database::schema::Schema; use crate::rank::QueryBuilder; use crate::DocumentId; -pub struct DatabaseView<'a> { - snapshot: Snapshot<&'a DB>, +pub struct DatabaseView +where D: Deref +{ + snapshot: Snapshot, blob: PositiveBlob, schema: Schema, } -impl<'a> DatabaseView<'a> { - pub fn new(snapshot: Snapshot<&'a DB>) -> Result> { +impl DatabaseView +where D: Deref +{ + pub fn new(snapshot: Snapshot) -> Result, Box> { let schema = retrieve_data_schema(&snapshot)?; let blob = retrieve_data_index(&snapshot)?; Ok(DatabaseView { snapshot, blob, schema }) @@ -35,11 +40,11 @@ impl<'a> DatabaseView<'a> { &self.blob } - pub fn into_snapshot(self) -> Snapshot<&'a DB> { + pub fn into_snapshot(self) -> Snapshot { self.snapshot } - pub fn snapshot(&self) -> &Snapshot<&'a DB> { + pub fn snapshot(&self) -> &Snapshot { &self.snapshot } @@ -47,20 +52,20 @@ impl<'a> DatabaseView<'a> { Ok(self.snapshot.get(key)?) } - pub fn query_builder(&self) -> Result>, Box> { + pub fn query_builder(&self) -> Result>>, Box> { QueryBuilder::new(self) } // TODO create an enum error type - pub fn retrieve_document(&self, id: DocumentId) -> Result> - where D: DeserializeOwned + pub fn retrieve_document(&self, id: DocumentId) -> Result> + where T: DeserializeOwned { let mut deserializer = Deserializer::new(&self.snapshot, &self.schema, id); - Ok(D::deserialize(&mut deserializer)?) + Ok(T::deserialize(&mut deserializer)?) } - pub fn retrieve_documents(&self, ids: I) -> DocumentIter - where D: DeserializeOwned, + pub fn retrieve_documents(&self, ids: I) -> DocumentIter + where T: DeserializeOwned, I: IntoIterator, { DocumentIter { @@ -71,7 +76,9 @@ impl<'a> DatabaseView<'a> { } } -impl<'a> fmt::Debug for DatabaseView<'a> { +impl fmt::Debug for DatabaseView +where D: Deref +{ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let mut options = ReadOptions::new(); let lower = DocumentKey::new(0); @@ -102,17 +109,20 @@ impl<'a> fmt::Debug for DatabaseView<'a> { } // TODO this is just an iter::Map !!! -pub struct DocumentIter<'a, D, I> { - database_view: &'a DatabaseView<'a>, +pub struct DocumentIter<'a, D, T, I> +where D: Deref +{ + database_view: &'a DatabaseView, document_ids: I, - _phantom: marker::PhantomData, + _phantom: marker::PhantomData, } -impl<'a, D, I> Iterator for DocumentIter<'a, D, I> -where D: DeserializeOwned, +impl<'a, D, T, I> Iterator for DocumentIter<'a, D, T, I> +where D: Deref, + T: DeserializeOwned, I: Iterator, { - type Item = Result>; + type Item = Result>; fn size_hint(&self) -> (usize, Option) { self.document_ids.size_hint() @@ -126,13 +136,15 @@ where D: DeserializeOwned, } } -impl<'a, D, I> ExactSizeIterator for DocumentIter<'a, D, I> -where D: DeserializeOwned, +impl<'a, D, T, I> ExactSizeIterator for DocumentIter<'a, D, T, I> +where D: Deref, + T: DeserializeOwned, I: ExactSizeIterator + Iterator, { } -impl<'a, D, I> DoubleEndedIterator for DocumentIter<'a, D, I> -where D: DeserializeOwned, +impl<'a, D, T, I> DoubleEndedIterator for DocumentIter<'a, D, T, I> +where D: Deref, + T: DeserializeOwned, I: DoubleEndedIterator + Iterator, { fn next_back(&mut self) -> Option { diff --git a/src/database/deserializer.rs b/src/database/deserializer.rs index 11e65896f..26d74984d 100644 --- a/src/database/deserializer.rs +++ b/src/database/deserializer.rs @@ -1,4 +1,5 @@ use std::error::Error; +use std::ops::Deref; use std::fmt; use rocksdb::rocksdb::{DB, Snapshot, SeekKey}; @@ -11,19 +12,25 @@ use crate::database::document_key::{DocumentKey, DocumentKeyAttr}; use crate::database::schema::Schema; use crate::DocumentId; -pub struct Deserializer<'a> { - snapshot: &'a Snapshot<&'a DB>, +pub struct Deserializer<'a, D> +where D: Deref +{ + snapshot: &'a Snapshot, schema: &'a Schema, document_id: DocumentId, } -impl<'a> Deserializer<'a> { - pub fn new(snapshot: &'a Snapshot<&DB>, schema: &'a Schema, doc: DocumentId) -> Self { +impl<'a, D> Deserializer<'a, D> +where D: Deref +{ + pub fn new(snapshot: &'a Snapshot, schema: &'a Schema, doc: DocumentId) -> Self { Deserializer { snapshot, schema, document_id: doc } } } -impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> { +impl<'de, 'a, 'b, D> de::Deserializer<'de> for &'b mut Deserializer<'a, D> +where D: Deref +{ type Error = DeserializerError; fn deserialize_any(self, visitor: V) -> Result diff --git a/src/database/mod.rs b/src/database/mod.rs index c347b11ce..8b2b89544 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -1,6 +1,7 @@ use std::error::Error; use std::path::Path; use std::ops::Deref; +use std::sync::Arc; use std::fmt; use rocksdb::rocksdb_options::{DBOptions, IngestExternalFileOptions, ColumnFamilyOptions}; @@ -42,7 +43,8 @@ where D: Deref } } -pub struct Database(DB); +#[derive(Clone)] +pub struct Database(Arc); impl Database { pub fn create>(path: P, schema: Schema) -> Result> { @@ -66,7 +68,7 @@ impl Database { schema.write_to(&mut schema_bytes)?; db.put(DATA_SCHEMA, &schema_bytes)?; - Ok(Database(db)) + Ok(Database(Arc::new(db))) } pub fn open>(path: P) -> Result> { @@ -86,7 +88,7 @@ impl Database { None => return Err(String::from("Database does not contain a schema").into()), }; - Ok(Database(db)) + Ok(Database(Arc::new(db))) } pub fn ingest_update_file(&self, update: Update) -> Result<(), Box> { @@ -114,10 +116,15 @@ impl Database { Ok(self.0.flush(true)?) } - pub fn view(&self) -> Result> { + pub fn view(&self) -> Result, Box> { let snapshot = self.0.snapshot(); DatabaseView::new(snapshot) } + + pub fn view_arc(&self) -> Result>, Box> { + let snapshot = Snapshot::new(self.0.clone()); + DatabaseView::new(snapshot) + } } impl fmt::Debug for Database { diff --git a/src/rank/criterion/exact.rs b/src/rank/criterion/exact.rs index 9ea59eae3..041a6ae67 100644 --- a/src/rank/criterion/exact.rs +++ b/src/rank/criterion/exact.rs @@ -1,5 +1,7 @@ use std::cmp::Ordering; +use std::ops::Deref; +use rocksdb::DB; use group_by::GroupBy; use crate::rank::{match_query_index, Document}; @@ -20,8 +22,10 @@ fn number_exact_matches(matches: &[Match]) -> usize { #[derive(Debug, Clone, Copy)] pub struct Exact; -impl Criterion for Exact { - fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView) -> Ordering { +impl Criterion for Exact +where D: Deref +{ + fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView) -> Ordering { let lhs = number_exact_matches(&lhs.matches); let rhs = number_exact_matches(&rhs.matches); diff --git a/src/rank/criterion/mod.rs b/src/rank/criterion/mod.rs index a4590ae90..2993f1ba8 100644 --- a/src/rank/criterion/mod.rs +++ b/src/rank/criterion/mod.rs @@ -5,8 +5,11 @@ mod sum_of_words_attribute; mod sum_of_words_position; mod exact; -use std::vec; use std::cmp::Ordering; +use std::ops::Deref; +use std::vec; + +use rocksdb::DB; use crate::database::DatabaseView; use crate::rank::Document; @@ -20,32 +23,38 @@ pub use self::{ exact::Exact, }; -pub trait Criterion { +pub trait Criterion +where D: Deref +{ #[inline] - fn evaluate(&self, lhs: &Document, rhs: &Document, view: &DatabaseView) -> Ordering; + fn evaluate(&self, lhs: &Document, rhs: &Document, view: &DatabaseView) -> Ordering; #[inline] - fn eq(&self, lhs: &Document, rhs: &Document, view: &DatabaseView) -> bool { + fn eq(&self, lhs: &Document, rhs: &Document, view: &DatabaseView) -> bool { self.evaluate(lhs, rhs, view) == Ordering::Equal } } -impl<'a, T: Criterion + ?Sized> Criterion for &'a T { - fn evaluate(&self, lhs: &Document, rhs: &Document, view: &DatabaseView) -> Ordering { +impl<'a, D, T: Criterion + ?Sized> Criterion for &'a T +where D: Deref +{ + fn evaluate(&self, lhs: &Document, rhs: &Document, view: &DatabaseView) -> Ordering { (**self).evaluate(lhs, rhs, view) } - fn eq(&self, lhs: &Document, rhs: &Document, view: &DatabaseView) -> bool { + fn eq(&self, lhs: &Document, rhs: &Document, view: &DatabaseView) -> bool { (**self).eq(lhs, rhs, view) } } -impl Criterion for Box { - fn evaluate(&self, lhs: &Document, rhs: &Document, view: &DatabaseView) -> Ordering { +impl + ?Sized> Criterion for Box +where D: Deref +{ + fn evaluate(&self, lhs: &Document, rhs: &Document, view: &DatabaseView) -> Ordering { (**self).evaluate(lhs, rhs, view) } - fn eq(&self, lhs: &Document, rhs: &Document, view: &DatabaseView) -> bool { + fn eq(&self, lhs: &Document, rhs: &Document, view: &DatabaseView) -> bool { (**self).eq(lhs, rhs, view) } } @@ -53,15 +62,19 @@ impl Criterion for Box { #[derive(Debug, Clone, Copy)] pub struct DocumentId; -impl Criterion for DocumentId { - fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView) -> Ordering { +impl Criterion for DocumentId +where D: Deref +{ + fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView) -> Ordering { lhs.id.cmp(&rhs.id) } } // TODO there is too much Box here, can we use // static references or static closures -pub fn default() -> Vec> { +pub fn default() -> Vec>> +where D: Deref +{ vec![ Box::new(SumOfTypos), Box::new(NumberOfWords), diff --git a/src/rank/criterion/number_of_words.rs b/src/rank/criterion/number_of_words.rs index b20586f39..855d997ba 100644 --- a/src/rank/criterion/number_of_words.rs +++ b/src/rank/criterion/number_of_words.rs @@ -1,5 +1,7 @@ use std::cmp::Ordering; +use std::ops::Deref; +use rocksdb::DB; use group_by::GroupBy; use crate::rank::{match_query_index, Document}; @@ -15,8 +17,10 @@ fn number_of_query_words(matches: &[Match]) -> usize { #[derive(Debug, Clone, Copy)] pub struct NumberOfWords; -impl Criterion for NumberOfWords { - fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView) -> Ordering { +impl Criterion for NumberOfWords +where D: Deref +{ + fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView) -> Ordering { let lhs = number_of_query_words(&lhs.matches); let rhs = number_of_query_words(&rhs.matches); diff --git a/src/rank/criterion/sum_of_typos.rs b/src/rank/criterion/sum_of_typos.rs index 5cbd4bac1..a7074bd86 100644 --- a/src/rank/criterion/sum_of_typos.rs +++ b/src/rank/criterion/sum_of_typos.rs @@ -1,4 +1,7 @@ use std::cmp::Ordering; +use std::ops::Deref; + +use rocksdb::DB; use group_by::GroupBy; @@ -25,8 +28,10 @@ fn sum_matches_typos(matches: &[Match]) -> i8 { #[derive(Debug, Clone, Copy)] pub struct SumOfTypos; -impl Criterion for SumOfTypos { - fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView) -> Ordering { +impl Criterion for SumOfTypos +where D: Deref +{ + fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView) -> Ordering { let lhs = sum_matches_typos(&lhs.matches); let rhs = sum_matches_typos(&rhs.matches); diff --git a/src/rank/criterion/sum_of_words_attribute.rs b/src/rank/criterion/sum_of_words_attribute.rs index d373dedef..800fe7c7f 100644 --- a/src/rank/criterion/sum_of_words_attribute.rs +++ b/src/rank/criterion/sum_of_words_attribute.rs @@ -1,5 +1,7 @@ use std::cmp::Ordering; +use std::ops::Deref; +use rocksdb::DB; use group_by::GroupBy; use crate::database::DatabaseView; @@ -19,8 +21,10 @@ fn sum_matches_attributes(matches: &[Match]) -> u8 { #[derive(Debug, Clone, Copy)] pub struct SumOfWordsAttribute; -impl Criterion for SumOfWordsAttribute { - fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView) -> Ordering { +impl Criterion for SumOfWordsAttribute +where D: Deref +{ + fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView) -> Ordering { let lhs = sum_matches_attributes(&lhs.matches); let rhs = sum_matches_attributes(&rhs.matches); diff --git a/src/rank/criterion/sum_of_words_position.rs b/src/rank/criterion/sum_of_words_position.rs index cd41d5b72..2a54b1098 100644 --- a/src/rank/criterion/sum_of_words_position.rs +++ b/src/rank/criterion/sum_of_words_position.rs @@ -1,5 +1,7 @@ use std::cmp::Ordering; +use std::ops::Deref; +use rocksdb::DB; use group_by::GroupBy; use crate::database::DatabaseView; @@ -19,8 +21,10 @@ fn sum_matches_attribute_index(matches: &[Match]) -> u32 { #[derive(Debug, Clone, Copy)] pub struct SumOfWordsPosition; -impl Criterion for SumOfWordsPosition { - fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView) -> Ordering { +impl Criterion for SumOfWordsPosition +where D: Deref +{ + fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView) -> Ordering { let lhs = sum_matches_attribute_index(&lhs.matches); let rhs = sum_matches_attribute_index(&rhs.matches); diff --git a/src/rank/criterion/words_proximity.rs b/src/rank/criterion/words_proximity.rs index abbd0e99f..14eb1ad0e 100644 --- a/src/rank/criterion/words_proximity.rs +++ b/src/rank/criterion/words_proximity.rs @@ -1,5 +1,7 @@ use std::cmp::{self, Ordering}; +use std::ops::Deref; +use rocksdb::DB; use group_by::GroupBy; use crate::rank::{match_query_index, Document}; @@ -49,8 +51,10 @@ fn matches_proximity(matches: &[Match]) -> u32 { #[derive(Debug, Clone, Copy)] pub struct WordsProximity; -impl Criterion for WordsProximity { - fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView) -> Ordering { +impl Criterion for WordsProximity +where D: Deref +{ + fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView) -> Ordering { let lhs = matches_proximity(&lhs.matches); let rhs = matches_proximity(&rhs.matches); diff --git a/src/rank/query_builder.rs b/src/rank/query_builder.rs index 1a43d30fb..2d9795a55 100644 --- a/src/rank/query_builder.rs +++ b/src/rank/query_builder.rs @@ -1,11 +1,12 @@ +use std::ops::{Deref, Range}; use std::error::Error; use std::hash::Hash; -use std::ops::Range; use std::{mem, vec, str}; use group_by::GroupByMut; use hashbrown::HashMap; use fst::Streamer; +use rocksdb::DB; use crate::automaton::{self, DfaExt, AutomatonExt}; use crate::rank::criterion::{self, Criterion}; @@ -23,19 +24,25 @@ fn split_whitespace_automatons(query: &str) -> Vec { automatons } -pub struct QueryBuilder<'a, C> { - view: &'a DatabaseView<'a>, +pub struct QueryBuilder<'a, D, C> +where D: Deref +{ + view: &'a DatabaseView, criteria: Vec, } -impl<'a> QueryBuilder<'a, Box> { - pub fn new(view: &'a DatabaseView<'a>) -> Result> { +impl<'a, D> QueryBuilder<'a, D, Box>> +where D: Deref +{ + pub fn new(view: &'a DatabaseView) -> Result> { QueryBuilder::with_criteria(view, criterion::default()) } } -impl<'a, C> QueryBuilder<'a, C> { - pub fn with_criteria(view: &'a DatabaseView<'a>, criteria: Vec) -> Result> { +impl<'a, D, C> QueryBuilder<'a, D, C> +where D: Deref +{ + pub fn with_criteria(view: &'a DatabaseView, criteria: Vec) -> Result> { Ok(QueryBuilder { view, criteria }) } @@ -44,7 +51,7 @@ impl<'a, C> QueryBuilder<'a, C> { self } - pub fn with_distinct(self, function: F, size: usize) -> DistinctQueryBuilder<'a, F, C> { + pub fn with_distinct(self, function: F, size: usize) -> DistinctQueryBuilder<'a, D, F, C> { DistinctQueryBuilder { inner: self, function: function, @@ -92,8 +99,9 @@ impl<'a, C> QueryBuilder<'a, C> { } } -impl<'a, C> QueryBuilder<'a, C> -where C: Criterion +impl<'a, D, C> QueryBuilder<'a, D, C> +where D: Deref, + C: Criterion { pub fn query(&self, query: &str, limit: usize) -> Vec { let mut documents = self.query_all(query); @@ -119,16 +127,19 @@ where C: Criterion } } -pub struct DistinctQueryBuilder<'a, F, C> { - inner: QueryBuilder<'a, C>, +pub struct DistinctQueryBuilder<'a, D, F, C> +where D: Deref +{ + inner: QueryBuilder<'a, D, C>, function: F, size: usize, } -impl<'a, F, K, C> DistinctQueryBuilder<'a, F, C> -where F: Fn(DocumentId, &DatabaseView) -> Option, +impl<'a, D, F, K, C> DistinctQueryBuilder<'a, D, F, C> +where D: Deref, + F: Fn(DocumentId, &DatabaseView) -> Option, K: Hash + Eq, - C: Criterion, + C: Criterion, { pub fn query(&self, query: &str, range: Range) -> Vec { let mut documents = self.inner.query_all(query);