feat: Remove the Index dependency of the Serializer

This commit is contained in:
Clément Renault 2019-05-13 15:29:04 +02:00
parent 9bba90c47e
commit aa90f22865
No known key found for this signature in database
GPG Key ID: 0151CDAB43460DAE
3 changed files with 67 additions and 23 deletions

View File

@ -1,5 +1,5 @@
use std::collections::{BTreeSet, HashSet, HashMap};
use std::collections::hash_map::Entry; use std::collections::hash_map::Entry;
use std::collections::{BTreeSet, HashSet, HashMap};
use std::convert::TryInto; use std::convert::TryInto;
use std::path::Path; use std::path::Path;
use std::sync::{Arc, RwLock}; use std::sync::{Arc, RwLock};
@ -14,10 +14,11 @@ use sled::IVec;
use zerocopy::{AsBytes, LayoutVerified}; use zerocopy::{AsBytes, LayoutVerified};
use fst::{SetBuilder, set::OpBuilder, Streamer}; use fst::{SetBuilder, set::OpBuilder, Streamer};
use crate::{Schema, SchemaAttr, RankedMap};
use crate::serde::{extract_document_id, Serializer, Deserializer, SerializerError};
use crate::indexer::{Indexer, Indexed};
use crate::document_attr_key::DocumentAttrKey; use crate::document_attr_key::DocumentAttrKey;
use crate::indexer::{Indexer, Indexed};
use crate::serde::extract_document_id;
use crate::serde::{Serializer, RamDocumentStore, Deserializer, SerializerError};
use crate::{Schema, SchemaAttr, RankedMap};
#[derive(Debug)] #[derive(Debug)]
pub enum Error { pub enum Error {
@ -521,13 +522,21 @@ impl Store for IndexLease {
pub struct DocumentsAddition<'a> { pub struct DocumentsAddition<'a> {
inner: &'a Index, inner: &'a Index,
document_ids: HashSet<DocumentId>,
document_store: RamDocumentStore,
indexer: Indexer, indexer: Indexer,
ranked_map: RankedMap, ranked_map: RankedMap,
} }
impl<'a> DocumentsAddition<'a> { impl<'a> DocumentsAddition<'a> {
fn new(inner: &'a Index, ranked_map: RankedMap) -> DocumentsAddition<'a> { fn new(inner: &'a Index, ranked_map: RankedMap) -> DocumentsAddition<'a> {
DocumentsAddition { inner, indexer: Indexer::new(), ranked_map } DocumentsAddition {
inner,
document_ids: HashSet::new(),
document_store: RamDocumentStore::new(),
indexer: Indexer::new(),
ranked_map,
}
} }
pub fn update_document<D>(&mut self, document: D) -> Result<(), Error> pub fn update_document<D>(&mut self, document: D) -> Result<(), Error>
@ -541,15 +550,13 @@ impl<'a> DocumentsAddition<'a> {
None => return Err(Error::MissingDocumentId), None => return Err(Error::MissingDocumentId),
}; };
// 1. remove the previous document match indexes // 1. store the document id for future deletion
let mut documents_deletion = DocumentsDeletion::new(self.inner); self.document_ids.insert(document_id);
documents_deletion.delete_document(document_id);
documents_deletion.finalize()?;
// 2. index the document fields // 2. index the document fields in ram stores
let serializer = Serializer { let serializer = Serializer {
schema, schema,
index: &self.inner, document_store: &mut self.document_store,
indexer: &mut self.indexer, indexer: &mut self.indexer,
ranked_map: &mut self.ranked_map, ranked_map: &mut self.ranked_map,
document_id, document_id,
@ -565,6 +572,17 @@ impl<'a> DocumentsAddition<'a> {
let main = &lease_inner.raw.main; let main = &lease_inner.raw.main;
let words = &lease_inner.raw.words; let words = &lease_inner.raw.words;
let attrs_words = &lease_inner.raw.attrs_words; let attrs_words = &lease_inner.raw.attrs_words;
let documents = &lease_inner.raw.documents;
// 1. remove the previous documents match indexes
let mut documents_deletion = DocumentsDeletion::new(self.inner);
documents_deletion.extend(self.document_ids);
documents_deletion.finalize()?;
// 2. insert new document attributes in the database
for ((id, attr), value) in self.document_store.into_inner() {
documents.set_document_field(id, attr, value)?;
}
let Indexed { words_doc_indexes, docs_attrs_words } = self.indexer.build(); let Indexed { words_doc_indexes, docs_attrs_words } = self.indexer.build();
let mut delta_words_builder = SetBuilder::memory(); let mut delta_words_builder = SetBuilder::memory();
@ -717,3 +735,9 @@ impl<'a> DocumentsDeletion<'a> {
Ok(()) Ok(())
} }
} }
impl<'a> Extend<DocumentId> for DocumentsDeletion<'a> {
fn extend<T: IntoIterator<Item=DocumentId>>(&mut self, iter: T) {
self.documents.extend(iter)
}
}

View File

@ -22,10 +22,15 @@ pub use self::convert_to_number::ConvertToNumber;
pub use self::indexer::Indexer; pub use self::indexer::Indexer;
pub use self::serializer::Serializer; pub use self::serializer::Serializer;
use std::collections::BTreeMap;
use std::{fmt, error::Error}; use std::{fmt, error::Error};
use meilidb_core::DocumentId;
use rmp_serde::encode::Error as RmpError; use rmp_serde::encode::Error as RmpError;
use serde::ser; use serde::ser;
use crate::number::ParseNumberError; use crate::number::ParseNumberError;
use crate::schema::SchemaAttr;
#[derive(Debug)] #[derive(Debug)]
pub enum SerializerError { pub enum SerializerError {
@ -95,3 +100,19 @@ impl From<ParseNumberError> for SerializerError {
SerializerError::ParseNumberError(error) SerializerError::ParseNumberError(error)
} }
} }
pub struct RamDocumentStore(BTreeMap<(DocumentId, SchemaAttr), Vec<u8>>);
impl RamDocumentStore {
pub fn new() -> RamDocumentStore {
RamDocumentStore(BTreeMap::new())
}
pub fn set_document_field(&mut self, id: DocumentId, attr: SchemaAttr, value: Vec<u8>) {
self.0.insert((id, attr), value);
}
pub fn into_inner(self) -> BTreeMap<(DocumentId, SchemaAttr), Vec<u8>> {
self.0
}
}

View File

@ -1,15 +1,14 @@
use meilidb_core::DocumentId; use meilidb_core::DocumentId;
use serde::ser; use serde::ser;
use crate::database::Index;
use crate::ranked_map::RankedMap;
use crate::indexer::Indexer as RawIndexer; use crate::indexer::Indexer as RawIndexer;
use crate::ranked_map::RankedMap;
use crate::schema::Schema; use crate::schema::Schema;
use super::{SerializerError, ConvertToString, ConvertToNumber, Indexer}; use super::{RamDocumentStore, SerializerError, ConvertToString, ConvertToNumber, Indexer};
pub struct Serializer<'a> { pub struct Serializer<'a> {
pub schema: &'a Schema, pub schema: &'a Schema,
pub index: &'a Index, pub document_store: &'a mut RamDocumentStore,
pub indexer: &'a mut RawIndexer, pub indexer: &'a mut RawIndexer,
pub ranked_map: &'a mut RankedMap, pub ranked_map: &'a mut RankedMap,
pub document_id: DocumentId, pub document_id: DocumentId,
@ -134,7 +133,7 @@ impl<'a> ser::Serializer for Serializer<'a> {
Ok(MapSerializer { Ok(MapSerializer {
schema: self.schema, schema: self.schema,
document_id: self.document_id, document_id: self.document_id,
index: self.index, document_store: self.document_store,
indexer: self.indexer, indexer: self.indexer,
ranked_map: self.ranked_map, ranked_map: self.ranked_map,
current_key_name: None, current_key_name: None,
@ -150,7 +149,7 @@ impl<'a> ser::Serializer for Serializer<'a> {
Ok(StructSerializer { Ok(StructSerializer {
schema: self.schema, schema: self.schema,
document_id: self.document_id, document_id: self.document_id,
index: self.index, document_store: self.document_store,
indexer: self.indexer, indexer: self.indexer,
ranked_map: self.ranked_map, ranked_map: self.ranked_map,
}) })
@ -171,7 +170,7 @@ impl<'a> ser::Serializer for Serializer<'a> {
pub struct MapSerializer<'a> { pub struct MapSerializer<'a> {
schema: &'a Schema, schema: &'a Schema,
document_id: DocumentId, document_id: DocumentId,
index: &'a Index, document_store: &'a mut RamDocumentStore,
indexer: &'a mut RawIndexer, indexer: &'a mut RawIndexer,
ranked_map: &'a mut RankedMap, ranked_map: &'a mut RankedMap,
current_key_name: Option<String>, current_key_name: Option<String>,
@ -208,7 +207,7 @@ impl<'a> ser::SerializeMap for MapSerializer<'a> {
serialize_value( serialize_value(
self.schema, self.schema,
self.document_id, self.document_id,
self.index, self.document_store,
self.indexer, self.indexer,
self.ranked_map, self.ranked_map,
&key, &key,
@ -224,7 +223,7 @@ impl<'a> ser::SerializeMap for MapSerializer<'a> {
pub struct StructSerializer<'a> { pub struct StructSerializer<'a> {
schema: &'a Schema, schema: &'a Schema,
document_id: DocumentId, document_id: DocumentId,
index: &'a Index, document_store: &'a mut RamDocumentStore,
indexer: &'a mut RawIndexer, indexer: &'a mut RawIndexer,
ranked_map: &'a mut RankedMap, ranked_map: &'a mut RankedMap,
} }
@ -243,7 +242,7 @@ impl<'a> ser::SerializeStruct for StructSerializer<'a> {
serialize_value( serialize_value(
self.schema, self.schema,
self.document_id, self.document_id,
self.index, self.document_store,
self.indexer, self.indexer,
self.ranked_map, self.ranked_map,
key, key,
@ -259,7 +258,7 @@ impl<'a> ser::SerializeStruct for StructSerializer<'a> {
fn serialize_value<T: ?Sized>( fn serialize_value<T: ?Sized>(
schema: &Schema, schema: &Schema,
document_id: DocumentId, document_id: DocumentId,
index: &Index, document_store: &mut RamDocumentStore,
indexer: &mut RawIndexer, indexer: &mut RawIndexer,
ranked_map: &mut RankedMap, ranked_map: &mut RankedMap,
key: &str, key: &str,
@ -272,7 +271,7 @@ where T: ser::Serialize,
if props.is_stored() { if props.is_stored() {
let value = rmp_serde::to_vec_named(value)?; let value = rmp_serde::to_vec_named(value)?;
index.lease_inner().raw.documents.set_document_field(document_id, attr, value)?; document_store.set_document_field(document_id, attr, value);
} }
if props.is_indexed() { if props.is_indexed() {