From aa90f22865603224616823c752fbd6ab6cab6f20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 13 May 2019 15:29:04 +0200 Subject: [PATCH] feat: Remove the Index dependency of the Serializer --- meilidb-data/src/database.rs | 46 +++++++++++++++++++++------- meilidb-data/src/serde/mod.rs | 21 +++++++++++++ meilidb-data/src/serde/serializer.rs | 23 +++++++------- 3 files changed, 67 insertions(+), 23 deletions(-) diff --git a/meilidb-data/src/database.rs b/meilidb-data/src/database.rs index 745c5a8b5..01273e186 100644 --- a/meilidb-data/src/database.rs +++ b/meilidb-data/src/database.rs @@ -1,5 +1,5 @@ -use std::collections::{BTreeSet, HashSet, HashMap}; use std::collections::hash_map::Entry; +use std::collections::{BTreeSet, HashSet, HashMap}; use std::convert::TryInto; use std::path::Path; use std::sync::{Arc, RwLock}; @@ -14,10 +14,11 @@ use sled::IVec; use zerocopy::{AsBytes, LayoutVerified}; use fst::{SetBuilder, set::OpBuilder, Streamer}; -use crate::{Schema, SchemaAttr, RankedMap}; -use crate::serde::{extract_document_id, Serializer, Deserializer, SerializerError}; -use crate::indexer::{Indexer, Indexed}; use crate::document_attr_key::DocumentAttrKey; +use crate::indexer::{Indexer, Indexed}; +use crate::serde::extract_document_id; +use crate::serde::{Serializer, RamDocumentStore, Deserializer, SerializerError}; +use crate::{Schema, SchemaAttr, RankedMap}; #[derive(Debug)] pub enum Error { @@ -521,13 +522,21 @@ impl Store for IndexLease { pub struct DocumentsAddition<'a> { inner: &'a Index, + document_ids: HashSet, + document_store: RamDocumentStore, indexer: Indexer, ranked_map: RankedMap, } impl<'a> DocumentsAddition<'a> { fn new(inner: &'a Index, ranked_map: RankedMap) -> DocumentsAddition<'a> { - DocumentsAddition { inner, indexer: Indexer::new(), ranked_map } + DocumentsAddition { + inner, + document_ids: HashSet::new(), + document_store: RamDocumentStore::new(), + indexer: Indexer::new(), + ranked_map, + } } pub fn update_document(&mut self, document: D) -> Result<(), Error> @@ -541,15 +550,13 @@ impl<'a> DocumentsAddition<'a> { None => return Err(Error::MissingDocumentId), }; - // 1. remove the previous document match indexes - let mut documents_deletion = DocumentsDeletion::new(self.inner); - documents_deletion.delete_document(document_id); - documents_deletion.finalize()?; + // 1. store the document id for future deletion + self.document_ids.insert(document_id); - // 2. index the document fields + // 2. index the document fields in ram stores let serializer = Serializer { schema, - index: &self.inner, + document_store: &mut self.document_store, indexer: &mut self.indexer, ranked_map: &mut self.ranked_map, document_id, @@ -565,6 +572,17 @@ impl<'a> DocumentsAddition<'a> { let main = &lease_inner.raw.main; let words = &lease_inner.raw.words; let attrs_words = &lease_inner.raw.attrs_words; + let documents = &lease_inner.raw.documents; + + // 1. remove the previous documents match indexes + let mut documents_deletion = DocumentsDeletion::new(self.inner); + documents_deletion.extend(self.document_ids); + documents_deletion.finalize()?; + + // 2. insert new document attributes in the database + for ((id, attr), value) in self.document_store.into_inner() { + documents.set_document_field(id, attr, value)?; + } let Indexed { words_doc_indexes, docs_attrs_words } = self.indexer.build(); let mut delta_words_builder = SetBuilder::memory(); @@ -717,3 +735,9 @@ impl<'a> DocumentsDeletion<'a> { Ok(()) } } + +impl<'a> Extend for DocumentsDeletion<'a> { + fn extend>(&mut self, iter: T) { + self.documents.extend(iter) + } +} diff --git a/meilidb-data/src/serde/mod.rs b/meilidb-data/src/serde/mod.rs index cf222c1bd..1e2854c36 100644 --- a/meilidb-data/src/serde/mod.rs +++ b/meilidb-data/src/serde/mod.rs @@ -22,10 +22,15 @@ pub use self::convert_to_number::ConvertToNumber; pub use self::indexer::Indexer; pub use self::serializer::Serializer; +use std::collections::BTreeMap; use std::{fmt, error::Error}; + +use meilidb_core::DocumentId; use rmp_serde::encode::Error as RmpError; use serde::ser; + use crate::number::ParseNumberError; +use crate::schema::SchemaAttr; #[derive(Debug)] pub enum SerializerError { @@ -95,3 +100,19 @@ impl From for SerializerError { SerializerError::ParseNumberError(error) } } + +pub struct RamDocumentStore(BTreeMap<(DocumentId, SchemaAttr), Vec>); + +impl RamDocumentStore { + pub fn new() -> RamDocumentStore { + RamDocumentStore(BTreeMap::new()) + } + + pub fn set_document_field(&mut self, id: DocumentId, attr: SchemaAttr, value: Vec) { + self.0.insert((id, attr), value); + } + + pub fn into_inner(self) -> BTreeMap<(DocumentId, SchemaAttr), Vec> { + self.0 + } +} diff --git a/meilidb-data/src/serde/serializer.rs b/meilidb-data/src/serde/serializer.rs index d7a7b7d85..0636d86d6 100644 --- a/meilidb-data/src/serde/serializer.rs +++ b/meilidb-data/src/serde/serializer.rs @@ -1,15 +1,14 @@ use meilidb_core::DocumentId; use serde::ser; -use crate::database::Index; -use crate::ranked_map::RankedMap; use crate::indexer::Indexer as RawIndexer; +use crate::ranked_map::RankedMap; use crate::schema::Schema; -use super::{SerializerError, ConvertToString, ConvertToNumber, Indexer}; +use super::{RamDocumentStore, SerializerError, ConvertToString, ConvertToNumber, Indexer}; pub struct Serializer<'a> { pub schema: &'a Schema, - pub index: &'a Index, + pub document_store: &'a mut RamDocumentStore, pub indexer: &'a mut RawIndexer, pub ranked_map: &'a mut RankedMap, pub document_id: DocumentId, @@ -134,7 +133,7 @@ impl<'a> ser::Serializer for Serializer<'a> { Ok(MapSerializer { schema: self.schema, document_id: self.document_id, - index: self.index, + document_store: self.document_store, indexer: self.indexer, ranked_map: self.ranked_map, current_key_name: None, @@ -150,7 +149,7 @@ impl<'a> ser::Serializer for Serializer<'a> { Ok(StructSerializer { schema: self.schema, document_id: self.document_id, - index: self.index, + document_store: self.document_store, indexer: self.indexer, ranked_map: self.ranked_map, }) @@ -171,7 +170,7 @@ impl<'a> ser::Serializer for Serializer<'a> { pub struct MapSerializer<'a> { schema: &'a Schema, document_id: DocumentId, - index: &'a Index, + document_store: &'a mut RamDocumentStore, indexer: &'a mut RawIndexer, ranked_map: &'a mut RankedMap, current_key_name: Option, @@ -208,7 +207,7 @@ impl<'a> ser::SerializeMap for MapSerializer<'a> { serialize_value( self.schema, self.document_id, - self.index, + self.document_store, self.indexer, self.ranked_map, &key, @@ -224,7 +223,7 @@ impl<'a> ser::SerializeMap for MapSerializer<'a> { pub struct StructSerializer<'a> { schema: &'a Schema, document_id: DocumentId, - index: &'a Index, + document_store: &'a mut RamDocumentStore, indexer: &'a mut RawIndexer, ranked_map: &'a mut RankedMap, } @@ -243,7 +242,7 @@ impl<'a> ser::SerializeStruct for StructSerializer<'a> { serialize_value( self.schema, self.document_id, - self.index, + self.document_store, self.indexer, self.ranked_map, key, @@ -259,7 +258,7 @@ impl<'a> ser::SerializeStruct for StructSerializer<'a> { fn serialize_value( schema: &Schema, document_id: DocumentId, - index: &Index, + document_store: &mut RamDocumentStore, indexer: &mut RawIndexer, ranked_map: &mut RankedMap, key: &str, @@ -272,7 +271,7 @@ where T: ser::Serialize, if props.is_stored() { let value = rmp_serde::to_vec_named(value)?; - index.lease_inner().raw.documents.set_document_field(document_id, attr, value)?; + document_store.set_document_field(document_id, attr, value); } if props.is_indexed() {