From 615825b9fd856149ce4a17834753ba61c1587856 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 18 May 2020 10:56:24 +0200 Subject: [PATCH] Remove the serde Serializer --- meilisearch-core/benches/search_benchmark.rs | 6 +- meilisearch-core/src/serde/mod.rs | 2 - meilisearch-core/src/serde/serializer.rs | 361 ------------------ .../src/update/documents_addition.rs | 82 ++-- 4 files changed, 57 insertions(+), 394 deletions(-) delete mode 100644 meilisearch-core/src/serde/serializer.rs diff --git a/meilisearch-core/benches/search_benchmark.rs b/meilisearch-core/benches/search_benchmark.rs index e2b0c0f5a..dc4c7cdc1 100644 --- a/meilisearch-core/benches/search_benchmark.rs +++ b/meilisearch-core/benches/search_benchmark.rs @@ -8,7 +8,7 @@ use std::fs::File; use std::io::BufReader; use std::iter; -use meilisearch_core::Database; +use meilisearch_core::{Database, DatabaseOptions}; use meilisearch_core::{ProcessedUpdateResult, UpdateStatus}; use meilisearch_core::settings::{Settings, SettingsUpdate}; use meilisearch_schema::Schema; @@ -17,7 +17,7 @@ use serde_json::Value; use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId}; fn prepare_database(path: &Path) -> Database { - let database = Database::open_or_create(path).unwrap(); + let database = Database::open_or_create(path, DatabaseOptions::default()).unwrap(); let db = &database; let (sender, receiver) = mpsc::sync_channel(100); @@ -27,7 +27,7 @@ fn prepare_database(path: &Path) -> Database { let index = database.create_index("bench").unwrap(); database.set_update_callback(Box::new(update_fn)); - + let mut writer = db.main_write_txn().unwrap(); index.main.put_schema(&mut writer, &Schema::with_primary_key("id")).unwrap(); writer.commit().unwrap(); diff --git a/meilisearch-core/src/serde/mod.rs b/meilisearch-core/src/serde/mod.rs index 46352c0f5..9cb8e50bc 100644 --- a/meilisearch-core/src/serde/mod.rs +++ b/meilisearch-core/src/serde/mod.rs @@ -13,14 +13,12 @@ mod convert_to_string; mod deserializer; mod extract_document_id; mod indexer; -mod serializer; pub use self::convert_to_number::ConvertToNumber; pub use self::convert_to_string::ConvertToString; pub use self::deserializer::{Deserializer, DeserializerError}; pub use self::extract_document_id::{compute_document_id, extract_document_id, value_to_string}; pub use self::indexer::Indexer; -pub use self::serializer::{serialize_value, serialize_value_with_id, Serializer}; use std::{error::Error, fmt}; diff --git a/meilisearch-core/src/serde/serializer.rs b/meilisearch-core/src/serde/serializer.rs deleted file mode 100644 index 6142e96e7..000000000 --- a/meilisearch-core/src/serde/serializer.rs +++ /dev/null @@ -1,361 +0,0 @@ -use meilisearch_schema::{Schema, FieldId}; -use serde::ser; - -use crate::database::MainT; -use crate::raw_indexer::RawIndexer; -use crate::store::{DocumentsFields, DocumentsFieldsCounts}; -use crate::{DocumentId, RankedMap}; - -use super::{ConvertToNumber, ConvertToString, Indexer, SerializerError}; - -pub struct Serializer<'a, 'b> { - pub txn: &'a mut heed::RwTxn<'b, MainT>, - pub schema: &'a mut Schema, - pub document_store: DocumentsFields, - pub document_fields_counts: DocumentsFieldsCounts, - pub indexer: &'a mut RawIndexer, - pub ranked_map: &'a mut RankedMap, - pub document_id: DocumentId, -} - -impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> { - type Ok = (); - type Error = SerializerError; - type SerializeSeq = ser::Impossible; - type SerializeTuple = ser::Impossible; - type SerializeTupleStruct = ser::Impossible; - type SerializeTupleVariant = ser::Impossible; - type SerializeMap = MapSerializer<'a, 'b>; - type SerializeStruct = StructSerializer<'a, 'b>; - type SerializeStructVariant = ser::Impossible; - - forward_to_unserializable_type! { - bool => serialize_bool, - char => serialize_char, - - i8 => serialize_i8, - i16 => serialize_i16, - i32 => serialize_i32, - i64 => serialize_i64, - - u8 => serialize_u8, - u16 => serialize_u16, - u32 => serialize_u32, - u64 => serialize_u64, - - f32 => serialize_f32, - f64 => serialize_f64, - } - - fn serialize_str(self, _v: &str) -> Result { - Err(SerializerError::UnserializableType { type_name: "str" }) - } - - fn serialize_bytes(self, _v: &[u8]) -> Result { - Err(SerializerError::UnserializableType { type_name: "&[u8]" }) - } - - fn serialize_none(self) -> Result { - Err(SerializerError::UnserializableType { - type_name: "Option", - }) - } - - fn serialize_some(self, _value: &T) -> Result - where - T: ser::Serialize, - { - Err(SerializerError::UnserializableType { - type_name: "Option", - }) - } - - fn serialize_unit(self) -> Result { - Err(SerializerError::UnserializableType { type_name: "()" }) - } - - fn serialize_unit_struct(self, _name: &'static str) -> Result { - Err(SerializerError::UnserializableType { - type_name: "unit struct", - }) - } - - fn serialize_unit_variant( - self, - _name: &'static str, - _variant_index: u32, - _variant: &'static str, - ) -> Result { - Err(SerializerError::UnserializableType { - type_name: "unit variant", - }) - } - - fn serialize_newtype_struct( - self, - _name: &'static str, - value: &T, - ) -> Result - where - T: ser::Serialize, - { - value.serialize(self) - } - - fn serialize_newtype_variant( - self, - _name: &'static str, - _variant_index: u32, - _variant: &'static str, - _value: &T, - ) -> Result - where - T: ser::Serialize, - { - Err(SerializerError::UnserializableType { - type_name: "newtype variant", - }) - } - - fn serialize_seq(self, _len: Option) -> Result { - Err(SerializerError::UnserializableType { - type_name: "sequence", - }) - } - - fn serialize_tuple(self, _len: usize) -> Result { - Err(SerializerError::UnserializableType { type_name: "tuple" }) - } - - fn serialize_tuple_struct( - self, - _name: &'static str, - _len: usize, - ) -> Result { - Err(SerializerError::UnserializableType { - type_name: "tuple struct", - }) - } - - fn serialize_tuple_variant( - self, - _name: &'static str, - _variant_index: u32, - _variant: &'static str, - _len: usize, - ) -> Result { - Err(SerializerError::UnserializableType { - type_name: "tuple variant", - }) - } - - fn serialize_map(self, _len: Option) -> Result { - Ok(MapSerializer { - txn: self.txn, - schema: self.schema, - document_id: self.document_id, - document_store: self.document_store, - document_fields_counts: self.document_fields_counts, - indexer: self.indexer, - ranked_map: self.ranked_map, - current_key_name: None, - }) - } - - fn serialize_struct( - self, - _name: &'static str, - _len: usize, - ) -> Result { - Ok(StructSerializer { - txn: self.txn, - schema: self.schema, - document_id: self.document_id, - document_store: self.document_store, - document_fields_counts: self.document_fields_counts, - indexer: self.indexer, - ranked_map: self.ranked_map, - }) - } - - fn serialize_struct_variant( - self, - _name: &'static str, - _variant_index: u32, - _variant: &'static str, - _len: usize, - ) -> Result { - Err(SerializerError::UnserializableType { - type_name: "struct variant", - }) - } -} - -pub struct MapSerializer<'a, 'b> { - txn: &'a mut heed::RwTxn<'b, MainT>, - schema: &'a mut Schema, - document_id: DocumentId, - document_store: DocumentsFields, - document_fields_counts: DocumentsFieldsCounts, - indexer: &'a mut RawIndexer, - ranked_map: &'a mut RankedMap, - current_key_name: Option, -} - -impl<'a, 'b> ser::SerializeMap for MapSerializer<'a, 'b> { - type Ok = (); - type Error = SerializerError; - - fn serialize_key(&mut self, key: &T) -> Result<(), Self::Error> - where - T: ser::Serialize, - { - let key = key.serialize(ConvertToString)?; - self.current_key_name = Some(key); - Ok(()) - } - - fn serialize_value(&mut self, value: &T) -> Result<(), Self::Error> - where - T: ser::Serialize, - { - let key = self.current_key_name.take().unwrap(); - self.serialize_entry(&key, value) - } - - fn serialize_entry( - &mut self, - key: &K, - value: &V, - ) -> Result<(), Self::Error> - where - K: ser::Serialize, - V: ser::Serialize, - { - let key = key.serialize(ConvertToString)?; - serialize_value( - self.txn, - key.as_str(), - self.schema, - self.document_id, - self.document_store, - self.document_fields_counts, - self.indexer, - self.ranked_map, - value, - ) - } - - fn end(self) -> Result { - Ok(()) - } -} - -pub struct StructSerializer<'a, 'b> { - txn: &'a mut heed::RwTxn<'b, MainT>, - schema: &'a mut Schema, - document_id: DocumentId, - document_store: DocumentsFields, - document_fields_counts: DocumentsFieldsCounts, - indexer: &'a mut RawIndexer, - ranked_map: &'a mut RankedMap, -} - -impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> { - type Ok = (); - type Error = SerializerError; - - fn serialize_field( - &mut self, - key: &'static str, - value: &T, - ) -> Result<(), Self::Error> - where - T: ser::Serialize, - { - serialize_value( - self.txn, - key, - self.schema, - self.document_id, - self.document_store, - self.document_fields_counts, - self.indexer, - self.ranked_map, - value, - ) - } - - fn end(self) -> Result { - Ok(()) - } -} - -pub fn serialize_value<'a, T: ?Sized>( - txn: &mut heed::RwTxn, - attribute: &str, - schema: &'a mut Schema, - document_id: DocumentId, - document_store: DocumentsFields, - documents_fields_counts: DocumentsFieldsCounts, - indexer: &mut RawIndexer, - ranked_map: &mut RankedMap, - value: &T, -) -> Result<(), SerializerError> -where - T: ser::Serialize, -{ - let field_id = schema.insert_and_index(&attribute)?; - serialize_value_with_id( - txn, - field_id, - schema, - document_id, - document_store, - documents_fields_counts, - indexer, - ranked_map, - value, - ) -} - -pub fn serialize_value_with_id<'a, T: ?Sized>( - txn: &mut heed::RwTxn, - field_id: FieldId, - schema: &'a Schema, - document_id: DocumentId, - document_store: DocumentsFields, - documents_fields_counts: DocumentsFieldsCounts, - indexer: &mut RawIndexer, - ranked_map: &mut RankedMap, - value: &T, -) -> Result<(), SerializerError> -where - T: ser::Serialize, -{ - let serialized = serde_json::to_vec(value)?; - document_store.put_document_field(txn, document_id, field_id, &serialized)?; - - if let Some(indexed_pos) = schema.is_indexed(field_id) { - let indexer = Indexer { - pos: *indexed_pos, - indexer, - document_id, - }; - if let Some(number_of_words) = value.serialize(indexer)? { - documents_fields_counts.put_document_field_count( - txn, - document_id, - *indexed_pos, - number_of_words as u16, - )?; - } - } - - if schema.is_ranked(field_id) { - let number = value.serialize(ConvertToNumber).unwrap_or_default(); - ranked_map.insert(document_id, field_id, number); - } - - Ok(()) -} diff --git a/meilisearch-core/src/update/documents_addition.rs b/meilisearch-core/src/update/documents_addition.rs index d8f1f53f1..c65cf6e81 100644 --- a/meilisearch-core/src/update/documents_addition.rs +++ b/meilisearch-core/src/update/documents_addition.rs @@ -4,12 +4,14 @@ use fst::{set::OpBuilder, SetBuilder}; use indexmap::IndexMap; use sdset::{duo::Union, SetOperation}; use serde::{Deserialize, Serialize}; +use serde_json::Value; use crate::database::{MainT, UpdateT}; use crate::database::{UpdateEvent, UpdateEventsEmitter}; use crate::facets; use crate::raw_indexer::RawIndexer; -use crate::serde::{extract_document_id, serialize_value_with_id, Deserializer, Serializer}; +use crate::serde::{extract_document_id, Deserializer}; +use crate::serde::{ConvertToNumber, Indexer}; use crate::store; use crate::update::{apply_documents_deletion, compute_short_prefixes, next_update_id, Update}; use crate::{Error, MResult, RankedMap}; @@ -107,7 +109,7 @@ pub fn push_documents_addition( pub fn apply_addition<'a, 'b>( writer: &'a mut heed::RwTxn<'b, MainT>, index: &store::Index, - addition: Vec>, + addition: Vec>, partial: bool ) -> MResult<()> { let mut documents_additions = HashMap::new(); @@ -137,7 +139,7 @@ pub fn apply_addition<'a, 'b>( // retrieve the old document and // update the new one with missing keys found in the old one - let result = Option::>::deserialize(&mut deserializer)?; + let result = Option::>::deserialize(&mut deserializer)?; if let Some(old_document) = result { for (key, value) in old_document { document.entry(key).or_insert(value); @@ -170,18 +172,33 @@ pub fn apply_addition<'a, 'b>( let mut indexer = RawIndexer::new(stop_words); + // For each document in this update for (document_id, document) in documents_additions { - let serializer = Serializer { - txn: writer, - schema: &mut schema, - document_store: index.documents_fields, - document_fields_counts: index.documents_fields_counts, - indexer: &mut indexer, - ranked_map: &mut ranked_map, - document_id, - }; - document.serialize(serializer)?; + // For each key-value pair in the document. + for (attribute, value) in document { + + let field_id = schema.insert_and_index(&attribute)?; + let serialized = serde_json::to_vec(&value)?; + index.documents_fields.put_document_field(writer, document_id, field_id, &serialized)?; + + if let Some(indexed_pos) = schema.is_indexed(field_id) { + let indexer = Indexer { pos: *indexed_pos, indexer: &mut indexer, document_id }; + if let Some(number_of_words) = value.serialize(indexer)? { + index.documents_fields_counts.put_document_field_count( + writer, + document_id, + *indexed_pos, + number_of_words as u16, + )?; + } + } + + if schema.is_ranked(field_id) { + let number = value.serialize(ConvertToNumber).unwrap_or_default(); + ranked_map.insert(document_id, field_id, number); + } + } } write_documents_addition_index( @@ -200,7 +217,7 @@ pub fn apply_addition<'a, 'b>( pub fn apply_documents_partial_addition<'a, 'b>( writer: &'a mut heed::RwTxn<'b, MainT>, index: &store::Index, - addition: Vec>, + addition: Vec>, ) -> MResult<()> { apply_addition(writer, index, addition, true) } @@ -208,7 +225,7 @@ pub fn apply_documents_partial_addition<'a, 'b>( pub fn apply_documents_addition<'a, 'b>( writer: &'a mut heed::RwTxn<'b, MainT>, index: &store::Index, - addition: Vec>, + addition: Vec>, ) -> MResult<()> { apply_addition(writer, index, addition, false) } @@ -253,22 +270,31 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn, index: &store::Ind for document_id in documents_ids_to_reindex { for result in index.documents_fields.document_fields(writer, document_id)? { let (field_id, bytes) = result?; - let value: serde_json::Value = serde_json::from_slice(bytes)?; + let value: Value = serde_json::from_slice(bytes)?; ram_store.insert((document_id, field_id), value); } - for ((docid, field_id), value) in ram_store.drain() { - serialize_value_with_id( - writer, - field_id, - &schema, - docid, - index.documents_fields, - index.documents_fields_counts, - &mut indexer, - &mut ranked_map, - &value - )?; + // For each key-value pair in the document. + for ((document_id, field_id), value) in ram_store.drain() { + let serialized = serde_json::to_vec(&value)?; + index.documents_fields.put_document_field(writer, document_id, field_id, &serialized)?; + + if let Some(indexed_pos) = schema.is_indexed(field_id) { + let indexer = Indexer { pos: *indexed_pos, indexer: &mut indexer, document_id }; + if let Some(number_of_words) = value.serialize(indexer)? { + index.documents_fields_counts.put_document_field_count( + writer, + document_id, + *indexed_pos, + number_of_words as u16, + )?; + } + } + + if schema.is_ranked(field_id) { + let number = value.serialize(ConvertToNumber).unwrap_or_default(); + ranked_map.insert(document_id, field_id, number); + } } }