squash-me

This commit is contained in:
qdequele 2020-01-10 18:20:30 +01:00
parent 2ee90a891c
commit bbe1845f66
No known key found for this signature in database
GPG key ID: B3F0A000EBF11745
20 changed files with 1118 additions and 676 deletions

View file

@ -2,7 +2,7 @@ use std::collections::HashSet;
use std::io::Cursor;
use std::{error::Error, fmt};
use meilisearch_schema::{Schema, SchemaAttr};
use meilisearch_schema::{Schema, FieldId};
use serde::{de, forward_to_deserialize_any};
use serde_json::de::IoRead as SerdeJsonIoRead;
use serde_json::Deserializer as SerdeJsonDeserializer;
@ -54,7 +54,7 @@ pub struct Deserializer<'a> {
pub reader: &'a heed::RoTxn<MainT>,
pub documents_fields: DocumentsFields,
pub schema: &'a Schema,
pub attributes: Option<&'a HashSet<SchemaAttr>>,
pub attributes: Option<&'a HashSet<FieldId>>,
}
impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> {
@ -92,15 +92,17 @@ impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> {
}
};
let is_displayed = self.schema.props(attr).is_displayed();
let is_displayed = self.schema.id_is_displayed(attr);
if is_displayed && self.attributes.map_or(true, |f| f.contains(&attr)) {
let attribute_name = self.schema.attribute_name(attr);
if let Some(attribute_name) = self.schema.get_name(attr) {
let cursor = Cursor::new(value.to_owned());
let ioread = SerdeJsonIoRead::new(cursor);
let value = Value(SerdeJsonDeserializer::new(ioread));
let cursor = Cursor::new(value.to_owned());
let ioread = SerdeJsonIoRead::new(cursor);
let value = Value(SerdeJsonDeserializer::new(ioread));
Some((attribute_name, value))
Some((*attribute_name, value))
} else {
None
}
} else {
None
}

View file

@ -1,4 +1,4 @@
use meilisearch_schema::SchemaAttr;
use meilisearch_schema::{IndexedPos};
use serde::ser;
use serde::Serialize;
@ -7,7 +7,7 @@ use crate::raw_indexer::RawIndexer;
use crate::DocumentId;
pub struct Indexer<'a> {
pub attribute: SchemaAttr,
pub pos: IndexedPos,
pub indexer: &'a mut RawIndexer,
pub document_id: DocumentId,
}
@ -85,7 +85,7 @@ impl<'a> ser::Serializer for Indexer<'a> {
fn serialize_str(self, text: &str) -> Result<Self::Ok, Self::Error> {
let number_of_words = self
.indexer
.index_text(self.document_id, self.attribute, text);
.index_text(self.document_id, self.pos, text);
Ok(Some(number_of_words))
}
@ -104,7 +104,7 @@ impl<'a> ser::Serializer for Indexer<'a> {
let text = value.serialize(ConvertToString)?;
let number_of_words = self
.indexer
.index_text(self.document_id, self.attribute, &text);
.index_text(self.document_id, self.pos, &text);
Ok(Some(number_of_words))
}
@ -153,7 +153,7 @@ impl<'a> ser::Serializer for Indexer<'a> {
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
let indexer = SeqIndexer {
attribute: self.attribute,
pos: self.pos,
document_id: self.document_id,
indexer: self.indexer,
texts: Vec::new(),
@ -164,7 +164,7 @@ impl<'a> ser::Serializer for Indexer<'a> {
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
let indexer = TupleIndexer {
attribute: self.attribute,
pos: self.pos,
document_id: self.document_id,
indexer: self.indexer,
texts: Vec::new(),
@ -197,7 +197,7 @@ impl<'a> ser::Serializer for Indexer<'a> {
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
let indexer = MapIndexer {
attribute: self.attribute,
pos: self.pos,
document_id: self.document_id,
indexer: self.indexer,
texts: Vec::new(),
@ -212,7 +212,7 @@ impl<'a> ser::Serializer for Indexer<'a> {
_len: usize,
) -> Result<Self::SerializeStruct, Self::Error> {
let indexer = StructIndexer {
attribute: self.attribute,
pos: self.pos,
document_id: self.document_id,
indexer: self.indexer,
texts: Vec::new(),
@ -235,7 +235,7 @@ impl<'a> ser::Serializer for Indexer<'a> {
}
pub struct SeqIndexer<'a> {
attribute: SchemaAttr,
pos: IndexedPos,
document_id: DocumentId,
indexer: &'a mut RawIndexer,
texts: Vec<String>,
@ -257,13 +257,13 @@ impl<'a> ser::SerializeSeq for SeqIndexer<'a> {
fn end(self) -> Result<Self::Ok, Self::Error> {
let texts = self.texts.iter().map(String::as_str);
self.indexer
.index_text_seq(self.document_id, self.attribute, texts);
.index_text_seq(self.document_id, self.pos, texts);
Ok(None)
}
}
pub struct MapIndexer<'a> {
attribute: SchemaAttr,
pos: IndexedPos,
document_id: DocumentId,
indexer: &'a mut RawIndexer,
texts: Vec<String>,
@ -294,13 +294,13 @@ impl<'a> ser::SerializeMap for MapIndexer<'a> {
fn end(self) -> Result<Self::Ok, Self::Error> {
let texts = self.texts.iter().map(String::as_str);
self.indexer
.index_text_seq(self.document_id, self.attribute, texts);
.index_text_seq(self.document_id, self.pos, texts);
Ok(None)
}
}
pub struct StructIndexer<'a> {
attribute: SchemaAttr,
pos: IndexedPos,
document_id: DocumentId,
indexer: &'a mut RawIndexer,
texts: Vec<String>,
@ -328,13 +328,13 @@ impl<'a> ser::SerializeStruct for StructIndexer<'a> {
fn end(self) -> Result<Self::Ok, Self::Error> {
let texts = self.texts.iter().map(String::as_str);
self.indexer
.index_text_seq(self.document_id, self.attribute, texts);
.index_text_seq(self.document_id, self.pos, texts);
Ok(None)
}
}
pub struct TupleIndexer<'a> {
attribute: SchemaAttr,
pos: IndexedPos,
document_id: DocumentId,
indexer: &'a mut RawIndexer,
texts: Vec<String>,
@ -356,7 +356,7 @@ impl<'a> ser::SerializeTuple for TupleIndexer<'a> {
fn end(self) -> Result<Self::Ok, Self::Error> {
let texts = self.texts.iter().map(String::as_str);
self.indexer
.index_text_seq(self.document_id, self.attribute, texts);
.index_text_seq(self.document_id, self.pos, texts);
Ok(None)
}
}

View file

@ -26,6 +26,7 @@ use std::{error::Error, fmt};
use serde::ser;
use serde_json::Error as SerdeJsonError;
use meilisearch_schema::Error as SchemaError;
use crate::ParseNumberError;
@ -36,6 +37,7 @@ pub enum SerializerError {
Zlmdb(heed::Error),
SerdeJson(SerdeJsonError),
ParseNumber(ParseNumberError),
Schema(SchemaError),
UnserializableType { type_name: &'static str },
UnindexableType { type_name: &'static str },
UnrankableType { type_name: &'static str },
@ -62,6 +64,7 @@ impl fmt::Display for SerializerError {
SerializerError::ParseNumber(e) => {
write!(f, "error while trying to parse a number: {}", e)
}
SerializerError::Schema(e) => write!(f, "impossible to update schema: {}", e),
SerializerError::UnserializableType { type_name } => {
write!(f, "{} is not a serializable type", type_name)
}
@ -101,3 +104,9 @@ impl From<ParseNumberError> for SerializerError {
SerializerError::ParseNumber(error)
}
}
impl From<SchemaError> for SerializerError {
fn from(error: SchemaError) -> SerializerError {
SerializerError::Schema(error)
}
}

View file

@ -1,4 +1,4 @@
use meilisearch_schema::{Schema, SchemaAttr, SchemaProps};
use meilisearch_schema::{Schema, FieldsMap};
use serde::ser;
use crate::database::MainT;
@ -15,6 +15,7 @@ pub struct Serializer<'a, 'b> {
pub document_fields_counts: DocumentsFieldsCounts,
pub indexer: &'a mut RawIndexer,
pub ranked_map: &'a mut RankedMap,
pub fields_map: &'a mut FieldsMap,
pub document_id: DocumentId,
}
@ -158,6 +159,7 @@ impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> {
document_fields_counts: self.document_fields_counts,
indexer: self.indexer,
ranked_map: self.ranked_map,
fields_map: self.fields_map,
current_key_name: None,
})
}
@ -175,6 +177,7 @@ impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> {
document_fields_counts: self.document_fields_counts,
indexer: self.indexer,
ranked_map: self.ranked_map,
fields_map: self.fields_map,
})
}
@ -199,6 +202,7 @@ pub struct MapSerializer<'a, 'b> {
document_fields_counts: DocumentsFieldsCounts,
indexer: &'a mut RawIndexer,
ranked_map: &'a mut RankedMap,
fields_map: &'a mut FieldsMap,
current_key_name: Option<String>,
}
@ -243,6 +247,7 @@ impl<'a, 'b> ser::SerializeMap for MapSerializer<'a, 'b> {
self.document_fields_counts,
self.indexer,
self.ranked_map,
self.fields_map,
value,
),
None => Ok(()),
@ -262,6 +267,7 @@ pub struct StructSerializer<'a, 'b> {
document_fields_counts: DocumentsFieldsCounts,
indexer: &'a mut RawIndexer,
ranked_map: &'a mut RankedMap,
fields_map: &'a mut FieldsMap,
}
impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
@ -276,20 +282,26 @@ impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
where
T: ser::Serialize,
{
match self.schema.attribute(key) {
Some(attribute) => serialize_value(
self.txn,
attribute,
self.schema.props(attribute),
self.document_id,
self.document_store,
self.document_fields_counts,
self.indexer,
self.ranked_map,
value,
),
None => Ok(()),
}
// let id = fields_map.insert(key)?;
// let attribute = match self.schema.attribute(id) {
// Some(attribute) => attribute,
// None => {
// },
// }
serialize_value(
self.txn,
attribute,
self.schema.props(attribute),
self.document_id,
self.document_store,
self.document_fields_counts,
self.indexer,
self.ranked_map,
value,
)
}
fn end(self) -> Result<Self::Ok, Self::Error> {
@ -297,10 +309,10 @@ impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
}
}
pub fn serialize_value<T: ?Sized>(
pub fn serialize_value<'a, T: ?Sized>(
txn: &mut heed::RwTxn<MainT>,
attribute: SchemaAttr,
props: SchemaProps,
attribute: &'static str,
schema: &'a Schema,
document_id: DocumentId,
document_store: DocumentsFields,
documents_fields_counts: DocumentsFieldsCounts,
@ -312,11 +324,12 @@ where
T: ser::Serialize,
{
let serialized = serde_json::to_vec(value)?;
document_store.put_document_field(txn, document_id, attribute, &serialized)?;
let field_id = schema.get_or_create(attribute)?;
document_store.put_document_field(txn, document_id, field_id, &serialized)?;
if props.is_indexed() {
if let Some(indexed_pos) = schema.id_is_indexed(field_id) {
let indexer = Indexer {
attribute,
field_id,
indexer,
document_id,
};
@ -324,15 +337,15 @@ where
documents_fields_counts.put_document_field_count(
txn,
document_id,
attribute,
field_id,
number_of_words as u16,
)?;
}
}
if props.is_ranked() {
if let Some(field_id) = schema.id_is_ranked(field_id) {
let number = value.serialize(ConvertToNumber)?;
ranked_map.insert(document_id, attribute, number);
ranked_map.insert(document_id, field_id, number);
}
Ok(())