Compute and store the number of words in documents fields

This commit is contained in:
Clément Renault 2019-10-14 14:07:10 +02:00
parent a7e40a78c1
commit b377003192
No known key found for this signature in database
GPG key ID: 92ADA4E935E71FA4
5 changed files with 40 additions and 16 deletions

View file

@ -13,7 +13,7 @@ pub struct Indexer<'a> {
}
impl<'a> ser::Serializer for Indexer<'a> {
type Ok = ();
type Ok = Option<usize>;
type Error = SerializerError;
type SerializeSeq = SeqIndexer<'a>;
type SerializeTuple = TupleIndexer<'a>;
@ -83,8 +83,8 @@ impl<'a> ser::Serializer for Indexer<'a> {
}
fn serialize_str(self, text: &str) -> Result<Self::Ok, Self::Error> {
self.indexer.index_text(self.document_id, self.attribute, text);
Ok(())
let number_of_words = self.indexer.index_text(self.document_id, self.attribute, text);
Ok(Some(number_of_words))
}
fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
@ -99,8 +99,8 @@ impl<'a> ser::Serializer for Indexer<'a> {
where T: ser::Serialize,
{
let text = value.serialize(ConvertToString)?;
self.indexer.index_text(self.document_id, self.attribute, &text);
Ok(())
let number_of_words = self.indexer.index_text(self.document_id, self.attribute, &text);
Ok(Some(number_of_words))
}
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
@ -225,7 +225,7 @@ pub struct SeqIndexer<'a> {
}
impl<'a> ser::SerializeSeq for SeqIndexer<'a> {
type Ok = ();
type Ok = Option<usize>;
type Error = SerializerError;
fn serialize_element<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
@ -239,7 +239,7 @@ impl<'a> ser::SerializeSeq for SeqIndexer<'a> {
fn end(self) -> Result<Self::Ok, Self::Error> {
let texts = self.texts.iter().map(String::as_str);
self.indexer.index_text_seq(self.document_id, self.attribute, texts);
Ok(())
Ok(None)
}
}
@ -251,7 +251,7 @@ pub struct MapIndexer<'a> {
}
impl<'a> ser::SerializeMap for MapIndexer<'a> {
type Ok = ();
type Ok = Option<usize>;
type Error = SerializerError;
fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
@ -273,7 +273,7 @@ impl<'a> ser::SerializeMap for MapIndexer<'a> {
fn end(self) -> Result<Self::Ok, Self::Error> {
let texts = self.texts.iter().map(String::as_str);
self.indexer.index_text_seq(self.document_id, self.attribute, texts);
Ok(())
Ok(None)
}
}
@ -285,7 +285,7 @@ pub struct StructSerializer<'a> {
}
impl<'a> ser::SerializeStruct for StructSerializer<'a> {
type Ok = ();
type Ok = Option<usize>;
type Error = SerializerError;
fn serialize_field<T: ?Sized>(
@ -305,7 +305,7 @@ impl<'a> ser::SerializeStruct for StructSerializer<'a> {
fn end(self) -> Result<Self::Ok, Self::Error> {
let texts = self.texts.iter().map(String::as_str);
self.indexer.index_text_seq(self.document_id, self.attribute, texts);
Ok(())
Ok(None)
}
}
@ -317,7 +317,7 @@ pub struct TupleIndexer<'a> {
}
impl<'a> ser::SerializeTuple for TupleIndexer<'a> {
type Ok = ();
type Ok = Option<usize>;
type Error = SerializerError;
fn serialize_element<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
@ -331,6 +331,6 @@ impl<'a> ser::SerializeTuple for TupleIndexer<'a> {
fn end(self) -> Result<Self::Ok, Self::Error> {
let texts = self.texts.iter().map(String::as_str);
self.indexer.index_text_seq(self.document_id, self.attribute, texts);
Ok(())
Ok(None)
}
}

View file

@ -1,4 +1,5 @@
use meilidb_schema::Schema;
use std::collections::HashMap;
use meilidb_schema::{Schema, SchemaAttr};
use serde::ser;
use crate::{DocumentId, RankedMap};
@ -10,6 +11,7 @@ use super::{SerializerError, ConvertToString, ConvertToNumber, Indexer};
pub struct Serializer<'a> {
pub schema: &'a Schema,
pub document_store: &'a mut RamDocumentStore,
pub document_fields_counts: &'a mut HashMap<(DocumentId, SchemaAttr), u64>,
pub indexer: &'a mut RawIndexer,
pub ranked_map: &'a mut RankedMap,
pub document_id: DocumentId,
@ -135,6 +137,7 @@ impl<'a> ser::Serializer for Serializer<'a> {
schema: self.schema,
document_id: self.document_id,
document_store: self.document_store,
document_fields_counts: self.document_fields_counts,
indexer: self.indexer,
ranked_map: self.ranked_map,
current_key_name: None,
@ -151,6 +154,7 @@ impl<'a> ser::Serializer for Serializer<'a> {
schema: self.schema,
document_id: self.document_id,
document_store: self.document_store,
document_fields_counts: self.document_fields_counts,
indexer: self.indexer,
ranked_map: self.ranked_map,
})
@ -172,6 +176,7 @@ pub struct MapSerializer<'a> {
schema: &'a Schema,
document_id: DocumentId,
document_store: &'a mut RamDocumentStore,
document_fields_counts: &'a mut HashMap<(DocumentId, SchemaAttr), u64>,
indexer: &'a mut RawIndexer,
ranked_map: &'a mut RankedMap,
current_key_name: Option<String>,
@ -209,6 +214,7 @@ impl<'a> ser::SerializeMap for MapSerializer<'a> {
self.schema,
self.document_id,
self.document_store,
self.document_fields_counts,
self.indexer,
self.ranked_map,
&key,
@ -225,6 +231,7 @@ pub struct StructSerializer<'a> {
schema: &'a Schema,
document_id: DocumentId,
document_store: &'a mut RamDocumentStore,
document_fields_counts: &'a mut HashMap<(DocumentId, SchemaAttr), u64>,
indexer: &'a mut RawIndexer,
ranked_map: &'a mut RankedMap,
}
@ -244,6 +251,7 @@ impl<'a> ser::SerializeStruct for StructSerializer<'a> {
self.schema,
self.document_id,
self.document_store,
self.document_fields_counts,
self.indexer,
self.ranked_map,
key,
@ -260,6 +268,7 @@ fn serialize_value<T: ?Sized>(
schema: &Schema,
document_id: DocumentId,
document_store: &mut RamDocumentStore,
documents_fields_counts: &mut HashMap<(DocumentId, SchemaAttr), u64>,
indexer: &mut RawIndexer,
ranked_map: &mut RankedMap,
key: &str,
@ -275,7 +284,9 @@ where T: ser::Serialize,
if props.is_indexed() {
let indexer = Indexer { attribute, indexer, document_id };
value.serialize(indexer)?;
if let Some(number_of_words) = value.serialize(indexer)? {
documents_fields_counts.insert((document_id, attribute), number_of_words as u64);
}
}
if props.is_ranked() {