mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 06:44:27 +01:00
feat: Implement De/Serialize on Blob
This commit is contained in:
parent
a43a772e9a
commit
0e856db4e6
@ -15,6 +15,8 @@ use std::{io, fmt, mem};
|
|||||||
use fst::Map;
|
use fst::Map;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
use rocksdb::rocksdb::{DB, Snapshot};
|
use rocksdb::rocksdb::{DB, Snapshot};
|
||||||
|
use serde::ser::{Serialize, Serializer, SerializeTuple};
|
||||||
|
use serde::de::{self, Deserialize, Deserializer, SeqAccess, Visitor};
|
||||||
|
|
||||||
use crate::index::identifier::Identifier;
|
use crate::index::identifier::Identifier;
|
||||||
use crate::data::DocIndexes;
|
use crate::data::DocIndexes;
|
||||||
@ -33,6 +35,65 @@ impl Blob {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Serialize for Blob {
|
||||||
|
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
|
||||||
|
match self {
|
||||||
|
Blob::Positive(blob) => {
|
||||||
|
let mut tuple = serializer.serialize_tuple(2)?;
|
||||||
|
tuple.serialize_element(&Sign::Positive)?;
|
||||||
|
tuple.serialize_element(&blob)?;
|
||||||
|
tuple.end()
|
||||||
|
},
|
||||||
|
Blob::Negative(blob) => {
|
||||||
|
let mut tuple = serializer.serialize_tuple(2)?;
|
||||||
|
tuple.serialize_element(&Sign::Negative)?;
|
||||||
|
tuple.serialize_element(&blob)?;
|
||||||
|
tuple.end()
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'de> Deserialize<'de> for Blob {
|
||||||
|
fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Blob, D::Error> {
|
||||||
|
struct TupleVisitor;
|
||||||
|
|
||||||
|
impl<'de> Visitor<'de> for TupleVisitor {
|
||||||
|
type Value = Blob;
|
||||||
|
|
||||||
|
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
formatter.write_str("a Blob struct")
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<Self::Value, A::Error> {
|
||||||
|
let sign = match seq.next_element()? {
|
||||||
|
Some(value) => value,
|
||||||
|
None => return Err(de::Error::invalid_length(0, &self)),
|
||||||
|
};
|
||||||
|
match sign {
|
||||||
|
Sign::Positive => {
|
||||||
|
let blob = match seq.next_element()? {
|
||||||
|
Some(value) => value,
|
||||||
|
None => return Err(de::Error::invalid_length(1, &self)),
|
||||||
|
};
|
||||||
|
Ok(Blob::Positive(blob))
|
||||||
|
},
|
||||||
|
Sign::Negative => {
|
||||||
|
let blob = match seq.next_element()? {
|
||||||
|
Some(value) => value,
|
||||||
|
None => return Err(de::Error::invalid_length(1, &self)),
|
||||||
|
};
|
||||||
|
Ok(Blob::Negative(blob))
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
deserializer.deserialize_tuple(2, TupleVisitor)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||||
pub enum Sign {
|
pub enum Sign {
|
||||||
Positive,
|
Positive,
|
||||||
|
@ -1,9 +1,11 @@
|
|||||||
|
use std::io::{Read, Write};
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::io::Write;
|
|
||||||
|
|
||||||
use crate::DocumentId;
|
use crate::DocumentId;
|
||||||
use crate::data::{DocIds, DocIdsBuilder};
|
use crate::data::{DocIds, DocIdsBuilder};
|
||||||
|
use serde::ser::{Serialize, Serializer};
|
||||||
|
use serde::de::{self, Deserialize, Deserializer};
|
||||||
|
|
||||||
pub struct NegativeBlob {
|
pub struct NegativeBlob {
|
||||||
doc_ids: DocIds,
|
doc_ids: DocIds,
|
||||||
@ -31,6 +33,19 @@ impl NegativeBlob {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Serialize for NegativeBlob {
|
||||||
|
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
|
||||||
|
self.doc_ids.serialize(serializer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'de> Deserialize<'de> for NegativeBlob {
|
||||||
|
fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<NegativeBlob, D::Error> {
|
||||||
|
let bytes = Vec::deserialize(deserializer)?;
|
||||||
|
NegativeBlob::from_bytes(bytes).map_err(de::Error::custom)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct NegativeBlobBuilder<W> {
|
pub struct NegativeBlobBuilder<W> {
|
||||||
doc_ids: DocIdsBuilder<W>,
|
doc_ids: DocIdsBuilder<W>,
|
||||||
}
|
}
|
||||||
|
@ -1,11 +1,14 @@
|
|||||||
|
use std::io::{Read, Write};
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::io::Write;
|
use std::fmt;
|
||||||
|
|
||||||
use fst::{Map, MapBuilder};
|
use fst::{Map, MapBuilder};
|
||||||
|
|
||||||
use crate::DocIndex;
|
use crate::DocIndex;
|
||||||
use crate::data::{DocIndexes, DocIndexesBuilder};
|
use crate::data::{DocIndexes, DocIndexesBuilder};
|
||||||
|
use serde::ser::{Serialize, Serializer, SerializeTuple};
|
||||||
|
use serde::de::{self, Deserialize, Deserializer, SeqAccess, Visitor};
|
||||||
|
|
||||||
pub struct PositiveBlob {
|
pub struct PositiveBlob {
|
||||||
map: Map,
|
map: Map,
|
||||||
@ -45,6 +48,52 @@ impl PositiveBlob {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Serialize for PositiveBlob {
|
||||||
|
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
|
||||||
|
let mut tuple = serializer.serialize_tuple(2)?;
|
||||||
|
tuple.serialize_element(&self.map.as_fst().to_vec())?;
|
||||||
|
tuple.serialize_element(&self.indexes)?;
|
||||||
|
tuple.end()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'de> Deserialize<'de> for PositiveBlob {
|
||||||
|
fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<PositiveBlob, D::Error> {
|
||||||
|
struct TupleVisitor;
|
||||||
|
|
||||||
|
impl<'de> Visitor<'de> for TupleVisitor {
|
||||||
|
type Value = PositiveBlob;
|
||||||
|
|
||||||
|
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
formatter.write_str("a PositiveBlob struct")
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<Self::Value, A::Error> {
|
||||||
|
let map = match seq.next_element()? {
|
||||||
|
Some(bytes) => match Map::from_bytes(bytes) {
|
||||||
|
Ok(value) => value,
|
||||||
|
Err(err) => return Err(de::Error::custom(err)),
|
||||||
|
},
|
||||||
|
None => return Err(de::Error::invalid_length(0, &self)),
|
||||||
|
};
|
||||||
|
|
||||||
|
let indexes = match seq.next_element()? {
|
||||||
|
Some(bytes) => match DocIndexes::from_bytes(bytes) {
|
||||||
|
Ok(value) => value,
|
||||||
|
Err(err) => return Err(de::Error::custom(err)),
|
||||||
|
},
|
||||||
|
None => return Err(de::Error::invalid_length(1, &self)),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(PositiveBlob { map, indexes })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
deserializer.deserialize_tuple(2, TupleVisitor)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct PositiveBlobBuilder<W, X> {
|
pub struct PositiveBlobBuilder<W, X> {
|
||||||
map: W,
|
map: W,
|
||||||
indexes: DocIndexesBuilder<X>,
|
indexes: DocIndexesBuilder<X>,
|
||||||
|
@ -7,31 +7,32 @@ use std::{io, mem};
|
|||||||
|
|
||||||
use byteorder::{NativeEndian, WriteBytesExt};
|
use byteorder::{NativeEndian, WriteBytesExt};
|
||||||
use fst::raw::MmapReadOnly;
|
use fst::raw::MmapReadOnly;
|
||||||
|
use serde::ser::{Serialize, Serializer};
|
||||||
|
|
||||||
use crate::DocumentId;
|
use crate::DocumentId;
|
||||||
use crate::data::Data;
|
use crate::data::Data;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct DocIds {
|
pub struct DocIds {
|
||||||
doc_ids: Data,
|
data: Data,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DocIds {
|
impl DocIds {
|
||||||
pub unsafe fn from_path<P: AsRef<Path>>(path: P) -> io::Result<Self> {
|
pub unsafe fn from_path<P: AsRef<Path>>(path: P) -> io::Result<Self> {
|
||||||
let mmap = MmapReadOnly::open_path(path)?;
|
let mmap = MmapReadOnly::open_path(path)?;
|
||||||
let doc_ids = Data::Mmap(mmap);
|
let data = Data::Mmap(mmap);
|
||||||
Ok(DocIds { doc_ids })
|
Ok(DocIds { data })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn from_bytes(vec: Vec<u8>) -> Result<Self, Box<Error>> {
|
pub fn from_bytes(vec: Vec<u8>) -> Result<Self, Box<Error>> {
|
||||||
// FIXME check if modulo DocumentId
|
// FIXME check if modulo DocumentId
|
||||||
let len = vec.len();
|
let len = vec.len();
|
||||||
let doc_ids = Data::Shared {
|
let data = Data::Shared {
|
||||||
vec: Arc::new(vec),
|
vec: Arc::new(vec),
|
||||||
offset: 0,
|
offset: 0,
|
||||||
len: len
|
len: len
|
||||||
};
|
};
|
||||||
Ok(DocIds { doc_ids })
|
Ok(DocIds { data })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn contains(&self, doc: DocumentId) -> bool {
|
pub fn contains(&self, doc: DocumentId) -> bool {
|
||||||
@ -40,13 +41,19 @@ impl DocIds {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn doc_ids(&self) -> &[DocumentId] {
|
pub fn doc_ids(&self) -> &[DocumentId] {
|
||||||
let slice = &self.doc_ids;
|
let slice = &self.data;
|
||||||
let ptr = slice.as_ptr() as *const DocumentId;
|
let ptr = slice.as_ptr() as *const DocumentId;
|
||||||
let len = slice.len() / mem::size_of::<DocumentId>();
|
let len = slice.len() / mem::size_of::<DocumentId>();
|
||||||
unsafe { from_raw_parts(ptr, len) }
|
unsafe { from_raw_parts(ptr, len) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Serialize for DocIds {
|
||||||
|
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
|
||||||
|
self.data.as_ref().serialize(serializer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct DocIdsBuilder<W> {
|
pub struct DocIdsBuilder<W> {
|
||||||
doc_ids: BTreeSet<DocumentId>, // TODO: prefer a linked-list
|
doc_ids: BTreeSet<DocumentId>, // TODO: prefer a linked-list
|
||||||
wrt: W,
|
wrt: W,
|
||||||
|
@ -8,6 +8,7 @@ use std::mem;
|
|||||||
|
|
||||||
use fst::raw::MmapReadOnly;
|
use fst::raw::MmapReadOnly;
|
||||||
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
|
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
|
||||||
|
use serde::ser::{Serialize, Serializer, SerializeTuple};
|
||||||
|
|
||||||
use crate::DocIndex;
|
use crate::DocIndex;
|
||||||
use crate::data::Data;
|
use crate::data::Data;
|
||||||
@ -84,6 +85,15 @@ impl DocIndexes {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Serialize for DocIndexes {
|
||||||
|
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
|
||||||
|
let mut tuple = serializer.serialize_tuple(2)?;
|
||||||
|
tuple.serialize_element(self.ranges.as_ref())?;
|
||||||
|
tuple.serialize_element(self.indexes.as_ref())?;
|
||||||
|
tuple.end()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct DocIndexesBuilder<W> {
|
pub struct DocIndexesBuilder<W> {
|
||||||
keys: BTreeMap<String, u64>,
|
keys: BTreeMap<String, u64>,
|
||||||
indexes: Vec<Vec<DocIndex>>,
|
indexes: Vec<Vec<DocIndex>>,
|
||||||
|
@ -23,6 +23,12 @@ impl Deref for Data {
|
|||||||
type Target = [u8];
|
type Target = [u8];
|
||||||
|
|
||||||
fn deref(&self) -> &Self::Target {
|
fn deref(&self) -> &Self::Target {
|
||||||
|
self.as_ref()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AsRef<[u8]> for Data {
|
||||||
|
fn as_ref(&self) -> &[u8] {
|
||||||
match self {
|
match self {
|
||||||
Data::Shared { vec, offset, len } => {
|
Data::Shared { vec, offset, len } => {
|
||||||
&vec[*offset..offset + len]
|
&vec[*offset..offset + len]
|
||||||
|
@ -22,7 +22,7 @@ use crate::{DocIndex, DocumentId};
|
|||||||
use crate::index::schema::Schema;
|
use crate::index::schema::Schema;
|
||||||
use crate::index::update::Update;
|
use crate::index::update::Update;
|
||||||
use crate::index::identifier::Identifier;
|
use crate::index::identifier::Identifier;
|
||||||
use crate::blob::{PositiveBlobBuilder, BlobInfo, Sign, Blob, blobs_from_blob_infos};
|
use crate::blob::{PositiveBlobBuilder, PositiveBlob, BlobInfo, Sign, Blob, blobs_from_blob_infos};
|
||||||
use crate::tokenizer::{TokenizerBuilder, DefaultBuilder, Tokenizer};
|
use crate::tokenizer::{TokenizerBuilder, DefaultBuilder, Tokenizer};
|
||||||
use crate::rank::{criterion, Config, RankedStream};
|
use crate::rank::{criterion, Config, RankedStream};
|
||||||
use crate::automaton;
|
use crate::automaton;
|
||||||
@ -35,6 +35,45 @@ fn simple_vec_append(key: &[u8], value: Option<&[u8]>, operands: &mut MergeOpera
|
|||||||
output
|
output
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct MergeBuilder {
|
||||||
|
blobs: Vec<Blob>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MergeBuilder {
|
||||||
|
pub fn new() -> MergeBuilder {
|
||||||
|
MergeBuilder { blobs: Vec::new() }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn push(&mut self, blob: Blob) {
|
||||||
|
if blob.sign() == Sign::Negative && self.blobs.is_empty() { return }
|
||||||
|
self.blobs.push(blob);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn merge(self) -> PositiveBlob {
|
||||||
|
unimplemented!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn merge_indexes(key: &[u8], existing_value: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> {
|
||||||
|
if key != b"data-index" { panic!("The merge operator only allow \"data-index\" merging") }
|
||||||
|
|
||||||
|
let mut merge_builder = MergeBuilder::new();
|
||||||
|
|
||||||
|
if let Some(existing_value) = existing_value {
|
||||||
|
let base: PositiveBlob = bincode::deserialize(existing_value).unwrap(); // FIXME what do we do here ?
|
||||||
|
merge_builder.push(Blob::Positive(base));
|
||||||
|
}
|
||||||
|
|
||||||
|
for bytes in operands {
|
||||||
|
let blob: Blob = bincode::deserialize(bytes).unwrap();
|
||||||
|
merge_builder.push(blob);
|
||||||
|
}
|
||||||
|
|
||||||
|
let blob = merge_builder.merge();
|
||||||
|
// blob.to_vec()
|
||||||
|
unimplemented!()
|
||||||
|
}
|
||||||
|
|
||||||
pub struct Index {
|
pub struct Index {
|
||||||
database: rocksdb::DB,
|
database: rocksdb::DB,
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user