Move to zerocopy-lmdb

This commit is contained in:
Clément Renault 2019-10-16 17:05:24 +02:00
parent c332c7bc70
commit 1667e1b32f
No known key found for this signature in database
GPG key ID: 92ADA4E935E71FA4
25 changed files with 450 additions and 684 deletions

View file

@ -1,54 +1,51 @@
use std::sync::Arc;
use rkv::{Value, StoreError};
use crate::{DocumentId, MResult};
use zlmdb::types::{OwnedType, ByteSlice};
use zlmdb::Result as ZResult;
use crate::DocumentId;
use super::BEU64;
#[derive(Copy, Clone)]
pub struct DocsWords {
pub(crate) docs_words: rkv::SingleStore,
pub(crate) docs_words: zlmdb::Database<OwnedType<BEU64>, ByteSlice>,
}
impl DocsWords {
pub fn put_doc_words(
&self,
writer: &mut rkv::Writer,
writer: &mut zlmdb::RwTxn,
document_id: DocumentId,
words: &fst::Set,
) -> Result<(), rkv::StoreError>
) -> ZResult<()>
{
let document_id_bytes = document_id.0.to_be_bytes();
let document_id = BEU64::new(document_id.0);
let bytes = words.as_fst().as_bytes();
self.docs_words.put(writer, document_id_bytes, &Value::Blob(bytes))
self.docs_words.put(writer, &document_id, bytes)
}
pub fn del_doc_words(
&self,
writer: &mut rkv::Writer,
writer: &mut zlmdb::RwTxn,
document_id: DocumentId,
) -> Result<bool, rkv::StoreError>
) -> ZResult<bool>
{
let document_id_bytes = document_id.0.to_be_bytes();
match self.docs_words.delete(writer, document_id_bytes) {
Ok(()) => Ok(true),
Err(StoreError::LmdbError(lmdb::Error::NotFound)) => Ok(false),
Err(e) => Err(e),
}
let document_id = BEU64::new(document_id.0);
self.docs_words.delete(writer, &document_id)
}
pub fn doc_words<T: rkv::Readable>(
pub fn doc_words(
&self,
reader: &T,
reader: &zlmdb::RoTxn,
document_id: DocumentId,
) -> MResult<Option<fst::Set>>
) -> ZResult<Option<fst::Set>>
{
let document_id_bytes = document_id.0.to_be_bytes();
match self.docs_words.get(reader, document_id_bytes)? {
Some(Value::Blob(bytes)) => {
let document_id = BEU64::new(document_id.0);
match self.docs_words.get(reader, &document_id)? {
Some(bytes) => {
let len = bytes.len();
let bytes = Arc::from(bytes);
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len)?;
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
Ok(Some(fst::Set::from(fst)))
},
Some(value) => panic!("invalid type {:?}", value),
None => Ok(None),
}
}

View file

@ -1,102 +1,77 @@
use std::convert::TryFrom;
use meilidb_schema::SchemaAttr;
use zlmdb::types::{OwnedType, ByteSlice};
use zlmdb::Result as ZResult;
use crate::DocumentId;
use super::{document_attribute_into_key, document_attribute_from_key};
use super::DocumentAttrKey;
#[derive(Copy, Clone)]
pub struct DocumentsFields {
pub(crate) documents_fields: rkv::SingleStore,
pub(crate) documents_fields: zlmdb::Database<OwnedType<DocumentAttrKey>, ByteSlice>,
}
impl DocumentsFields {
pub fn put_document_field(
&self,
writer: &mut rkv::Writer,
writer: &mut zlmdb::RwTxn,
document_id: DocumentId,
attribute: SchemaAttr,
value: &[u8],
) -> Result<(), rkv::StoreError>
) -> ZResult<()>
{
let key = document_attribute_into_key(document_id, attribute);
self.documents_fields.put(writer, key, &rkv::Value::Blob(value))
let key = DocumentAttrKey::new(document_id, attribute);
self.documents_fields.put(writer, &key, value)
}
pub fn del_all_document_fields(
&self,
writer: &mut rkv::Writer,
writer: &mut zlmdb::RwTxn,
document_id: DocumentId,
) -> Result<usize, rkv::StoreError>
) -> ZResult<usize>
{
let document_id_bytes = document_id.0.to_be_bytes();
let mut keys_to_delete = Vec::new();
// WARN we can not delete the keys using the iterator
// so we store them and delete them just after
let iter = self.documents_fields.iter_from(writer, document_id_bytes)?;
for result in iter {
let (key, _) = result?;
let array = TryFrom::try_from(key).unwrap();
let (current_document_id, _) = document_attribute_from_key(array);
if current_document_id != document_id { break }
keys_to_delete.push(key.to_owned());
}
let count = keys_to_delete.len();
for key in keys_to_delete {
self.documents_fields.delete(writer, key)?;
}
Ok(count)
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
self.documents_fields.delete_range(writer, start..=end)
}
pub fn document_attribute<'a>(
pub fn document_attribute<'txn>(
&self,
reader: &'a impl rkv::Readable,
reader: &'txn zlmdb::RoTxn,
document_id: DocumentId,
attribute: SchemaAttr,
) -> Result<Option<&'a [u8]>, rkv::StoreError>
) -> ZResult<Option<&'txn [u8]>>
{
let key = document_attribute_into_key(document_id, attribute);
match self.documents_fields.get(reader, key)? {
Some(rkv::Value::Blob(bytes)) => Ok(Some(bytes)),
Some(value) => panic!("invalid type {:?}", value),
None => Ok(None),
}
let key = DocumentAttrKey::new(document_id, attribute);
self.documents_fields.get(reader, &key)
}
pub fn document_fields<'r, T: rkv::Readable>(
pub fn document_fields<'txn>(
&self,
reader: &'r T,
reader: &'txn zlmdb::RoTxn,
document_id: DocumentId,
) -> Result<DocumentFieldsIter<'r>, rkv::StoreError>
) -> ZResult<DocumentFieldsIter<'txn>>
{
let document_id_bytes = document_id.0.to_be_bytes();
let iter = self.documents_fields.iter_from(reader, document_id_bytes)?;
Ok(DocumentFieldsIter { document_id, iter })
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
let iter = self.documents_fields.range(reader, start..=end)?;
Ok(DocumentFieldsIter { iter })
}
}
pub struct DocumentFieldsIter<'r> {
document_id: DocumentId,
iter: rkv::store::single::Iter<'r>,
pub struct DocumentFieldsIter<'txn> {
iter: zlmdb::RoRange<'txn, OwnedType<DocumentAttrKey>, ByteSlice>,
}
impl<'r> Iterator for DocumentFieldsIter<'r> {
type Item = Result<(SchemaAttr, &'r [u8]), rkv::StoreError>;
impl<'txn> Iterator for DocumentFieldsIter<'txn> {
type Item = ZResult<(SchemaAttr, &'txn [u8])>;
fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() {
Some(Ok((key, Some(rkv::Value::Blob(bytes))))) => {
let array = TryFrom::try_from(key).unwrap();
let (current_document_id, attr) = document_attribute_from_key(array);
if current_document_id != self.document_id { return None; }
Some(Ok((key, bytes))) => {
let attr = SchemaAttr(key.attr.get());
Some(Ok((attr, bytes)))
},
Some(Ok((key, data))) => panic!("{:?}, {:?}", key, data),
Some(Err(e)) => Some(Err(e)),
Some(Err(e)) => Some(Err(e.into())),
None => None,
}
}

View file

@ -1,163 +1,142 @@
use std::convert::TryFrom;
use meilidb_schema::SchemaAttr;
use zlmdb::types::OwnedType;
use zlmdb::Result as ZResult;
use crate::DocumentId;
use super::{document_attribute_into_key, document_attribute_from_key};
use super::DocumentAttrKey;
#[derive(Copy, Clone)]
pub struct DocumentsFieldsCounts {
pub(crate) documents_fields_counts: rkv::SingleStore,
pub(crate) documents_fields_counts: zlmdb::Database<OwnedType<DocumentAttrKey>, OwnedType<u64>>,
}
impl DocumentsFieldsCounts {
pub fn put_document_field_count(
&self,
writer: &mut rkv::Writer,
writer: &mut zlmdb::RwTxn,
document_id: DocumentId,
attribute: SchemaAttr,
value: u64,
) -> Result<(), rkv::StoreError>
) -> ZResult<()>
{
let key = document_attribute_into_key(document_id, attribute);
self.documents_fields_counts.put(writer, key, &rkv::Value::U64(value))
let key = DocumentAttrKey::new(document_id, attribute);
self.documents_fields_counts.put(writer, &key, &value)
}
pub fn del_all_document_fields_counts(
&self,
writer: &mut rkv::Writer,
writer: &mut zlmdb::RwTxn,
document_id: DocumentId,
) -> Result<usize, rkv::StoreError>
) -> ZResult<usize>
{
let mut keys_to_delete = Vec::new();
// WARN we can not delete the keys using the iterator
// so we store them and delete them just after
for result in self.document_fields_counts(writer, document_id)? {
let (attribute, _) = result?;
let key = document_attribute_into_key(document_id, attribute);
keys_to_delete.push(key);
}
let count = keys_to_delete.len();
for key in keys_to_delete {
self.documents_fields_counts.delete(writer, key)?;
}
Ok(count)
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
self.documents_fields_counts.delete_range(writer, start..=end)
}
pub fn document_field_count(
&self,
reader: &impl rkv::Readable,
reader: &zlmdb::RoTxn,
document_id: DocumentId,
attribute: SchemaAttr,
) -> Result<Option<u64>, rkv::StoreError>
) -> ZResult<Option<u64>>
{
let key = document_attribute_into_key(document_id, attribute);
match self.documents_fields_counts.get(reader, key)? {
Some(rkv::Value::U64(count)) => Ok(Some(count)),
Some(value) => panic!("invalid type {:?}", value),
let key = DocumentAttrKey::new(document_id, attribute);
match self.documents_fields_counts.get(reader, &key)? {
Some(count) => Ok(Some(count)),
None => Ok(None),
}
}
pub fn document_fields_counts<'r, T: rkv::Readable>(
pub fn document_fields_counts<'txn>(
&self,
reader: &'r T,
reader: &'txn zlmdb::RoTxn,
document_id: DocumentId,
) -> Result<DocumentFieldsCountsIter<'r>, rkv::StoreError>
) -> ZResult<DocumentFieldsCountsIter<'txn>>
{
let document_id_bytes = document_id.0.to_be_bytes();
let iter = self.documents_fields_counts.iter_from(reader, document_id_bytes)?;
Ok(DocumentFieldsCountsIter { document_id, iter })
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
let iter = self.documents_fields_counts.range(reader, start..=end)?;
Ok(DocumentFieldsCountsIter { iter })
}
pub fn documents_ids<'r, T: rkv::Readable>(
pub fn documents_ids<'txn>(
&self,
reader: &'r T,
) -> Result<DocumentsIdsIter<'r>, rkv::StoreError>
reader: &'txn zlmdb::RoTxn,
) -> ZResult<DocumentsIdsIter<'txn>>
{
let iter = self.documents_fields_counts.iter_start(reader)?;
let iter = self.documents_fields_counts.iter(reader)?;
Ok(DocumentsIdsIter { last_seen_id: None, iter })
}
pub fn all_documents_fields_counts<'r, T: rkv::Readable>(
pub fn all_documents_fields_counts<'txn>(
&self,
reader: &'r T,
) -> Result<AllDocumentsFieldsCountsIter<'r>, rkv::StoreError>
reader: &'txn zlmdb::RoTxn,
) -> ZResult<AllDocumentsFieldsCountsIter<'txn>>
{
let iter = self.documents_fields_counts.iter_start(reader)?;
let iter = self.documents_fields_counts.iter(reader)?;
Ok(AllDocumentsFieldsCountsIter { iter })
}
}
pub struct DocumentFieldsCountsIter<'r> {
document_id: DocumentId,
iter: rkv::store::single::Iter<'r>,
pub struct DocumentFieldsCountsIter<'txn> {
iter: zlmdb::RoRange<'txn, OwnedType<DocumentAttrKey>, OwnedType<u64>>,
}
impl Iterator for DocumentFieldsCountsIter<'_> {
type Item = Result<(SchemaAttr, u64), rkv::StoreError>;
type Item = ZResult<(SchemaAttr, u64)>;
fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() {
Some(Ok((key, Some(rkv::Value::U64(count))))) => {
let array = TryFrom::try_from(key).unwrap();
let (current_document_id, attr) = document_attribute_from_key(array);
if current_document_id != self.document_id { return None; }
Some(Ok((key, count))) => {
let attr = SchemaAttr(key.attr.get());
Some(Ok((attr, count)))
},
Some(Ok((key, data))) => panic!("{:?}, {:?}", key, data),
Some(Err(e)) => Some(Err(e)),
Some(Err(e)) => Some(Err(e.into())),
None => None,
}
}
}
pub struct DocumentsIdsIter<'r> {
pub struct DocumentsIdsIter<'txn> {
last_seen_id: Option<DocumentId>,
iter: rkv::store::single::Iter<'r>,
iter: zlmdb::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u64>>,
}
impl Iterator for DocumentsIdsIter<'_> {
type Item = Result<DocumentId, rkv::StoreError>;
type Item = ZResult<DocumentId>;
fn next(&mut self) -> Option<Self::Item> {
for result in &mut self.iter {
match result {
Ok((key, _)) => {
let array = TryFrom::try_from(key).unwrap();
let (document_id, _) = document_attribute_from_key(array);
let document_id = DocumentId(key.docid.get());
if Some(document_id) != self.last_seen_id {
self.last_seen_id = Some(document_id);
return Some(Ok(document_id))
}
},
Err(e) => return Some(Err(e)),
Err(e) => return Some(Err(e.into())),
}
}
None
}
}
pub struct AllDocumentsFieldsCountsIter<'r> {
iter: rkv::store::single::Iter<'r>,
pub struct AllDocumentsFieldsCountsIter<'txn> {
iter: zlmdb::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u64>>,
}
impl<'r> Iterator for AllDocumentsFieldsCountsIter<'r> {
type Item = Result<(DocumentId, SchemaAttr, u64), rkv::StoreError>;
type Item = ZResult<(DocumentId, SchemaAttr, u64)>;
fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() {
Some(Ok((key, Some(rkv::Value::U64(count))))) => {
let array = TryFrom::try_from(key).unwrap();
let (document_id, attr) = document_attribute_from_key(array);
Some(Ok((document_id, attr, count)))
Some(Ok((key, count))) => {
let docid = DocumentId(key.docid.get());
let attr = SchemaAttr(key.attr.get());
Some(Ok((docid, attr, count)))
},
Some(Ok((key, data))) => panic!("{:?}, {:?}", key, data),
Some(Err(e)) => Some(Err(e)),
Some(Err(e)) => Some(Err(e.into())),
None => None,
}
}

View file

@ -1,9 +1,8 @@
use std::sync::Arc;
use std::convert::TryInto;
use meilidb_schema::Schema;
use rkv::Value;
use crate::{RankedMap, MResult};
use zlmdb::types::{Str, OwnedType, ByteSlice, Serde};
use zlmdb::Result as ZResult;
use crate::RankedMap;
const CUSTOMS_KEY: &str = "customs-key";
const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
@ -14,155 +13,80 @@ const WORDS_KEY: &str = "words";
#[derive(Copy, Clone)]
pub struct Main {
pub(crate) main: rkv::SingleStore,
pub(crate) main: zlmdb::DynDatabase,
}
impl Main {
pub fn put_words_fst(
&self,
writer: &mut rkv::Writer,
fst: &fst::Set,
) -> Result<(), rkv::StoreError>
{
let blob = rkv::Value::Blob(fst.as_fst().as_bytes());
self.main.put(writer, WORDS_KEY, &blob)
pub fn put_words_fst(&self, writer: &mut zlmdb::RwTxn, fst: &fst::Set) -> ZResult<()> {
let bytes = fst.as_fst().as_bytes();
self.main.put::<Str, ByteSlice>(writer, WORDS_KEY, bytes)
}
pub fn words_fst(
&self,
reader: &impl rkv::Readable,
) -> MResult<Option<fst::Set>>
{
match self.main.get(reader, WORDS_KEY)? {
Some(Value::Blob(bytes)) => {
pub fn words_fst(&self, reader: &zlmdb::RoTxn) -> ZResult<Option<fst::Set>> {
match self.main.get::<Str, ByteSlice>(reader, WORDS_KEY)? {
Some(bytes) => {
let len = bytes.len();
let bytes = Arc::from(bytes);
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len)?;
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
Ok(Some(fst::Set::from(fst)))
},
Some(value) => panic!("invalid type {:?}", value),
None => Ok(None),
}
}
pub fn put_schema(
&self,
writer: &mut rkv::Writer,
schema: &Schema,
) -> MResult<()>
{
let bytes = bincode::serialize(schema)?;
let blob = Value::Blob(&bytes[..]);
self.main.put(writer, SCHEMA_KEY, &blob)?;
Ok(())
pub fn put_schema(&self, writer: &mut zlmdb::RwTxn, schema: &Schema) -> ZResult<()> {
self.main.put::<Str, Serde<Schema>>(writer, SCHEMA_KEY, schema)
}
pub fn schema(
&self,
reader: &impl rkv::Readable,
) -> MResult<Option<Schema>>
{
match self.main.get(reader, SCHEMA_KEY)? {
Some(Value::Blob(bytes)) => {
let schema = bincode::deserialize_from(bytes)?;
Ok(Some(schema))
},
Some(value) => panic!("invalid type {:?}", value),
None => Ok(None),
}
pub fn schema(&self, reader: &zlmdb::RoTxn) -> ZResult<Option<Schema>> {
self.main.get::<Str, Serde<Schema>>(reader, SCHEMA_KEY)
}
pub fn put_ranked_map(
&self,
writer: &mut rkv::Writer,
ranked_map: &RankedMap,
) -> MResult<()>
{
let mut bytes = Vec::new();
ranked_map.write_to_bin(&mut bytes)?;
let blob = Value::Blob(&bytes[..]);
self.main.put(writer, RANKED_MAP_KEY, &blob)?;
Ok(())
pub fn put_ranked_map(&self, writer: &mut zlmdb::RwTxn, ranked_map: &RankedMap) -> ZResult<()> {
self.main.put::<Str, Serde<RankedMap>>(writer, RANKED_MAP_KEY, &ranked_map)
}
pub fn ranked_map(
&self,
reader: &impl rkv::Readable,
) -> MResult<Option<RankedMap>>
{
match self.main.get(reader, RANKED_MAP_KEY)? {
Some(Value::Blob(bytes)) => {
let ranked_map = RankedMap::read_from_bin(bytes)?;
Ok(Some(ranked_map))
},
Some(value) => panic!("invalid type {:?}", value),
None => Ok(None),
}
pub fn ranked_map(&self, reader: &zlmdb::RoTxn) -> ZResult<Option<RankedMap>> {
self.main.get::<Str, Serde<RankedMap>>(reader, RANKED_MAP_KEY)
}
pub fn put_synonyms_fst(
&self,
writer: &mut rkv::Writer,
fst: &fst::Set,
) -> MResult<()>
{
let blob = rkv::Value::Blob(fst.as_fst().as_bytes());
Ok(self.main.put(writer, SYNONYMS_KEY, &blob)?)
pub fn put_synonyms_fst(&self, writer: &mut zlmdb::RwTxn, fst: &fst::Set) -> ZResult<()> {
let bytes = fst.as_fst().as_bytes();
self.main.put::<Str, ByteSlice>(writer, SYNONYMS_KEY, bytes)
}
pub fn synonyms_fst(
&self,
reader: &impl rkv::Readable,
) -> MResult<Option<fst::Set>>
{
match self.main.get(reader, SYNONYMS_KEY)? {
Some(Value::Blob(bytes)) => {
pub fn synonyms_fst(&self, reader: &zlmdb::RoTxn) -> ZResult<Option<fst::Set>> {
match self.main.get::<Str, ByteSlice>(reader, SYNONYMS_KEY)? {
Some(bytes) => {
let len = bytes.len();
let bytes = Arc::from(bytes);
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len)?;
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
Ok(Some(fst::Set::from(fst)))
},
Some(value) => panic!("invalid type {:?}", value),
None => Ok(None),
}
}
pub fn put_number_of_documents<F: Fn(u64) -> u64>(
&self,
writer: &mut rkv::Writer,
f: F,
) -> Result<u64, rkv::StoreError>
pub fn put_number_of_documents<F>(&self, writer: &mut zlmdb::RwTxn, f: F) -> ZResult<u64>
where F: Fn(u64) -> u64,
{
let new = self.number_of_documents(writer).map(f)?;
self.main.put(writer, NUMBER_OF_DOCUMENTS_KEY, &Value::Blob(&new.to_be_bytes()))?;
self.main.put::<Str, OwnedType<u64>>(writer, NUMBER_OF_DOCUMENTS_KEY, &new)?;
Ok(new)
}
pub fn number_of_documents(
&self,
reader: &impl rkv::Readable,
) -> Result<u64, rkv::StoreError>
{
match self.main.get(reader, NUMBER_OF_DOCUMENTS_KEY)? {
Some(Value::Blob(bytes)) => {
let array = bytes.try_into().unwrap();
Ok(u64::from_be_bytes(array))
},
Some(value) => panic!("invalid type {:?}", value),
pub fn number_of_documents(&self, reader: &zlmdb::RwTxn) -> ZResult<u64> {
match self.main.get::<Str, OwnedType<u64>>(reader, NUMBER_OF_DOCUMENTS_KEY)? {
Some(value) => Ok(value),
None => Ok(0),
}
}
pub fn put_customs(&self, writer: &mut rkv::Writer, customs: &[u8]) -> MResult<()> {
self.main.put(writer, CUSTOMS_KEY, &Value::Blob(customs))?;
Ok(())
pub fn put_customs(&self, writer: &mut zlmdb::RwTxn, customs: &[u8]) -> ZResult<()> {
self.main.put::<Str, ByteSlice>(writer, CUSTOMS_KEY, customs)
}
pub fn customs<'t>(&self, reader: &'t impl rkv::Readable) -> MResult<Option<&'t [u8]>> {
match self.main.get(reader, CUSTOMS_KEY)? {
Some(Value::Blob(bytes)) => Ok(Some(bytes)),
Some(value) => panic!("invalid type {:?}", value),
None => Ok(None),
}
pub fn customs<'txn>(&self, reader: &'txn zlmdb::RoTxn) -> ZResult<Option<&'txn [u8]>> {
self.main.get::<Str, ByteSlice>(reader, CUSTOMS_KEY)
}
}

View file

@ -17,42 +17,28 @@ pub use self::updates::Updates;
pub use self::updates_results::UpdatesResults;
use std::collections::HashSet;
use std::convert::TryFrom;
use meilidb_schema::{Schema, SchemaAttr};
use serde::de;
use zerocopy::{AsBytes, FromBytes};
use zlmdb::Result as ZResult;
use crate::criterion::Criteria;
use crate::serde::Deserializer;
use crate::{update, query_builder::QueryBuilder, DocumentId, MResult, Error};
fn aligned_to(bytes: &[u8], align: usize) -> bool {
(bytes as *const _ as *const () as usize) % align == 0
}
type BEU64 = zerocopy::U64<byteorder::BigEndian>;
type BEU16 = zerocopy::U16<byteorder::BigEndian>;
fn document_attribute_into_key(document_id: DocumentId, attribute: SchemaAttr) -> [u8; 10] {
let document_id_bytes = document_id.0.to_be_bytes();
let attr_bytes = attribute.0.to_be_bytes();
#[derive(Debug, Copy, Clone)]
#[derive(AsBytes, FromBytes)]
#[repr(C)]
pub struct DocumentAttrKey { docid: BEU64, attr: BEU16 }
let mut key = [0u8; 10];
key[0..8].copy_from_slice(&document_id_bytes);
key[8..10].copy_from_slice(&attr_bytes);
key
}
fn document_attribute_from_key(key: [u8; 10]) -> (DocumentId, SchemaAttr) {
let document_id = {
let array = TryFrom::try_from(&key[0..8]).unwrap();
DocumentId(u64::from_be_bytes(array))
};
let schema_attr = {
let array = TryFrom::try_from(&key[8..8+2]).unwrap();
SchemaAttr(u16::from_be_bytes(array))
};
(document_id, schema_attr)
impl DocumentAttrKey {
fn new(docid: DocumentId, attr: SchemaAttr) -> DocumentAttrKey {
DocumentAttrKey { docid: BEU64::new(docid.0), attr: BEU16::new(attr.0) }
}
}
fn main_name(name: &str) -> String {
@ -102,9 +88,9 @@ pub struct Index {
}
impl Index {
pub fn document<R: rkv::Readable, T: de::DeserializeOwned>(
pub fn document<T: de::DeserializeOwned>(
&self,
reader: &R,
reader: &zlmdb::RoTxn,
attributes: Option<&HashSet<&str>>,
document_id: DocumentId,
) -> MResult<Option<T>>
@ -130,9 +116,9 @@ impl Index {
Ok(T::deserialize(&mut deserializer).map(Some)?)
}
pub fn document_attribute<T: de::DeserializeOwned, R: rkv::Readable>(
pub fn document_attribute<T: de::DeserializeOwned>(
&self,
reader: &R,
reader: &zlmdb::RoTxn,
document_id: DocumentId,
attribute: SchemaAttr,
) -> MResult<Option<T>>
@ -144,12 +130,12 @@ impl Index {
}
}
pub fn schema_update(&self, writer: &mut rkv::Writer, schema: Schema) -> MResult<u64> {
pub fn schema_update(&self, writer: &mut zlmdb::RwTxn, schema: Schema) -> MResult<u64> {
let _ = self.updates_notifier.send(());
update::push_schema_update(writer, self.updates, self.updates_results, schema)
}
pub fn customs_update(&self, writer: &mut rkv::Writer, customs: Vec<u8>) -> MResult<u64> {
pub fn customs_update(&self, writer: &mut zlmdb::RwTxn, customs: Vec<u8>) -> ZResult<u64> {
let _ = self.updates_notifier.send(());
update::push_customs_update(writer, self.updates, self.updates_results, customs)
}
@ -186,16 +172,16 @@ impl Index {
)
}
pub fn current_update_id<T: rkv::Readable>(&self, reader: &T) -> MResult<Option<u64>> {
pub fn current_update_id(&self, reader: &zlmdb::RoTxn) -> MResult<Option<u64>> {
match self.updates.last_update_id(reader)? {
Some((id, _)) => Ok(Some(id)),
None => Ok(None),
}
}
pub fn update_status<T: rkv::Readable>(
pub fn update_status(
&self,
reader: &T,
reader: &zlmdb::RoTxn,
update_id: u64,
) -> MResult<update::UpdateStatus>
{
@ -228,31 +214,10 @@ impl Index {
}
pub fn create(
env: &rkv::Rkv,
env: &zlmdb::Env,
name: &str,
updates_notifier: crossbeam_channel::Sender<()>,
) -> Result<Index, rkv::StoreError>
{
open_options(env, name, rkv::StoreOptions::create(), updates_notifier)
}
pub fn open(
env: &rkv::Rkv,
name: &str,
updates_notifier: crossbeam_channel::Sender<()>,
) -> Result<Index, rkv::StoreError>
{
let mut options = rkv::StoreOptions::default();
options.create = false;
open_options(env, name, options, updates_notifier)
}
fn open_options(
env: &rkv::Rkv,
name: &str,
options: rkv::StoreOptions,
updates_notifier: crossbeam_channel::Sender<()>,
) -> Result<Index, rkv::StoreError>
) -> MResult<Index>
{
// create all the store names
let main_name = main_name(name);
@ -265,14 +230,14 @@ fn open_options(
let updates_results_name = updates_results_name(name);
// open all the stores
let main = env.open_single(main_name.as_str(), options)?;
let postings_lists = env.open_single(postings_lists_name.as_str(), options)?;
let documents_fields = env.open_single(documents_fields_name.as_str(), options)?;
let documents_fields_counts = env.open_single(documents_fields_counts_name.as_str(), options)?;
let synonyms = env.open_single(synonyms_name.as_str(), options)?;
let docs_words = env.open_single(docs_words_name.as_str(), options)?;
let updates = env.open_single(updates_name.as_str(), options)?;
let updates_results = env.open_single(updates_results_name.as_str(), options)?;
let main = env.create_dyn_database(Some(&main_name))?;
let postings_lists = env.create_database(Some(&postings_lists_name))?;
let documents_fields = env.create_database(Some(&documents_fields_name))?;
let documents_fields_counts = env.create_database(Some(&documents_fields_counts_name))?;
let synonyms = env.create_database(Some(&synonyms_name))?;
let docs_words = env.create_database(Some(&docs_words_name))?;
let updates = env.create_database(Some(&updates_name))?;
let updates_results = env.create_database(Some(&updates_results_name))?;
Ok(Index {
main: Main { main },
@ -286,3 +251,66 @@ fn open_options(
updates_notifier,
})
}
pub fn open(
env: &zlmdb::Env,
name: &str,
updates_notifier: crossbeam_channel::Sender<()>,
) -> MResult<Option<Index>>
{
// create all the store names
let main_name = main_name(name);
let postings_lists_name = postings_lists_name(name);
let documents_fields_name = documents_fields_name(name);
let documents_fields_counts_name = documents_fields_counts_name(name);
let synonyms_name = synonyms_name(name);
let docs_words_name = docs_words_name(name);
let updates_name = updates_name(name);
let updates_results_name = updates_results_name(name);
// open all the stores
let main = match env.open_dyn_database(Some(&main_name))? {
Some(main) => main,
None => return Ok(None),
};
let postings_lists = match env.open_database(Some(&postings_lists_name))? {
Some(postings_lists) => postings_lists,
None => return Ok(None),
};
let documents_fields = match env.open_database(Some(&documents_fields_name))? {
Some(documents_fields) => documents_fields,
None => return Ok(None),
};
let documents_fields_counts = match env.open_database(Some(&documents_fields_counts_name))? {
Some(documents_fields_counts) => documents_fields_counts,
None => return Ok(None),
};
let synonyms = match env.open_database(Some(&synonyms_name))? {
Some(synonyms) => synonyms,
None => return Ok(None),
};
let docs_words = match env.open_database(Some(&docs_words_name))? {
Some(docs_words) => docs_words,
None => return Ok(None),
};
let updates = match env.open_database(Some(&updates_name))? {
Some(updates) => updates,
None => return Ok(None),
};
let updates_results = match env.open_database(Some(&updates_results_name))? {
Some(updates_results) => updates_results,
None => return Ok(None),
};
Ok(Some(Index {
main: Main { main },
postings_lists: PostingsLists { postings_lists },
documents_fields: DocumentsFields { documents_fields },
documents_fields_counts: DocumentsFieldsCounts { documents_fields_counts },
synonyms: Synonyms { synonyms },
docs_words: DocsWords { docs_words },
updates: Updates { updates },
updates_results: UpdatesResults { updates_results },
updates_notifier,
}))
}

View file

@ -1,81 +1,39 @@
use std::borrow::Cow;
use std::{mem, ptr};
use zerocopy::{AsBytes, LayoutVerified};
use rkv::StoreError;
use sdset::{Set, SetBuf};
use zlmdb::types::{ByteSlice, CowSlice};
use zlmdb::Result as ZResult;
use crate::DocIndex;
use crate::store::aligned_to;
#[derive(Copy, Clone)]
pub struct PostingsLists {
pub(crate) postings_lists: rkv::SingleStore,
pub(crate) postings_lists: zlmdb::Database<ByteSlice, CowSlice<DocIndex>>,
}
impl PostingsLists {
pub fn put_postings_list(
&self,
writer: &mut rkv::Writer,
writer: &mut zlmdb::RwTxn,
word: &[u8],
words_indexes: &[DocIndex],
) -> Result<(), rkv::StoreError>
words_indexes: &Set<DocIndex>,
) -> ZResult<()>
{
let blob = rkv::Value::Blob(words_indexes.as_bytes());
self.postings_lists.put(writer, word, &blob)
self.postings_lists.put(writer, word, words_indexes)
}
pub fn del_postings_list(
&self,
writer: &mut rkv::Writer,
word: &[u8],
) -> Result<bool, rkv::StoreError>
{
match self.postings_lists.delete(writer, word) {
Ok(()) => Ok(true),
Err(StoreError::LmdbError(lmdb::Error::NotFound)) => Ok(false),
Err(e) => Err(e),
}
pub fn del_postings_list(&self, writer: &mut zlmdb::RwTxn, word: &[u8]) -> ZResult<bool> {
self.postings_lists.delete(writer, word)
}
pub fn postings_list<'a>(
pub fn postings_list<'txn>(
&self,
reader: &'a impl rkv::Readable,
reader: &'txn zlmdb::RoTxn,
word: &[u8],
) -> Result<Option<Cow<'a, sdset::Set<DocIndex>>>, rkv::StoreError>
) -> ZResult<Option<Cow<'txn, Set<DocIndex>>>>
{
let bytes = match self.postings_lists.get(reader, word)? {
Some(rkv::Value::Blob(bytes)) => bytes,
Some(value) => panic!("invalid type {:?}", value),
None => return Ok(None),
};
match LayoutVerified::new_slice(bytes) {
Some(layout) => {
let set = sdset::Set::new(layout.into_slice()).unwrap();
Ok(Some(Cow::Borrowed(set)))
},
None => {
let len = bytes.len();
let elem_size = mem::size_of::<DocIndex>();
// ensure that it is the alignment that is wrong
// and the length is valid
if len % elem_size == 0 && !aligned_to(bytes, mem::align_of::<DocIndex>()) {
let elems = len / elem_size;
let mut vec = Vec::<DocIndex>::with_capacity(elems);
unsafe {
let dst = vec.as_mut_ptr() as *mut u8;
ptr::copy_nonoverlapping(bytes.as_ptr(), dst, len);
vec.set_len(elems);
}
let setbuf = sdset::SetBuf::new(vec).unwrap();
return Ok(Some(Cow::Owned(setbuf)))
}
Ok(None)
},
match self.postings_lists.get(reader, word)? {
Some(Cow::Borrowed(slice)) => Ok(Some(Cow::Borrowed(Set::new_unchecked(slice)))),
Some(Cow::Owned(vec)) => Ok(Some(Cow::Owned(SetBuf::new_unchecked(vec)))),
None => Ok(None),
}
}
}

View file

@ -1,51 +1,36 @@
use std::sync::Arc;
use rkv::StoreError;
use crate::error::MResult;
use zlmdb::types::ByteSlice;
use zlmdb::Result as ZResult;
#[derive(Copy, Clone)]
pub struct Synonyms {
pub(crate) synonyms: rkv::SingleStore,
pub(crate) synonyms: zlmdb::Database<ByteSlice, ByteSlice>,
}
impl Synonyms {
pub fn put_synonyms(
&self,
writer: &mut rkv::Writer,
writer: &mut zlmdb::RwTxn,
word: &[u8],
synonyms: &fst::Set,
) -> Result<(), rkv::StoreError>
) -> ZResult<()>
{
let blob = rkv::Value::Blob(synonyms.as_fst().as_bytes());
self.synonyms.put(writer, word, &blob)
let bytes = synonyms.as_fst().as_bytes();
self.synonyms.put(writer, word, bytes)
}
pub fn del_synonyms(
&self,
writer: &mut rkv::Writer,
word: &[u8],
) -> Result<bool, rkv::StoreError>
{
match self.synonyms.delete(writer, word) {
Ok(()) => Ok(true),
Err(StoreError::LmdbError(lmdb::Error::NotFound)) => Ok(false),
Err(e) => Err(e),
}
pub fn del_synonyms(&self, writer: &mut zlmdb::RwTxn, word: &[u8]) -> ZResult<bool> {
self.synonyms.delete(writer, word)
}
pub fn synonyms(
&self,
reader: &impl rkv::Readable,
word: &[u8],
) -> MResult<Option<fst::Set>>
{
pub fn synonyms(&self, reader: &zlmdb::RoTxn, word: &[u8]) -> ZResult<Option<fst::Set>> {
match self.synonyms.get(reader, word)? {
Some(rkv::Value::Blob(bytes)) => {
Some(bytes) => {
let len = bytes.len();
let bytes = Arc::from(bytes);
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len)?;
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
Ok(Some(fst::Set::from(fst)))
},
Some(value) => panic!("invalid type {:?}", value),
None => Ok(None),
}
}

View file

@ -1,100 +1,56 @@
use std::convert::TryInto;
use rkv::Value;
use crate::{update::Update, MResult};
use zlmdb::types::{OwnedType, Serde};
use zlmdb::Result as ZResult;
use crate::update::Update;
use super::BEU64;
#[derive(Copy, Clone)]
pub struct Updates {
pub(crate) updates: rkv::SingleStore,
pub(crate) updates: zlmdb::Database<OwnedType<BEU64>, Serde<Update>>,
}
impl Updates {
// TODO we should use the MDB_LAST op but
// it is not exposed by the rkv library
pub fn last_update_id<'a>(
&self,
reader: &'a impl rkv::Readable,
) -> Result<Option<(u64, Option<Value<'a>>)>, rkv::StoreError>
{
let mut last = None;
let iter = self.updates.iter_start(reader)?;
for result in iter {
let (key, data) = result?;
last = Some((key, data));
// TODO do not trigger deserialize if possible
pub fn last_update_id(&self, reader: &zlmdb::RoTxn) -> ZResult<Option<(u64, Update)>> {
match self.updates.last(reader)? {
Some((key, data)) => Ok(Some((key.get(), data))),
None => Ok(None),
}
let (last_key, last_data) = match last {
Some(entry) => entry,
None => return Ok(None),
};
let array = last_key.try_into().unwrap();
let number = u64::from_be_bytes(array);
Ok(Some((number, last_data)))
}
fn first_update_id<'a>(
&self,
reader: &'a impl rkv::Readable,
) -> Result<Option<(u64, Option<Value<'a>>)>, rkv::StoreError>
{
let mut iter = self.updates.iter_start(reader)?;
let (first_key, first_data) = match iter.next() {
Some(result) => result?,
None => return Ok(None),
};
let array = first_key.try_into().unwrap();
let number = u64::from_be_bytes(array);
Ok(Some((number, first_data)))
// TODO do not trigger deserialize if possible
fn first_update_id(&self, reader: &zlmdb::RoTxn) -> ZResult<Option<(u64, Update)>> {
match self.updates.first(reader)? {
Some((key, data)) => Ok(Some((key.get(), data))),
None => Ok(None),
}
}
pub fn contains(
&self,
reader: &impl rkv::Readable,
update_id: u64,
) -> Result<bool, rkv::StoreError>
{
let update_id_bytes = update_id.to_be_bytes();
self.updates.get(reader, update_id_bytes).map(|v| v.is_some())
// TODO do not trigger deserialize if possible
pub fn contains(&self, reader: &zlmdb::RoTxn, update_id: u64) -> ZResult<bool> {
let update_id = BEU64::new(update_id);
self.updates.get(reader, &update_id).map(|v| v.is_some())
}
pub fn put_update(
&self,
writer: &mut rkv::Writer,
writer: &mut zlmdb::RwTxn,
update_id: u64,
update: &Update,
) -> MResult<()>
) -> ZResult<()>
{
let update_id_bytes = update_id.to_be_bytes();
let update = serde_json::to_vec(&update)?;
let blob = Value::Blob(&update);
self.updates.put(writer, update_id_bytes, &blob)?;
Ok(())
// TODO prefer using serde_json?
let update_id = BEU64::new(update_id);
self.updates.put(writer, &update_id, update)
}
pub fn pop_front(
&self,
writer: &mut rkv::Writer,
) -> MResult<Option<(u64, Update)>>
{
let (first_id, first_data) = match self.first_update_id(writer)? {
Some(entry) => entry,
None => return Ok(None),
};
match first_data {
Some(Value::Blob(bytes)) => {
let update = serde_json::from_slice(&bytes)?;
// remove it from the database now
let first_id_bytes = first_id.to_be_bytes();
self.updates.delete(writer, first_id_bytes)?;
Ok(Some((first_id, update)))
pub fn pop_front(&self, writer: &mut zlmdb::RwTxn) -> ZResult<Option<(u64, Update)>> {
match self.first_update_id(writer)? {
Some((update_id, update)) => {
let key = BEU64::new(update_id);
self.updates.delete(writer, &key)?;
Ok(Some((update_id, update)))
},
Some(value) => panic!("invalid type {:?}", value),
None => Ok(None),
None => Ok(None)
}
}
}

View file

@ -1,67 +1,39 @@
use std::convert::TryInto;
use rkv::Value;
use crate::{update::UpdateResult, MResult};
use zlmdb::types::{OwnedType, Serde};
use zlmdb::Result as ZResult;
use crate::update::UpdateResult;
use super::BEU64;
#[derive(Copy, Clone)]
pub struct UpdatesResults {
pub(crate) updates_results: rkv::SingleStore,
pub(crate) updates_results: zlmdb::Database<OwnedType<BEU64>, Serde<UpdateResult>>,
}
impl UpdatesResults {
// TODO we should use the MDB_LAST op but
// it is not exposed by the rkv library
pub fn last_update_id<'a>(
&self,
reader: &'a impl rkv::Readable,
) -> Result<Option<(u64, Option<Value<'a>>)>, rkv::StoreError>
{
let mut last = None;
let iter = self.updates_results.iter_start(reader)?;
for result in iter {
let (key, data) = result?;
last = Some((key, data));
pub fn last_update_id(&self, reader: &zlmdb::RoTxn) -> ZResult<Option<(u64, UpdateResult)>> {
match self.updates_results.last(reader)? {
Some((key, data)) => Ok(Some((key.get(), data))),
None => Ok(None),
}
let (last_key, last_data) = match last {
Some(entry) => entry,
None => return Ok(None),
};
let array = last_key.try_into().unwrap();
let number = u64::from_be_bytes(array);
Ok(Some((number, last_data)))
}
pub fn put_update_result(
&self,
writer: &mut rkv::Writer,
writer: &mut zlmdb::RwTxn,
update_id: u64,
update_result: &UpdateResult,
) -> MResult<()>
) -> ZResult<()>
{
let update_id_bytes = update_id.to_be_bytes();
let update_result = bincode::serialize(&update_result)?;
let blob = Value::Blob(&update_result);
self.updates_results.put(writer, update_id_bytes, &blob)?;
Ok(())
let update_id = BEU64::new(update_id);
self.updates_results.put(writer, &update_id, update_result)
}
pub fn update_result(
&self,
reader: &impl rkv::Readable,
reader: &zlmdb::RoTxn,
update_id: u64,
) -> MResult<Option<UpdateResult>>
) -> ZResult<Option<UpdateResult>>
{
let update_id_bytes = update_id.to_be_bytes();
match self.updates_results.get(reader, update_id_bytes)? {
Some(Value::Blob(bytes)) => {
let update_result = bincode::deserialize(&bytes)?;
Ok(Some(update_result))
},
Some(value) => panic!("invalid type {:?}", value),
None => Ok(None),
}
let update_id = BEU64::new(update_id);
self.updates_results.get(reader, &update_id)
}
}