feat: Introduce Tree wrappers for each index component

This commit is contained in:
Clément Renault 2019-05-06 14:13:09 +02:00
parent 6eb25687f8
commit 0c18026240
No known key found for this signature in database
GPG Key ID: 0151CDAB43460DAE
5 changed files with 186 additions and 26 deletions

View File

@ -1,4 +1,5 @@
use std::collections::HashSet;
use std::convert::TryInto;
use std::io::{self, Cursor, BufRead};
use std::iter::FromIterator;
use std::path::Path;
@ -8,15 +9,17 @@ use std::{error, fmt};
use arc_swap::{ArcSwap, Lease};
use byteorder::{ReadBytesExt, BigEndian};
use hashbrown::HashMap;
use meilidb_core::{criterion::Criteria, QueryBuilder, DocumentId};
use meilidb_core::{criterion::Criteria, QueryBuilder, DocumentId, DocIndex};
use rmp_serde::decode::{Error as RmpError};
use sdset::SetBuf;
use serde::de;
use sled::IVec;
use zerocopy::{AsBytes, LayoutVerified};
use crate::{Schema, SchemaAttr, RankedMap};
use crate::serde::{extract_document_id, Serializer, Deserializer, SerializerError};
use crate::indexer::{Indexer, WordIndexTree};
use crate::document_attr_key::DocumentAttrKey;
pub type WordIndex = meilidb_core::Index<WordIndexTree>;
@ -27,6 +30,7 @@ pub enum Error {
WordIndexMissing,
MissingDocumentId,
SledError(sled::Error),
FstError(fst::Error),
BincodeError(bincode::Error),
SerializerError(SerializerError),
}
@ -37,6 +41,12 @@ impl From<sled::Error> for Error {
}
}
impl From<fst::Error> for Error {
fn from(error: fst::Error) -> Error {
Error::FstError(error)
}
}
impl From<bincode::Error> for Error {
fn from(error: bincode::Error) -> Error {
Error::BincodeError(error)
@ -58,6 +68,7 @@ impl fmt::Display for Error {
WordIndexMissing => write!(f, "this index does not have a word index"),
MissingDocumentId => write!(f, "document id is missing"),
SledError(e) => write!(f, "sled error; {}", e),
FstError(e) => write!(f, "fst error; {}", e),
BincodeError(e) => write!(f, "bincode error; {}", e),
SerializerError(e) => write!(f, "serializer error; {}", e),
}
@ -180,6 +191,102 @@ impl Database {
}
}
struct RawIndex2 {
main: MainIndex,
words: WordsIndex,
documents: DocumentsIndex,
}
struct MainIndex(Arc<sled::Tree>);
impl MainIndex {
fn schema(&self) -> Result<Option<Schema>, Error> {
match self.0.get("schema")? {
Some(bytes) => {
let schema = Schema::read_from_bin(bytes.as_ref())?;
Ok(Some(schema))
},
None => Ok(None),
}
}
fn words_set(&self) -> Result<Option<fst::Set>, Error> {
match self.0.get("words")? {
Some(bytes) => {
let len = bytes.len();
let value = bytes.into();
let fst = fst::raw::Fst::from_shared_bytes(value, 0, len)?;
Ok(Some(fst::Set::from(fst)))
},
None => Ok(None),
}
}
fn ranked_map(&self) -> Result<Option<RankedMap>, Error> {
match self.0.get("ranked-map")? {
Some(bytes) => {
let ranked_map = bincode::deserialize(bytes.as_ref())?;
Ok(Some(ranked_map))
},
None => Ok(None),
}
}
}
struct WordsIndex(Arc<sled::Tree>);
impl WordsIndex {
fn doc_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Error> {
match self.0.get(word)? {
Some(bytes) => {
let layout = LayoutVerified::new_slice(bytes.as_ref()).expect("invalid layout");
let slice = layout.into_slice();
let setbuf = SetBuf::new_unchecked(slice.to_vec());
Ok(Some(setbuf))
},
None => Ok(None),
}
}
}
struct DocumentsIndex(Arc<sled::Tree>);
impl DocumentsIndex {
fn document_field(&self, id: DocumentId, attr: SchemaAttr) -> Result<Option<IVec>, Error> {
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
self.0.get(key).map_err(Into::into)
}
fn document_fields(&self, id: DocumentId) -> DocumentFieldsIter {
let start = DocumentAttrKey::new(id, SchemaAttr::min());
let start = start.to_be_bytes();
let end = DocumentAttrKey::new(id, SchemaAttr::max());
let end = end.to_be_bytes();
DocumentFieldsIter(self.0.range(start..=end))
}
}
pub struct DocumentFieldsIter<'a>(sled::Iter<'a>);
impl<'a> Iterator for DocumentFieldsIter<'a> {
type Item = Result<(SchemaAttr, IVec), Error>;
fn next(&mut self) -> Option<Self::Item> {
match self.0.next() {
Some(Ok((key, value))) => {
let slice: &[u8] = key.as_ref();
let array = slice.try_into().unwrap();
let key = DocumentAttrKey::from_be_bytes(array);
Some(Ok((key.attribute, value)))
},
Some(Err(e)) => Some(Err(Error::SledError(e))),
None => None,
}
}
}
#[derive(Clone)]
pub struct RawIndex {
schema: Schema,
@ -294,23 +401,6 @@ impl RawIndex {
}
}
pub struct DocumentFieldsIter<'a>(sled::Iter<'a>);
impl<'a> Iterator for DocumentFieldsIter<'a> {
type Item = Result<(DocumentId, SchemaAttr, IVec), Error>;
fn next(&mut self) -> Option<Self::Item> {
match self.0.next() {
Some(Ok((key, value))) => {
let (id, attr) = extract_document_key(key).unwrap();
Some(Ok((id, attr, value)))
},
Some(Err(e)) => Some(Err(Error::SledError(e))),
None => None,
}
}
}
#[derive(Clone)]
pub struct Index(RawIndex);

View File

@ -0,0 +1,69 @@
use meilidb_core::DocumentId;
use crate::schema::SchemaAttr;
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct DocumentAttrKey {
pub document_id: DocumentId,
pub attribute: SchemaAttr,
}
impl DocumentAttrKey {
pub fn new(document_id: DocumentId, attribute: SchemaAttr) -> DocumentAttrKey {
DocumentAttrKey { document_id, attribute }
}
pub fn to_be_bytes(self) -> [u8; 10] {
let mut output = [0u8; 10];
let document_id = self.document_id.0.to_be_bytes();
let attribute = self.attribute.0.to_be_bytes();
unsafe {
use std::{mem::size_of, ptr::copy_nonoverlapping};
let output = output.as_mut_ptr();
copy_nonoverlapping(document_id.as_ptr(), output, size_of::<u64>());
let output = output.add(size_of::<u64>());
copy_nonoverlapping(attribute.as_ptr(), output, size_of::<u16>());
}
output
}
pub fn from_be_bytes(bytes: [u8; 10]) -> DocumentAttrKey {
let document_id;
let attribute;
unsafe {
use std::ptr::read_unaligned;
let pointer = bytes.as_ptr() as *const _;
let document_id_bytes = read_unaligned(pointer);
document_id = u64::from_be_bytes(document_id_bytes);
let pointer = pointer.add(1) as *const _;
let attribute_bytes = read_unaligned(pointer);
attribute = u16::from_be_bytes(attribute_bytes);
}
DocumentAttrKey {
document_id: DocumentId(document_id),
attribute: SchemaAttr(attribute),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn to_from_be_bytes() {
let document_id = DocumentId(67578308);
let schema_attr = SchemaAttr(3456);
let x = DocumentAttrKey::new(document_id, schema_attr);
assert_eq!(x, DocumentAttrKey::from_be_bytes(x.to_be_bytes()));
}
}

View File

@ -1,4 +1,5 @@
mod database;
mod document_attr_key;
mod indexer;
mod number;
mod ranked_map;

View File

@ -186,12 +186,16 @@ impl Schema {
pub struct SchemaAttr(pub u16);
impl SchemaAttr {
pub fn new(value: u16) -> SchemaAttr {
pub const fn new(value: u16) -> SchemaAttr {
SchemaAttr(value)
}
pub fn min() -> SchemaAttr {
SchemaAttr(0)
pub const fn min() -> SchemaAttr {
SchemaAttr(u16::min_value())
}
pub const fn max() -> SchemaAttr {
SchemaAttr(u16::max_value())
}
pub fn next(self) -> Option<SchemaAttr> {
@ -201,10 +205,6 @@ impl SchemaAttr {
pub fn prev(self) -> Option<SchemaAttr> {
self.0.checked_sub(1).map(SchemaAttr)
}
pub fn max() -> SchemaAttr {
SchemaAttr(u16::MAX)
}
}
impl fmt::Display for SchemaAttr {

View File

@ -45,7 +45,7 @@ impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a>
},
}
});
let iter = document_attributes.filter_map(|(_, attr, value)| {
let iter = document_attributes.filter_map(|(attr, value)| {
if self.fields.map_or(true, |f| f.contains(&attr)) {
let attribute_name = self.raw_index.schema().attribute_name(attr);
Some((attribute_name, Value::new(value)))