Make the project be a workspace

This commit is contained in:
Clément Renault 2019-10-04 10:26:32 +02:00
parent 3476939b7e
commit 62a0aefe44
No known key found for this signature in database
GPG key ID: 92ADA4E935E71FA4
40 changed files with 49 additions and 42 deletions

View file

@ -0,0 +1,51 @@
use std::sync::Arc;
use rkv::Value;
use crate::{DocumentId, MResult};
#[derive(Copy, Clone)]
pub struct DocsWords {
pub(crate) docs_words: rkv::SingleStore,
}
impl DocsWords {
pub fn put_doc_words(
&self,
writer: &mut rkv::Writer,
document_id: DocumentId,
words: &fst::Set,
) -> Result<(), rkv::StoreError>
{
let document_id_bytes = document_id.0.to_be_bytes();
let bytes = words.as_fst().as_bytes();
self.docs_words.put(writer, document_id_bytes, &Value::Blob(bytes))
}
pub fn del_doc_words(
&self,
writer: &mut rkv::Writer,
document_id: DocumentId,
) -> Result<(), rkv::StoreError>
{
let document_id_bytes = document_id.0.to_be_bytes();
self.docs_words.delete(writer, document_id_bytes)
}
pub fn doc_words<T: rkv::Readable>(
&self,
reader: &T,
document_id: DocumentId,
) -> MResult<Option<fst::Set>>
{
let document_id_bytes = document_id.0.to_be_bytes();
match self.docs_words.get(reader, document_id_bytes)? {
Some(Value::Blob(bytes)) => {
let len = bytes.len();
let bytes = Arc::from(bytes);
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len)?;
Ok(Some(fst::Set::from(fst)))
},
Some(value) => panic!("invalid type {:?}", value),
None => Ok(None),
}
}
}

View file

@ -0,0 +1,117 @@
use std::convert::TryFrom;
use meilidb_schema::SchemaAttr;
use crate::DocumentId;
#[derive(Copy, Clone)]
pub struct DocumentsFields {
pub(crate) documents_fields: rkv::SingleStore,
}
fn document_attribute_into_key(document_id: DocumentId, attribute: SchemaAttr) -> [u8; 10] {
let document_id_bytes = document_id.0.to_be_bytes();
let attr_bytes = attribute.0.to_be_bytes();
let mut key = [0u8; 10];
key[0..8].copy_from_slice(&document_id_bytes);
key[8..10].copy_from_slice(&attr_bytes);
key
}
impl DocumentsFields {
pub fn put_document_field(
&self,
writer: &mut rkv::Writer,
document_id: DocumentId,
attribute: SchemaAttr,
value: &[u8],
) -> Result<(), rkv::StoreError>
{
let key = document_attribute_into_key(document_id, attribute);
self.documents_fields.put(writer, key, &rkv::Value::Blob(value))
}
pub fn del_all_document_fields(
&self,
writer: &mut rkv::Writer,
document_id: DocumentId,
) -> Result<usize, rkv::StoreError>
{
let document_id_bytes = document_id.0.to_be_bytes();
let mut keys_to_delete = Vec::new();
// WARN we can not delete the keys using the iterator
// so we store them and delete them just after
let iter = self.documents_fields.iter_from(writer, document_id_bytes)?;
for result in iter {
let (key, _) = result?;
let current_document_id = {
let bytes = key.get(0..8).unwrap();
let array = TryFrom::try_from(bytes).unwrap();
DocumentId(u64::from_be_bytes(array))
};
if current_document_id != document_id { break }
keys_to_delete.push(key.to_owned());
}
let count = keys_to_delete.len();
for key in keys_to_delete {
self.documents_fields.delete(writer, key)?;
}
Ok(count)
}
pub fn document_field<'a>(
&self,
reader: &'a impl rkv::Readable,
document_id: DocumentId,
attribute: SchemaAttr,
) -> Result<Option<&'a [u8]>, rkv::StoreError>
{
let key = document_attribute_into_key(document_id, attribute);
match self.documents_fields.get(reader, key)? {
Some(rkv::Value::Blob(bytes)) => Ok(Some(bytes)),
Some(value) => panic!("invalid type {:?}", value),
None => Ok(None),
}
}
pub fn document_fields<'r, T: rkv::Readable>(
&self,
reader: &'r T,
document_id: DocumentId,
) -> Result<DocumentFieldsIter<'r, T>, rkv::StoreError>
{
let document_id_bytes = document_id.0.to_be_bytes();
let iter = self.documents_fields.iter_from(reader, document_id_bytes)?;
Ok(DocumentFieldsIter { reader, document_id, iter })
}
}
pub struct DocumentFieldsIter<'r, T> {
reader: &'r T,
document_id: DocumentId,
iter: rkv::store::single::Iter<'r>,
}
impl<'r, T: rkv::Readable + 'r> Iterator for DocumentFieldsIter<'r, T> {
type Item = Result<(SchemaAttr, &'r [u8]), rkv::StoreError>;
fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() {
Some(Ok((key, Some(rkv::Value::Blob(bytes))))) => {
let key_bytes = key.get(8..8+2).unwrap();
let array = TryFrom::try_from(key_bytes).unwrap();
let attr = u16::from_be_bytes(array);
let attr = SchemaAttr::new(attr);
Some(Ok((attr, bytes)))
},
Some(Ok((key, data))) => panic!("{:?}, {:?}", key, data),
Some(Err(e)) => Some(Err(e)),
None => None,
}
}
}

View file

@ -0,0 +1,99 @@
use std::sync::Arc;
use std::convert::TryInto;
use rkv::Value;
use crate::{RankedMap, MResult};
const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
const RANKED_MAP_KEY: &str = "ranked-map";
const SCHEMA_KEY: &str = "schema";
const SYNONYMS_KEY: &str = "synonyms";
const WORDS_KEY: &str = "words";
#[derive(Copy, Clone)]
pub struct Main {
pub(crate) main: rkv::SingleStore,
}
impl Main {
pub fn put_words_fst(
&self,
writer: &mut rkv::Writer,
fst: &fst::Set,
) -> Result<(), rkv::StoreError>
{
let blob = rkv::Value::Blob(fst.as_fst().as_bytes());
self.main.put(writer, WORDS_KEY, &blob)
}
pub fn words_fst(
&self,
reader: &impl rkv::Readable,
) -> MResult<Option<fst::Set>>
{
match self.main.get(reader, WORDS_KEY)? {
Some(Value::Blob(bytes)) => {
let len = bytes.len();
let bytes = Arc::from(bytes);
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len)?;
Ok(Some(fst::Set::from(fst)))
},
Some(value) => panic!("invalid type {:?}", value),
None => Ok(None),
}
}
pub fn put_ranked_map(
&self,
writer: &mut rkv::Writer,
ranked_map: &RankedMap,
) -> MResult<()>
{
let mut bytes = Vec::new();
ranked_map.write_to_bin(&mut bytes)?;
let blob = Value::Blob(&bytes[..]);
self.main.put(writer, RANKED_MAP_KEY, &blob)?;
Ok(())
}
pub fn ranked_map(
&self,
reader: &impl rkv::Readable,
) -> MResult<Option<RankedMap>>
{
match self.main.get(reader, RANKED_MAP_KEY)? {
Some(Value::Blob(bytes)) => {
let ranked_map = RankedMap::read_from_bin(bytes)?;
Ok(Some(ranked_map))
},
Some(value) => panic!("invalid type {:?}", value),
None => Ok(None),
}
}
pub fn put_number_of_documents<F: Fn(u64) -> u64>(
&self,
writer: &mut rkv::Writer,
f: F,
) -> Result<u64, rkv::StoreError>
{
let new = self.number_of_documents(writer).map(f)?;
self.main.put(writer, NUMBER_OF_DOCUMENTS_KEY, &Value::Blob(&new.to_be_bytes()))?;
Ok(new)
}
pub fn number_of_documents(
&self,
reader: &impl rkv::Readable,
) -> Result<u64, rkv::StoreError>
{
match self.main.get(reader, NUMBER_OF_DOCUMENTS_KEY)? {
Some(Value::Blob(bytes)) => {
let array = bytes.try_into().unwrap();
Ok(u64::from_be_bytes(array))
},
Some(value) => panic!("invalid type {:?}", value),
None => Ok(0),
}
}
}

View file

@ -0,0 +1,99 @@
mod docs_words;
mod documents_fields;
mod main;
mod postings_lists;
mod synonyms;
mod updates;
mod updates_results;
pub use self::docs_words::DocsWords;
pub use self::documents_fields::{DocumentsFields, DocumentFieldsIter};
pub use self::main::Main;
pub use self::postings_lists::PostingsLists;
pub use self::synonyms::Synonyms;
pub use self::updates::Updates;
pub use self::updates_results::UpdatesResults;
fn aligned_to(bytes: &[u8], align: usize) -> bool {
(bytes as *const _ as *const () as usize) % align == 0
}
fn postings_lists_name(name: &str) -> String {
format!("{}-postings-lists", name)
}
fn documents_fields_name(name: &str) -> String {
format!("{}-documents-fields", name)
}
fn synonyms_name(name: &str) -> String {
format!("{}-synonyms", name)
}
fn docs_words_name(name: &str) -> String {
format!("{}-docs-words", name)
}
fn updates_name(name: &str) -> String {
format!("{}-updates", name)
}
fn updates_results_name(name: &str) -> String {
format!("{}-updates-results", name)
}
#[derive(Copy, Clone)]
pub struct Index {
pub main: Main,
pub postings_lists: PostingsLists,
pub documents_fields: DocumentsFields,
pub synonyms: Synonyms,
pub docs_words: DocsWords,
pub updates: Updates,
pub updates_results: UpdatesResults,
}
pub fn create(env: &rkv::Rkv, name: &str) -> Result<Index, rkv::StoreError> {
open_options(env, name, rkv::StoreOptions::create())
}
pub fn open(env: &rkv::Rkv, name: &str) -> Result<Index, rkv::StoreError> {
let mut options = rkv::StoreOptions::default();
options.create = false;
open_options(env, name, options)
}
fn open_options(
env: &rkv::Rkv,
name: &str,
options: rkv::StoreOptions,
) -> Result<Index, rkv::StoreError>
{
// create all the database names
let main_name = name;
let postings_lists_name = postings_lists_name(name);
let documents_fields_name = documents_fields_name(name);
let synonyms_name = synonyms_name(name);
let docs_words_name = docs_words_name(name);
let updates_name = updates_name(name);
let updates_results_name = updates_results_name(name);
// open all the database names
let main = env.open_single(main_name, options)?;
let postings_lists = env.open_single(postings_lists_name.as_str(), options)?;
let documents_fields = env.open_single(documents_fields_name.as_str(), options)?;
let synonyms = env.open_single(synonyms_name.as_str(), options)?;
let docs_words = env.open_single(docs_words_name.as_str(), options)?;
let updates = env.open_single(updates_name.as_str(), options)?;
let updates_results = env.open_single(updates_results_name.as_str(), options)?;
Ok(Index {
main: Main { main },
postings_lists: PostingsLists { postings_lists },
documents_fields: DocumentsFields { documents_fields },
synonyms: Synonyms { synonyms },
docs_words: DocsWords { docs_words },
updates: Updates { updates },
updates_results: UpdatesResults { updates_results },
})
}

View file

@ -0,0 +1,75 @@
use std::borrow::Cow;
use std::{mem, ptr};
use zerocopy::{AsBytes, LayoutVerified};
use crate::DocIndex;
use crate::store::aligned_to;
#[derive(Copy, Clone)]
pub struct PostingsLists {
pub(crate) postings_lists: rkv::SingleStore,
}
impl PostingsLists {
pub fn put_postings_list(
&self,
writer: &mut rkv::Writer,
word: &[u8],
words_indexes: &[DocIndex],
) -> Result<(), rkv::StoreError>
{
let blob = rkv::Value::Blob(words_indexes.as_bytes());
self.postings_lists.put(writer, word, &blob)
}
pub fn del_postings_list(
&self,
writer: &mut rkv::Writer,
word: &[u8],
) -> Result<(), rkv::StoreError>
{
self.postings_lists.delete(writer, word)
}
pub fn postings_list<'a>(
&self,
reader: &'a impl rkv::Readable,
word: &[u8],
) -> Result<Option<Cow<'a, sdset::Set<DocIndex>>>, rkv::StoreError>
{
let bytes = match self.postings_lists.get(reader, word)? {
Some(rkv::Value::Blob(bytes)) => bytes,
Some(value) => panic!("invalid type {:?}", value),
None => return Ok(None),
};
match LayoutVerified::new_slice(bytes) {
Some(layout) => {
let set = sdset::Set::new(layout.into_slice()).unwrap();
Ok(Some(Cow::Borrowed(set)))
},
None => {
let len = bytes.len();
let elem_size = mem::size_of::<DocIndex>();
// ensure that it is the alignment that is wrong
// and the length is valid
if len % elem_size == 0 && !aligned_to(bytes, mem::align_of::<DocIndex>()) {
let elems = len / elem_size;
let mut vec = Vec::<DocIndex>::with_capacity(elems);
unsafe {
let dst = vec.as_mut_ptr() as *mut u8;
ptr::copy_nonoverlapping(bytes.as_ptr(), dst, len);
vec.set_len(elems);
}
let setbuf = sdset::SetBuf::new(vec).unwrap();
return Ok(Some(Cow::Owned(setbuf)))
}
Ok(None)
},
}
}
}

View file

@ -0,0 +1,23 @@
#[derive(Copy, Clone)]
pub struct Synonyms {
pub(crate) synonyms: rkv::SingleStore,
}
impl Synonyms {
pub fn synonyms_fst(
&self,
reader: &impl rkv::Readable,
) -> Result<fst::Set, rkv::StoreError>
{
Ok(fst::Set::default())
}
pub fn alternatives_to(
&self,
reader: &impl rkv::Readable,
word: &[u8],
) -> Result<Option<fst::Set>, rkv::StoreError>
{
unimplemented!()
}
}

View file

@ -0,0 +1,82 @@
use std::convert::TryInto;
use rkv::Value;
use crate::{update::Update, MResult};
#[derive(Copy, Clone)]
pub struct Updates {
pub(crate) updates: rkv::SingleStore,
}
impl Updates {
// TODO we should use the MDB_LAST op but
// it is not exposed by the rkv library
fn last_update_id<'a>(
&self,
reader: &'a impl rkv::Readable,
) -> Result<Option<(u64, Option<Value<'a>>)>, rkv::StoreError>
{
let mut last = None;
let iter = self.updates.iter_start(reader)?;
for result in iter {
let (key, data) = result?;
last = Some((key, data));
}
let (last_key, last_data) = match last {
Some(entry) => entry,
None => return Ok(None),
};
let array = last_key.try_into().unwrap();
let number = u64::from_be_bytes(array);
Ok(Some((number, last_data)))
}
pub fn contains(
&self,
reader: &impl rkv::Readable,
update_id: u64,
) -> Result<bool, rkv::StoreError>
{
let update_id_bytes = update_id.to_be_bytes();
self.updates.get(reader, update_id_bytes).map(|v| v.is_some())
}
pub fn push_back(
&self,
writer: &mut rkv::Writer,
update: &Update,
) -> MResult<u64>
{
let last_update_id = self.last_update_id(writer)?;
let last_update_id = last_update_id.map_or(0, |(n, _)| n + 1);
let last_update_id_bytes = last_update_id.to_be_bytes();
let update = rmp_serde::to_vec_named(&update)?;
let blob = Value::Blob(&update);
self.updates.put(writer, last_update_id_bytes, &blob)?;
Ok(last_update_id)
}
pub fn pop_back(
&self,
writer: &mut rkv::Writer,
) -> MResult<Option<(u64, Update)>>
{
let (last_id, last_data) = match self.last_update_id(writer)? {
Some(entry) => entry,
None => return Ok(None),
};
match last_data {
Some(Value::Blob(bytes)) => {
let update = rmp_serde::from_read_ref(&bytes)?;
Ok(Some((last_id, update)))
},
Some(value) => panic!("invalid type {:?}", value),
None => Ok(None),
}
}
}

View file

@ -0,0 +1,41 @@
use rkv::Value;
use crate::{update::UpdateResult, MResult};
#[derive(Copy, Clone)]
pub struct UpdatesResults {
pub(crate) updates_results: rkv::SingleStore,
}
impl UpdatesResults {
pub fn put_update_result(
&self,
writer: &mut rkv::Writer,
update_id: u64,
update_result: &UpdateResult,
) -> MResult<()>
{
let update_id_bytes = update_id.to_be_bytes();
let update_result = bincode::serialize(&update_result)?;
let blob = Value::Blob(&update_result);
self.updates_results.put(writer, update_id_bytes, &blob)?;
Ok(())
}
pub fn update_result(
&self,
reader: &impl rkv::Readable,
update_id: u64,
) -> MResult<Option<UpdateResult>>
{
let update_id_bytes = update_id.to_be_bytes();
match self.updates_results.get(reader, update_id_bytes)? {
Some(Value::Blob(bytes)) => {
let update_result = bincode::deserialize(&bytes)?;
Ok(Some(update_result))
},
Some(value) => panic!("invalid type {:?}", value),
None => Ok(None),
}
}
}