mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-25 06:00:08 +01:00
feat: Introduce Tree wrappers for each index component
This commit is contained in:
parent
6eb25687f8
commit
0c18026240
@ -1,4 +1,5 @@
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
use std::convert::TryInto;
|
||||||
use std::io::{self, Cursor, BufRead};
|
use std::io::{self, Cursor, BufRead};
|
||||||
use std::iter::FromIterator;
|
use std::iter::FromIterator;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
@ -8,15 +9,17 @@ use std::{error, fmt};
|
|||||||
use arc_swap::{ArcSwap, Lease};
|
use arc_swap::{ArcSwap, Lease};
|
||||||
use byteorder::{ReadBytesExt, BigEndian};
|
use byteorder::{ReadBytesExt, BigEndian};
|
||||||
use hashbrown::HashMap;
|
use hashbrown::HashMap;
|
||||||
use meilidb_core::{criterion::Criteria, QueryBuilder, DocumentId};
|
use meilidb_core::{criterion::Criteria, QueryBuilder, DocumentId, DocIndex};
|
||||||
use rmp_serde::decode::{Error as RmpError};
|
use rmp_serde::decode::{Error as RmpError};
|
||||||
use sdset::SetBuf;
|
use sdset::SetBuf;
|
||||||
use serde::de;
|
use serde::de;
|
||||||
use sled::IVec;
|
use sled::IVec;
|
||||||
|
use zerocopy::{AsBytes, LayoutVerified};
|
||||||
|
|
||||||
use crate::{Schema, SchemaAttr, RankedMap};
|
use crate::{Schema, SchemaAttr, RankedMap};
|
||||||
use crate::serde::{extract_document_id, Serializer, Deserializer, SerializerError};
|
use crate::serde::{extract_document_id, Serializer, Deserializer, SerializerError};
|
||||||
use crate::indexer::{Indexer, WordIndexTree};
|
use crate::indexer::{Indexer, WordIndexTree};
|
||||||
|
use crate::document_attr_key::DocumentAttrKey;
|
||||||
|
|
||||||
pub type WordIndex = meilidb_core::Index<WordIndexTree>;
|
pub type WordIndex = meilidb_core::Index<WordIndexTree>;
|
||||||
|
|
||||||
@ -27,6 +30,7 @@ pub enum Error {
|
|||||||
WordIndexMissing,
|
WordIndexMissing,
|
||||||
MissingDocumentId,
|
MissingDocumentId,
|
||||||
SledError(sled::Error),
|
SledError(sled::Error),
|
||||||
|
FstError(fst::Error),
|
||||||
BincodeError(bincode::Error),
|
BincodeError(bincode::Error),
|
||||||
SerializerError(SerializerError),
|
SerializerError(SerializerError),
|
||||||
}
|
}
|
||||||
@ -37,6 +41,12 @@ impl From<sled::Error> for Error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<fst::Error> for Error {
|
||||||
|
fn from(error: fst::Error) -> Error {
|
||||||
|
Error::FstError(error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl From<bincode::Error> for Error {
|
impl From<bincode::Error> for Error {
|
||||||
fn from(error: bincode::Error) -> Error {
|
fn from(error: bincode::Error) -> Error {
|
||||||
Error::BincodeError(error)
|
Error::BincodeError(error)
|
||||||
@ -58,6 +68,7 @@ impl fmt::Display for Error {
|
|||||||
WordIndexMissing => write!(f, "this index does not have a word index"),
|
WordIndexMissing => write!(f, "this index does not have a word index"),
|
||||||
MissingDocumentId => write!(f, "document id is missing"),
|
MissingDocumentId => write!(f, "document id is missing"),
|
||||||
SledError(e) => write!(f, "sled error; {}", e),
|
SledError(e) => write!(f, "sled error; {}", e),
|
||||||
|
FstError(e) => write!(f, "fst error; {}", e),
|
||||||
BincodeError(e) => write!(f, "bincode error; {}", e),
|
BincodeError(e) => write!(f, "bincode error; {}", e),
|
||||||
SerializerError(e) => write!(f, "serializer error; {}", e),
|
SerializerError(e) => write!(f, "serializer error; {}", e),
|
||||||
}
|
}
|
||||||
@ -180,6 +191,102 @@ impl Database {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct RawIndex2 {
|
||||||
|
main: MainIndex,
|
||||||
|
words: WordsIndex,
|
||||||
|
documents: DocumentsIndex,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct MainIndex(Arc<sled::Tree>);
|
||||||
|
|
||||||
|
impl MainIndex {
|
||||||
|
fn schema(&self) -> Result<Option<Schema>, Error> {
|
||||||
|
match self.0.get("schema")? {
|
||||||
|
Some(bytes) => {
|
||||||
|
let schema = Schema::read_from_bin(bytes.as_ref())?;
|
||||||
|
Ok(Some(schema))
|
||||||
|
},
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn words_set(&self) -> Result<Option<fst::Set>, Error> {
|
||||||
|
match self.0.get("words")? {
|
||||||
|
Some(bytes) => {
|
||||||
|
let len = bytes.len();
|
||||||
|
let value = bytes.into();
|
||||||
|
let fst = fst::raw::Fst::from_shared_bytes(value, 0, len)?;
|
||||||
|
Ok(Some(fst::Set::from(fst)))
|
||||||
|
},
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ranked_map(&self) -> Result<Option<RankedMap>, Error> {
|
||||||
|
match self.0.get("ranked-map")? {
|
||||||
|
Some(bytes) => {
|
||||||
|
let ranked_map = bincode::deserialize(bytes.as_ref())?;
|
||||||
|
Ok(Some(ranked_map))
|
||||||
|
},
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct WordsIndex(Arc<sled::Tree>);
|
||||||
|
|
||||||
|
impl WordsIndex {
|
||||||
|
fn doc_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Error> {
|
||||||
|
match self.0.get(word)? {
|
||||||
|
Some(bytes) => {
|
||||||
|
let layout = LayoutVerified::new_slice(bytes.as_ref()).expect("invalid layout");
|
||||||
|
let slice = layout.into_slice();
|
||||||
|
let setbuf = SetBuf::new_unchecked(slice.to_vec());
|
||||||
|
Ok(Some(setbuf))
|
||||||
|
},
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct DocumentsIndex(Arc<sled::Tree>);
|
||||||
|
|
||||||
|
impl DocumentsIndex {
|
||||||
|
fn document_field(&self, id: DocumentId, attr: SchemaAttr) -> Result<Option<IVec>, Error> {
|
||||||
|
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
|
||||||
|
self.0.get(key).map_err(Into::into)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn document_fields(&self, id: DocumentId) -> DocumentFieldsIter {
|
||||||
|
let start = DocumentAttrKey::new(id, SchemaAttr::min());
|
||||||
|
let start = start.to_be_bytes();
|
||||||
|
|
||||||
|
let end = DocumentAttrKey::new(id, SchemaAttr::max());
|
||||||
|
let end = end.to_be_bytes();
|
||||||
|
|
||||||
|
DocumentFieldsIter(self.0.range(start..=end))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct DocumentFieldsIter<'a>(sled::Iter<'a>);
|
||||||
|
|
||||||
|
impl<'a> Iterator for DocumentFieldsIter<'a> {
|
||||||
|
type Item = Result<(SchemaAttr, IVec), Error>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
match self.0.next() {
|
||||||
|
Some(Ok((key, value))) => {
|
||||||
|
let slice: &[u8] = key.as_ref();
|
||||||
|
let array = slice.try_into().unwrap();
|
||||||
|
let key = DocumentAttrKey::from_be_bytes(array);
|
||||||
|
Some(Ok((key.attribute, value)))
|
||||||
|
},
|
||||||
|
Some(Err(e)) => Some(Err(Error::SledError(e))),
|
||||||
|
None => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct RawIndex {
|
pub struct RawIndex {
|
||||||
schema: Schema,
|
schema: Schema,
|
||||||
@ -294,23 +401,6 @@ impl RawIndex {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct DocumentFieldsIter<'a>(sled::Iter<'a>);
|
|
||||||
|
|
||||||
impl<'a> Iterator for DocumentFieldsIter<'a> {
|
|
||||||
type Item = Result<(DocumentId, SchemaAttr, IVec), Error>;
|
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
|
||||||
match self.0.next() {
|
|
||||||
Some(Ok((key, value))) => {
|
|
||||||
let (id, attr) = extract_document_key(key).unwrap();
|
|
||||||
Some(Ok((id, attr, value)))
|
|
||||||
},
|
|
||||||
Some(Err(e)) => Some(Err(Error::SledError(e))),
|
|
||||||
None => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Index(RawIndex);
|
pub struct Index(RawIndex);
|
||||||
|
|
||||||
|
69
meilidb-data/src/document_attr_key.rs
Normal file
69
meilidb-data/src/document_attr_key.rs
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
use meilidb_core::DocumentId;
|
||||||
|
use crate::schema::SchemaAttr;
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct DocumentAttrKey {
|
||||||
|
pub document_id: DocumentId,
|
||||||
|
pub attribute: SchemaAttr,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DocumentAttrKey {
|
||||||
|
pub fn new(document_id: DocumentId, attribute: SchemaAttr) -> DocumentAttrKey {
|
||||||
|
DocumentAttrKey { document_id, attribute }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn to_be_bytes(self) -> [u8; 10] {
|
||||||
|
let mut output = [0u8; 10];
|
||||||
|
|
||||||
|
let document_id = self.document_id.0.to_be_bytes();
|
||||||
|
let attribute = self.attribute.0.to_be_bytes();
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
use std::{mem::size_of, ptr::copy_nonoverlapping};
|
||||||
|
|
||||||
|
let output = output.as_mut_ptr();
|
||||||
|
copy_nonoverlapping(document_id.as_ptr(), output, size_of::<u64>());
|
||||||
|
|
||||||
|
let output = output.add(size_of::<u64>());
|
||||||
|
copy_nonoverlapping(attribute.as_ptr(), output, size_of::<u16>());
|
||||||
|
}
|
||||||
|
|
||||||
|
output
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_be_bytes(bytes: [u8; 10]) -> DocumentAttrKey {
|
||||||
|
let document_id;
|
||||||
|
let attribute;
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
use std::ptr::read_unaligned;
|
||||||
|
|
||||||
|
let pointer = bytes.as_ptr() as *const _;
|
||||||
|
let document_id_bytes = read_unaligned(pointer);
|
||||||
|
document_id = u64::from_be_bytes(document_id_bytes);
|
||||||
|
|
||||||
|
let pointer = pointer.add(1) as *const _;
|
||||||
|
let attribute_bytes = read_unaligned(pointer);
|
||||||
|
attribute = u16::from_be_bytes(attribute_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
DocumentAttrKey {
|
||||||
|
document_id: DocumentId(document_id),
|
||||||
|
attribute: SchemaAttr(attribute),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn to_from_be_bytes() {
|
||||||
|
let document_id = DocumentId(67578308);
|
||||||
|
let schema_attr = SchemaAttr(3456);
|
||||||
|
let x = DocumentAttrKey::new(document_id, schema_attr);
|
||||||
|
|
||||||
|
assert_eq!(x, DocumentAttrKey::from_be_bytes(x.to_be_bytes()));
|
||||||
|
}
|
||||||
|
}
|
@ -1,4 +1,5 @@
|
|||||||
mod database;
|
mod database;
|
||||||
|
mod document_attr_key;
|
||||||
mod indexer;
|
mod indexer;
|
||||||
mod number;
|
mod number;
|
||||||
mod ranked_map;
|
mod ranked_map;
|
||||||
|
@ -186,12 +186,16 @@ impl Schema {
|
|||||||
pub struct SchemaAttr(pub u16);
|
pub struct SchemaAttr(pub u16);
|
||||||
|
|
||||||
impl SchemaAttr {
|
impl SchemaAttr {
|
||||||
pub fn new(value: u16) -> SchemaAttr {
|
pub const fn new(value: u16) -> SchemaAttr {
|
||||||
SchemaAttr(value)
|
SchemaAttr(value)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn min() -> SchemaAttr {
|
pub const fn min() -> SchemaAttr {
|
||||||
SchemaAttr(0)
|
SchemaAttr(u16::min_value())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const fn max() -> SchemaAttr {
|
||||||
|
SchemaAttr(u16::max_value())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn next(self) -> Option<SchemaAttr> {
|
pub fn next(self) -> Option<SchemaAttr> {
|
||||||
@ -201,10 +205,6 @@ impl SchemaAttr {
|
|||||||
pub fn prev(self) -> Option<SchemaAttr> {
|
pub fn prev(self) -> Option<SchemaAttr> {
|
||||||
self.0.checked_sub(1).map(SchemaAttr)
|
self.0.checked_sub(1).map(SchemaAttr)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn max() -> SchemaAttr {
|
|
||||||
SchemaAttr(u16::MAX)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for SchemaAttr {
|
impl fmt::Display for SchemaAttr {
|
||||||
|
@ -45,7 +45,7 @@ impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a>
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
let iter = document_attributes.filter_map(|(_, attr, value)| {
|
let iter = document_attributes.filter_map(|(attr, value)| {
|
||||||
if self.fields.map_or(true, |f| f.contains(&attr)) {
|
if self.fields.map_or(true, |f| f.contains(&attr)) {
|
||||||
let attribute_name = self.raw_index.schema().attribute_name(attr);
|
let attribute_name = self.raw_index.schema().attribute_name(attr);
|
||||||
Some((attribute_name, Value::new(value)))
|
Some((attribute_name, Value::new(value)))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user