mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-12 06:24:29 +01:00
feat: Introduce Tree wrappers for each index component
This commit is contained in:
parent
6eb25687f8
commit
0c18026240
@ -1,4 +1,5 @@
|
||||
use std::collections::HashSet;
|
||||
use std::convert::TryInto;
|
||||
use std::io::{self, Cursor, BufRead};
|
||||
use std::iter::FromIterator;
|
||||
use std::path::Path;
|
||||
@ -8,15 +9,17 @@ use std::{error, fmt};
|
||||
use arc_swap::{ArcSwap, Lease};
|
||||
use byteorder::{ReadBytesExt, BigEndian};
|
||||
use hashbrown::HashMap;
|
||||
use meilidb_core::{criterion::Criteria, QueryBuilder, DocumentId};
|
||||
use meilidb_core::{criterion::Criteria, QueryBuilder, DocumentId, DocIndex};
|
||||
use rmp_serde::decode::{Error as RmpError};
|
||||
use sdset::SetBuf;
|
||||
use serde::de;
|
||||
use sled::IVec;
|
||||
use zerocopy::{AsBytes, LayoutVerified};
|
||||
|
||||
use crate::{Schema, SchemaAttr, RankedMap};
|
||||
use crate::serde::{extract_document_id, Serializer, Deserializer, SerializerError};
|
||||
use crate::indexer::{Indexer, WordIndexTree};
|
||||
use crate::document_attr_key::DocumentAttrKey;
|
||||
|
||||
pub type WordIndex = meilidb_core::Index<WordIndexTree>;
|
||||
|
||||
@ -27,6 +30,7 @@ pub enum Error {
|
||||
WordIndexMissing,
|
||||
MissingDocumentId,
|
||||
SledError(sled::Error),
|
||||
FstError(fst::Error),
|
||||
BincodeError(bincode::Error),
|
||||
SerializerError(SerializerError),
|
||||
}
|
||||
@ -37,6 +41,12 @@ impl From<sled::Error> for Error {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<fst::Error> for Error {
|
||||
fn from(error: fst::Error) -> Error {
|
||||
Error::FstError(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<bincode::Error> for Error {
|
||||
fn from(error: bincode::Error) -> Error {
|
||||
Error::BincodeError(error)
|
||||
@ -58,6 +68,7 @@ impl fmt::Display for Error {
|
||||
WordIndexMissing => write!(f, "this index does not have a word index"),
|
||||
MissingDocumentId => write!(f, "document id is missing"),
|
||||
SledError(e) => write!(f, "sled error; {}", e),
|
||||
FstError(e) => write!(f, "fst error; {}", e),
|
||||
BincodeError(e) => write!(f, "bincode error; {}", e),
|
||||
SerializerError(e) => write!(f, "serializer error; {}", e),
|
||||
}
|
||||
@ -180,6 +191,102 @@ impl Database {
|
||||
}
|
||||
}
|
||||
|
||||
struct RawIndex2 {
|
||||
main: MainIndex,
|
||||
words: WordsIndex,
|
||||
documents: DocumentsIndex,
|
||||
}
|
||||
|
||||
struct MainIndex(Arc<sled::Tree>);
|
||||
|
||||
impl MainIndex {
|
||||
fn schema(&self) -> Result<Option<Schema>, Error> {
|
||||
match self.0.get("schema")? {
|
||||
Some(bytes) => {
|
||||
let schema = Schema::read_from_bin(bytes.as_ref())?;
|
||||
Ok(Some(schema))
|
||||
},
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
fn words_set(&self) -> Result<Option<fst::Set>, Error> {
|
||||
match self.0.get("words")? {
|
||||
Some(bytes) => {
|
||||
let len = bytes.len();
|
||||
let value = bytes.into();
|
||||
let fst = fst::raw::Fst::from_shared_bytes(value, 0, len)?;
|
||||
Ok(Some(fst::Set::from(fst)))
|
||||
},
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
fn ranked_map(&self) -> Result<Option<RankedMap>, Error> {
|
||||
match self.0.get("ranked-map")? {
|
||||
Some(bytes) => {
|
||||
let ranked_map = bincode::deserialize(bytes.as_ref())?;
|
||||
Ok(Some(ranked_map))
|
||||
},
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct WordsIndex(Arc<sled::Tree>);
|
||||
|
||||
impl WordsIndex {
|
||||
fn doc_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Error> {
|
||||
match self.0.get(word)? {
|
||||
Some(bytes) => {
|
||||
let layout = LayoutVerified::new_slice(bytes.as_ref()).expect("invalid layout");
|
||||
let slice = layout.into_slice();
|
||||
let setbuf = SetBuf::new_unchecked(slice.to_vec());
|
||||
Ok(Some(setbuf))
|
||||
},
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct DocumentsIndex(Arc<sled::Tree>);
|
||||
|
||||
impl DocumentsIndex {
|
||||
fn document_field(&self, id: DocumentId, attr: SchemaAttr) -> Result<Option<IVec>, Error> {
|
||||
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
|
||||
self.0.get(key).map_err(Into::into)
|
||||
}
|
||||
|
||||
fn document_fields(&self, id: DocumentId) -> DocumentFieldsIter {
|
||||
let start = DocumentAttrKey::new(id, SchemaAttr::min());
|
||||
let start = start.to_be_bytes();
|
||||
|
||||
let end = DocumentAttrKey::new(id, SchemaAttr::max());
|
||||
let end = end.to_be_bytes();
|
||||
|
||||
DocumentFieldsIter(self.0.range(start..=end))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocumentFieldsIter<'a>(sled::Iter<'a>);
|
||||
|
||||
impl<'a> Iterator for DocumentFieldsIter<'a> {
|
||||
type Item = Result<(SchemaAttr, IVec), Error>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.0.next() {
|
||||
Some(Ok((key, value))) => {
|
||||
let slice: &[u8] = key.as_ref();
|
||||
let array = slice.try_into().unwrap();
|
||||
let key = DocumentAttrKey::from_be_bytes(array);
|
||||
Some(Ok((key.attribute, value)))
|
||||
},
|
||||
Some(Err(e)) => Some(Err(Error::SledError(e))),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RawIndex {
|
||||
schema: Schema,
|
||||
@ -294,23 +401,6 @@ impl RawIndex {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocumentFieldsIter<'a>(sled::Iter<'a>);
|
||||
|
||||
impl<'a> Iterator for DocumentFieldsIter<'a> {
|
||||
type Item = Result<(DocumentId, SchemaAttr, IVec), Error>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.0.next() {
|
||||
Some(Ok((key, value))) => {
|
||||
let (id, attr) = extract_document_key(key).unwrap();
|
||||
Some(Ok((id, attr, value)))
|
||||
},
|
||||
Some(Err(e)) => Some(Err(Error::SledError(e))),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Index(RawIndex);
|
||||
|
||||
|
69
meilidb-data/src/document_attr_key.rs
Normal file
69
meilidb-data/src/document_attr_key.rs
Normal file
@ -0,0 +1,69 @@
|
||||
use meilidb_core::DocumentId;
|
||||
use crate::schema::SchemaAttr;
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct DocumentAttrKey {
|
||||
pub document_id: DocumentId,
|
||||
pub attribute: SchemaAttr,
|
||||
}
|
||||
|
||||
impl DocumentAttrKey {
|
||||
pub fn new(document_id: DocumentId, attribute: SchemaAttr) -> DocumentAttrKey {
|
||||
DocumentAttrKey { document_id, attribute }
|
||||
}
|
||||
|
||||
pub fn to_be_bytes(self) -> [u8; 10] {
|
||||
let mut output = [0u8; 10];
|
||||
|
||||
let document_id = self.document_id.0.to_be_bytes();
|
||||
let attribute = self.attribute.0.to_be_bytes();
|
||||
|
||||
unsafe {
|
||||
use std::{mem::size_of, ptr::copy_nonoverlapping};
|
||||
|
||||
let output = output.as_mut_ptr();
|
||||
copy_nonoverlapping(document_id.as_ptr(), output, size_of::<u64>());
|
||||
|
||||
let output = output.add(size_of::<u64>());
|
||||
copy_nonoverlapping(attribute.as_ptr(), output, size_of::<u16>());
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
pub fn from_be_bytes(bytes: [u8; 10]) -> DocumentAttrKey {
|
||||
let document_id;
|
||||
let attribute;
|
||||
|
||||
unsafe {
|
||||
use std::ptr::read_unaligned;
|
||||
|
||||
let pointer = bytes.as_ptr() as *const _;
|
||||
let document_id_bytes = read_unaligned(pointer);
|
||||
document_id = u64::from_be_bytes(document_id_bytes);
|
||||
|
||||
let pointer = pointer.add(1) as *const _;
|
||||
let attribute_bytes = read_unaligned(pointer);
|
||||
attribute = u16::from_be_bytes(attribute_bytes);
|
||||
}
|
||||
|
||||
DocumentAttrKey {
|
||||
document_id: DocumentId(document_id),
|
||||
attribute: SchemaAttr(attribute),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn to_from_be_bytes() {
|
||||
let document_id = DocumentId(67578308);
|
||||
let schema_attr = SchemaAttr(3456);
|
||||
let x = DocumentAttrKey::new(document_id, schema_attr);
|
||||
|
||||
assert_eq!(x, DocumentAttrKey::from_be_bytes(x.to_be_bytes()));
|
||||
}
|
||||
}
|
@ -1,4 +1,5 @@
|
||||
mod database;
|
||||
mod document_attr_key;
|
||||
mod indexer;
|
||||
mod number;
|
||||
mod ranked_map;
|
||||
|
@ -186,12 +186,16 @@ impl Schema {
|
||||
pub struct SchemaAttr(pub u16);
|
||||
|
||||
impl SchemaAttr {
|
||||
pub fn new(value: u16) -> SchemaAttr {
|
||||
pub const fn new(value: u16) -> SchemaAttr {
|
||||
SchemaAttr(value)
|
||||
}
|
||||
|
||||
pub fn min() -> SchemaAttr {
|
||||
SchemaAttr(0)
|
||||
pub const fn min() -> SchemaAttr {
|
||||
SchemaAttr(u16::min_value())
|
||||
}
|
||||
|
||||
pub const fn max() -> SchemaAttr {
|
||||
SchemaAttr(u16::max_value())
|
||||
}
|
||||
|
||||
pub fn next(self) -> Option<SchemaAttr> {
|
||||
@ -201,10 +205,6 @@ impl SchemaAttr {
|
||||
pub fn prev(self) -> Option<SchemaAttr> {
|
||||
self.0.checked_sub(1).map(SchemaAttr)
|
||||
}
|
||||
|
||||
pub fn max() -> SchemaAttr {
|
||||
SchemaAttr(u16::MAX)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for SchemaAttr {
|
||||
|
@ -45,7 +45,7 @@ impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a>
|
||||
},
|
||||
}
|
||||
});
|
||||
let iter = document_attributes.filter_map(|(_, attr, value)| {
|
||||
let iter = document_attributes.filter_map(|(attr, value)| {
|
||||
if self.fields.map_or(true, |f| f.contains(&attr)) {
|
||||
let attribute_name = self.raw_index.schema().attribute_name(attr);
|
||||
Some((attribute_name, Value::new(value)))
|
||||
|
Loading…
x
Reference in New Issue
Block a user