mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-12 06:24:29 +01:00
feat: Introduce the DocumentsAddition type
This commit is contained in:
parent
42e39f6eb5
commit
e67ada8823
@ -87,8 +87,7 @@ where S: Store,
|
|||||||
{
|
{
|
||||||
fn query_all(&self, query: &str) -> Result<Vec<RawDocument>, S::Error> {
|
fn query_all(&self, query: &str) -> Result<Vec<RawDocument>, S::Error> {
|
||||||
let automatons = generate_automatons(query);
|
let automatons = generate_automatons(query);
|
||||||
let words = self.store.words()?;
|
let words = self.store.words()?.as_fst();
|
||||||
let words = words.as_fst();
|
|
||||||
|
|
||||||
let mut stream = {
|
let mut stream = {
|
||||||
let mut op_builder = fst::raw::OpBuilder::new();
|
let mut op_builder = fst::raw::OpBuilder::new();
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
use std::collections::HashSet;
|
use std::collections::{HashSet, HashMap};
|
||||||
|
use std::collections::hash_map::Entry;
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
use std::io::{self, Cursor, BufRead};
|
use std::io::{self, Cursor, BufRead};
|
||||||
use std::iter::FromIterator;
|
use std::iter::FromIterator;
|
||||||
@ -8,21 +9,19 @@ use std::{error, fmt};
|
|||||||
|
|
||||||
use arc_swap::{ArcSwap, Lease};
|
use arc_swap::{ArcSwap, Lease};
|
||||||
use byteorder::{ReadBytesExt, BigEndian};
|
use byteorder::{ReadBytesExt, BigEndian};
|
||||||
use hashbrown::HashMap;
|
use meilidb_core::{criterion::Criteria, QueryBuilder, Store, DocumentId, DocIndex};
|
||||||
use meilidb_core::{criterion::Criteria, QueryBuilder, DocumentId, DocIndex};
|
|
||||||
use rmp_serde::decode::{Error as RmpError};
|
use rmp_serde::decode::{Error as RmpError};
|
||||||
use sdset::SetBuf;
|
use sdset::{Set, SetBuf, SetOperation, duo::Union};
|
||||||
use serde::de;
|
use serde::de;
|
||||||
use sled::IVec;
|
use sled::IVec;
|
||||||
use zerocopy::{AsBytes, LayoutVerified};
|
use zerocopy::{AsBytes, LayoutVerified};
|
||||||
|
use fst::{SetBuilder, set::OpBuilder};
|
||||||
|
|
||||||
use crate::{Schema, SchemaAttr, RankedMap};
|
use crate::{Schema, SchemaAttr, RankedMap};
|
||||||
use crate::serde::{extract_document_id, Serializer, Deserializer, SerializerError};
|
use crate::serde::{extract_document_id, Serializer, Deserializer, SerializerError};
|
||||||
use crate::indexer::{Indexer, WordIndexTree};
|
use crate::indexer::Indexer;
|
||||||
use crate::document_attr_key::DocumentAttrKey;
|
use crate::document_attr_key::DocumentAttrKey;
|
||||||
|
|
||||||
pub type WordIndex = meilidb_core::Index<WordIndexTree>;
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
SchemaDiffer,
|
SchemaDiffer,
|
||||||
@ -97,106 +96,113 @@ impl Database {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let indexes: HashSet<&str> = match self.inner.get("indexes")? {
|
let mut cache = self.cache.write().unwrap();
|
||||||
Some(bytes) => bincode::deserialize(&bytes)?,
|
let index = match cache.entry(name.to_string()) {
|
||||||
None => return Ok(None),
|
Entry::Occupied(occupied) => {
|
||||||
|
occupied.get().clone()
|
||||||
|
},
|
||||||
|
Entry::Vacant(vacant) => {
|
||||||
|
let bytes = match self.inner.get("indexes")? {
|
||||||
|
Some(bytes) => bytes,
|
||||||
|
None => return Ok(None),
|
||||||
|
};
|
||||||
|
|
||||||
|
let indexes: HashSet<&str> = bincode::deserialize(&bytes)?;
|
||||||
|
if indexes.get(name).is_none() {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
let main = {
|
||||||
|
let tree = self.inner.open_tree(name)?;
|
||||||
|
MainIndex(tree)
|
||||||
|
};
|
||||||
|
|
||||||
|
let words = {
|
||||||
|
let tree_name = format!("{}-words", name);
|
||||||
|
let tree = self.inner.open_tree(tree_name)?;
|
||||||
|
WordsIndex(tree)
|
||||||
|
};
|
||||||
|
|
||||||
|
let documents = {
|
||||||
|
let tree_name = format!("{}-documents", name);
|
||||||
|
let tree = self.inner.open_tree(tree_name)?;
|
||||||
|
DocumentsIndex(tree)
|
||||||
|
};
|
||||||
|
|
||||||
|
let raw_index = RawIndex { main, words, documents };
|
||||||
|
let index = Index::from_raw(raw_index)?;
|
||||||
|
|
||||||
|
vacant.insert(Arc::new(index)).clone()
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
if indexes.get(name).is_none() {
|
|
||||||
return Ok(None);
|
|
||||||
}
|
|
||||||
|
|
||||||
let main = {
|
|
||||||
let tree = self.inner.open_tree(name)?;
|
|
||||||
MainIndex(tree)
|
|
||||||
};
|
|
||||||
|
|
||||||
let words = {
|
|
||||||
let tree_name = format!("{}-words", name);
|
|
||||||
let tree = self.inner.open_tree(tree_name)?;
|
|
||||||
WordsIndex(tree)
|
|
||||||
};
|
|
||||||
|
|
||||||
let documents = {
|
|
||||||
let tree_name = format!("{}-documents", name);
|
|
||||||
let tree = self.inner.open_tree(tree_name)?;
|
|
||||||
DocumentsIndex(tree)
|
|
||||||
};
|
|
||||||
|
|
||||||
let raw_index = RawIndex { main, words, documents };
|
|
||||||
let index = Arc::new(Index(raw_index));
|
|
||||||
|
|
||||||
{
|
|
||||||
let cache = self.cache.write().unwrap();
|
|
||||||
cache.insert(name.to_string(), index.clone());
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Some(index))
|
Ok(Some(index))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn create_index(&self, name: &str, schema: Schema) -> Result<Arc<Index>, Error> {
|
pub fn create_index(&self, name: &str, schema: Schema) -> Result<Arc<Index>, Error> {
|
||||||
{
|
let mut cache = self.cache.write().unwrap();
|
||||||
let cache = self.cache.read().unwrap();
|
|
||||||
if let Some(index) = cache.get(name).cloned() {
|
|
||||||
// TODO check if schemas are the same
|
|
||||||
return Ok(index)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut indexes: HashSet<&str> = match self.inner.get("indexes")? {
|
let index = match cache.entry(name.to_string()) {
|
||||||
Some(bytes) => bincode::deserialize(&bytes)?,
|
Entry::Occupied(occupied) => {
|
||||||
None => HashSet::new(),
|
occupied.get().clone()
|
||||||
|
},
|
||||||
|
Entry::Vacant(vacant) => {
|
||||||
|
let bytes = self.inner.get("indexes")?;
|
||||||
|
let bytes = bytes.as_ref();
|
||||||
|
|
||||||
|
let mut indexes: HashSet<&str> = match bytes {
|
||||||
|
Some(bytes) => bincode::deserialize(bytes)?,
|
||||||
|
None => HashSet::new(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let new_insertion = indexes.insert(name);
|
||||||
|
|
||||||
|
let main = {
|
||||||
|
let tree = self.inner.open_tree(name)?;
|
||||||
|
MainIndex(tree)
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(prev_schema) = main.schema()? {
|
||||||
|
if prev_schema != schema {
|
||||||
|
return Err(Error::SchemaDiffer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let words = {
|
||||||
|
let tree_name = format!("{}-words", name);
|
||||||
|
let tree = self.inner.open_tree(tree_name)?;
|
||||||
|
WordsIndex(tree)
|
||||||
|
};
|
||||||
|
|
||||||
|
let documents = {
|
||||||
|
let tree_name = format!("{}-documents", name);
|
||||||
|
let tree = self.inner.open_tree(tree_name)?;
|
||||||
|
DocumentsIndex(tree)
|
||||||
|
};
|
||||||
|
|
||||||
|
let raw_index = RawIndex { main, words, documents };
|
||||||
|
let index = Index::from_raw(raw_index)?;
|
||||||
|
|
||||||
|
vacant.insert(Arc::new(index)).clone()
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
let new_insertion = indexes.insert(name);
|
|
||||||
|
|
||||||
let main = {
|
|
||||||
let tree = self.inner.open_tree(name)?;
|
|
||||||
MainIndex(tree)
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Some(prev_schema) = main.schema()? {
|
|
||||||
if prev_schema != schema {
|
|
||||||
return Err(Error::SchemaDiffer)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let words = {
|
|
||||||
let tree_name = format!("{}-words", name);
|
|
||||||
let tree = self.inner.open_tree(tree_name)?;
|
|
||||||
WordsIndex(tree)
|
|
||||||
};
|
|
||||||
|
|
||||||
let documents = {
|
|
||||||
let tree_name = format!("{}-documents", name);
|
|
||||||
let tree = self.inner.open_tree(tree_name)?;
|
|
||||||
DocumentsIndex(tree)
|
|
||||||
};
|
|
||||||
|
|
||||||
let raw_index = RawIndex { main, words, documents };
|
|
||||||
let index = Arc::new(Index(raw_index));
|
|
||||||
|
|
||||||
{
|
|
||||||
let cache = self.cache.write().unwrap();
|
|
||||||
cache.insert(name.to_string(), index.clone());
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(index)
|
Ok(index)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
struct RawIndex {
|
pub struct RawIndex {
|
||||||
main: MainIndex,
|
pub main: MainIndex,
|
||||||
words: WordsIndex,
|
pub words: WordsIndex,
|
||||||
documents: DocumentsIndex,
|
pub documents: DocumentsIndex,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
struct MainIndex(Arc<sled::Tree>);
|
pub struct MainIndex(Arc<sled::Tree>);
|
||||||
|
|
||||||
impl MainIndex {
|
impl MainIndex {
|
||||||
fn schema(&self) -> Result<Option<Schema>, Error> {
|
pub fn schema(&self) -> Result<Option<Schema>, Error> {
|
||||||
match self.0.get("schema")? {
|
match self.0.get("schema")? {
|
||||||
Some(bytes) => {
|
Some(bytes) => {
|
||||||
let schema = Schema::read_from_bin(bytes.as_ref())?;
|
let schema = Schema::read_from_bin(bytes.as_ref())?;
|
||||||
@ -206,7 +212,7 @@ impl MainIndex {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn words_set(&self) -> Result<Option<fst::Set>, Error> {
|
pub fn words_set(&self) -> Result<Option<fst::Set>, Error> {
|
||||||
match self.0.get("words")? {
|
match self.0.get("words")? {
|
||||||
Some(bytes) => {
|
Some(bytes) => {
|
||||||
let len = bytes.len();
|
let len = bytes.len();
|
||||||
@ -218,7 +224,12 @@ impl MainIndex {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn ranked_map(&self) -> Result<Option<RankedMap>, Error> {
|
pub fn set_words_set(&self, value: &fst::Set) -> Result<(), Error> {
|
||||||
|
self.0.set("words", value.as_fst().as_bytes())?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn ranked_map(&self) -> Result<Option<RankedMap>, Error> {
|
||||||
match self.0.get("ranked-map")? {
|
match self.0.get("ranked-map")? {
|
||||||
Some(bytes) => {
|
Some(bytes) => {
|
||||||
let ranked_map = RankedMap::read_from_bin(bytes.as_ref())?;
|
let ranked_map = RankedMap::read_from_bin(bytes.as_ref())?;
|
||||||
@ -227,13 +238,20 @@ impl MainIndex {
|
|||||||
None => Ok(None),
|
None => Ok(None),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn set_ranked_map(&self, value: &RankedMap) -> Result<(), Error> {
|
||||||
|
let mut bytes = Vec::new();
|
||||||
|
value.write_to_bin(&mut bytes)?;
|
||||||
|
self.0.set("ranked_map", bytes)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
struct WordsIndex(Arc<sled::Tree>);
|
pub struct WordsIndex(Arc<sled::Tree>);
|
||||||
|
|
||||||
impl WordsIndex {
|
impl WordsIndex {
|
||||||
fn doc_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Error> {
|
pub fn doc_indexes(&self, word: &[u8]) -> sled::Result<Option<SetBuf<DocIndex>>> {
|
||||||
match self.0.get(word)? {
|
match self.0.get(word)? {
|
||||||
Some(bytes) => {
|
Some(bytes) => {
|
||||||
let layout = LayoutVerified::new_slice(bytes.as_ref()).expect("invalid layout");
|
let layout = LayoutVerified::new_slice(bytes.as_ref()).expect("invalid layout");
|
||||||
@ -244,18 +262,33 @@ impl WordsIndex {
|
|||||||
None => Ok(None),
|
None => Ok(None),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn set_doc_indexes(&self, word: &[u8], set: Option<&Set<DocIndex>>) -> sled::Result<()> {
|
||||||
|
match set {
|
||||||
|
Some(set) => self.0.set(word, set.as_bytes())?,
|
||||||
|
None => self.0.del(word)?,
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
struct DocumentsIndex(Arc<sled::Tree>);
|
pub struct DocumentsIndex(Arc<sled::Tree>);
|
||||||
|
|
||||||
impl DocumentsIndex {
|
impl DocumentsIndex {
|
||||||
fn document_field(&self, id: DocumentId, attr: SchemaAttr) -> Result<Option<IVec>, Error> {
|
pub fn document_field(&self, id: DocumentId, attr: SchemaAttr) -> sled::Result<Option<IVec>> {
|
||||||
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
|
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
|
||||||
self.0.get(key).map_err(Into::into)
|
self.0.get(key)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn document_fields(&self, id: DocumentId) -> DocumentFieldsIter {
|
pub fn set_document_field(&self, id: DocumentId, attr: SchemaAttr, value: Vec<u8>) -> sled::Result<()> {
|
||||||
|
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
|
||||||
|
self.0.set(key, value)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn document_fields(&self, id: DocumentId) -> DocumentFieldsIter {
|
||||||
let start = DocumentAttrKey::new(id, SchemaAttr::min());
|
let start = DocumentAttrKey::new(id, SchemaAttr::min());
|
||||||
let start = start.to_be_bytes();
|
let start = start.to_be_bytes();
|
||||||
|
|
||||||
@ -269,7 +302,7 @@ impl DocumentsIndex {
|
|||||||
pub struct DocumentFieldsIter<'a>(sled::Iter<'a>);
|
pub struct DocumentFieldsIter<'a>(sled::Iter<'a>);
|
||||||
|
|
||||||
impl<'a> Iterator for DocumentFieldsIter<'a> {
|
impl<'a> Iterator for DocumentFieldsIter<'a> {
|
||||||
type Item = Result<(SchemaAttr, IVec), Error>;
|
type Item = sled::Result<(SchemaAttr, IVec)>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
match self.0.next() {
|
match self.0.next() {
|
||||||
@ -279,51 +312,72 @@ impl<'a> Iterator for DocumentFieldsIter<'a> {
|
|||||||
let key = DocumentAttrKey::from_be_bytes(array);
|
let key = DocumentAttrKey::from_be_bytes(array);
|
||||||
Some(Ok((key.attribute, value)))
|
Some(Ok((key.attribute, value)))
|
||||||
},
|
},
|
||||||
Some(Err(e)) => Some(Err(Error::SledError(e))),
|
Some(Err(e)) => Some(Err(e)),
|
||||||
None => None,
|
None => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Index(RawIndex);
|
pub struct Index(ArcSwap<InnerIndex>);
|
||||||
|
|
||||||
|
pub struct InnerIndex {
|
||||||
|
pub words: fst::Set,
|
||||||
|
pub schema: Schema,
|
||||||
|
pub ranked_map: RankedMap,
|
||||||
|
pub raw: RawIndex, // TODO this will be a snapshot in the future
|
||||||
|
}
|
||||||
|
|
||||||
impl Index {
|
impl Index {
|
||||||
pub fn query_builder(&self) -> QueryBuilder<Lease<Arc<WordIndex>>> {
|
fn from_raw(raw: RawIndex) -> Result<Index, Error> {
|
||||||
let word_index = self.word_index();
|
let words = match raw.main.words_set()? {
|
||||||
QueryBuilder::new(word_index)
|
Some(words) => words,
|
||||||
|
None => fst::Set::default(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let schema = match raw.main.schema()? {
|
||||||
|
Some(schema) => schema,
|
||||||
|
None => return Err(Error::SchemaMissing),
|
||||||
|
};
|
||||||
|
|
||||||
|
let ranked_map = match raw.main.ranked_map()? {
|
||||||
|
Some(map) => map,
|
||||||
|
None => RankedMap::default(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let inner = InnerIndex { words, schema, ranked_map, raw };
|
||||||
|
let index = Index(ArcSwap::new(Arc::new(inner)));
|
||||||
|
|
||||||
|
Ok(index)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn query_builder(&self) -> QueryBuilder<IndexLease> {
|
||||||
|
let lease = IndexLease(self.0.lease());
|
||||||
|
QueryBuilder::new(lease)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn query_builder_with_criteria<'c>(
|
pub fn query_builder_with_criteria<'c>(
|
||||||
&self,
|
&self,
|
||||||
criteria: Criteria<'c>,
|
criteria: Criteria<'c>,
|
||||||
) -> QueryBuilder<'c, Lease<Arc<WordIndex>>>
|
) -> QueryBuilder<'c, IndexLease>
|
||||||
{
|
{
|
||||||
let word_index = self.word_index();
|
let lease = IndexLease(self.0.lease());
|
||||||
QueryBuilder::with_criteria(word_index, criteria)
|
QueryBuilder::with_criteria(lease, criteria)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn schema(&self) -> &Schema {
|
pub fn lease_inner(&self) -> Lease<Arc<InnerIndex>> {
|
||||||
self.0.schema()
|
self.0.lease()
|
||||||
}
|
|
||||||
|
|
||||||
pub fn word_index(&self) -> Lease<Arc<WordIndex>> {
|
|
||||||
self.0.word_index()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn ranked_map(&self) -> Lease<Arc<RankedMap>> {
|
|
||||||
self.0.ranked_map()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn documents_addition(&self) -> DocumentsAddition {
|
pub fn documents_addition(&self) -> DocumentsAddition {
|
||||||
let index = self.0.clone();
|
let ranked_map = self.0.lease().ranked_map.clone();
|
||||||
let ranked_map = self.0.ranked_map().clone();
|
DocumentsAddition::new(self, ranked_map)
|
||||||
DocumentsAddition::from_raw(index, ranked_map)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn documents_deletion(&self) -> DocumentsDeletion {
|
pub fn documents_deletion(&self) -> DocumentsDeletion {
|
||||||
let index = self.0.clone();
|
// let index = self.0.clone();
|
||||||
DocumentsDeletion::from_raw(index)
|
// DocumentsDeletion::from_raw(index)
|
||||||
|
unimplemented!()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn document<T>(
|
pub fn document<T>(
|
||||||
@ -333,17 +387,18 @@ impl Index {
|
|||||||
) -> Result<Option<T>, RmpError>
|
) -> Result<Option<T>, RmpError>
|
||||||
where T: de::DeserializeOwned,
|
where T: de::DeserializeOwned,
|
||||||
{
|
{
|
||||||
let fields = match fields {
|
let schema = &self.lease_inner().schema;
|
||||||
Some(fields) => {
|
let fields = fields
|
||||||
let iter = fields.iter().filter_map(|n| self.0.schema().attribute(n));
|
.map(|fields| {
|
||||||
Some(HashSet::from_iter(iter))
|
fields
|
||||||
},
|
.into_iter()
|
||||||
None => None,
|
.filter_map(|name| schema.attribute(name))
|
||||||
};
|
.collect()
|
||||||
|
});
|
||||||
|
|
||||||
let mut deserializer = Deserializer {
|
let mut deserializer = Deserializer {
|
||||||
document_id: id,
|
document_id: id,
|
||||||
raw_index: &self.0,
|
index: &self,
|
||||||
fields: fields.as_ref(),
|
fields: fields.as_ref(),
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -353,21 +408,35 @@ impl Index {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct DocumentsAddition {
|
pub struct IndexLease(Lease<Arc<InnerIndex>>);
|
||||||
inner: RawIndex,
|
|
||||||
|
impl Store for IndexLease {
|
||||||
|
type Error = Error;
|
||||||
|
|
||||||
|
fn words(&self) -> Result<&fst::Set, Self::Error> {
|
||||||
|
Ok(&self.0.words)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> {
|
||||||
|
Ok(self.0.raw.words.doc_indexes(word)?)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct DocumentsAddition<'a> {
|
||||||
|
inner: &'a Index,
|
||||||
indexer: Indexer,
|
indexer: Indexer,
|
||||||
ranked_map: RankedMap,
|
ranked_map: RankedMap,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DocumentsAddition {
|
impl<'a> DocumentsAddition<'a> {
|
||||||
pub fn from_raw(inner: RawIndex, ranked_map: RankedMap) -> DocumentsAddition {
|
fn new(inner: &'a Index, ranked_map: RankedMap) -> DocumentsAddition<'a> {
|
||||||
DocumentsAddition { inner, indexer: Indexer::new(), ranked_map }
|
DocumentsAddition { inner, indexer: Indexer::new(), ranked_map }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn update_document<D>(&mut self, document: D) -> Result<(), Error>
|
pub fn update_document<D>(&mut self, document: D) -> Result<(), Error>
|
||||||
where D: serde::Serialize,
|
where D: serde::Serialize,
|
||||||
{
|
{
|
||||||
let schema = self.inner.schema();
|
let schema = &self.inner.lease_inner().schema;
|
||||||
let identifier = schema.identifier_name();
|
let identifier = schema.identifier_name();
|
||||||
|
|
||||||
let document_id = match extract_document_id(identifier, &document)? {
|
let document_id = match extract_document_id(identifier, &document)? {
|
||||||
@ -375,6 +444,12 @@ impl DocumentsAddition {
|
|||||||
None => return Err(Error::MissingDocumentId),
|
None => return Err(Error::MissingDocumentId),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// 1. remove the previous document match indexes
|
||||||
|
let mut documents_deletion = DocumentsDeletion::new(self.inner);
|
||||||
|
documents_deletion.delete_document(document_id);
|
||||||
|
documents_deletion.finalize()?;
|
||||||
|
|
||||||
|
// 2. index the document fields
|
||||||
let serializer = Serializer {
|
let serializer = Serializer {
|
||||||
schema,
|
schema,
|
||||||
index: &self.inner,
|
index: &self.inner,
|
||||||
@ -388,30 +463,70 @@ impl DocumentsAddition {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn finalize(self) -> sled::Result<()> {
|
pub fn finalize(self) -> Result<(), Error> {
|
||||||
|
let lease_inner = self.inner.lease_inner();
|
||||||
|
let main = &lease_inner.raw.main;
|
||||||
|
let words = &lease_inner.raw.words;
|
||||||
|
|
||||||
let delta_index = self.indexer.build();
|
let delta_index = self.indexer.build();
|
||||||
|
let mut delta_words_builder = SetBuilder::memory();
|
||||||
|
|
||||||
let index = self.inner.word_index();
|
for (word, delta_set) in delta_index {
|
||||||
let new_index = index.insert_indexes(delta_index)?;
|
delta_words_builder.insert(&word).unwrap();
|
||||||
|
|
||||||
let new_index = Arc::from(new_index);
|
let set = match words.doc_indexes(&word)? {
|
||||||
self.inner.update_word_index(new_index);
|
Some(set) => Union::new(&set, &delta_set).into_set_buf(),
|
||||||
|
None => delta_set,
|
||||||
|
};
|
||||||
|
|
||||||
|
words.set_doc_indexes(&word, Some(&set))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let delta_words = delta_words_builder
|
||||||
|
.into_inner()
|
||||||
|
.and_then(fst::Set::from_bytes)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let words = match main.words_set()? {
|
||||||
|
Some(words) => {
|
||||||
|
let op = OpBuilder::new()
|
||||||
|
.add(words.stream())
|
||||||
|
.add(delta_words.stream())
|
||||||
|
.r#union();
|
||||||
|
|
||||||
|
let mut words_builder = SetBuilder::memory();
|
||||||
|
words_builder.extend_stream(op).unwrap();
|
||||||
|
words_builder
|
||||||
|
.into_inner()
|
||||||
|
.and_then(fst::Set::from_bytes)
|
||||||
|
.unwrap()
|
||||||
|
},
|
||||||
|
None => delta_words,
|
||||||
|
};
|
||||||
|
|
||||||
|
main.set_words_set(&words)?;
|
||||||
|
main.set_ranked_map(&self.ranked_map)?;
|
||||||
|
|
||||||
|
// update the "consistent" view of the Index
|
||||||
|
let ranked_map = self.ranked_map;
|
||||||
|
let schema = lease_inner.schema.clone();
|
||||||
|
let raw = lease_inner.raw.clone();
|
||||||
|
|
||||||
|
let inner = InnerIndex { words, schema, ranked_map, raw };
|
||||||
|
self.inner.0.store(Arc::new(inner));
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct DocumentsDeletion {
|
pub struct DocumentsDeletion<'a> {
|
||||||
inner: RawIndex,
|
inner: &'a Index,
|
||||||
documents: Vec<DocumentId>,
|
documents: Vec<DocumentId>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DocumentsDeletion {
|
impl<'a> DocumentsDeletion<'a> {
|
||||||
pub fn from_raw(inner: RawIndex) -> DocumentsDeletion {
|
fn new(inner: &'a Index) -> DocumentsDeletion {
|
||||||
DocumentsDeletion {
|
DocumentsDeletion { inner, documents: Vec::new() }
|
||||||
inner,
|
|
||||||
documents: Vec::new(),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn delete_document(&mut self, id: DocumentId) {
|
pub fn delete_document(&mut self, id: DocumentId) {
|
||||||
@ -423,13 +538,16 @@ impl DocumentsDeletion {
|
|||||||
self.documents.dedup();
|
self.documents.dedup();
|
||||||
|
|
||||||
let idset = SetBuf::new_unchecked(self.documents);
|
let idset = SetBuf::new_unchecked(self.documents);
|
||||||
let index = self.inner.word_index();
|
|
||||||
|
|
||||||
let new_index = index.remove_documents(&idset)?;
|
// let index = self.inner.word_index();
|
||||||
let new_index = Arc::from(new_index);
|
|
||||||
|
|
||||||
self.inner.update_word_index(new_index);
|
// let new_index = index.remove_documents(&idset)?;
|
||||||
|
// let new_index = Arc::from(new_index);
|
||||||
|
|
||||||
Ok(())
|
// self.inner.update_word_index(new_index);
|
||||||
|
|
||||||
|
// Ok(())
|
||||||
|
|
||||||
|
unimplemented!("documents deletion finalize")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,78 +1,13 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::convert::TryFrom;
|
use std::convert::TryFrom;
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use deunicode::deunicode_with_tofu;
|
use deunicode::deunicode_with_tofu;
|
||||||
use meilidb_core::{DocumentId, DocIndex, Store};
|
use meilidb_core::{DocumentId, DocIndex, Store};
|
||||||
use meilidb_tokenizer::{is_cjk, Tokenizer, SeqTokenizer, Token};
|
use meilidb_tokenizer::{is_cjk, Tokenizer, SeqTokenizer, Token};
|
||||||
use sdset::{Set, SetBuf};
|
use sdset::SetBuf;
|
||||||
use sled::Tree;
|
|
||||||
use zerocopy::{AsBytes, LayoutVerified};
|
|
||||||
|
|
||||||
use crate::SchemaAttr;
|
use crate::SchemaAttr;
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct WordIndexTree(pub Arc<Tree>);
|
|
||||||
|
|
||||||
impl Store for WordIndexTree {
|
|
||||||
type Error = sled::Error;
|
|
||||||
|
|
||||||
fn get_fst(&self) -> Result<fst::Set, Self::Error> {
|
|
||||||
match self.0.get("fst")? {
|
|
||||||
Some(bytes) => {
|
|
||||||
let bytes: Arc<[u8]> = bytes.into();
|
|
||||||
let len = bytes.len();
|
|
||||||
let raw = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
|
|
||||||
Ok(fst::Set::from(raw))
|
|
||||||
},
|
|
||||||
None => Ok(fst::Set::default()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn set_fst(&self, set: &fst::Set) -> Result<(), Self::Error> {
|
|
||||||
let bytes = set.as_fst().to_vec();
|
|
||||||
self.0.set("fst", bytes)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> {
|
|
||||||
let mut word_bytes = Vec::from("word-");
|
|
||||||
word_bytes.extend_from_slice(word);
|
|
||||||
|
|
||||||
match self.0.get(word_bytes)? {
|
|
||||||
Some(bytes) => {
|
|
||||||
let layout = LayoutVerified::new_slice(bytes.as_ref()).unwrap();
|
|
||||||
let slice = layout.into_slice();
|
|
||||||
let setbuf = SetBuf::new_unchecked(slice.to_vec());
|
|
||||||
Ok(Some(setbuf))
|
|
||||||
},
|
|
||||||
None => Ok(None),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn set_indexes(&self, word: &[u8], indexes: &Set<DocIndex>) -> Result<(), Self::Error> {
|
|
||||||
let mut word_bytes = Vec::from("word-");
|
|
||||||
word_bytes.extend_from_slice(word);
|
|
||||||
|
|
||||||
let slice = indexes.as_slice();
|
|
||||||
let bytes = slice.as_bytes();
|
|
||||||
|
|
||||||
self.0.set(word_bytes, bytes)?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn del_indexes(&self, word: &[u8]) -> Result<(), Self::Error> {
|
|
||||||
let mut word_bytes = Vec::from("word-");
|
|
||||||
word_bytes.extend_from_slice(word);
|
|
||||||
|
|
||||||
self.0.del(word_bytes)?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
type Word = Vec<u8>; // TODO make it be a SmallVec
|
type Word = Vec<u8>; // TODO make it be a SmallVec
|
||||||
|
|
||||||
pub struct Indexer {
|
pub struct Indexer {
|
||||||
@ -115,6 +50,7 @@ impl Indexer {
|
|||||||
pub fn build(self) -> BTreeMap<Word, SetBuf<DocIndex>> {
|
pub fn build(self) -> BTreeMap<Word, SetBuf<DocIndex>> {
|
||||||
self.indexed.into_iter().map(|(word, mut indexes)| {
|
self.indexed.into_iter().map(|(word, mut indexes)| {
|
||||||
indexes.sort_unstable();
|
indexes.sort_unstable();
|
||||||
|
indexes.dedup();
|
||||||
(word, SetBuf::new_unchecked(indexes))
|
(word, SetBuf::new_unchecked(indexes))
|
||||||
}).collect()
|
}).collect()
|
||||||
}
|
}
|
||||||
|
@ -6,12 +6,12 @@ use rmp_serde::decode::{Deserializer as RmpDeserializer, ReadReader};
|
|||||||
use rmp_serde::decode::{Error as RmpError};
|
use rmp_serde::decode::{Error as RmpError};
|
||||||
use serde::{de, forward_to_deserialize_any};
|
use serde::{de, forward_to_deserialize_any};
|
||||||
|
|
||||||
use crate::database::RawIndex;
|
use crate::database::Index;
|
||||||
use crate::SchemaAttr;
|
use crate::SchemaAttr;
|
||||||
|
|
||||||
pub struct Deserializer<'a> {
|
pub struct Deserializer<'a> {
|
||||||
pub document_id: DocumentId,
|
pub document_id: DocumentId,
|
||||||
pub raw_index: &'a RawIndex,
|
pub index: &'a Index,
|
||||||
pub fields: Option<&'a HashSet<SchemaAttr>>,
|
pub fields: Option<&'a HashSet<SchemaAttr>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -26,15 +26,18 @@ impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a>
|
|||||||
}
|
}
|
||||||
|
|
||||||
forward_to_deserialize_any! {
|
forward_to_deserialize_any! {
|
||||||
bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string unit seq
|
bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
|
||||||
bytes byte_buf unit_struct tuple_struct
|
bytes byte_buf option unit unit_struct newtype_struct seq tuple
|
||||||
identifier tuple ignored_any option newtype_struct enum struct
|
tuple_struct struct enum identifier ignored_any
|
||||||
}
|
}
|
||||||
|
|
||||||
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
||||||
where V: de::Visitor<'de>
|
where V: de::Visitor<'de>
|
||||||
{
|
{
|
||||||
let document_attributes = self.raw_index.get_document_fields(self.document_id);
|
let schema = &self.index.lease_inner().schema;
|
||||||
|
let documents = &self.index.lease_inner().raw.documents;
|
||||||
|
|
||||||
|
let document_attributes = documents.document_fields(self.document_id);
|
||||||
let document_attributes = document_attributes.filter_map(|result| {
|
let document_attributes = document_attributes.filter_map(|result| {
|
||||||
match result {
|
match result {
|
||||||
Ok(value) => Some(value),
|
Ok(value) => Some(value),
|
||||||
@ -45,9 +48,10 @@ impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a>
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
let iter = document_attributes.filter_map(|(attr, value)| {
|
let iter = document_attributes.filter_map(|(attr, value)| {
|
||||||
if self.fields.map_or(true, |f| f.contains(&attr)) {
|
if self.fields.map_or(true, |f| f.contains(&attr)) {
|
||||||
let attribute_name = self.raw_index.schema().attribute_name(attr);
|
let attribute_name = schema.attribute_name(attr);
|
||||||
Some((attribute_name, Value::new(value)))
|
Some((attribute_name, Value::new(value)))
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use meilidb_core::DocumentId;
|
use meilidb_core::DocumentId;
|
||||||
use serde::ser;
|
use serde::ser;
|
||||||
|
|
||||||
use crate::database::RawIndex;
|
use crate::database::Index;
|
||||||
use crate::ranked_map::RankedMap;
|
use crate::ranked_map::RankedMap;
|
||||||
use crate::indexer::Indexer as RawIndexer;
|
use crate::indexer::Indexer as RawIndexer;
|
||||||
use crate::schema::Schema;
|
use crate::schema::Schema;
|
||||||
@ -9,7 +9,7 @@ use super::{SerializerError, ConvertToString, ConvertToNumber, Indexer};
|
|||||||
|
|
||||||
pub struct Serializer<'a> {
|
pub struct Serializer<'a> {
|
||||||
pub schema: &'a Schema,
|
pub schema: &'a Schema,
|
||||||
pub index: &'a RawIndex,
|
pub index: &'a Index,
|
||||||
pub indexer: &'a mut RawIndexer,
|
pub indexer: &'a mut RawIndexer,
|
||||||
pub ranked_map: &'a mut RankedMap,
|
pub ranked_map: &'a mut RankedMap,
|
||||||
pub document_id: DocumentId,
|
pub document_id: DocumentId,
|
||||||
@ -171,7 +171,7 @@ impl<'a> ser::Serializer for Serializer<'a> {
|
|||||||
pub struct MapSerializer<'a> {
|
pub struct MapSerializer<'a> {
|
||||||
schema: &'a Schema,
|
schema: &'a Schema,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
index: &'a RawIndex,
|
index: &'a Index,
|
||||||
indexer: &'a mut RawIndexer,
|
indexer: &'a mut RawIndexer,
|
||||||
ranked_map: &'a mut RankedMap,
|
ranked_map: &'a mut RankedMap,
|
||||||
current_key_name: Option<String>,
|
current_key_name: Option<String>,
|
||||||
@ -224,7 +224,7 @@ impl<'a> ser::SerializeMap for MapSerializer<'a> {
|
|||||||
pub struct StructSerializer<'a> {
|
pub struct StructSerializer<'a> {
|
||||||
schema: &'a Schema,
|
schema: &'a Schema,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
index: &'a RawIndex,
|
index: &'a Index,
|
||||||
indexer: &'a mut RawIndexer,
|
indexer: &'a mut RawIndexer,
|
||||||
ranked_map: &'a mut RankedMap,
|
ranked_map: &'a mut RankedMap,
|
||||||
}
|
}
|
||||||
@ -259,7 +259,7 @@ impl<'a> ser::SerializeStruct for StructSerializer<'a> {
|
|||||||
fn serialize_value<T: ?Sized>(
|
fn serialize_value<T: ?Sized>(
|
||||||
schema: &Schema,
|
schema: &Schema,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
index: &RawIndex,
|
index: &Index,
|
||||||
indexer: &mut RawIndexer,
|
indexer: &mut RawIndexer,
|
||||||
ranked_map: &mut RankedMap,
|
ranked_map: &mut RankedMap,
|
||||||
key: &str,
|
key: &str,
|
||||||
@ -272,7 +272,7 @@ where T: ser::Serialize,
|
|||||||
|
|
||||||
if props.is_stored() {
|
if props.is_stored() {
|
||||||
let value = rmp_serde::to_vec_named(value)?;
|
let value = rmp_serde::to_vec_named(value)?;
|
||||||
index.set_document_attribute(document_id, attr, value)?;
|
index.lease_inner().raw.documents.set_document_field(document_id, attr, value)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
if props.is_indexed() {
|
if props.is_indexed() {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user