2019-04-18 13:58:35 +02:00
|
|
|
use std::collections::HashSet;
|
|
|
|
use std::io::{self, Cursor, BufRead};
|
|
|
|
use std::iter::FromIterator;
|
2019-04-08 15:19:57 +02:00
|
|
|
use std::path::Path;
|
2019-04-16 10:47:52 +02:00
|
|
|
use std::sync::Arc;
|
2019-04-22 15:26:43 +02:00
|
|
|
use std::{error, fmt};
|
2019-04-08 15:19:57 +02:00
|
|
|
|
2019-04-16 10:47:52 +02:00
|
|
|
use arc_swap::{ArcSwap, Lease};
|
2019-04-18 14:23:09 +02:00
|
|
|
use byteorder::{ReadBytesExt, BigEndian};
|
2019-04-16 10:47:52 +02:00
|
|
|
use hashbrown::HashMap;
|
2019-04-18 14:11:00 +02:00
|
|
|
use meilidb_core::criterion::Criteria;
|
|
|
|
use meilidb_core::QueryBuilder;
|
2019-04-11 14:51:17 +02:00
|
|
|
use meilidb_core::shared_data_cursor::{FromSharedDataCursor, SharedDataCursor};
|
|
|
|
use meilidb_core::write_to_bytes::WriteToBytes;
|
2019-04-16 10:47:52 +02:00
|
|
|
use meilidb_core::{DocumentId, Index as WordIndex};
|
2019-04-18 13:58:35 +02:00
|
|
|
use rmp_serde::decode::{Error as RmpError};
|
2019-04-19 13:41:52 +02:00
|
|
|
use sdset::SetBuf;
|
2019-04-18 14:23:09 +02:00
|
|
|
use serde::de;
|
2019-04-11 14:51:17 +02:00
|
|
|
use sled::IVec;
|
|
|
|
|
2019-04-16 12:06:40 +02:00
|
|
|
use crate::{Schema, SchemaAttr, RankedMap};
|
2019-04-21 22:40:21 +02:00
|
|
|
use crate::serde::{extract_document_id, Serializer, Deserializer, SerializerError};
|
2019-04-19 15:50:53 +02:00
|
|
|
use crate::indexer::Indexer;
|
2019-04-08 15:19:57 +02:00
|
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
|
pub enum Error {
|
2019-04-16 10:47:52 +02:00
|
|
|
SchemaDiffer,
|
2019-04-11 14:51:17 +02:00
|
|
|
SchemaMissing,
|
|
|
|
WordIndexMissing,
|
2019-04-21 22:40:21 +02:00
|
|
|
MissingDocumentId,
|
2019-04-08 15:19:57 +02:00
|
|
|
SledError(sled::Error),
|
|
|
|
BincodeError(bincode::Error),
|
2019-04-21 22:40:21 +02:00
|
|
|
SerializerError(SerializerError),
|
2019-04-08 15:19:57 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
impl From<sled::Error> for Error {
|
|
|
|
fn from(error: sled::Error) -> Error {
|
|
|
|
Error::SledError(error)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<bincode::Error> for Error {
|
|
|
|
fn from(error: bincode::Error) -> Error {
|
|
|
|
Error::BincodeError(error)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-04-21 22:40:21 +02:00
|
|
|
impl From<SerializerError> for Error {
|
|
|
|
fn from(error: SerializerError) -> Error {
|
|
|
|
Error::SerializerError(error)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-04-22 15:26:43 +02:00
|
|
|
impl fmt::Display for Error {
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
|
|
use self::Error::*;
|
|
|
|
match self {
|
|
|
|
SchemaDiffer => write!(f, "schemas differ"),
|
|
|
|
SchemaMissing => write!(f, "this index does not have a schema"),
|
|
|
|
WordIndexMissing => write!(f, "this index does not have a word index"),
|
|
|
|
MissingDocumentId => write!(f, "document id is missing"),
|
|
|
|
SledError(e) => write!(f, "sled error; {}", e),
|
|
|
|
BincodeError(e) => write!(f, "bincode error; {}", e),
|
|
|
|
SerializerError(e) => write!(f, "serializer error; {}", e),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl error::Error for Error { }
|
|
|
|
|
2019-04-08 15:19:57 +02:00
|
|
|
fn index_name(name: &str) -> Vec<u8> {
|
|
|
|
format!("index-{}", name).into_bytes()
|
|
|
|
}
|
2019-03-29 17:01:10 +01:00
|
|
|
|
2019-04-12 18:46:36 +02:00
|
|
|
fn document_key(id: DocumentId, attr: SchemaAttr) -> Vec<u8> {
|
|
|
|
let DocumentId(document_id) = id;
|
|
|
|
let SchemaAttr(schema_attr) = attr;
|
|
|
|
|
|
|
|
let mut bytes = Vec::new();
|
|
|
|
bytes.extend_from_slice(b"document-");
|
|
|
|
bytes.extend_from_slice(&document_id.to_be_bytes()[..]);
|
|
|
|
bytes.extend_from_slice(&schema_attr.to_be_bytes()[..]);
|
|
|
|
bytes
|
|
|
|
}
|
|
|
|
|
2019-04-18 13:58:35 +02:00
|
|
|
trait CursorExt {
|
|
|
|
fn consume_if_eq(&mut self, needle: &[u8]) -> bool;
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<T: AsRef<[u8]>> CursorExt for Cursor<T> {
|
|
|
|
fn consume_if_eq(&mut self, needle: &[u8]) -> bool {
|
|
|
|
let position = self.position() as usize;
|
|
|
|
let slice = self.get_ref().as_ref();
|
|
|
|
|
|
|
|
if slice[position..].starts_with(needle) {
|
|
|
|
self.consume(needle.len());
|
|
|
|
true
|
|
|
|
} else {
|
|
|
|
false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn extract_document_key(key: Vec<u8>) -> io::Result<(DocumentId, SchemaAttr)> {
|
|
|
|
let mut key = Cursor::new(key);
|
|
|
|
|
|
|
|
if !key.consume_if_eq(b"document-") {
|
|
|
|
return Err(io::Error::from(io::ErrorKind::InvalidData))
|
|
|
|
}
|
|
|
|
|
|
|
|
let document_id = key.read_u64::<BigEndian>().map(DocumentId)?;
|
|
|
|
let schema_attr = key.read_u16::<BigEndian>().map(SchemaAttr)?;
|
|
|
|
|
|
|
|
Ok((document_id, schema_attr))
|
|
|
|
}
|
|
|
|
|
2019-03-29 17:01:10 +01:00
|
|
|
#[derive(Clone)]
|
2019-04-16 10:47:52 +02:00
|
|
|
pub struct Database {
|
2019-04-18 13:58:35 +02:00
|
|
|
opened: Arc<ArcSwap<HashMap<String, RawIndex>>>,
|
2019-04-16 10:47:52 +02:00
|
|
|
inner: sled::Db,
|
|
|
|
}
|
2019-03-29 17:01:10 +01:00
|
|
|
|
|
|
|
impl Database {
|
2019-04-08 15:19:57 +02:00
|
|
|
pub fn start_default<P: AsRef<Path>>(path: P) -> Result<Database, Error> {
|
2019-04-16 10:47:52 +02:00
|
|
|
let inner = sled::Db::start_default(path)?;
|
|
|
|
let opened = Arc::new(ArcSwap::new(Arc::new(HashMap::new())));
|
|
|
|
Ok(Database { opened, inner })
|
2019-03-29 17:01:10 +01:00
|
|
|
}
|
|
|
|
|
2019-04-08 15:19:57 +02:00
|
|
|
pub fn open_index(&self, name: &str) -> Result<Option<Index>, Error> {
|
2019-04-16 10:47:52 +02:00
|
|
|
// check if the index was already opened
|
2019-04-18 13:58:35 +02:00
|
|
|
if let Some(raw_index) = self.opened.lease().get(name) {
|
|
|
|
return Ok(Some(Index(raw_index.clone())))
|
2019-04-16 10:47:52 +02:00
|
|
|
}
|
2019-03-29 17:01:10 +01:00
|
|
|
|
2019-04-16 10:47:52 +02:00
|
|
|
let raw_name = index_name(name);
|
|
|
|
if self.inner.tree_names().into_iter().any(|tn| tn == raw_name) {
|
|
|
|
let tree = self.inner.open_tree(raw_name)?;
|
2019-04-18 13:58:35 +02:00
|
|
|
let raw_index = RawIndex::from_raw(tree)?;
|
2019-04-16 10:47:52 +02:00
|
|
|
|
|
|
|
self.opened.rcu(|opened| {
|
|
|
|
let mut opened = HashMap::clone(opened);
|
2019-04-18 13:58:35 +02:00
|
|
|
opened.insert(name.to_string(), raw_index.clone());
|
2019-04-16 10:47:52 +02:00
|
|
|
opened
|
|
|
|
});
|
|
|
|
|
2019-04-18 13:58:35 +02:00
|
|
|
return Ok(Some(Index(raw_index)))
|
2019-04-08 15:19:57 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
Ok(None)
|
|
|
|
}
|
|
|
|
|
2019-04-16 10:47:52 +02:00
|
|
|
pub fn create_index(&self, name: String, schema: Schema) -> Result<Index, Error> {
|
|
|
|
match self.open_index(&name)? {
|
2019-04-08 15:19:57 +02:00
|
|
|
Some(index) => {
|
2019-04-18 13:58:35 +02:00
|
|
|
if index.schema() != &schema {
|
2019-04-16 10:47:52 +02:00
|
|
|
return Err(Error::SchemaDiffer);
|
|
|
|
}
|
|
|
|
|
2019-04-08 15:19:57 +02:00
|
|
|
Ok(index)
|
|
|
|
},
|
|
|
|
None => {
|
2019-04-16 10:47:52 +02:00
|
|
|
let raw_name = index_name(&name);
|
|
|
|
let tree = self.inner.open_tree(raw_name)?;
|
2019-04-18 13:58:35 +02:00
|
|
|
let raw_index = RawIndex::new_from_raw(tree, schema)?;
|
2019-04-16 10:47:52 +02:00
|
|
|
|
|
|
|
self.opened.rcu(|opened| {
|
|
|
|
let mut opened = HashMap::clone(opened);
|
2019-04-18 13:58:35 +02:00
|
|
|
opened.insert(name.clone(), raw_index.clone());
|
2019-04-16 10:47:52 +02:00
|
|
|
opened
|
|
|
|
});
|
|
|
|
|
2019-04-18 13:58:35 +02:00
|
|
|
Ok(Index(raw_index))
|
2019-04-08 15:19:57 +02:00
|
|
|
},
|
|
|
|
}
|
2019-03-29 17:01:10 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-04-11 14:51:17 +02:00
|
|
|
#[derive(Clone)]
|
2019-04-18 13:58:35 +02:00
|
|
|
pub struct RawIndex {
|
2019-04-08 15:19:57 +02:00
|
|
|
schema: Schema,
|
2019-04-16 10:47:52 +02:00
|
|
|
word_index: Arc<ArcSwap<WordIndex>>,
|
2019-04-16 12:06:40 +02:00
|
|
|
ranked_map: Arc<ArcSwap<RankedMap>>,
|
2019-04-08 15:19:57 +02:00
|
|
|
inner: Arc<sled::Tree>,
|
|
|
|
}
|
|
|
|
|
2019-04-18 13:58:35 +02:00
|
|
|
impl RawIndex {
|
|
|
|
fn from_raw(inner: Arc<sled::Tree>) -> Result<RawIndex, Error> {
|
2019-04-16 12:06:40 +02:00
|
|
|
let schema = {
|
|
|
|
let bytes = inner.get("schema")?;
|
|
|
|
let bytes = bytes.ok_or(Error::SchemaMissing)?;
|
|
|
|
Schema::read_from_bin(bytes.as_ref())?
|
|
|
|
};
|
2019-04-11 14:51:17 +02:00
|
|
|
|
|
|
|
let bytes = inner.get("word-index")?;
|
|
|
|
let bytes = bytes.ok_or(Error::WordIndexMissing)?;
|
|
|
|
let word_index = {
|
|
|
|
let len = bytes.len();
|
2019-04-22 15:26:43 +02:00
|
|
|
let bytes: Arc<[u8]> = Into::into(bytes);
|
2019-04-11 14:51:17 +02:00
|
|
|
let mut cursor = SharedDataCursor::from_shared_bytes(bytes, 0, len);
|
|
|
|
|
|
|
|
// TODO must handle this error
|
|
|
|
let word_index = WordIndex::from_shared_data_cursor(&mut cursor).unwrap();
|
|
|
|
|
2019-04-16 10:47:52 +02:00
|
|
|
Arc::new(ArcSwap::new(Arc::new(word_index)))
|
2019-04-11 14:51:17 +02:00
|
|
|
};
|
|
|
|
|
2019-04-16 12:06:40 +02:00
|
|
|
let ranked_map = {
|
|
|
|
let map = match inner.get("ranked-map")? {
|
|
|
|
Some(bytes) => bincode::deserialize(bytes.as_ref())?,
|
|
|
|
None => RankedMap::default(),
|
|
|
|
};
|
|
|
|
|
|
|
|
Arc::new(ArcSwap::new(Arc::new(map)))
|
|
|
|
};
|
|
|
|
|
2019-04-18 13:58:35 +02:00
|
|
|
Ok(RawIndex { schema, word_index, ranked_map, inner })
|
2019-04-08 15:19:57 +02:00
|
|
|
}
|
|
|
|
|
2019-04-18 13:58:35 +02:00
|
|
|
fn new_from_raw(inner: Arc<sled::Tree>, schema: Schema) -> Result<RawIndex, Error> {
|
2019-04-08 15:19:57 +02:00
|
|
|
let mut schema_bytes = Vec::new();
|
2019-04-11 14:51:17 +02:00
|
|
|
schema.write_to_bin(&mut schema_bytes)?;
|
2019-04-08 15:19:57 +02:00
|
|
|
inner.set("schema", schema_bytes)?;
|
2019-04-11 14:51:17 +02:00
|
|
|
|
|
|
|
let word_index = WordIndex::default();
|
|
|
|
inner.set("word-index", word_index.into_bytes())?;
|
2019-04-16 10:47:52 +02:00
|
|
|
let word_index = Arc::new(ArcSwap::new(Arc::new(word_index)));
|
2019-04-11 14:51:17 +02:00
|
|
|
|
2019-04-16 12:06:40 +02:00
|
|
|
let ranked_map = Arc::new(ArcSwap::new(Arc::new(RankedMap::default())));
|
|
|
|
|
2019-04-18 13:58:35 +02:00
|
|
|
Ok(RawIndex { schema, word_index, ranked_map, inner })
|
2019-04-08 15:19:57 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn schema(&self) -> &Schema {
|
|
|
|
&self.schema
|
|
|
|
}
|
2019-04-11 14:51:17 +02:00
|
|
|
|
2019-04-16 10:47:52 +02:00
|
|
|
pub fn word_index(&self) -> Lease<Arc<WordIndex>> {
|
|
|
|
self.word_index.lease()
|
|
|
|
}
|
|
|
|
|
2019-04-16 12:06:40 +02:00
|
|
|
pub fn ranked_map(&self) -> Lease<Arc<RankedMap>> {
|
|
|
|
self.ranked_map.lease()
|
|
|
|
}
|
|
|
|
|
2019-04-19 13:41:52 +02:00
|
|
|
pub fn update_word_index(&self, word_index: Arc<WordIndex>) -> sled::Result<()> {
|
|
|
|
let data = word_index.into_bytes();
|
|
|
|
self.inner.set("word-index", data).map(drop)?;
|
|
|
|
self.word_index.store(word_index);
|
|
|
|
|
|
|
|
Ok(())
|
2019-04-11 14:51:17 +02:00
|
|
|
}
|
2019-04-12 18:46:36 +02:00
|
|
|
|
2019-04-18 13:58:35 +02:00
|
|
|
pub fn update_ranked_map(&self, ranked_map: Arc<RankedMap>) {
|
2019-04-16 12:06:40 +02:00
|
|
|
self.ranked_map.store(ranked_map)
|
|
|
|
}
|
|
|
|
|
2019-04-12 18:46:36 +02:00
|
|
|
pub fn set_document_attribute<V>(
|
|
|
|
&self,
|
|
|
|
id: DocumentId,
|
|
|
|
attr: SchemaAttr,
|
|
|
|
value: V,
|
2019-04-19 15:50:53 +02:00
|
|
|
) -> Result<Option<IVec>, sled::Error>
|
2019-04-12 18:46:36 +02:00
|
|
|
where IVec: From<V>,
|
|
|
|
{
|
|
|
|
let key = document_key(id, attr);
|
|
|
|
Ok(self.inner.set(key, value)?)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn get_document_attribute(
|
|
|
|
&self,
|
|
|
|
id: DocumentId,
|
|
|
|
attr: SchemaAttr
|
2019-04-19 15:50:53 +02:00
|
|
|
) -> Result<Option<IVec>, sled::Error>
|
2019-04-12 18:46:36 +02:00
|
|
|
{
|
|
|
|
let key = document_key(id, attr);
|
|
|
|
Ok(self.inner.get(key)?)
|
|
|
|
}
|
|
|
|
|
2019-04-18 13:58:35 +02:00
|
|
|
pub fn get_document_fields(&self, id: DocumentId) -> DocumentFieldsIter {
|
|
|
|
let start = document_key(id, SchemaAttr::min());
|
|
|
|
let end = document_key(id, SchemaAttr::max());
|
|
|
|
DocumentFieldsIter(self.inner.range(start..=end))
|
|
|
|
}
|
|
|
|
|
2019-04-12 18:46:36 +02:00
|
|
|
pub fn del_document_attribute(
|
|
|
|
&self,
|
|
|
|
id: DocumentId,
|
|
|
|
attr: SchemaAttr
|
2019-04-19 15:50:53 +02:00
|
|
|
) -> Result<Option<IVec>, sled::Error>
|
2019-04-12 18:46:36 +02:00
|
|
|
{
|
|
|
|
let key = document_key(id, attr);
|
|
|
|
Ok(self.inner.del(key)?)
|
|
|
|
}
|
2019-04-08 15:19:57 +02:00
|
|
|
}
|
2019-04-18 13:58:35 +02:00
|
|
|
|
|
|
|
pub struct DocumentFieldsIter<'a>(sled::Iter<'a>);
|
|
|
|
|
|
|
|
impl<'a> Iterator for DocumentFieldsIter<'a> {
|
|
|
|
type Item = Result<(DocumentId, SchemaAttr, IVec), Error>;
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
match self.0.next() {
|
|
|
|
Some(Ok((key, value))) => {
|
|
|
|
let (id, attr) = extract_document_key(key).unwrap();
|
|
|
|
Some(Ok((id, attr, value)))
|
|
|
|
},
|
|
|
|
Some(Err(e)) => Some(Err(Error::SledError(e))),
|
|
|
|
None => None,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone)]
|
|
|
|
pub struct Index(RawIndex);
|
|
|
|
|
|
|
|
impl Index {
|
2019-04-18 14:11:00 +02:00
|
|
|
pub fn query_builder(&self) -> QueryBuilder<Lease<Arc<WordIndex>>> {
|
|
|
|
let word_index = self.word_index();
|
|
|
|
QueryBuilder::new(word_index)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn query_builder_with_criteria<'c>(
|
|
|
|
&self,
|
|
|
|
criteria: Criteria<'c>,
|
|
|
|
) -> QueryBuilder<'c, Lease<Arc<WordIndex>>>
|
|
|
|
{
|
|
|
|
let word_index = self.word_index();
|
|
|
|
QueryBuilder::with_criteria(word_index, criteria)
|
|
|
|
}
|
|
|
|
|
2019-04-18 13:58:35 +02:00
|
|
|
pub fn schema(&self) -> &Schema {
|
|
|
|
self.0.schema()
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn word_index(&self) -> Lease<Arc<WordIndex>> {
|
|
|
|
self.0.word_index()
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn ranked_map(&self) -> Lease<Arc<RankedMap>> {
|
|
|
|
self.0.ranked_map()
|
|
|
|
}
|
|
|
|
|
2019-04-19 13:41:52 +02:00
|
|
|
pub fn documents_addition(&self) -> DocumentsAddition {
|
|
|
|
let index = self.0.clone();
|
|
|
|
DocumentsAddition::from_raw(index)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn documents_deletion(&self) -> DocumentsDeletion {
|
|
|
|
let index = self.0.clone();
|
|
|
|
DocumentsDeletion::from_raw(index)
|
|
|
|
}
|
|
|
|
|
2019-04-18 13:58:35 +02:00
|
|
|
pub fn document<T>(
|
|
|
|
&self,
|
|
|
|
fields: Option<&HashSet<&str>>,
|
|
|
|
id: DocumentId,
|
|
|
|
) -> Result<Option<T>, RmpError>
|
|
|
|
where T: de::DeserializeOwned,
|
|
|
|
{
|
|
|
|
let fields = match fields {
|
|
|
|
Some(fields) => {
|
|
|
|
let iter = fields.iter().filter_map(|n| self.0.schema().attribute(n));
|
|
|
|
Some(HashSet::from_iter(iter))
|
|
|
|
},
|
|
|
|
None => None,
|
|
|
|
};
|
|
|
|
|
|
|
|
let mut deserializer = Deserializer {
|
|
|
|
document_id: id,
|
|
|
|
raw_index: &self.0,
|
|
|
|
fields: fields.as_ref(),
|
|
|
|
};
|
|
|
|
|
|
|
|
// TODO: currently we return an error if all document fields are missing,
|
|
|
|
// returning None would have been better
|
|
|
|
T::deserialize(&mut deserializer).map(Some)
|
|
|
|
}
|
|
|
|
}
|
2019-04-19 13:41:52 +02:00
|
|
|
|
2019-04-19 15:50:53 +02:00
|
|
|
pub struct DocumentsAddition {
|
|
|
|
inner: RawIndex,
|
|
|
|
indexer: Indexer,
|
|
|
|
}
|
2019-04-19 13:41:52 +02:00
|
|
|
|
|
|
|
impl DocumentsAddition {
|
|
|
|
pub fn from_raw(inner: RawIndex) -> DocumentsAddition {
|
2019-04-19 15:50:53 +02:00
|
|
|
DocumentsAddition { inner, indexer: Indexer::new() }
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn update_document<D>(&mut self, document: D) -> Result<(), Error>
|
|
|
|
where D: serde::Serialize,
|
|
|
|
{
|
2019-04-21 22:40:21 +02:00
|
|
|
let schema = self.inner.schema();
|
|
|
|
let identifier = schema.identifier_name();
|
|
|
|
|
|
|
|
let document_id = match extract_document_id(identifier, &document)? {
|
|
|
|
Some(id) => id,
|
|
|
|
None => return Err(Error::MissingDocumentId),
|
|
|
|
};
|
|
|
|
|
|
|
|
let serializer = Serializer {
|
|
|
|
schema,
|
|
|
|
index: &self.inner,
|
|
|
|
indexer: &mut self.indexer,
|
|
|
|
document_id,
|
|
|
|
};
|
|
|
|
|
|
|
|
document.serialize(serializer)?;
|
|
|
|
|
|
|
|
Ok(())
|
2019-04-19 15:50:53 +02:00
|
|
|
}
|
|
|
|
pub fn finalize(self) -> sled::Result<()> {
|
2019-04-21 22:40:21 +02:00
|
|
|
let delta_index = self.indexer.build();
|
|
|
|
|
|
|
|
let index = self.inner.word_index();
|
|
|
|
let new_index = index.r#union(&delta_index);
|
|
|
|
|
|
|
|
let new_index = Arc::from(new_index);
|
|
|
|
self.inner.update_word_index(new_index)?;
|
|
|
|
|
|
|
|
Ok(())
|
2019-04-19 13:41:52 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub struct DocumentsDeletion {
|
|
|
|
inner: RawIndex,
|
|
|
|
documents: Vec<DocumentId>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl DocumentsDeletion {
|
|
|
|
pub fn from_raw(inner: RawIndex) -> DocumentsDeletion {
|
|
|
|
DocumentsDeletion { inner, documents: Vec::new() }
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn delete_document(&mut self, id: DocumentId) {
|
|
|
|
self.documents.push(id);
|
|
|
|
}
|
|
|
|
|
2019-04-19 15:50:53 +02:00
|
|
|
pub fn finalize(mut self) -> Result<(), Error> {
|
2019-04-19 13:41:52 +02:00
|
|
|
self.documents.sort_unstable();
|
|
|
|
self.documents.dedup();
|
|
|
|
|
|
|
|
let idset = SetBuf::new_unchecked(self.documents);
|
|
|
|
let index = self.inner.word_index();
|
|
|
|
|
|
|
|
let new_index = index.remove_documents(&idset);
|
|
|
|
let new_index = Arc::from(new_index);
|
|
|
|
|
|
|
|
self.inner.update_word_index(new_index)?;
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
}
|