feat: Introduce the DocumentsAddition type

This commit is contained in:
Clément Renault 2019-05-09 14:23:39 +02:00
parent 42e39f6eb5
commit e67ada8823
No known key found for this signature in database
GPG key ID: 0151CDAB43460DAE
5 changed files with 289 additions and 232 deletions

View file

@ -1,78 +1,13 @@
use std::collections::BTreeMap;
use std::convert::TryFrom;
use std::sync::Arc;
use deunicode::deunicode_with_tofu;
use meilidb_core::{DocumentId, DocIndex, Store};
use meilidb_tokenizer::{is_cjk, Tokenizer, SeqTokenizer, Token};
use sdset::{Set, SetBuf};
use sled::Tree;
use zerocopy::{AsBytes, LayoutVerified};
use sdset::SetBuf;
use crate::SchemaAttr;
#[derive(Clone)]
pub struct WordIndexTree(pub Arc<Tree>);
impl Store for WordIndexTree {
type Error = sled::Error;
fn get_fst(&self) -> Result<fst::Set, Self::Error> {
match self.0.get("fst")? {
Some(bytes) => {
let bytes: Arc<[u8]> = bytes.into();
let len = bytes.len();
let raw = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
Ok(fst::Set::from(raw))
},
None => Ok(fst::Set::default()),
}
}
fn set_fst(&self, set: &fst::Set) -> Result<(), Self::Error> {
let bytes = set.as_fst().to_vec();
self.0.set("fst", bytes)?;
Ok(())
}
fn get_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> {
let mut word_bytes = Vec::from("word-");
word_bytes.extend_from_slice(word);
match self.0.get(word_bytes)? {
Some(bytes) => {
let layout = LayoutVerified::new_slice(bytes.as_ref()).unwrap();
let slice = layout.into_slice();
let setbuf = SetBuf::new_unchecked(slice.to_vec());
Ok(Some(setbuf))
},
None => Ok(None),
}
}
fn set_indexes(&self, word: &[u8], indexes: &Set<DocIndex>) -> Result<(), Self::Error> {
let mut word_bytes = Vec::from("word-");
word_bytes.extend_from_slice(word);
let slice = indexes.as_slice();
let bytes = slice.as_bytes();
self.0.set(word_bytes, bytes)?;
Ok(())
}
fn del_indexes(&self, word: &[u8]) -> Result<(), Self::Error> {
let mut word_bytes = Vec::from("word-");
word_bytes.extend_from_slice(word);
self.0.del(word_bytes)?;
Ok(())
}
}
type Word = Vec<u8>; // TODO make it be a SmallVec
pub struct Indexer {
@ -115,6 +50,7 @@ impl Indexer {
pub fn build(self) -> BTreeMap<Word, SetBuf<DocIndex>> {
self.indexed.into_iter().map(|(word, mut indexes)| {
indexes.sort_unstable();
indexes.dedup();
(word, SetBuf::new_unchecked(indexes))
}).collect()
}