diff --git a/meilidb-core/src/index.rs b/meilidb-core/src/index.rs deleted file mode 100644 index 87b59e682..000000000 --- a/meilidb-core/src/index.rs +++ /dev/null @@ -1,134 +0,0 @@ -use std::collections::BTreeMap; -use fst::{set, IntoStreamer, Streamer}; -use sdset::{Set, SetBuf, SetOperation}; -use sdset::duo::{Union, DifferenceByKey}; -use crate::{DocIndex, DocumentId}; - -pub type Word = Vec; // TODO should be a smallvec - -pub trait Store: Clone { - type Error: std::error::Error; - - fn get_fst(&self) -> Result; - fn set_fst(&self, set: &fst::Set) -> Result<(), Self::Error>; - - fn get_indexes(&self, word: &[u8]) -> Result>, Self::Error>; - fn set_indexes(&self, word: &[u8], indexes: &Set) -> Result<(), Self::Error>; - fn del_indexes(&self, word: &[u8]) -> Result<(), Self::Error>; -} - -pub struct Index { - pub set: fst::Set, - pub store: S, -} - -impl Index -where S: Store, -{ - pub fn from_store(store: S) -> Result, S::Error> { - let set = store.get_fst()?; - Ok(Index { set, store }) - } - - pub fn remove_documents(&self, documents: &Set) -> Result, S::Error> { - let mut buffer = Vec::new(); - let mut builder = fst::SetBuilder::memory(); - let mut stream = self.into_stream(); - - while let Some((input, result)) = stream.next() { - let indexes = match result? { - Some(indexes) => indexes, - None => continue, - }; - - let op = DifferenceByKey::new(&indexes, documents, |x| x.document_id, |x| *x); - buffer.clear(); - op.extend_vec(&mut buffer); - - if buffer.is_empty() { - self.store.del_indexes(input)?; - } else { - builder.insert(input).unwrap(); - let indexes = Set::new_unchecked(&buffer); - self.store.set_indexes(input, indexes)?; - } - } - - let set = builder.into_inner().and_then(fst::Set::from_bytes).unwrap(); - self.store.set_fst(&set)?; - - Ok(Index { set, store: self.store.clone() }) - } - - pub fn insert_indexes(&self, map: BTreeMap>) -> Result, S::Error> { - let mut buffer = Vec::new(); - let mut builder = fst::SetBuilder::memory(); - let set = fst::Set::from_iter(map.keys()).unwrap(); - let mut union_ = self.set.op().add(&set).r#union(); - - while let Some(input) = union_.next() { - let remote = self.store.get_indexes(input)?; - let locale = map.get(input); - - match (remote, locale) { - (Some(remote), Some(locale)) => { - buffer.clear(); - Union::new(&remote, &locale).extend_vec(&mut buffer); - let indexes = Set::new_unchecked(&buffer); - - if !indexes.is_empty() { - self.store.set_indexes(input, indexes)?; - builder.insert(input).unwrap(); - } else { - self.store.del_indexes(input)?; - } - }, - (None, Some(locale)) => { - self.store.set_indexes(input, &locale)?; - builder.insert(input).unwrap(); - }, - (Some(_), None) => { - builder.insert(input).unwrap(); - }, - (None, None) => unreachable!(), - } - } - - let set = builder.into_inner().and_then(fst::Set::from_bytes).unwrap(); - self.store.set_fst(&set)?; - - Ok(Index { set, store: self.store.clone() }) - } -} - -pub struct Stream<'m, S> { - set_stream: set::Stream<'m>, - store: &'m S, -} - -impl<'m, 'a, S> Streamer<'a> for Stream<'m, S> -where S: 'a + Store, -{ - type Item = (&'a [u8], Result>, S::Error>); - - fn next(&'a mut self) -> Option { - match self.set_stream.next() { - Some(input) => Some((input, self.store.get_indexes(input))), - None => None, - } - } -} - -impl<'m, 'a, S> IntoStreamer<'a> for &'m Index -where S: 'a + Store, -{ - type Item = (&'a [u8], Result>, S::Error>); - type Into = Stream<'m, S>; - - fn into_stream(self) -> Self::Into { - Stream { - set_stream: self.set.into_stream(), - store: &self.store, - } - } -} diff --git a/meilidb-core/src/lib.rs b/meilidb-core/src/lib.rs index 783740b1b..838d787dd 100644 --- a/meilidb-core/src/lib.rs +++ b/meilidb-core/src/lib.rs @@ -1,18 +1,18 @@ -pub mod criterion; -mod index; mod automaton; -mod query_builder; mod distinct_map; +mod query_builder; +mod store; +pub mod criterion; use std::sync::Arc; -use serde::{Serialize, Deserialize}; -use slice_group_by::GroupBy; use rayon::slice::ParallelSliceMut; +use serde::{Serialize, Deserialize}; +use slice_group_by::GroupBy; use zerocopy::{AsBytes, FromBytes}; -pub use self::index::{Index, Store}; pub use self::query_builder::{QueryBuilder, DistinctQueryBuilder}; +pub use self::store::Store; /// Represent an internally generated document unique identifier. /// diff --git a/meilidb-core/src/query_builder.rs b/meilidb-core/src/query_builder.rs index b5ff3a530..25cd10b2a 100644 --- a/meilidb-core/src/query_builder.rs +++ b/meilidb-core/src/query_builder.rs @@ -1,5 +1,5 @@ use std::hash::Hash; -use std::ops::{Range, Deref}; +use std::ops::Range; use std::rc::Rc; use std::time::Instant; use std::{cmp, mem}; @@ -15,7 +15,7 @@ use crate::automaton::{self, DfaExt, AutomatonExt}; use crate::distinct_map::{DistinctMap, BufferedDistinctMap}; use crate::criterion::Criteria; use crate::raw_documents_from_matches; -use crate::{Match, DocumentId, Index, Store, RawDocument, Document}; +use crate::{Match, DocumentId, Store, RawDocument, Document}; fn generate_automatons(query: &str) -> Vec { let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace); @@ -35,37 +35,37 @@ fn generate_automatons(query: &str) -> Vec { automatons } -pub struct QueryBuilder<'c, I, FI = fn(DocumentId) -> bool> { - index: I, +pub struct QueryBuilder<'c, S, FI = fn(DocumentId) -> bool> { + store: S, criteria: Criteria<'c>, searchable_attrs: Option>, filter: Option, } -impl<'c, I> QueryBuilder<'c, I, fn(DocumentId) -> bool> { - pub fn new(index: I) -> Self { - QueryBuilder::with_criteria(index, Criteria::default()) +impl<'c, S> QueryBuilder<'c, S, fn(DocumentId) -> bool> { + pub fn new(store: S) -> Self { + QueryBuilder::with_criteria(store, Criteria::default()) } - pub fn with_criteria(index: I, criteria: Criteria<'c>) -> Self { - QueryBuilder { index, criteria, searchable_attrs: None, filter: None } + pub fn with_criteria(store: S, criteria: Criteria<'c>) -> Self { + QueryBuilder { store, criteria, searchable_attrs: None, filter: None } } } -impl<'c, I, FI> QueryBuilder<'c, I, FI> +impl<'c, S, FI> QueryBuilder<'c, S, FI> { - pub fn with_filter(self, function: F) -> QueryBuilder<'c, I, F> + pub fn with_filter(self, function: F) -> QueryBuilder<'c, S, F> where F: Fn(DocumentId) -> bool, { QueryBuilder { - index: self.index, + store: self.store, criteria: self.criteria, searchable_attrs: self.searchable_attrs, filter: Some(function) } } - pub fn with_distinct(self, function: F, size: usize) -> DistinctQueryBuilder<'c, I, FI, F> + pub fn with_distinct(self, function: F, size: usize) -> DistinctQueryBuilder<'c, S, FI, F> where F: Fn(DocumentId) -> Option, K: Hash + Eq, { @@ -82,18 +82,18 @@ impl<'c, I, FI> QueryBuilder<'c, I, FI> } } -impl<'c, I, FI, S> QueryBuilder<'c, I, FI> -where I: Deref>, - S: Store, +impl<'c, S, FI> QueryBuilder<'c, S, FI> +where S: Store, { fn query_all(&self, query: &str) -> Result, S::Error> { let automatons = generate_automatons(query); - let fst = self.index.set.as_fst(); + let words = self.store.words()?; + let words = words.as_fst(); let mut stream = { let mut op_builder = fst::raw::OpBuilder::new(); for automaton in &automatons { - let stream = fst.search(automaton); + let stream = words.search(automaton); op_builder.push(stream); } op_builder.r#union() @@ -107,7 +107,7 @@ where I: Deref>, let distance = automaton.eval(input).to_u8(); let is_exact = distance == 0 && input.len() == automaton.query_len(); - let doc_indexes = self.index.store.get_indexes(input)?; + let doc_indexes = self.store.word_indexes(input)?; let doc_indexes = doc_indexes.expect("word doc-indexes not found"); for di in doc_indexes.as_slice() { @@ -137,10 +137,9 @@ where I: Deref>, } } -impl<'c, I, FI, S> QueryBuilder<'c, I, FI> -where I: Deref>, +impl<'c, S, FI> QueryBuilder<'c, S, FI> +where S: Store, FI: Fn(DocumentId) -> bool, - S: Store, { pub fn query(self, query: &str, range: Range) -> Result, S::Error> { // We delegate the filter work to the distinct query builder, @@ -215,12 +214,11 @@ impl<'c, I, FI, FD> DistinctQueryBuilder<'c, I, FI, FD> } } -impl<'c, I, FI, FD, K, S> DistinctQueryBuilder<'c, I, FI, FD> -where I: Deref>, +impl<'c, S, FI, FD, K> DistinctQueryBuilder<'c, S, FI, FD> +where S: Store, FI: Fn(DocumentId) -> bool, FD: Fn(DocumentId) -> Option, K: Hash + Eq, - S: Store, { pub fn query(self, query: &str, range: Range) -> Result, S::Error> { let start = Instant::now(); diff --git a/meilidb-core/src/store.rs b/meilidb-core/src/store.rs new file mode 100644 index 000000000..14e95f0cc --- /dev/null +++ b/meilidb-core/src/store.rs @@ -0,0 +1,23 @@ +use std::error::Error; +use fst::Set; +use sdset::SetBuf; +use crate::DocIndex; + +pub trait Store { + type Error: Error; + + fn words(&self) -> Result<&Set, Self::Error>; + fn word_indexes(&self, word: &[u8]) -> Result>, Self::Error>; +} + +impl Store for &'_ T where T: Store { + type Error = T::Error; + + fn words(&self) -> Result<&Set, Self::Error> { + (*self).words() + } + + fn word_indexes(&self, word: &[u8]) -> Result>, Self::Error> { + (*self).word_indexes(word) + } +}