Port all tests to the TempDatabase struct

This commit is contained in:
Clément Renault 2019-10-08 16:16:30 +02:00
parent c514692233
commit 175461c13a
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
4 changed files with 986 additions and 825 deletions

View File

@ -9,6 +9,7 @@ use levenshtein_automata::DFA;
use meilidb_tokenizer::{split_query_string, is_cjk}; use meilidb_tokenizer::{split_query_string, is_cjk};
use crate::store; use crate::store;
use crate::error::MResult;
use self::dfa::{build_dfa, build_prefix_dfa}; use self::dfa::{build_dfa, build_prefix_dfa};
use self::query_enhancer::QueryEnhancerBuilder; use self::query_enhancer::QueryEnhancerBuilder;
@ -24,11 +25,18 @@ impl AutomatonProducer {
pub fn new( pub fn new(
reader: &impl rkv::Readable, reader: &impl rkv::Readable,
query: &str, query: &str,
main_store: store::Main,
synonyms_store: store::Synonyms, synonyms_store: store::Synonyms,
) -> (AutomatonProducer, QueryEnhancer) ) -> MResult<(AutomatonProducer, QueryEnhancer)>
{ {
let (automatons, query_enhancer) = generate_automatons(reader, query, synonyms_store).unwrap(); let (automatons, query_enhancer) = generate_automatons(
(AutomatonProducer { automatons }, query_enhancer) reader,
query,
main_store,
synonyms_store,
)?;
Ok((AutomatonProducer { automatons }, query_enhancer))
} }
pub fn into_iter(self) -> vec::IntoIter<Vec<Automaton>> { pub fn into_iter(self) -> vec::IntoIter<Vec<Automaton>> {
@ -102,12 +110,16 @@ pub fn normalize_str(string: &str) -> String {
fn generate_automatons( fn generate_automatons(
reader: &impl rkv::Readable, reader: &impl rkv::Readable,
query: &str, query: &str,
main_store: store::Main,
synonym_store: store::Synonyms, synonym_store: store::Synonyms,
) -> Result<(Vec<Vec<Automaton>>, QueryEnhancer), rkv::StoreError> ) -> MResult<(Vec<Vec<Automaton>>, QueryEnhancer)>
{ {
let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace); let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace);
let query_words: Vec<_> = split_query_string(query).map(str::to_lowercase).collect(); let query_words: Vec<_> = split_query_string(query).map(str::to_lowercase).collect();
let synonyms = synonym_store.synonyms_fst(reader)?; let synonyms = match main_store.synonyms_fst(reader)? {
Some(synonym) => synonym,
None => fst::Set::default(),
};
let mut automaton_index = 0; let mut automaton_index = 0;
let mut automatons = Vec::new(); let mut automatons = Vec::new();
@ -157,7 +169,7 @@ fn generate_automatons(
let base_nb_words = split_query_string(base).count(); let base_nb_words = split_query_string(base).count();
if ngram_nb_words != base_nb_words { continue } if ngram_nb_words != base_nb_words { continue }
if let Some(synonyms) = synonym_store.alternatives_to(reader, base.as_bytes())? { if let Some(synonyms) = synonym_store.synonyms(reader, base.as_bytes())? {
let mut stream = synonyms.into_stream(); let mut stream = synonyms.into_stream();
while let Some(synonyms) = stream.next() { while let Some(synonyms) = stream.next() {

File diff suppressed because it is too large Load Diff

View File

@ -99,6 +99,33 @@ impl Main {
} }
} }
pub fn put_synonyms_fst(
&self,
writer: &mut rkv::Writer,
fst: &fst::Set,
) -> MResult<()>
{
let blob = rkv::Value::Blob(fst.as_fst().as_bytes());
Ok(self.main.put(writer, SYNONYMS_KEY, &blob)?)
}
pub fn synonyms_fst(
&self,
reader: &impl rkv::Readable,
) -> MResult<Option<fst::Set>>
{
match self.main.get(reader, SYNONYMS_KEY)? {
Some(Value::Blob(bytes)) => {
let len = bytes.len();
let bytes = Arc::from(bytes);
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len)?;
Ok(Some(fst::Set::from(fst)))
},
Some(value) => panic!("invalid type {:?}", value),
None => Ok(None),
}
}
pub fn put_number_of_documents<F: Fn(u64) -> u64>( pub fn put_number_of_documents<F: Fn(u64) -> u64>(
&self, &self,
writer: &mut rkv::Writer, writer: &mut rkv::Writer,

View File

@ -1,23 +1,38 @@
use std::sync::Arc;
use crate::error::MResult;
#[derive(Copy, Clone)] #[derive(Copy, Clone)]
pub struct Synonyms { pub struct Synonyms {
pub(crate) synonyms: rkv::SingleStore, pub(crate) synonyms: rkv::SingleStore,
} }
impl Synonyms { impl Synonyms {
pub fn synonyms_fst( pub fn put_synonyms(
&self, &self,
reader: &impl rkv::Readable, writer: &mut rkv::Writer,
) -> Result<fst::Set, rkv::StoreError> word: &[u8],
synonyms: &fst::Set,
) -> Result<(), rkv::StoreError>
{ {
Ok(fst::Set::default()) let blob = rkv::Value::Blob(synonyms.as_fst().as_bytes());
self.synonyms.put(writer, word, &blob)
} }
pub fn alternatives_to( pub fn synonyms(
&self, &self,
reader: &impl rkv::Readable, reader: &impl rkv::Readable,
word: &[u8], word: &[u8],
) -> Result<Option<fst::Set>, rkv::StoreError> ) -> MResult<Option<fst::Set>>
{ {
unimplemented!() match self.synonyms.get(reader, word)? {
Some(rkv::Value::Blob(bytes)) => {
let len = bytes.len();
let bytes = Arc::from(bytes);
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len)?;
Ok(Some(fst::Set::from(fst)))
},
Some(value) => panic!("invalid type {:?}", value),
None => Ok(None),
}
} }
} }