From 18736bdcd0fb5e7444f724595204ac8ff7b13b44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 13 Jun 2019 16:20:01 +0200 Subject: [PATCH] feat: Introduce the synonyms concept to the Store trait --- meilidb-core/src/query_builder.rs | 66 +++++++++++++++++++++++++++---- meilidb-core/src/store.rs | 11 ++++++ 2 files changed, 69 insertions(+), 8 deletions(-) diff --git a/meilidb-core/src/query_builder.rs b/meilidb-core/src/query_builder.rs index 6e8194e2f..7c089c8dc 100644 --- a/meilidb-core/src/query_builder.rs +++ b/meilidb-core/src/query_builder.rs @@ -349,7 +349,7 @@ mod tests { use std::iter::FromIterator; use sdset::SetBuf; - use fst::Set; + use fst::{Set, IntoStreamer}; use crate::DocIndex; use crate::store::Store; @@ -357,18 +357,46 @@ mod tests { #[derive(Default)] struct InMemorySetStore { set: Set, + synonyms: Set, indexes: HashMap, SetBuf>, + alternatives: HashMap, Set>, } - impl Store for InMemorySetStore { - type Error = std::io::Error; + fn set_from_stream<'f, I, S>(stream: I) -> Set + where + I: for<'a> fst::IntoStreamer<'a, Into=S, Item=&'a [u8]>, + S: 'f + for<'a> fst::Streamer<'a, Item=&'a [u8]>, + { + let mut builder = fst::SetBuilder::memory(); + builder.extend_stream(stream); + builder.into_inner().and_then(Set::from_bytes).unwrap() + } - fn words(&self) -> Result<&Set, Self::Error> { - Ok(&self.set) - } + fn insert_key(set: &Set, key: &[u8]) -> Set { + let unique_key = { + let mut builder = fst::SetBuilder::memory(); + builder.insert(key); + builder.into_inner().and_then(Set::from_bytes).unwrap() + }; - fn word_indexes(&self, word: &[u8]) -> Result>, Self::Error> { - Ok(self.indexes.get(word).cloned()) + let union_ = set.op().add(unique_key.into_stream()).r#union(); + + set_from_stream(union_) + } + + fn sdset_into_fstset(set: &sdset::Set<&str>) -> Set { + let mut builder = fst::SetBuilder::memory(); + builder.extend_iter(set.into_iter()); + builder.into_inner().and_then(Set::from_bytes).unwrap() + } + + impl InMemorySetStore { + pub fn add_synonym(&mut self, word: &str, new: SetBuf<&str>) { + let alternatives = self.alternatives.entry(word.as_bytes().to_vec()).or_default(); + let new = sdset_into_fstset(&new); + *alternatives = set_from_stream(alternatives.op().add(new.into_stream()).r#union()); + + self.synonyms = insert_key(&self.synonyms, word.as_bytes()); } } @@ -384,11 +412,33 @@ mod tests { InMemorySetStore { set: Set::from_iter(tree).unwrap(), + synonyms: Set::default(), indexes: map, + alternatives: HashMap::new(), } } } + impl Store for InMemorySetStore { + type Error = std::io::Error; + + fn words(&self) -> Result<&Set, Self::Error> { + Ok(&self.set) + } + + fn word_indexes(&self, word: &[u8]) -> Result>, Self::Error> { + Ok(self.indexes.get(word).cloned()) + } + + fn synonyms(&self) -> Result<&Set, Self::Error> { + Ok(&self.synonyms) + } + + fn alternatives_to(&self, word: &[u8]) -> Result, Self::Error> { + Ok(self.alternatives.get(word).map(|s| Set::from_bytes(s.as_fst().to_vec()).unwrap())) + } + } + const fn doc_index(document_id: u64, word_index: u16) -> DocIndex { DocIndex { document_id: DocumentId(document_id), diff --git a/meilidb-core/src/store.rs b/meilidb-core/src/store.rs index 14e95f0cc..6e429a1b4 100644 --- a/meilidb-core/src/store.rs +++ b/meilidb-core/src/store.rs @@ -8,6 +8,9 @@ pub trait Store { fn words(&self) -> Result<&Set, Self::Error>; fn word_indexes(&self, word: &[u8]) -> Result>, Self::Error>; + + fn synonyms(&self) -> Result<&Set, Self::Error>; + fn alternatives_to(&self, word: &[u8]) -> Result, Self::Error>; } impl Store for &'_ T where T: Store { @@ -20,4 +23,12 @@ impl Store for &'_ T where T: Store { fn word_indexes(&self, word: &[u8]) -> Result>, Self::Error> { (*self).word_indexes(word) } + + fn synonyms(&self) -> Result<&Set, Self::Error> { + (*self).synonyms() + } + + fn alternatives_to(&self, word: &[u8]) -> Result, Self::Error> { + (*self).alternatives_to(word) + } }