mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 22:14:32 +01:00
feat: Introduce the synonyms concept to the Store trait
This commit is contained in:
parent
e8b2e86007
commit
18736bdcd0
@ -349,7 +349,7 @@ mod tests {
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use sdset::SetBuf;
|
||||
use fst::Set;
|
||||
use fst::{Set, IntoStreamer};
|
||||
|
||||
use crate::DocIndex;
|
||||
use crate::store::Store;
|
||||
@ -357,18 +357,46 @@ mod tests {
|
||||
#[derive(Default)]
|
||||
struct InMemorySetStore {
|
||||
set: Set,
|
||||
synonyms: Set,
|
||||
indexes: HashMap<Vec<u8>, SetBuf<DocIndex>>,
|
||||
alternatives: HashMap<Vec<u8>, Set>,
|
||||
}
|
||||
|
||||
impl Store for InMemorySetStore {
|
||||
type Error = std::io::Error;
|
||||
fn set_from_stream<'f, I, S>(stream: I) -> Set
|
||||
where
|
||||
I: for<'a> fst::IntoStreamer<'a, Into=S, Item=&'a [u8]>,
|
||||
S: 'f + for<'a> fst::Streamer<'a, Item=&'a [u8]>,
|
||||
{
|
||||
let mut builder = fst::SetBuilder::memory();
|
||||
builder.extend_stream(stream);
|
||||
builder.into_inner().and_then(Set::from_bytes).unwrap()
|
||||
}
|
||||
|
||||
fn words(&self) -> Result<&Set, Self::Error> {
|
||||
Ok(&self.set)
|
||||
}
|
||||
fn insert_key(set: &Set, key: &[u8]) -> Set {
|
||||
let unique_key = {
|
||||
let mut builder = fst::SetBuilder::memory();
|
||||
builder.insert(key);
|
||||
builder.into_inner().and_then(Set::from_bytes).unwrap()
|
||||
};
|
||||
|
||||
fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> {
|
||||
Ok(self.indexes.get(word).cloned())
|
||||
let union_ = set.op().add(unique_key.into_stream()).r#union();
|
||||
|
||||
set_from_stream(union_)
|
||||
}
|
||||
|
||||
fn sdset_into_fstset(set: &sdset::Set<&str>) -> Set {
|
||||
let mut builder = fst::SetBuilder::memory();
|
||||
builder.extend_iter(set.into_iter());
|
||||
builder.into_inner().and_then(Set::from_bytes).unwrap()
|
||||
}
|
||||
|
||||
impl InMemorySetStore {
|
||||
pub fn add_synonym(&mut self, word: &str, new: SetBuf<&str>) {
|
||||
let alternatives = self.alternatives.entry(word.as_bytes().to_vec()).or_default();
|
||||
let new = sdset_into_fstset(&new);
|
||||
*alternatives = set_from_stream(alternatives.op().add(new.into_stream()).r#union());
|
||||
|
||||
self.synonyms = insert_key(&self.synonyms, word.as_bytes());
|
||||
}
|
||||
}
|
||||
|
||||
@ -384,11 +412,33 @@ mod tests {
|
||||
|
||||
InMemorySetStore {
|
||||
set: Set::from_iter(tree).unwrap(),
|
||||
synonyms: Set::default(),
|
||||
indexes: map,
|
||||
alternatives: HashMap::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Store for InMemorySetStore {
|
||||
type Error = std::io::Error;
|
||||
|
||||
fn words(&self) -> Result<&Set, Self::Error> {
|
||||
Ok(&self.set)
|
||||
}
|
||||
|
||||
fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> {
|
||||
Ok(self.indexes.get(word).cloned())
|
||||
}
|
||||
|
||||
fn synonyms(&self) -> Result<&Set, Self::Error> {
|
||||
Ok(&self.synonyms)
|
||||
}
|
||||
|
||||
fn alternatives_to(&self, word: &[u8]) -> Result<Option<Set>, Self::Error> {
|
||||
Ok(self.alternatives.get(word).map(|s| Set::from_bytes(s.as_fst().to_vec()).unwrap()))
|
||||
}
|
||||
}
|
||||
|
||||
const fn doc_index(document_id: u64, word_index: u16) -> DocIndex {
|
||||
DocIndex {
|
||||
document_id: DocumentId(document_id),
|
||||
|
@ -8,6 +8,9 @@ pub trait Store {
|
||||
|
||||
fn words(&self) -> Result<&Set, Self::Error>;
|
||||
fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error>;
|
||||
|
||||
fn synonyms(&self) -> Result<&Set, Self::Error>;
|
||||
fn alternatives_to(&self, word: &[u8]) -> Result<Option<Set>, Self::Error>;
|
||||
}
|
||||
|
||||
impl<T> Store for &'_ T where T: Store {
|
||||
@ -20,4 +23,12 @@ impl<T> Store for &'_ T where T: Store {
|
||||
fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> {
|
||||
(*self).word_indexes(word)
|
||||
}
|
||||
|
||||
fn synonyms(&self) -> Result<&Set, Self::Error> {
|
||||
(*self).synonyms()
|
||||
}
|
||||
|
||||
fn alternatives_to(&self, word: &[u8]) -> Result<Option<Set>, Self::Error> {
|
||||
(*self).alternatives_to(word)
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user