mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 22:14:32 +01:00
feat: Create types to edit synonyms and keep them in the database
This commit is contained in:
parent
0633f16b4d
commit
a76c00a787
@ -120,11 +120,12 @@ impl<'a> DocumentsAddition<'a> {
|
||||
|
||||
// update the "consistent" view of the Index
|
||||
let ranked_map = self.ranked_map;
|
||||
let synonyms = fst::Set::from_bytes(lease_inner.synonyms.as_fst().to_vec()).unwrap(); // clone()
|
||||
let schema = lease_inner.schema.clone();
|
||||
let raw = lease_inner.raw.clone();
|
||||
lease_inner.raw.compact();
|
||||
|
||||
let inner = InnerIndex { words, schema, ranked_map, raw };
|
||||
let inner = InnerIndex { words, synonyms, schema, ranked_map, raw };
|
||||
self.inner.0.store(Arc::new(inner));
|
||||
|
||||
Ok(())
|
||||
|
@ -119,11 +119,12 @@ impl<'a> DocumentsDeletion<'a> {
|
||||
|
||||
// update the "consistent" view of the Index
|
||||
let ranked_map = lease_inner.ranked_map.clone();
|
||||
let synonyms = fst::Set::from_bytes(lease_inner.synonyms.as_fst().to_vec()).unwrap(); // clone()
|
||||
let schema = lease_inner.schema.clone();
|
||||
let raw = lease_inner.raw.clone();
|
||||
lease_inner.raw.compact();
|
||||
|
||||
let inner = InnerIndex { words, schema, ranked_map, raw };
|
||||
let inner = InnerIndex { words, synonyms, schema, ranked_map, raw };
|
||||
self.inner.0.store(Arc::new(inner));
|
||||
|
||||
Ok(())
|
||||
|
@ -13,7 +13,11 @@ use crate::ranked_map::RankedMap;
|
||||
use crate::serde::Deserializer;
|
||||
|
||||
use super::{Error, CustomSettings};
|
||||
use super::{RawIndex, DocumentsAddition, DocumentsDeletion};
|
||||
use super::{
|
||||
RawIndex,
|
||||
DocumentsAddition, DocumentsDeletion,
|
||||
SynonymsAddition, SynonymsDeletion,
|
||||
};
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct IndexStats {
|
||||
@ -27,6 +31,7 @@ pub struct Index(pub ArcSwap<InnerIndex>);
|
||||
|
||||
pub struct InnerIndex {
|
||||
pub words: fst::Set,
|
||||
pub synonyms: fst::Set,
|
||||
pub schema: Schema,
|
||||
pub ranked_map: RankedMap,
|
||||
pub raw: RawIndex, // TODO this will be a snapshot in the future
|
||||
@ -39,6 +44,11 @@ impl Index {
|
||||
None => fst::Set::default(),
|
||||
};
|
||||
|
||||
let synonyms = match raw.main.synonyms_set()? {
|
||||
Some(synonyms) => synonyms,
|
||||
None => fst::Set::default(),
|
||||
};
|
||||
|
||||
let schema = match raw.main.schema()? {
|
||||
Some(schema) => schema,
|
||||
None => return Err(Error::SchemaMissing),
|
||||
@ -49,7 +59,7 @@ impl Index {
|
||||
None => RankedMap::default(),
|
||||
};
|
||||
|
||||
let inner = InnerIndex { words, schema, ranked_map, raw };
|
||||
let inner = InnerIndex { words, synonyms, schema, ranked_map, raw };
|
||||
let index = Index(ArcSwap::new(Arc::new(inner)));
|
||||
|
||||
Ok(index)
|
||||
@ -101,6 +111,14 @@ impl Index {
|
||||
DocumentsDeletion::new(self, ranked_map)
|
||||
}
|
||||
|
||||
pub fn synonyms_addition(&self) -> SynonymsAddition {
|
||||
SynonymsAddition::new(self)
|
||||
}
|
||||
|
||||
pub fn synonyms_deletion(&self) -> SynonymsDeletion {
|
||||
SynonymsDeletion::new(self)
|
||||
}
|
||||
|
||||
pub fn document<T>(
|
||||
&self,
|
||||
fields: Option<&HashSet<&str>>,
|
||||
@ -141,4 +159,12 @@ impl Store for IndexLease {
|
||||
fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> {
|
||||
Ok(self.0.raw.words.doc_indexes(word)?)
|
||||
}
|
||||
|
||||
fn synonyms(&self) -> Result<&fst::Set, Self::Error> {
|
||||
Ok(&self.0.synonyms)
|
||||
}
|
||||
|
||||
fn alternatives_to(&self, word: &[u8]) -> Result<Option<fst::Set>, Self::Error> {
|
||||
Ok(self.0.raw.synonyms.alternatives_to(word)?)
|
||||
}
|
||||
}
|
||||
|
@ -44,6 +44,22 @@ impl MainIndex {
|
||||
self.0.set("words", value.as_fst().as_bytes()).map_err(Into::into)
|
||||
}
|
||||
|
||||
pub fn synonyms_set(&self) -> Result<Option<fst::Set>, Error> {
|
||||
match self.0.get_pinned("synonyms")? {
|
||||
Some(bytes) => {
|
||||
let len = bytes.len();
|
||||
let value = Arc::from(bytes.as_ref());
|
||||
let fst = fst::raw::Fst::from_shared_bytes(value, 0, len)?;
|
||||
Ok(Some(fst::Set::from(fst)))
|
||||
},
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_synonyms_set(&self, value: &fst::Set) -> Result<(), Error> {
|
||||
self.0.set("synonyms", value.as_fst().as_bytes()).map_err(Into::into)
|
||||
}
|
||||
|
||||
pub fn ranked_map(&self) -> Result<Option<RankedMap>, Error> {
|
||||
match self.0.get_pinned("ranked-map")? {
|
||||
Some(bytes) => {
|
||||
|
@ -13,6 +13,9 @@ mod error;
|
||||
mod index;
|
||||
mod main_index;
|
||||
mod raw_index;
|
||||
mod synonyms_addition;
|
||||
mod synonyms_deletion;
|
||||
mod synonyms_index;
|
||||
mod words_index;
|
||||
|
||||
pub use self::error::Error;
|
||||
@ -22,11 +25,14 @@ pub use self::custom_settings::CustomSettings;
|
||||
use self::docs_words_index::DocsWordsIndex;
|
||||
use self::documents_addition::DocumentsAddition;
|
||||
use self::documents_deletion::DocumentsDeletion;
|
||||
use self::synonyms_addition::SynonymsAddition;
|
||||
use self::synonyms_deletion::SynonymsDeletion;
|
||||
use self::documents_index::DocumentsIndex;
|
||||
use self::index::InnerIndex;
|
||||
use self::main_index::MainIndex;
|
||||
use self::raw_index::{RawIndex, InnerRawIndex};
|
||||
use self::words_index::WordsIndex;
|
||||
use self::synonyms_index::SynonymsIndex;
|
||||
|
||||
pub struct Database {
|
||||
cache: RwLock<HashMap<String, Arc<Index>>>,
|
||||
@ -99,6 +105,12 @@ impl Database {
|
||||
MainIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(name)))
|
||||
};
|
||||
|
||||
let synonyms = {
|
||||
let cf_name = format!("{}-synonyms", name);
|
||||
self.inner.cf_handle(&cf_name).expect("cf not found");
|
||||
SynonymsIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name)))
|
||||
};
|
||||
|
||||
let words = {
|
||||
let cf_name = format!("{}-words", name);
|
||||
self.inner.cf_handle(&cf_name).expect("cf not found");
|
||||
@ -123,7 +135,7 @@ impl Database {
|
||||
CustomSettings(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name)))
|
||||
};
|
||||
|
||||
let raw_index = RawIndex { main, words, docs_words, documents, custom };
|
||||
let raw_index = RawIndex { main, synonyms, words, docs_words, documents, custom };
|
||||
let index = Index::from_raw(raw_index)?;
|
||||
|
||||
vacant.insert(Arc::new(index)).clone()
|
||||
@ -154,6 +166,12 @@ impl Database {
|
||||
|
||||
main.set_schema(&schema)?;
|
||||
|
||||
let synonyms = {
|
||||
let cf_name = format!("{}-synonyms", name);
|
||||
self.inner.create_cf(&cf_name, &rocksdb::Options::default())?;
|
||||
SynonymsIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name)))
|
||||
};
|
||||
|
||||
let words = {
|
||||
let cf_name = format!("{}-words", name);
|
||||
self.inner.create_cf(&cf_name, &rocksdb::Options::default())?;
|
||||
@ -182,7 +200,7 @@ impl Database {
|
||||
indexes.insert(name.to_string());
|
||||
self.set_indexes(&indexes)?;
|
||||
|
||||
let raw_index = RawIndex { main, words, docs_words, documents, custom };
|
||||
let raw_index = RawIndex { main, synonyms, words, docs_words, documents, custom };
|
||||
let index = Index::from_raw(raw_index)?;
|
||||
|
||||
vacant.insert(Arc::new(index)).clone()
|
||||
|
@ -1,9 +1,10 @@
|
||||
use std::sync::Arc;
|
||||
use super::{MainIndex, WordsIndex, DocsWordsIndex, DocumentsIndex, CustomSettings};
|
||||
use super::{MainIndex, SynonymsIndex, WordsIndex, DocsWordsIndex, DocumentsIndex, CustomSettings};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RawIndex {
|
||||
pub main: MainIndex,
|
||||
pub synonyms: SynonymsIndex,
|
||||
pub words: WordsIndex,
|
||||
pub docs_words: DocsWordsIndex,
|
||||
pub documents: DocumentsIndex,
|
||||
@ -13,6 +14,7 @@ pub struct RawIndex {
|
||||
impl RawIndex {
|
||||
pub(crate) fn compact(&self) {
|
||||
self.main.0.compact_range(None::<&[u8]>, None::<&[u8]>);
|
||||
self.synonyms.0.compact_range(None::<&[u8]>, None::<&[u8]>);
|
||||
self.words.0.compact_range(None::<&[u8]>, None::<&[u8]>);
|
||||
self.docs_words.0.compact_range(None::<&[u8]>, None::<&[u8]>);
|
||||
self.documents.0.compact_range(None::<&[u8]>, None::<&[u8]>);
|
||||
|
83
meilidb-data/src/database/synonyms_addition.rs
Normal file
83
meilidb-data/src/database/synonyms_addition.rs
Normal file
@ -0,0 +1,83 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use fst::{SetBuilder, set::OpBuilder};
|
||||
use sdset::SetBuf;
|
||||
|
||||
use crate::database::index::InnerIndex;
|
||||
use super::{Error, Index};
|
||||
|
||||
pub struct SynonymsAddition<'a> {
|
||||
inner: &'a Index,
|
||||
synonyms: BTreeMap<String, Vec<String>>,
|
||||
}
|
||||
|
||||
impl<'a> SynonymsAddition<'a> {
|
||||
pub fn new(inner: &'a Index) -> SynonymsAddition<'a> {
|
||||
SynonymsAddition { inner, synonyms: BTreeMap::new() }
|
||||
}
|
||||
|
||||
pub fn add_synonym<I>(&mut self, synonym: String, alternatives: I)
|
||||
where I: Iterator<Item=String>,
|
||||
{
|
||||
self.synonyms.entry(synonym).or_insert_with(Vec::new).extend(alternatives);
|
||||
}
|
||||
|
||||
pub fn finalize(self) -> Result<(), Error> {
|
||||
let lease_inner = self.inner.lease_inner();
|
||||
let synonyms = &lease_inner.raw.synonyms;
|
||||
let main = &lease_inner.raw.main;
|
||||
|
||||
let mut synonyms_builder = SetBuilder::memory();
|
||||
|
||||
for (synonym, mut alternatives) in self.synonyms {
|
||||
synonyms_builder.insert(&synonym).unwrap();
|
||||
|
||||
let alternatives = {
|
||||
alternatives.iter_mut().for_each(|s| *s = s.to_lowercase());
|
||||
let alternatives = SetBuf::from_dirty(alternatives);
|
||||
|
||||
let mut alternatives_builder = SetBuilder::memory();
|
||||
alternatives_builder.extend_iter(alternatives).unwrap();
|
||||
alternatives_builder.into_inner().unwrap()
|
||||
};
|
||||
synonyms.set_alternatives_to(synonym.as_bytes(), alternatives)?;
|
||||
}
|
||||
|
||||
let delta_synonyms = synonyms_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
let synonyms = match main.synonyms_set()? {
|
||||
Some(synonyms) => {
|
||||
let op = OpBuilder::new()
|
||||
.add(synonyms.stream())
|
||||
.add(delta_synonyms.stream())
|
||||
.r#union();
|
||||
|
||||
let mut synonyms_builder = SetBuilder::memory();
|
||||
synonyms_builder.extend_stream(op).unwrap();
|
||||
synonyms_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap()
|
||||
},
|
||||
None => delta_synonyms,
|
||||
};
|
||||
|
||||
main.set_synonyms_set(&synonyms)?;
|
||||
|
||||
// update the "consistent" view of the Index
|
||||
let words = main.words_set()?.unwrap_or_default();
|
||||
let ranked_map = lease_inner.ranked_map.clone();;
|
||||
let schema = lease_inner.schema.clone();
|
||||
let raw = lease_inner.raw.clone();
|
||||
lease_inner.raw.compact();
|
||||
|
||||
let inner = InnerIndex { words, synonyms, schema, ranked_map, raw };
|
||||
self.inner.0.store(Arc::new(inner));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
71
meilidb-data/src/database/synonyms_deletion.rs
Normal file
71
meilidb-data/src/database/synonyms_deletion.rs
Normal file
@ -0,0 +1,71 @@
|
||||
use std::collections::BTreeSet;
|
||||
use std::sync::Arc;
|
||||
|
||||
use fst::{SetBuilder, set::OpBuilder};
|
||||
|
||||
use crate::database::index::InnerIndex;
|
||||
use super::{Error, Index};
|
||||
|
||||
pub struct SynonymsDeletion<'a> {
|
||||
inner: &'a Index,
|
||||
synonyms: BTreeSet<String>,
|
||||
}
|
||||
|
||||
impl<'a> SynonymsDeletion<'a> {
|
||||
pub fn new(inner: &'a Index) -> SynonymsDeletion<'a> {
|
||||
SynonymsDeletion { inner, synonyms: BTreeSet::new() }
|
||||
}
|
||||
|
||||
pub fn delete_alternatives_of<I>(&mut self, synonym: String) {
|
||||
self.synonyms.insert(synonym);
|
||||
}
|
||||
|
||||
pub fn finalize(self) -> Result<(), Error> {
|
||||
let lease_inner = self.inner.lease_inner();
|
||||
let synonyms = &lease_inner.raw.synonyms;
|
||||
let main = &lease_inner.raw.main;
|
||||
|
||||
let mut synonyms_builder = SetBuilder::memory();
|
||||
|
||||
for synonym in self.synonyms {
|
||||
synonyms_builder.insert(&synonym).unwrap();
|
||||
synonyms.del_alternatives_of(synonym.as_bytes())?;
|
||||
}
|
||||
|
||||
let delta_synonyms = synonyms_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
let synonyms = match main.synonyms_set()? {
|
||||
Some(synonyms) => {
|
||||
let op = OpBuilder::new()
|
||||
.add(synonyms.stream())
|
||||
.add(delta_synonyms.stream())
|
||||
.difference();
|
||||
|
||||
let mut synonyms_builder = SetBuilder::memory();
|
||||
synonyms_builder.extend_stream(op).unwrap();
|
||||
synonyms_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap()
|
||||
},
|
||||
None => fst::Set::default(),
|
||||
};
|
||||
|
||||
main.set_synonyms_set(&synonyms)?;
|
||||
|
||||
// update the "consistent" view of the Index
|
||||
let words = main.words_set()?.unwrap_or_default();
|
||||
let ranked_map = lease_inner.ranked_map.clone();
|
||||
let schema = lease_inner.schema.clone();
|
||||
let raw = lease_inner.raw.clone();
|
||||
lease_inner.raw.compact();
|
||||
|
||||
let inner = InnerIndex { words, synonyms, schema, ranked_map, raw };
|
||||
self.inner.0.store(Arc::new(inner));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
23
meilidb-data/src/database/synonyms_index.rs
Normal file
23
meilidb-data/src/database/synonyms_index.rs
Normal file
@ -0,0 +1,23 @@
|
||||
use crate::database::raw_index::InnerRawIndex;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SynonymsIndex(pub(crate) InnerRawIndex);
|
||||
|
||||
impl SynonymsIndex {
|
||||
pub fn alternatives_to(&self, word: &[u8]) -> Result<Option<fst::Set>, rocksdb::Error> {
|
||||
match self.0.get(word)? {
|
||||
Some(vector) => Ok(Some(fst::Set::from_bytes(vector.to_vec()).unwrap())),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_alternatives_to(&self, word: &[u8], value: Vec<u8>) -> Result<(), rocksdb::Error> {
|
||||
self.0.set(word, value)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn del_alternatives_of(&self, word: &[u8]) -> Result<(), rocksdb::Error> {
|
||||
self.0.delete(word)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user