Introduce the getters and setters for the words prefixes FST

This commit is contained in:
Clément Renault 2021-02-03 10:30:33 +01:00
parent 48b470140b
commit b3a21d5a50
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
4 changed files with 26 additions and 1 deletions

View File

@ -311,6 +311,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
env: _env, env: _env,
main, main,
word_docids, word_docids,
word_prefix_docids,
docid_word_positions, docid_word_positions,
word_pair_proximity_docids, word_pair_proximity_docids,
facet_field_id_value_docids, facet_field_id_value_docids,

View File

@ -27,6 +27,7 @@ pub const SEARCHABLE_FIELDS_KEY: &str = "searchable-fields";
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids"; pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids"; pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids";
pub const WORDS_FST_KEY: &str = "words-fst"; pub const WORDS_FST_KEY: &str = "words-fst";
pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
#[derive(Clone)] #[derive(Clone)]
pub struct Index { pub struct Index {
@ -36,6 +37,8 @@ pub struct Index {
pub main: PolyDatabase, pub main: PolyDatabase,
/// A word and all the documents ids containing the word. /// A word and all the documents ids containing the word.
pub word_docids: Database<Str, RoaringBitmapCodec>, pub word_docids: Database<Str, RoaringBitmapCodec>,
/// A prefix of word and all the documents ids containing this prefix.
pub word_prefix_docids: Database<Str, RoaringBitmapCodec>,
/// Maps a word and a document id (u32) to all the positions where the given word appears. /// Maps a word and a document id (u32) to all the positions where the given word appears.
pub docid_word_positions: Database<BEU32StrCodec, BoRoaringBitmapCodec>, pub docid_word_positions: Database<BEU32StrCodec, BoRoaringBitmapCodec>,
/// Maps the proximity between a pair of words with all the docids where this relation appears. /// Maps the proximity between a pair of words with all the docids where this relation appears.
@ -50,11 +53,12 @@ pub struct Index {
impl Index { impl Index {
pub fn new<P: AsRef<Path>>(mut options: heed::EnvOpenOptions, path: P) -> anyhow::Result<Index> { pub fn new<P: AsRef<Path>>(mut options: heed::EnvOpenOptions, path: P) -> anyhow::Result<Index> {
options.max_dbs(7); options.max_dbs(8);
let env = options.open(path)?; let env = options.open(path)?;
let main = env.create_poly_database(Some("main"))?; let main = env.create_poly_database(Some("main"))?;
let word_docids = env.create_database(Some("word-docids"))?; let word_docids = env.create_database(Some("word-docids"))?;
let word_prefix_docids = env.create_database(Some("word-prefix-docids"))?;
let docid_word_positions = env.create_database(Some("docid-word-positions"))?; let docid_word_positions = env.create_database(Some("docid-word-positions"))?;
let word_pair_proximity_docids = env.create_database(Some("word-pair-proximity-docids"))?; let word_pair_proximity_docids = env.create_database(Some("word-pair-proximity-docids"))?;
let facet_field_id_value_docids = env.create_database(Some("facet-field-id-value-docids"))?; let facet_field_id_value_docids = env.create_database(Some("facet-field-id-value-docids"))?;
@ -65,6 +69,7 @@ impl Index {
env, env,
main, main,
word_docids, word_docids,
word_prefix_docids,
docid_word_positions, docid_word_positions,
word_pair_proximity_docids, word_pair_proximity_docids,
facet_field_id_value_docids, facet_field_id_value_docids,
@ -328,6 +333,23 @@ impl Index {
} }
} }
/* words prefixes fst */
/// Writes the FST which is the words prefixes dictionnary of the engine.
pub fn put_words_prefixes_fst<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
self.main.put::<_, Str, ByteSlice>(wtxn, WORDS_PREFIXES_FST_KEY, fst.as_fst().as_bytes())
}
/// Returns the FST which is the words prefixes dictionnary of the engine.
pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result<fst::Set<Cow<'t, [u8]>>> {
match self.main.get::<_, Str, ByteSlice>(rtxn, WORDS_PREFIXES_FST_KEY)? {
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
None => Ok(fst::Set::default().map_data(Cow::Owned)?),
}
}
/* documents */
/// Returns a [`Vec`] of the requested documents. Returns an error if a document is missing. /// Returns a [`Vec`] of the requested documents. Returns an error if a document is missing.
pub fn documents<'t>( pub fn documents<'t>(
&self, &self,

View File

@ -22,6 +22,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
env: _env, env: _env,
main: _main, main: _main,
word_docids, word_docids,
word_prefix_docids,
docid_word_positions, docid_word_positions,
word_pair_proximity_docids, word_pair_proximity_docids,
facet_field_id_value_docids, facet_field_id_value_docids,

View File

@ -79,6 +79,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
env: _env, env: _env,
main: _main, main: _main,
word_docids, word_docids,
word_prefix_docids,
docid_word_positions, docid_word_positions,
word_pair_proximity_docids, word_pair_proximity_docids,
facet_field_id_value_docids, facet_field_id_value_docids,