Split position DB into fid and relative position DB

This commit is contained in:
Loïc Lecrenier 2023-03-23 09:22:01 +01:00
parent 56b7209f26
commit 9b2653427d
11 changed files with 162 additions and 135 deletions

View file

@ -19,12 +19,12 @@ use crate::heed_codec::facet::{
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
FieldIdCodec, OrderedF64Codec,
};
use crate::heed_codec::{ScriptLanguageCodec, StrRefCodec};
use crate::heed_codec::{ScriptLanguageCodec, StrBEU16Codec, StrRefCodec};
use crate::{
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
FieldIdWordCountCodec, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec,
Search, StrBEU32Codec, U8StrStrCodec, BEU16, BEU32,
Search, U8StrStrCodec, BEU16, BEU32,
};
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
@ -76,7 +76,9 @@ pub mod db_name {
pub const WORD_PREFIX_PAIR_PROXIMITY_DOCIDS: &str = "word-prefix-pair-proximity-docids";
pub const PREFIX_WORD_PAIR_PROXIMITY_DOCIDS: &str = "prefix-word-pair-proximity-docids";
pub const WORD_POSITION_DOCIDS: &str = "word-position-docids";
pub const WORD_FIELD_ID_DOCIDS: &str = "word-field-id-docids";
pub const WORD_PREFIX_POSITION_DOCIDS: &str = "word-prefix-position-docids";
pub const WORD_PREFIX_FIELD_ID_DOCIDS: &str = "word-prefix-field-id-docids";
pub const FIELD_ID_WORD_COUNT_DOCIDS: &str = "field-id-word-count-docids";
pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids";
pub const FACET_ID_EXISTS_DOCIDS: &str = "facet-id-exists-docids";
@ -118,11 +120,16 @@ pub struct Index {
pub prefix_word_pair_proximity_docids: Database<U8StrStrCodec, CboRoaringBitmapCodec>,
/// Maps the word and the position with the docids that corresponds to it.
pub word_position_docids: Database<StrBEU32Codec, CboRoaringBitmapCodec>,
pub word_position_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
/// Maps the word and the field id with the docids that corresponds to it.
pub word_fid_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
/// Maps the field id and the word count with the docids that corresponds to it.
pub field_id_word_count_docids: Database<FieldIdWordCountCodec, CboRoaringBitmapCodec>,
/// Maps the position of a word prefix with all the docids where this prefix appears.
pub word_prefix_position_docids: Database<StrBEU32Codec, CboRoaringBitmapCodec>,
pub word_prefix_position_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
/// Maps the word and the field id with the docids that corresponds to it.
pub word_prefix_fid_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
/// Maps the script and language with all the docids that corresponds to it.
pub script_language_docids: Database<ScriptLanguageCodec, RoaringBitmapCodec>,
@ -153,7 +160,7 @@ impl Index {
) -> Result<Index> {
use db_name::*;
options.max_dbs(19);
options.max_dbs(21);
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
let env = options.open(path)?;
@ -170,8 +177,10 @@ impl Index {
let prefix_word_pair_proximity_docids =
env.create_database(Some(PREFIX_WORD_PAIR_PROXIMITY_DOCIDS))?;
let word_position_docids = env.create_database(Some(WORD_POSITION_DOCIDS))?;
let word_fid_docids = env.create_database(Some(WORD_FIELD_ID_DOCIDS))?;
let field_id_word_count_docids = env.create_database(Some(FIELD_ID_WORD_COUNT_DOCIDS))?;
let word_prefix_position_docids = env.create_database(Some(WORD_PREFIX_POSITION_DOCIDS))?;
let word_prefix_fid_docids = env.create_database(Some(WORD_PREFIX_FIELD_ID_DOCIDS))?;
let facet_id_f64_docids = env.create_database(Some(FACET_ID_F64_DOCIDS))?;
let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?;
let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?;
@ -196,7 +205,9 @@ impl Index {
word_prefix_pair_proximity_docids,
prefix_word_pair_proximity_docids,
word_position_docids,
word_fid_docids,
word_prefix_position_docids,
word_prefix_fid_docids,
field_id_word_count_docids,
facet_id_f64_docids,
facet_id_string_docids,