Prefer using constant for the database names

This commit is contained in:
Kerollmops 2021-06-15 11:06:42 +02:00
parent 78fe4259a9
commit 28c004aa2c
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
5 changed files with 183 additions and 152 deletions

View File

@ -5,55 +5,41 @@ use std::{str, io, fmt};
use anyhow::Context;
use byte_unit::Byte;
use heed::EnvOpenOptions;
use milli::facet::FacetType;
use milli::{Index, TreeLevel};
use structopt::StructOpt;
use milli::facet::FacetType;
use milli::index::db_name::*;
use milli::{Index, TreeLevel};
use Command::*;
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
const MAIN_DB_NAME: &str = "main";
const WORD_DOCIDS_DB_NAME: &str = "word-docids";
const WORD_PREFIX_DOCIDS_DB_NAME: &str = "word-prefix-docids";
const DOCID_WORD_POSITIONS_DB_NAME: &str = "docid-word-positions";
const WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME: &str = "word-pair-proximity-docids";
const WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME: &str = "word-prefix-pair-proximity-docids";
const WORD_LEVEL_POSITION_DOCIDS_DB_NAME: &str = "word-level-position-docids";
const WORD_PREFIX_LEVEL_POSITION_DOCIDS_DB_NAME: &str = "word-prefix-level-position-docids";
const FIELD_ID_WORD_COUNT_DOCIDS_DB_NAME: &str = "field-id-word-count-docids";
const FACET_ID_F64_DOCIDS_DB_NAME: &str = "facet-id-f64-docids";
const FACET_ID_STRING_DOCIDS_DB_NAME: &str = "facet-id-string-docids";
const FIELD_ID_DOCID_FACET_F64S_DB_NAME: &str = "field-id-docid-facet-f64s";
const FIELD_ID_DOCID_FACET_STRINGS_DB_NAME: &str = "field-id-docid-facet-strings";
const DOCUMENTS_DB_NAME: &str = "documents";
const ALL_DATABASE_NAMES: &[&str] = &[
MAIN_DB_NAME,
WORD_DOCIDS_DB_NAME,
WORD_PREFIX_DOCIDS_DB_NAME,
DOCID_WORD_POSITIONS_DB_NAME,
WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME,
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME,
WORD_LEVEL_POSITION_DOCIDS_DB_NAME,
WORD_PREFIX_LEVEL_POSITION_DOCIDS_DB_NAME,
FIELD_ID_WORD_COUNT_DOCIDS_DB_NAME,
FACET_ID_F64_DOCIDS_DB_NAME,
FACET_ID_STRING_DOCIDS_DB_NAME,
FIELD_ID_DOCID_FACET_F64S_DB_NAME,
FIELD_ID_DOCID_FACET_STRINGS_DB_NAME,
DOCUMENTS_DB_NAME,
MAIN,
WORD_DOCIDS,
WORD_PREFIX_DOCIDS,
DOCID_WORD_POSITIONS,
WORD_PAIR_PROXIMITY_DOCIDS,
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS,
WORD_LEVEL_POSITION_DOCIDS,
WORD_PREFIX_LEVEL_POSITION_DOCIDS,
FIELD_ID_WORD_COUNT_DOCIDS,
FACET_ID_F64_DOCIDS,
FACET_ID_STRING_DOCIDS,
FIELD_ID_DOCID_FACET_F64S,
FIELD_ID_DOCID_FACET_STRINGS,
DOCUMENTS,
];
const POSTINGS_DATABASE_NAMES: &[&str] = &[
WORD_DOCIDS_DB_NAME,
WORD_PREFIX_DOCIDS_DB_NAME,
DOCID_WORD_POSITIONS_DB_NAME,
WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME,
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME,
WORD_DOCIDS,
WORD_PREFIX_DOCIDS,
DOCID_WORD_POSITIONS,
WORD_PAIR_PROXIMITY_DOCIDS,
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS,
];
#[derive(Debug, StructOpt)]
@ -944,21 +930,21 @@ fn size_of_databases(index: &Index, rtxn: &heed::RoTxn, names: Vec<String>) -> a
for name in names {
let database = match name.as_str() {
MAIN_DB_NAME => &main,
WORD_PREFIX_DOCIDS_DB_NAME => word_prefix_docids.as_polymorph(),
WORD_DOCIDS_DB_NAME => word_docids.as_polymorph(),
DOCID_WORD_POSITIONS_DB_NAME => docid_word_positions.as_polymorph(),
WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME => word_pair_proximity_docids.as_polymorph(),
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME => word_prefix_pair_proximity_docids.as_polymorph(),
WORD_LEVEL_POSITION_DOCIDS_DB_NAME => word_level_position_docids.as_polymorph(),
WORD_PREFIX_LEVEL_POSITION_DOCIDS_DB_NAME => word_prefix_level_position_docids.as_polymorph(),
FIELD_ID_WORD_COUNT_DOCIDS_DB_NAME => field_id_word_count_docids.as_polymorph(),
FACET_ID_F64_DOCIDS_DB_NAME => facet_id_f64_docids.as_polymorph(),
FACET_ID_STRING_DOCIDS_DB_NAME => facet_id_string_docids.as_polymorph(),
FIELD_ID_DOCID_FACET_F64S_DB_NAME => field_id_docid_facet_f64s.as_polymorph(),
FIELD_ID_DOCID_FACET_STRINGS_DB_NAME => field_id_docid_facet_strings.as_polymorph(),
MAIN => &main,
WORD_PREFIX_DOCIDS => word_prefix_docids.as_polymorph(),
WORD_DOCIDS => word_docids.as_polymorph(),
DOCID_WORD_POSITIONS => docid_word_positions.as_polymorph(),
WORD_PAIR_PROXIMITY_DOCIDS => word_pair_proximity_docids.as_polymorph(),
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS => word_prefix_pair_proximity_docids.as_polymorph(),
WORD_LEVEL_POSITION_DOCIDS => word_level_position_docids.as_polymorph(),
WORD_PREFIX_LEVEL_POSITION_DOCIDS => word_prefix_level_position_docids.as_polymorph(),
FIELD_ID_WORD_COUNT_DOCIDS => field_id_word_count_docids.as_polymorph(),
FACET_ID_F64_DOCIDS => facet_id_f64_docids.as_polymorph(),
FACET_ID_STRING_DOCIDS => facet_id_string_docids.as_polymorph(),
FIELD_ID_DOCID_FACET_F64S => field_id_docid_facet_f64s.as_polymorph(),
FIELD_ID_DOCID_FACET_STRINGS => field_id_docid_facet_strings.as_polymorph(),
DOCUMENTS_DB_NAME => documents.as_polymorph(),
DOCUMENTS => documents.as_polymorph(),
unknown => anyhow::bail!("unknown database {:?}", unknown),
};
@ -1039,27 +1025,27 @@ fn database_stats(index: &Index, rtxn: &heed::RoTxn, name: &str) -> anyhow::Resu
}
match name {
WORD_DOCIDS_DB_NAME => {
WORD_DOCIDS => {
let db = index.word_docids.as_polymorph();
compute_stats::<RoaringBitmapCodec>(*db, rtxn, name)
},
WORD_PREFIX_DOCIDS_DB_NAME => {
WORD_PREFIX_DOCIDS => {
let db = index.word_prefix_docids.as_polymorph();
compute_stats::<RoaringBitmapCodec>(*db, rtxn, name)
},
DOCID_WORD_POSITIONS_DB_NAME => {
DOCID_WORD_POSITIONS => {
let db = index.docid_word_positions.as_polymorph();
compute_stats::<BoRoaringBitmapCodec>(*db, rtxn, name)
},
WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME => {
WORD_PAIR_PROXIMITY_DOCIDS => {
let db = index.word_pair_proximity_docids.as_polymorph();
compute_stats::<CboRoaringBitmapCodec>(*db, rtxn, name)
},
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME => {
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS => {
let db = index.word_prefix_pair_proximity_docids.as_polymorph();
compute_stats::<CboRoaringBitmapCodec>(*db, rtxn, name)
},
FIELD_ID_WORD_COUNT_DOCIDS_DB_NAME => {
FIELD_ID_WORD_COUNT_DOCIDS => {
let db = index.field_id_word_count_docids.as_polymorph();
compute_stats::<CboRoaringBitmapCodec>(*db, rtxn, name)
},

View File

@ -21,25 +21,44 @@ use crate::heed_codec::facet::{
};
use crate::fields_ids_map::FieldsIdsMap;
pub const CRITERIA_KEY: &str = "criteria";
pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key";
pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields";
pub const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution";
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids";
pub const PRIMARY_KEY_KEY: &str = "primary-key";
pub const SEARCHABLE_FIELDS_KEY: &str = "searchable-fields";
pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids";
pub const STOP_WORDS_KEY: &str = "stop-words";
pub const STRING_FACETED_DOCUMENTS_IDS_PREFIX: &str = "string-faceted-documents-ids";
pub const SYNONYMS_KEY: &str = "synonyms";
pub const WORDS_FST_KEY: &str = "words-fst";
pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
const CREATED_AT_KEY: &str = "created-at";
const UPDATED_AT_KEY: &str = "updated-at";
pub mod main_key {
pub const CRITERIA_KEY: &str = "criteria";
pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key";
pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields";
pub const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution";
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids";
pub const PRIMARY_KEY_KEY: &str = "primary-key";
pub const SEARCHABLE_FIELDS_KEY: &str = "searchable-fields";
pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids";
pub const STOP_WORDS_KEY: &str = "stop-words";
pub const STRING_FACETED_DOCUMENTS_IDS_PREFIX: &str = "string-faceted-documents-ids";
pub const SYNONYMS_KEY: &str = "synonyms";
pub const WORDS_FST_KEY: &str = "words-fst";
pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
pub const CREATED_AT_KEY: &str = "created-at";
pub const UPDATED_AT_KEY: &str = "updated-at";
}
pub mod db_name {
pub const MAIN: &str = "main";
pub const WORD_DOCIDS: &str = "word-docids";
pub const WORD_PREFIX_DOCIDS: &str = "word-prefix-docids";
pub const DOCID_WORD_POSITIONS: &str = "docid-word-positions";
pub const WORD_PAIR_PROXIMITY_DOCIDS: &str = "word-pair-proximity-docids";
pub const WORD_PREFIX_PAIR_PROXIMITY_DOCIDS: &str = "word-prefix-pair-proximity-docids";
pub const WORD_LEVEL_POSITION_DOCIDS: &str = "word-level-position-docids";
pub const WORD_PREFIX_LEVEL_POSITION_DOCIDS: &str = "word-prefix-level-position-docids";
pub const FIELD_ID_WORD_COUNT_DOCIDS: &str = "field-id-word-count-docids";
pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids";
pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
pub const DOCUMENTS: &str = "documents";
}
#[derive(Clone)]
pub struct Index {
@ -85,23 +104,25 @@ pub struct Index {
impl Index {
pub fn new<P: AsRef<Path>>(mut options: heed::EnvOpenOptions, path: P) -> Result<Index> {
use db_name::*;
options.max_dbs(14);
let env = options.open(path)?;
let main = env.create_poly_database(Some("main"))?;
let word_docids = env.create_database(Some("word-docids"))?;
let word_prefix_docids = env.create_database(Some("word-prefix-docids"))?;
let docid_word_positions = env.create_database(Some("docid-word-positions"))?;
let word_pair_proximity_docids = env.create_database(Some("word-pair-proximity-docids"))?;
let word_prefix_pair_proximity_docids = env.create_database(Some("word-prefix-pair-proximity-docids"))?;
let word_level_position_docids = env.create_database(Some("word-level-position-docids"))?;
let field_id_word_count_docids = env.create_database(Some("field-id-word-count-docids"))?;
let word_prefix_level_position_docids = env.create_database(Some("word-prefix-level-position-docids"))?;
let facet_id_f64_docids = env.create_database(Some("facet-id-f64-docids"))?;
let facet_id_string_docids = env.create_database(Some("facet-id-string-docids"))?;
let field_id_docid_facet_f64s = env.create_database(Some("field-id-docid-facet-f64s"))?;
let field_id_docid_facet_strings = env.create_database(Some("field-id-docid-facet-strings"))?;
let documents = env.create_database(Some("documents"))?;
let main = env.create_poly_database(Some(MAIN))?;
let word_docids = env.create_database(Some(WORD_DOCIDS))?;
let word_prefix_docids = env.create_database(Some(WORD_PREFIX_DOCIDS))?;
let docid_word_positions = env.create_database(Some(DOCID_WORD_POSITIONS))?;
let word_pair_proximity_docids = env.create_database(Some(WORD_PAIR_PROXIMITY_DOCIDS))?;
let word_prefix_pair_proximity_docids = env.create_database(Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?;
let word_level_position_docids = env.create_database(Some(WORD_LEVEL_POSITION_DOCIDS))?;
let field_id_word_count_docids = env.create_database(Some(FIELD_ID_WORD_COUNT_DOCIDS))?;
let word_prefix_level_position_docids = env.create_database(Some(WORD_PREFIX_LEVEL_POSITION_DOCIDS))?;
let facet_id_f64_docids = env.create_database(Some(FACET_ID_F64_DOCIDS))?;
let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?;
let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?;
let field_id_docid_facet_strings = env.create_database(Some(FIELD_ID_DOCID_FACET_STRINGS))?;
let documents = env.create_database(Some(DOCUMENTS))?;
Index::initialize_creation_dates(&env, main)?;
@ -127,10 +148,10 @@ impl Index {
fn initialize_creation_dates(env: &heed::Env, main: PolyDatabase) -> heed::Result<()> {
let mut txn = env.write_txn()?;
// The db was just created, we update its metadata with the relevant information.
if main.get::<_, Str, SerdeJson<DateTime<Utc>>>(&txn, CREATED_AT_KEY)?.is_none() {
if main.get::<_, Str, SerdeJson<DateTime<Utc>>>(&txn, main_key::CREATED_AT_KEY)?.is_none() {
let now = Utc::now();
main.put::<_, Str, SerdeJson<DateTime<Utc>>>(&mut txn, UPDATED_AT_KEY, &now)?;
main.put::<_, Str, SerdeJson<DateTime<Utc>>>(&mut txn, CREATED_AT_KEY, &now)?;
main.put::<_, Str, SerdeJson<DateTime<Utc>>>(&mut txn, main_key::UPDATED_AT_KEY, &now)?;
main.put::<_, Str, SerdeJson<DateTime<Utc>>>(&mut txn, main_key::CREATED_AT_KEY, &now)?;
txn.commit()?;
}
Ok(())
@ -164,17 +185,17 @@ impl Index {
/// Writes the documents ids that corresponds to the user-ids-documents-ids FST.
pub fn put_documents_ids(&self, wtxn: &mut RwTxn, docids: &RoaringBitmap) -> heed::Result<()> {
self.main.put::<_, Str, RoaringBitmapCodec>(wtxn, DOCUMENTS_IDS_KEY, docids)
self.main.put::<_, Str, RoaringBitmapCodec>(wtxn, main_key::DOCUMENTS_IDS_KEY, docids)
}
/// Returns the internal documents ids.
pub fn documents_ids(&self, rtxn: &RoTxn) -> heed::Result<RoaringBitmap> {
Ok(self.main.get::<_, Str, RoaringBitmapCodec>(rtxn, DOCUMENTS_IDS_KEY)?.unwrap_or_default())
Ok(self.main.get::<_, Str, RoaringBitmapCodec>(rtxn, main_key::DOCUMENTS_IDS_KEY)?.unwrap_or_default())
}
/// Returns the number of documents indexed in the database.
pub fn number_of_documents(&self, rtxn: &RoTxn) -> Result<u64> {
let count = self.main.get::<_, Str, RoaringBitmapLenCodec>(rtxn, DOCUMENTS_IDS_KEY)?;
let count = self.main.get::<_, Str, RoaringBitmapLenCodec>(rtxn, main_key::DOCUMENTS_IDS_KEY)?;
Ok(count.unwrap_or_default())
}
@ -183,17 +204,17 @@ impl Index {
/// Writes the documents primary key, this is the field name that is used to store the id.
pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> {
self.set_updated_at(wtxn, &Utc::now())?;
self.main.put::<_, Str, Str>(wtxn, PRIMARY_KEY_KEY, &primary_key)
self.main.put::<_, Str, Str>(wtxn, main_key::PRIMARY_KEY_KEY, &primary_key)
}
/// Deletes the primary key of the documents, this can be done to reset indexes settings.
pub fn delete_primary_key(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, PRIMARY_KEY_KEY)
self.main.delete::<_, Str>(wtxn, main_key::PRIMARY_KEY_KEY)
}
/// Returns the documents primary key, `None` if it hasn't been defined.
pub fn primary_key<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t str>> {
self.main.get::<_, Str, Str>(rtxn, PRIMARY_KEY_KEY)
self.main.get::<_, Str, Str>(rtxn, main_key::PRIMARY_KEY_KEY)
}
/* external documents ids */
@ -208,16 +229,16 @@ impl Index {
let ExternalDocumentsIds { hard, soft } = external_documents_ids;
let hard = hard.as_fst().as_bytes();
let soft = soft.as_fst().as_bytes();
self.main.put::<_, Str, ByteSlice>(wtxn, HARD_EXTERNAL_DOCUMENTS_IDS_KEY, hard)?;
self.main.put::<_, Str, ByteSlice>(wtxn, SOFT_EXTERNAL_DOCUMENTS_IDS_KEY, soft)?;
self.main.put::<_, Str, ByteSlice>(wtxn, main_key::HARD_EXTERNAL_DOCUMENTS_IDS_KEY, hard)?;
self.main.put::<_, Str, ByteSlice>(wtxn, main_key::SOFT_EXTERNAL_DOCUMENTS_IDS_KEY, soft)?;
Ok(())
}
/// Returns the external documents ids map which associate the external ids
/// with the internal ids (i.e. `u32`).
pub fn external_documents_ids<'t>(&self, rtxn: &'t RoTxn) -> Result<ExternalDocumentsIds<'t>> {
let hard = self.main.get::<_, Str, ByteSlice>(rtxn, HARD_EXTERNAL_DOCUMENTS_IDS_KEY)?;
let soft = self.main.get::<_, Str, ByteSlice>(rtxn, SOFT_EXTERNAL_DOCUMENTS_IDS_KEY)?;
let hard = self.main.get::<_, Str, ByteSlice>(rtxn, main_key::HARD_EXTERNAL_DOCUMENTS_IDS_KEY)?;
let soft = self.main.get::<_, Str, ByteSlice>(rtxn, main_key::SOFT_EXTERNAL_DOCUMENTS_IDS_KEY)?;
let hard = match hard {
Some(hard) => fst::Map::new(hard)?.map_data(Cow::Borrowed)?,
None => fst::Map::default().map_data(Cow::Owned)?,
@ -234,13 +255,16 @@ impl Index {
/// Writes the fields ids map which associate the documents keys with an internal field id
/// (i.e. `u8`), this field id is used to identify fields in the obkv documents.
pub fn put_fields_ids_map(&self, wtxn: &mut RwTxn, map: &FieldsIdsMap) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<FieldsIdsMap>>(wtxn, FIELDS_IDS_MAP_KEY, map)
self.main.put::<_, Str, SerdeJson<FieldsIdsMap>>(wtxn, main_key::FIELDS_IDS_MAP_KEY, map)
}
/// Returns the fields ids map which associate the documents keys with an internal field id
/// (i.e. `u8`), this field id is used to identify fields in the obkv documents.
pub fn fields_ids_map(&self, rtxn: &RoTxn) -> heed::Result<FieldsIdsMap> {
Ok(self.main.get::<_, Str, SerdeJson<FieldsIdsMap>>(rtxn, FIELDS_IDS_MAP_KEY)?.unwrap_or_default())
Ok(self.main.get::<_, Str, SerdeJson<FieldsIdsMap>>(
rtxn,
main_key::FIELDS_IDS_MAP_KEY,
)?.unwrap_or_default())
}
/* fields distribution */
@ -248,13 +272,16 @@ impl Index {
/// Writes the fields distribution which associates every field name with
/// the number of times it occurs in the documents.
pub fn put_fields_distribution(&self, wtxn: &mut RwTxn, distribution: &FieldsDistribution) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<FieldsDistribution>>(wtxn, FIELDS_DISTRIBUTION_KEY, distribution)
self.main.put::<_, Str, SerdeJson<FieldsDistribution>>(wtxn, main_key::FIELDS_DISTRIBUTION_KEY, distribution)
}
/// Returns the fields distribution which associates every field name with
/// the number of times it occurs in the documents.
pub fn fields_distribution(&self, rtxn: &RoTxn) -> heed::Result<FieldsDistribution> {
Ok(self.main.get::<_, Str, SerdeJson<FieldsDistribution>>(rtxn, FIELDS_DISTRIBUTION_KEY)?.unwrap_or_default())
Ok(self.main.get::<_, Str, SerdeJson<FieldsDistribution>>(
rtxn,
main_key::FIELDS_DISTRIBUTION_KEY,
)?.unwrap_or_default())
}
/* displayed fields */
@ -262,19 +289,19 @@ impl Index {
/// Writes the fields that must be displayed in the defined order.
/// There must be not be any duplicate field id.
pub fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, DISPLAYED_FIELDS_KEY, &fields)
self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, main_key::DISPLAYED_FIELDS_KEY, &fields)
}
/// Deletes the displayed fields ids, this will make the engine to display
/// all the documents attributes in the order of the `FieldsIdsMap`.
pub fn delete_displayed_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, DISPLAYED_FIELDS_KEY)
self.main.delete::<_, Str>(wtxn, main_key::DISPLAYED_FIELDS_KEY)
}
/// Returns the displayed fields in the order they were set by the user. If it returns
/// `None` it means that all the attributes are set as displayed in the order of the `FieldsIdsMap`.
pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, DISPLAYED_FIELDS_KEY)
self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, main_key::DISPLAYED_FIELDS_KEY)
}
pub fn displayed_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<Option<Vec<FieldId>>> {
@ -291,18 +318,18 @@ impl Index {
/// Writes the searchable fields, when this list is specified, only these are indexed.
pub fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, SEARCHABLE_FIELDS_KEY, &fields)
self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, main_key::SEARCHABLE_FIELDS_KEY, &fields)
}
/// Deletes the searchable fields, when no fields are specified, all fields are indexed.
pub fn delete_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, SEARCHABLE_FIELDS_KEY)
self.main.delete::<_, Str>(wtxn, main_key::SEARCHABLE_FIELDS_KEY)
}
/// Returns the searchable fields, those are the fields that are indexed,
/// if the searchable fields aren't there it means that **all** the fields are indexed.
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, SEARCHABLE_FIELDS_KEY)
self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, main_key::SEARCHABLE_FIELDS_KEY)
}
/// Identical to `searchable_fields`, but returns the ids instead.
@ -328,17 +355,20 @@ impl Index {
/// Writes the filterable fields names in the database.
pub fn put_filterable_fields(&self, wtxn: &mut RwTxn, fields: &HashSet<String>) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<_>>(wtxn, FILTERABLE_FIELDS_KEY, fields)
self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::FILTERABLE_FIELDS_KEY, fields)
}
/// Deletes the filterable fields ids in the database.
pub fn delete_filterable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, FILTERABLE_FIELDS_KEY)
self.main.delete::<_, Str>(wtxn, main_key::FILTERABLE_FIELDS_KEY)
}
/// Returns the filterable fields names.
pub fn filterable_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
Ok(self.main.get::<_, Str, SerdeJson<_>>(rtxn, FILTERABLE_FIELDS_KEY)?.unwrap_or_default())
Ok(self.main.get::<_, Str, SerdeJson<_>>(
rtxn,
main_key::FILTERABLE_FIELDS_KEY,
)?.unwrap_or_default())
}
/// Same as `filterable_fields`, but returns ids instead.
@ -409,9 +439,9 @@ impl Index {
docids: &RoaringBitmap,
) -> heed::Result<()>
{
let mut buffer = [0u8; STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
buffer[..STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()]
.copy_from_slice(STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
let mut buffer = [0u8; main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
buffer[..main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()]
.copy_from_slice(main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
*buffer.last_mut().unwrap() = field_id;
self.main.put::<_, ByteSlice, RoaringBitmapCodec>(wtxn, &buffer, docids)
}
@ -423,9 +453,9 @@ impl Index {
field_id: FieldId,
) -> heed::Result<RoaringBitmap>
{
let mut buffer = [0u8; STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
buffer[..STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()]
.copy_from_slice(STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
let mut buffer = [0u8; main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
buffer[..main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()]
.copy_from_slice(main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
*buffer.last_mut().unwrap() = field_id;
match self.main.get::<_, ByteSlice, RoaringBitmapCodec>(rtxn, &buffer)? {
Some(docids) => Ok(docids),
@ -441,9 +471,9 @@ impl Index {
docids: &RoaringBitmap,
) -> heed::Result<()>
{
let mut buffer = [0u8; NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
buffer[..NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()]
.copy_from_slice(NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
let mut buffer = [0u8; main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
buffer[..main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()]
.copy_from_slice(main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
*buffer.last_mut().unwrap() = field_id;
self.main.put::<_, ByteSlice, RoaringBitmapCodec>(wtxn, &buffer, docids)
}
@ -455,9 +485,9 @@ impl Index {
field_id: FieldId,
) -> heed::Result<RoaringBitmap>
{
let mut buffer = [0u8; NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
buffer[..NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()]
.copy_from_slice(NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
let mut buffer = [0u8; main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
buffer[..main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()]
.copy_from_slice(main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
*buffer.last_mut().unwrap() = field_id;
match self.main.get::<_, ByteSlice, RoaringBitmapCodec>(rtxn, &buffer)? {
Some(docids) => Ok(docids),
@ -468,29 +498,29 @@ impl Index {
/* distinct field */
pub(crate) fn put_distinct_field(&self, wtxn: &mut RwTxn, distinct_field: &str) -> heed::Result<()> {
self.main.put::<_, Str, Str>(wtxn, DISTINCT_FIELD_KEY, distinct_field)
self.main.put::<_, Str, Str>(wtxn, main_key::DISTINCT_FIELD_KEY, distinct_field)
}
pub fn distinct_field<'a>(&self, rtxn: &'a RoTxn) -> heed::Result<Option<&'a str>> {
self.main.get::<_, Str, Str>(rtxn, DISTINCT_FIELD_KEY)
self.main.get::<_, Str, Str>(rtxn, main_key::DISTINCT_FIELD_KEY)
}
pub(crate) fn delete_distinct_field(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, DISTINCT_FIELD_KEY)
self.main.delete::<_, Str>(wtxn, main_key::DISTINCT_FIELD_KEY)
}
/* criteria */
pub fn put_criteria(&self, wtxn: &mut RwTxn, criteria: &[Criterion]) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, CRITERIA_KEY, &criteria)
self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, main_key::CRITERIA_KEY, &criteria)
}
pub fn delete_criteria(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, CRITERIA_KEY)
self.main.delete::<_, Str>(wtxn, main_key::CRITERIA_KEY)
}
pub fn criteria(&self, rtxn: &RoTxn) -> heed::Result<Vec<Criterion>> {
match self.main.get::<_, Str, SerdeJson<Vec<Criterion>>>(rtxn, CRITERIA_KEY)? {
match self.main.get::<_, Str, SerdeJson<Vec<Criterion>>>(rtxn, main_key::CRITERIA_KEY)? {
Some(criteria) => Ok(criteria),
None => Ok(default_criteria()),
}
@ -500,12 +530,12 @@ impl Index {
/// Writes the FST which is the words dictionary of the engine.
pub fn put_words_fst<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
self.main.put::<_, Str, ByteSlice>(wtxn, WORDS_FST_KEY, fst.as_fst().as_bytes())
self.main.put::<_, Str, ByteSlice>(wtxn, main_key::WORDS_FST_KEY, fst.as_fst().as_bytes())
}
/// Returns the FST which is the words dictionary of the engine.
pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
match self.main.get::<_, Str, ByteSlice>(rtxn, WORDS_FST_KEY)? {
match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::WORDS_FST_KEY)? {
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
None => Ok(fst::Set::default().map_data(Cow::Owned)?),
}
@ -514,15 +544,15 @@ impl Index {
/* stop words */
pub fn put_stop_words<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
self.main.put::<_, Str, ByteSlice>(wtxn, STOP_WORDS_KEY, fst.as_fst().as_bytes())
self.main.put::<_, Str, ByteSlice>(wtxn, main_key::STOP_WORDS_KEY, fst.as_fst().as_bytes())
}
pub fn delete_stop_words(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, STOP_WORDS_KEY)
self.main.delete::<_, Str>(wtxn, main_key::STOP_WORDS_KEY)
}
pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> Result<Option<fst::Set<&'t [u8]>>> {
match self.main.get::<_, Str, ByteSlice>(rtxn, STOP_WORDS_KEY)? {
match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::STOP_WORDS_KEY)? {
Some(bytes) => Ok(Some(fst::Set::new(bytes)?)),
None => Ok(None),
}
@ -530,19 +560,29 @@ impl Index {
/* synonyms */
pub fn put_synonyms(&self, wtxn: &mut RwTxn, synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, SYNONYMS_KEY, synonyms)
pub fn put_synonyms(
&self,
wtxn: &mut RwTxn,
synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>,
) -> heed::Result<()>
{
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)
}
pub fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, SYNONYMS_KEY)
self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)
}
pub fn synonyms(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Vec<String>, Vec<Vec<String>>>> {
Ok(self.main.get::<_, Str, SerdeBincode<_>>(rtxn, SYNONYMS_KEY)?.unwrap_or_default())
Ok(self.main.get::<_, Str, SerdeBincode<_>>(rtxn, main_key::SYNONYMS_KEY)?.unwrap_or_default())
}
pub fn words_synonyms<S: AsRef<str>>(&self, rtxn: &RoTxn, words: &[S]) -> heed::Result<Option<Vec<Vec<String>>>> {
pub fn words_synonyms<S: AsRef<str>>(
&self,
rtxn: &RoTxn,
words: &[S],
) -> heed::Result<Option<Vec<Vec<String>>>>
{
let words: Vec<_> = words.iter().map(|s| s.as_ref().to_owned()).collect();
Ok(self.synonyms(rtxn)?.remove(&words))
}
@ -551,12 +591,12 @@ impl Index {
/// Writes the FST which is the words prefixes dictionnary of the engine.
pub fn put_words_prefixes_fst<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
self.main.put::<_, Str, ByteSlice>(wtxn, WORDS_PREFIXES_FST_KEY, fst.as_fst().as_bytes())
self.main.put::<_, Str, ByteSlice>(wtxn, main_key::WORDS_PREFIXES_FST_KEY, fst.as_fst().as_bytes())
}
/// Returns the FST which is the words prefixes dictionnary of the engine.
pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
match self.main.get::<_, Str, ByteSlice>(rtxn, WORDS_PREFIXES_FST_KEY)? {
match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? {
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
None => Ok(fst::Set::default().map_data(Cow::Owned)?),
}
@ -613,7 +653,7 @@ impl Index {
/// Returns the index creation time.
pub fn created_at(&self, rtxn: &RoTxn) -> heed::Result<DateTime<Utc>> {
let time = self.main
.get::<_, Str, SerdeJson<DateTime<Utc>>>(rtxn, CREATED_AT_KEY)?
.get::<_, Str, SerdeJson<DateTime<Utc>>>(rtxn, main_key::CREATED_AT_KEY)?
.expect("Index without creation time");
Ok(time)
}
@ -621,13 +661,13 @@ impl Index {
/// Returns the index last updated time.
pub fn updated_at(&self, rtxn: &RoTxn) -> heed::Result<DateTime<Utc>> {
let time = self.main
.get::<_, Str, SerdeJson<DateTime<Utc>>>(rtxn, UPDATED_AT_KEY)?
.get::<_, Str, SerdeJson<DateTime<Utc>>>(rtxn, main_key::UPDATED_AT_KEY)?
.expect("Index without update time");
Ok(time)
}
pub(crate) fn set_updated_at(&self, wtxn: &mut RwTxn, time: &DateTime<Utc>) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<DateTime<Utc>>>(wtxn, UPDATED_AT_KEY, &time)
self.main.put::<_, Str, SerdeJson<DateTime<Utc>>>(wtxn, main_key::UPDATED_AT_KEY, &time)
}
}

View File

@ -9,6 +9,7 @@ use serde_json::Value;
use crate::error::{InternalError, UserError};
use crate::heed_codec::CboRoaringBitmapCodec;
use crate::index::{db_name, main_key};
use crate::{Index, DocumentId, FieldId, BEU32, SmallString32, ExternalDocumentsIds, Result};
use super::ClearDocuments;
@ -78,7 +79,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
let primary_key = self.index.primary_key(self.wtxn)?.ok_or_else(|| {
InternalError::DatabaseMissingEntry { db_name: "main", key: Some("primary-key") }
InternalError::DatabaseMissingEntry {
db_name: db_name::MAIN,
key: Some(main_key::PRIMARY_KEY_KEY),
}
})?;
let id_field = fields_ids_map.id(primary_key).expect(r#"the field "id" to be present"#);

View File

@ -32,7 +32,7 @@ const LMDB_MAX_KEY_LENGTH: usize = 511;
const ONE_KILOBYTE: usize = 1024 * 1024;
const MAX_POSITION: usize = 1000;
const WORDS_FST_KEY: &[u8] = crate::index::WORDS_FST_KEY.as_bytes();
const WORDS_FST_KEY: &[u8] = crate::index::main_key::WORDS_FST_KEY.as_bytes();
pub struct Readers {
pub main: Reader<FileFuse>,

View File

@ -11,6 +11,7 @@ use roaring::RoaringBitmap;
use serde_json::{Map, Value};
use crate::error::{Error, UserError, InternalError};
use crate::index::db_name;
use crate::update::index_documents::merge_function::{merge_obkvs, keep_latest_obkv};
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
use crate::{BEU32, MergeFn, FieldsIdsMap, ExternalDocumentsIds, FieldId, FieldsDistribution};
@ -411,7 +412,7 @@ impl Transform<'_, '_> {
let key = BEU32::new(docid);
let base_obkv = self.index.documents.get(&self.rtxn, &key)?
.ok_or(InternalError::DatabaseMissingEntry {
db_name: "documents",
db_name: db_name::DOCUMENTS,
key: None,
})?;
let update_obkv = obkv::KvReader::new(update_obkv);