From 28c004aa2cd8cb16610aa322e449955c5cf523ce Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 15 Jun 2021 11:06:42 +0200 Subject: [PATCH] Prefer using constant for the database names --- infos/src/main.rs | 100 ++++---- milli/src/index.rs | 224 +++++++++++------- milli/src/update/delete_documents.rs | 6 +- milli/src/update/index_documents/store.rs | 2 +- milli/src/update/index_documents/transform.rs | 3 +- 5 files changed, 183 insertions(+), 152 deletions(-) diff --git a/infos/src/main.rs b/infos/src/main.rs index d6aa1f854..b0c304de0 100644 --- a/infos/src/main.rs +++ b/infos/src/main.rs @@ -5,55 +5,41 @@ use std::{str, io, fmt}; use anyhow::Context; use byte_unit::Byte; use heed::EnvOpenOptions; -use milli::facet::FacetType; -use milli::{Index, TreeLevel}; use structopt::StructOpt; +use milli::facet::FacetType; +use milli::index::db_name::*; +use milli::{Index, TreeLevel}; + use Command::*; #[cfg(target_os = "linux")] #[global_allocator] static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; -const MAIN_DB_NAME: &str = "main"; -const WORD_DOCIDS_DB_NAME: &str = "word-docids"; -const WORD_PREFIX_DOCIDS_DB_NAME: &str = "word-prefix-docids"; -const DOCID_WORD_POSITIONS_DB_NAME: &str = "docid-word-positions"; -const WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME: &str = "word-pair-proximity-docids"; -const WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME: &str = "word-prefix-pair-proximity-docids"; -const WORD_LEVEL_POSITION_DOCIDS_DB_NAME: &str = "word-level-position-docids"; -const WORD_PREFIX_LEVEL_POSITION_DOCIDS_DB_NAME: &str = "word-prefix-level-position-docids"; -const FIELD_ID_WORD_COUNT_DOCIDS_DB_NAME: &str = "field-id-word-count-docids"; -const FACET_ID_F64_DOCIDS_DB_NAME: &str = "facet-id-f64-docids"; -const FACET_ID_STRING_DOCIDS_DB_NAME: &str = "facet-id-string-docids"; -const FIELD_ID_DOCID_FACET_F64S_DB_NAME: &str = "field-id-docid-facet-f64s"; -const FIELD_ID_DOCID_FACET_STRINGS_DB_NAME: &str = "field-id-docid-facet-strings"; - -const DOCUMENTS_DB_NAME: &str = "documents"; - const ALL_DATABASE_NAMES: &[&str] = &[ - MAIN_DB_NAME, - WORD_DOCIDS_DB_NAME, - WORD_PREFIX_DOCIDS_DB_NAME, - DOCID_WORD_POSITIONS_DB_NAME, - WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME, - WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME, - WORD_LEVEL_POSITION_DOCIDS_DB_NAME, - WORD_PREFIX_LEVEL_POSITION_DOCIDS_DB_NAME, - FIELD_ID_WORD_COUNT_DOCIDS_DB_NAME, - FACET_ID_F64_DOCIDS_DB_NAME, - FACET_ID_STRING_DOCIDS_DB_NAME, - FIELD_ID_DOCID_FACET_F64S_DB_NAME, - FIELD_ID_DOCID_FACET_STRINGS_DB_NAME, - DOCUMENTS_DB_NAME, + MAIN, + WORD_DOCIDS, + WORD_PREFIX_DOCIDS, + DOCID_WORD_POSITIONS, + WORD_PAIR_PROXIMITY_DOCIDS, + WORD_PREFIX_PAIR_PROXIMITY_DOCIDS, + WORD_LEVEL_POSITION_DOCIDS, + WORD_PREFIX_LEVEL_POSITION_DOCIDS, + FIELD_ID_WORD_COUNT_DOCIDS, + FACET_ID_F64_DOCIDS, + FACET_ID_STRING_DOCIDS, + FIELD_ID_DOCID_FACET_F64S, + FIELD_ID_DOCID_FACET_STRINGS, + DOCUMENTS, ]; const POSTINGS_DATABASE_NAMES: &[&str] = &[ - WORD_DOCIDS_DB_NAME, - WORD_PREFIX_DOCIDS_DB_NAME, - DOCID_WORD_POSITIONS_DB_NAME, - WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME, - WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME, + WORD_DOCIDS, + WORD_PREFIX_DOCIDS, + DOCID_WORD_POSITIONS, + WORD_PAIR_PROXIMITY_DOCIDS, + WORD_PREFIX_PAIR_PROXIMITY_DOCIDS, ]; #[derive(Debug, StructOpt)] @@ -944,21 +930,21 @@ fn size_of_databases(index: &Index, rtxn: &heed::RoTxn, names: Vec) -> a for name in names { let database = match name.as_str() { - MAIN_DB_NAME => &main, - WORD_PREFIX_DOCIDS_DB_NAME => word_prefix_docids.as_polymorph(), - WORD_DOCIDS_DB_NAME => word_docids.as_polymorph(), - DOCID_WORD_POSITIONS_DB_NAME => docid_word_positions.as_polymorph(), - WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME => word_pair_proximity_docids.as_polymorph(), - WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME => word_prefix_pair_proximity_docids.as_polymorph(), - WORD_LEVEL_POSITION_DOCIDS_DB_NAME => word_level_position_docids.as_polymorph(), - WORD_PREFIX_LEVEL_POSITION_DOCIDS_DB_NAME => word_prefix_level_position_docids.as_polymorph(), - FIELD_ID_WORD_COUNT_DOCIDS_DB_NAME => field_id_word_count_docids.as_polymorph(), - FACET_ID_F64_DOCIDS_DB_NAME => facet_id_f64_docids.as_polymorph(), - FACET_ID_STRING_DOCIDS_DB_NAME => facet_id_string_docids.as_polymorph(), - FIELD_ID_DOCID_FACET_F64S_DB_NAME => field_id_docid_facet_f64s.as_polymorph(), - FIELD_ID_DOCID_FACET_STRINGS_DB_NAME => field_id_docid_facet_strings.as_polymorph(), + MAIN => &main, + WORD_PREFIX_DOCIDS => word_prefix_docids.as_polymorph(), + WORD_DOCIDS => word_docids.as_polymorph(), + DOCID_WORD_POSITIONS => docid_word_positions.as_polymorph(), + WORD_PAIR_PROXIMITY_DOCIDS => word_pair_proximity_docids.as_polymorph(), + WORD_PREFIX_PAIR_PROXIMITY_DOCIDS => word_prefix_pair_proximity_docids.as_polymorph(), + WORD_LEVEL_POSITION_DOCIDS => word_level_position_docids.as_polymorph(), + WORD_PREFIX_LEVEL_POSITION_DOCIDS => word_prefix_level_position_docids.as_polymorph(), + FIELD_ID_WORD_COUNT_DOCIDS => field_id_word_count_docids.as_polymorph(), + FACET_ID_F64_DOCIDS => facet_id_f64_docids.as_polymorph(), + FACET_ID_STRING_DOCIDS => facet_id_string_docids.as_polymorph(), + FIELD_ID_DOCID_FACET_F64S => field_id_docid_facet_f64s.as_polymorph(), + FIELD_ID_DOCID_FACET_STRINGS => field_id_docid_facet_strings.as_polymorph(), - DOCUMENTS_DB_NAME => documents.as_polymorph(), + DOCUMENTS => documents.as_polymorph(), unknown => anyhow::bail!("unknown database {:?}", unknown), }; @@ -1039,27 +1025,27 @@ fn database_stats(index: &Index, rtxn: &heed::RoTxn, name: &str) -> anyhow::Resu } match name { - WORD_DOCIDS_DB_NAME => { + WORD_DOCIDS => { let db = index.word_docids.as_polymorph(); compute_stats::(*db, rtxn, name) }, - WORD_PREFIX_DOCIDS_DB_NAME => { + WORD_PREFIX_DOCIDS => { let db = index.word_prefix_docids.as_polymorph(); compute_stats::(*db, rtxn, name) }, - DOCID_WORD_POSITIONS_DB_NAME => { + DOCID_WORD_POSITIONS => { let db = index.docid_word_positions.as_polymorph(); compute_stats::(*db, rtxn, name) }, - WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME => { + WORD_PAIR_PROXIMITY_DOCIDS => { let db = index.word_pair_proximity_docids.as_polymorph(); compute_stats::(*db, rtxn, name) }, - WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME => { + WORD_PREFIX_PAIR_PROXIMITY_DOCIDS => { let db = index.word_prefix_pair_proximity_docids.as_polymorph(); compute_stats::(*db, rtxn, name) }, - FIELD_ID_WORD_COUNT_DOCIDS_DB_NAME => { + FIELD_ID_WORD_COUNT_DOCIDS => { let db = index.field_id_word_count_docids.as_polymorph(); compute_stats::(*db, rtxn, name) }, diff --git a/milli/src/index.rs b/milli/src/index.rs index 9ebe34a2e..f3411564b 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -21,25 +21,44 @@ use crate::heed_codec::facet::{ }; use crate::fields_ids_map::FieldsIdsMap; -pub const CRITERIA_KEY: &str = "criteria"; -pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields"; -pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key"; -pub const DOCUMENTS_IDS_KEY: &str = "documents-ids"; -pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields"; -pub const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution"; -pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map"; -pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids"; -pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids"; -pub const PRIMARY_KEY_KEY: &str = "primary-key"; -pub const SEARCHABLE_FIELDS_KEY: &str = "searchable-fields"; -pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids"; -pub const STOP_WORDS_KEY: &str = "stop-words"; -pub const STRING_FACETED_DOCUMENTS_IDS_PREFIX: &str = "string-faceted-documents-ids"; -pub const SYNONYMS_KEY: &str = "synonyms"; -pub const WORDS_FST_KEY: &str = "words-fst"; -pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst"; -const CREATED_AT_KEY: &str = "created-at"; -const UPDATED_AT_KEY: &str = "updated-at"; +pub mod main_key { + pub const CRITERIA_KEY: &str = "criteria"; + pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields"; + pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key"; + pub const DOCUMENTS_IDS_KEY: &str = "documents-ids"; + pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields"; + pub const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution"; + pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map"; + pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids"; + pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids"; + pub const PRIMARY_KEY_KEY: &str = "primary-key"; + pub const SEARCHABLE_FIELDS_KEY: &str = "searchable-fields"; + pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids"; + pub const STOP_WORDS_KEY: &str = "stop-words"; + pub const STRING_FACETED_DOCUMENTS_IDS_PREFIX: &str = "string-faceted-documents-ids"; + pub const SYNONYMS_KEY: &str = "synonyms"; + pub const WORDS_FST_KEY: &str = "words-fst"; + pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst"; + pub const CREATED_AT_KEY: &str = "created-at"; + pub const UPDATED_AT_KEY: &str = "updated-at"; +} + +pub mod db_name { + pub const MAIN: &str = "main"; + pub const WORD_DOCIDS: &str = "word-docids"; + pub const WORD_PREFIX_DOCIDS: &str = "word-prefix-docids"; + pub const DOCID_WORD_POSITIONS: &str = "docid-word-positions"; + pub const WORD_PAIR_PROXIMITY_DOCIDS: &str = "word-pair-proximity-docids"; + pub const WORD_PREFIX_PAIR_PROXIMITY_DOCIDS: &str = "word-prefix-pair-proximity-docids"; + pub const WORD_LEVEL_POSITION_DOCIDS: &str = "word-level-position-docids"; + pub const WORD_PREFIX_LEVEL_POSITION_DOCIDS: &str = "word-prefix-level-position-docids"; + pub const FIELD_ID_WORD_COUNT_DOCIDS: &str = "field-id-word-count-docids"; + pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids"; + pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids"; + pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s"; + pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings"; + pub const DOCUMENTS: &str = "documents"; +} #[derive(Clone)] pub struct Index { @@ -85,23 +104,25 @@ pub struct Index { impl Index { pub fn new>(mut options: heed::EnvOpenOptions, path: P) -> Result { + use db_name::*; + options.max_dbs(14); let env = options.open(path)?; - let main = env.create_poly_database(Some("main"))?; - let word_docids = env.create_database(Some("word-docids"))?; - let word_prefix_docids = env.create_database(Some("word-prefix-docids"))?; - let docid_word_positions = env.create_database(Some("docid-word-positions"))?; - let word_pair_proximity_docids = env.create_database(Some("word-pair-proximity-docids"))?; - let word_prefix_pair_proximity_docids = env.create_database(Some("word-prefix-pair-proximity-docids"))?; - let word_level_position_docids = env.create_database(Some("word-level-position-docids"))?; - let field_id_word_count_docids = env.create_database(Some("field-id-word-count-docids"))?; - let word_prefix_level_position_docids = env.create_database(Some("word-prefix-level-position-docids"))?; - let facet_id_f64_docids = env.create_database(Some("facet-id-f64-docids"))?; - let facet_id_string_docids = env.create_database(Some("facet-id-string-docids"))?; - let field_id_docid_facet_f64s = env.create_database(Some("field-id-docid-facet-f64s"))?; - let field_id_docid_facet_strings = env.create_database(Some("field-id-docid-facet-strings"))?; - let documents = env.create_database(Some("documents"))?; + let main = env.create_poly_database(Some(MAIN))?; + let word_docids = env.create_database(Some(WORD_DOCIDS))?; + let word_prefix_docids = env.create_database(Some(WORD_PREFIX_DOCIDS))?; + let docid_word_positions = env.create_database(Some(DOCID_WORD_POSITIONS))?; + let word_pair_proximity_docids = env.create_database(Some(WORD_PAIR_PROXIMITY_DOCIDS))?; + let word_prefix_pair_proximity_docids = env.create_database(Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?; + let word_level_position_docids = env.create_database(Some(WORD_LEVEL_POSITION_DOCIDS))?; + let field_id_word_count_docids = env.create_database(Some(FIELD_ID_WORD_COUNT_DOCIDS))?; + let word_prefix_level_position_docids = env.create_database(Some(WORD_PREFIX_LEVEL_POSITION_DOCIDS))?; + let facet_id_f64_docids = env.create_database(Some(FACET_ID_F64_DOCIDS))?; + let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?; + let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?; + let field_id_docid_facet_strings = env.create_database(Some(FIELD_ID_DOCID_FACET_STRINGS))?; + let documents = env.create_database(Some(DOCUMENTS))?; Index::initialize_creation_dates(&env, main)?; @@ -127,10 +148,10 @@ impl Index { fn initialize_creation_dates(env: &heed::Env, main: PolyDatabase) -> heed::Result<()> { let mut txn = env.write_txn()?; // The db was just created, we update its metadata with the relevant information. - if main.get::<_, Str, SerdeJson>>(&txn, CREATED_AT_KEY)?.is_none() { + if main.get::<_, Str, SerdeJson>>(&txn, main_key::CREATED_AT_KEY)?.is_none() { let now = Utc::now(); - main.put::<_, Str, SerdeJson>>(&mut txn, UPDATED_AT_KEY, &now)?; - main.put::<_, Str, SerdeJson>>(&mut txn, CREATED_AT_KEY, &now)?; + main.put::<_, Str, SerdeJson>>(&mut txn, main_key::UPDATED_AT_KEY, &now)?; + main.put::<_, Str, SerdeJson>>(&mut txn, main_key::CREATED_AT_KEY, &now)?; txn.commit()?; } Ok(()) @@ -164,17 +185,17 @@ impl Index { /// Writes the documents ids that corresponds to the user-ids-documents-ids FST. pub fn put_documents_ids(&self, wtxn: &mut RwTxn, docids: &RoaringBitmap) -> heed::Result<()> { - self.main.put::<_, Str, RoaringBitmapCodec>(wtxn, DOCUMENTS_IDS_KEY, docids) + self.main.put::<_, Str, RoaringBitmapCodec>(wtxn, main_key::DOCUMENTS_IDS_KEY, docids) } /// Returns the internal documents ids. pub fn documents_ids(&self, rtxn: &RoTxn) -> heed::Result { - Ok(self.main.get::<_, Str, RoaringBitmapCodec>(rtxn, DOCUMENTS_IDS_KEY)?.unwrap_or_default()) + Ok(self.main.get::<_, Str, RoaringBitmapCodec>(rtxn, main_key::DOCUMENTS_IDS_KEY)?.unwrap_or_default()) } /// Returns the number of documents indexed in the database. pub fn number_of_documents(&self, rtxn: &RoTxn) -> Result { - let count = self.main.get::<_, Str, RoaringBitmapLenCodec>(rtxn, DOCUMENTS_IDS_KEY)?; + let count = self.main.get::<_, Str, RoaringBitmapLenCodec>(rtxn, main_key::DOCUMENTS_IDS_KEY)?; Ok(count.unwrap_or_default()) } @@ -183,17 +204,17 @@ impl Index { /// Writes the documents primary key, this is the field name that is used to store the id. pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> { self.set_updated_at(wtxn, &Utc::now())?; - self.main.put::<_, Str, Str>(wtxn, PRIMARY_KEY_KEY, &primary_key) + self.main.put::<_, Str, Str>(wtxn, main_key::PRIMARY_KEY_KEY, &primary_key) } /// Deletes the primary key of the documents, this can be done to reset indexes settings. pub fn delete_primary_key(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, PRIMARY_KEY_KEY) + self.main.delete::<_, Str>(wtxn, main_key::PRIMARY_KEY_KEY) } /// Returns the documents primary key, `None` if it hasn't been defined. pub fn primary_key<'t>(&self, rtxn: &'t RoTxn) -> heed::Result> { - self.main.get::<_, Str, Str>(rtxn, PRIMARY_KEY_KEY) + self.main.get::<_, Str, Str>(rtxn, main_key::PRIMARY_KEY_KEY) } /* external documents ids */ @@ -208,16 +229,16 @@ impl Index { let ExternalDocumentsIds { hard, soft } = external_documents_ids; let hard = hard.as_fst().as_bytes(); let soft = soft.as_fst().as_bytes(); - self.main.put::<_, Str, ByteSlice>(wtxn, HARD_EXTERNAL_DOCUMENTS_IDS_KEY, hard)?; - self.main.put::<_, Str, ByteSlice>(wtxn, SOFT_EXTERNAL_DOCUMENTS_IDS_KEY, soft)?; + self.main.put::<_, Str, ByteSlice>(wtxn, main_key::HARD_EXTERNAL_DOCUMENTS_IDS_KEY, hard)?; + self.main.put::<_, Str, ByteSlice>(wtxn, main_key::SOFT_EXTERNAL_DOCUMENTS_IDS_KEY, soft)?; Ok(()) } /// Returns the external documents ids map which associate the external ids /// with the internal ids (i.e. `u32`). pub fn external_documents_ids<'t>(&self, rtxn: &'t RoTxn) -> Result> { - let hard = self.main.get::<_, Str, ByteSlice>(rtxn, HARD_EXTERNAL_DOCUMENTS_IDS_KEY)?; - let soft = self.main.get::<_, Str, ByteSlice>(rtxn, SOFT_EXTERNAL_DOCUMENTS_IDS_KEY)?; + let hard = self.main.get::<_, Str, ByteSlice>(rtxn, main_key::HARD_EXTERNAL_DOCUMENTS_IDS_KEY)?; + let soft = self.main.get::<_, Str, ByteSlice>(rtxn, main_key::SOFT_EXTERNAL_DOCUMENTS_IDS_KEY)?; let hard = match hard { Some(hard) => fst::Map::new(hard)?.map_data(Cow::Borrowed)?, None => fst::Map::default().map_data(Cow::Owned)?, @@ -234,13 +255,16 @@ impl Index { /// Writes the fields ids map which associate the documents keys with an internal field id /// (i.e. `u8`), this field id is used to identify fields in the obkv documents. pub fn put_fields_ids_map(&self, wtxn: &mut RwTxn, map: &FieldsIdsMap) -> heed::Result<()> { - self.main.put::<_, Str, SerdeJson>(wtxn, FIELDS_IDS_MAP_KEY, map) + self.main.put::<_, Str, SerdeJson>(wtxn, main_key::FIELDS_IDS_MAP_KEY, map) } /// Returns the fields ids map which associate the documents keys with an internal field id /// (i.e. `u8`), this field id is used to identify fields in the obkv documents. pub fn fields_ids_map(&self, rtxn: &RoTxn) -> heed::Result { - Ok(self.main.get::<_, Str, SerdeJson>(rtxn, FIELDS_IDS_MAP_KEY)?.unwrap_or_default()) + Ok(self.main.get::<_, Str, SerdeJson>( + rtxn, + main_key::FIELDS_IDS_MAP_KEY, + )?.unwrap_or_default()) } /* fields distribution */ @@ -248,13 +272,16 @@ impl Index { /// Writes the fields distribution which associates every field name with /// the number of times it occurs in the documents. pub fn put_fields_distribution(&self, wtxn: &mut RwTxn, distribution: &FieldsDistribution) -> heed::Result<()> { - self.main.put::<_, Str, SerdeJson>(wtxn, FIELDS_DISTRIBUTION_KEY, distribution) + self.main.put::<_, Str, SerdeJson>(wtxn, main_key::FIELDS_DISTRIBUTION_KEY, distribution) } /// Returns the fields distribution which associates every field name with /// the number of times it occurs in the documents. pub fn fields_distribution(&self, rtxn: &RoTxn) -> heed::Result { - Ok(self.main.get::<_, Str, SerdeJson>(rtxn, FIELDS_DISTRIBUTION_KEY)?.unwrap_or_default()) + Ok(self.main.get::<_, Str, SerdeJson>( + rtxn, + main_key::FIELDS_DISTRIBUTION_KEY, + )?.unwrap_or_default()) } /* displayed fields */ @@ -262,19 +289,19 @@ impl Index { /// Writes the fields that must be displayed in the defined order. /// There must be not be any duplicate field id. pub fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> { - self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, DISPLAYED_FIELDS_KEY, &fields) + self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, main_key::DISPLAYED_FIELDS_KEY, &fields) } /// Deletes the displayed fields ids, this will make the engine to display /// all the documents attributes in the order of the `FieldsIdsMap`. pub fn delete_displayed_fields(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, DISPLAYED_FIELDS_KEY) + self.main.delete::<_, Str>(wtxn, main_key::DISPLAYED_FIELDS_KEY) } /// Returns the displayed fields in the order they were set by the user. If it returns /// `None` it means that all the attributes are set as displayed in the order of the `FieldsIdsMap`. pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result>> { - self.main.get::<_, Str, SerdeBincode>>(rtxn, DISPLAYED_FIELDS_KEY) + self.main.get::<_, Str, SerdeBincode>>(rtxn, main_key::DISPLAYED_FIELDS_KEY) } pub fn displayed_fields_ids(&self, rtxn: &RoTxn) -> heed::Result>> { @@ -291,18 +318,18 @@ impl Index { /// Writes the searchable fields, when this list is specified, only these are indexed. pub fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> { - self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, SEARCHABLE_FIELDS_KEY, &fields) + self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, main_key::SEARCHABLE_FIELDS_KEY, &fields) } /// Deletes the searchable fields, when no fields are specified, all fields are indexed. pub fn delete_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, SEARCHABLE_FIELDS_KEY) + self.main.delete::<_, Str>(wtxn, main_key::SEARCHABLE_FIELDS_KEY) } /// Returns the searchable fields, those are the fields that are indexed, /// if the searchable fields aren't there it means that **all** the fields are indexed. pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result>> { - self.main.get::<_, Str, SerdeBincode>>(rtxn, SEARCHABLE_FIELDS_KEY) + self.main.get::<_, Str, SerdeBincode>>(rtxn, main_key::SEARCHABLE_FIELDS_KEY) } /// Identical to `searchable_fields`, but returns the ids instead. @@ -328,17 +355,20 @@ impl Index { /// Writes the filterable fields names in the database. pub fn put_filterable_fields(&self, wtxn: &mut RwTxn, fields: &HashSet) -> heed::Result<()> { - self.main.put::<_, Str, SerdeJson<_>>(wtxn, FILTERABLE_FIELDS_KEY, fields) + self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::FILTERABLE_FIELDS_KEY, fields) } /// Deletes the filterable fields ids in the database. pub fn delete_filterable_fields(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, FILTERABLE_FIELDS_KEY) + self.main.delete::<_, Str>(wtxn, main_key::FILTERABLE_FIELDS_KEY) } /// Returns the filterable fields names. pub fn filterable_fields(&self, rtxn: &RoTxn) -> heed::Result> { - Ok(self.main.get::<_, Str, SerdeJson<_>>(rtxn, FILTERABLE_FIELDS_KEY)?.unwrap_or_default()) + Ok(self.main.get::<_, Str, SerdeJson<_>>( + rtxn, + main_key::FILTERABLE_FIELDS_KEY, + )?.unwrap_or_default()) } /// Same as `filterable_fields`, but returns ids instead. @@ -409,9 +439,9 @@ impl Index { docids: &RoaringBitmap, ) -> heed::Result<()> { - let mut buffer = [0u8; STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1]; - buffer[..STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()] - .copy_from_slice(STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); + let mut buffer = [0u8; main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1]; + buffer[..main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()] + .copy_from_slice(main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); *buffer.last_mut().unwrap() = field_id; self.main.put::<_, ByteSlice, RoaringBitmapCodec>(wtxn, &buffer, docids) } @@ -423,9 +453,9 @@ impl Index { field_id: FieldId, ) -> heed::Result { - let mut buffer = [0u8; STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1]; - buffer[..STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()] - .copy_from_slice(STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); + let mut buffer = [0u8; main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1]; + buffer[..main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()] + .copy_from_slice(main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); *buffer.last_mut().unwrap() = field_id; match self.main.get::<_, ByteSlice, RoaringBitmapCodec>(rtxn, &buffer)? { Some(docids) => Ok(docids), @@ -441,9 +471,9 @@ impl Index { docids: &RoaringBitmap, ) -> heed::Result<()> { - let mut buffer = [0u8; NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1]; - buffer[..NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()] - .copy_from_slice(NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); + let mut buffer = [0u8; main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1]; + buffer[..main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()] + .copy_from_slice(main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); *buffer.last_mut().unwrap() = field_id; self.main.put::<_, ByteSlice, RoaringBitmapCodec>(wtxn, &buffer, docids) } @@ -455,9 +485,9 @@ impl Index { field_id: FieldId, ) -> heed::Result { - let mut buffer = [0u8; NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1]; - buffer[..NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()] - .copy_from_slice(NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); + let mut buffer = [0u8; main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1]; + buffer[..main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()] + .copy_from_slice(main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); *buffer.last_mut().unwrap() = field_id; match self.main.get::<_, ByteSlice, RoaringBitmapCodec>(rtxn, &buffer)? { Some(docids) => Ok(docids), @@ -468,29 +498,29 @@ impl Index { /* distinct field */ pub(crate) fn put_distinct_field(&self, wtxn: &mut RwTxn, distinct_field: &str) -> heed::Result<()> { - self.main.put::<_, Str, Str>(wtxn, DISTINCT_FIELD_KEY, distinct_field) + self.main.put::<_, Str, Str>(wtxn, main_key::DISTINCT_FIELD_KEY, distinct_field) } pub fn distinct_field<'a>(&self, rtxn: &'a RoTxn) -> heed::Result> { - self.main.get::<_, Str, Str>(rtxn, DISTINCT_FIELD_KEY) + self.main.get::<_, Str, Str>(rtxn, main_key::DISTINCT_FIELD_KEY) } pub(crate) fn delete_distinct_field(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, DISTINCT_FIELD_KEY) + self.main.delete::<_, Str>(wtxn, main_key::DISTINCT_FIELD_KEY) } /* criteria */ pub fn put_criteria(&self, wtxn: &mut RwTxn, criteria: &[Criterion]) -> heed::Result<()> { - self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, CRITERIA_KEY, &criteria) + self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, main_key::CRITERIA_KEY, &criteria) } pub fn delete_criteria(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, CRITERIA_KEY) + self.main.delete::<_, Str>(wtxn, main_key::CRITERIA_KEY) } pub fn criteria(&self, rtxn: &RoTxn) -> heed::Result> { - match self.main.get::<_, Str, SerdeJson>>(rtxn, CRITERIA_KEY)? { + match self.main.get::<_, Str, SerdeJson>>(rtxn, main_key::CRITERIA_KEY)? { Some(criteria) => Ok(criteria), None => Ok(default_criteria()), } @@ -500,12 +530,12 @@ impl Index { /// Writes the FST which is the words dictionary of the engine. pub fn put_words_fst>(&self, wtxn: &mut RwTxn, fst: &fst::Set) -> heed::Result<()> { - self.main.put::<_, Str, ByteSlice>(wtxn, WORDS_FST_KEY, fst.as_fst().as_bytes()) + self.main.put::<_, Str, ByteSlice>(wtxn, main_key::WORDS_FST_KEY, fst.as_fst().as_bytes()) } /// Returns the FST which is the words dictionary of the engine. pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> Result>> { - match self.main.get::<_, Str, ByteSlice>(rtxn, WORDS_FST_KEY)? { + match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::WORDS_FST_KEY)? { Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?), None => Ok(fst::Set::default().map_data(Cow::Owned)?), } @@ -514,15 +544,15 @@ impl Index { /* stop words */ pub fn put_stop_words>(&self, wtxn: &mut RwTxn, fst: &fst::Set) -> heed::Result<()> { - self.main.put::<_, Str, ByteSlice>(wtxn, STOP_WORDS_KEY, fst.as_fst().as_bytes()) + self.main.put::<_, Str, ByteSlice>(wtxn, main_key::STOP_WORDS_KEY, fst.as_fst().as_bytes()) } pub fn delete_stop_words(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, STOP_WORDS_KEY) + self.main.delete::<_, Str>(wtxn, main_key::STOP_WORDS_KEY) } pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> Result>> { - match self.main.get::<_, Str, ByteSlice>(rtxn, STOP_WORDS_KEY)? { + match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::STOP_WORDS_KEY)? { Some(bytes) => Ok(Some(fst::Set::new(bytes)?)), None => Ok(None), } @@ -530,19 +560,29 @@ impl Index { /* synonyms */ - pub fn put_synonyms(&self, wtxn: &mut RwTxn, synonyms: &HashMap, Vec>>) -> heed::Result<()> { - self.main.put::<_, Str, SerdeBincode<_>>(wtxn, SYNONYMS_KEY, synonyms) + pub fn put_synonyms( + &self, + wtxn: &mut RwTxn, + synonyms: &HashMap, Vec>>, + ) -> heed::Result<()> + { + self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms) } pub fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, SYNONYMS_KEY) + self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY) } pub fn synonyms(&self, rtxn: &RoTxn) -> heed::Result, Vec>>> { - Ok(self.main.get::<_, Str, SerdeBincode<_>>(rtxn, SYNONYMS_KEY)?.unwrap_or_default()) + Ok(self.main.get::<_, Str, SerdeBincode<_>>(rtxn, main_key::SYNONYMS_KEY)?.unwrap_or_default()) } - pub fn words_synonyms>(&self, rtxn: &RoTxn, words: &[S]) -> heed::Result>>> { + pub fn words_synonyms>( + &self, + rtxn: &RoTxn, + words: &[S], + ) -> heed::Result>>> + { let words: Vec<_> = words.iter().map(|s| s.as_ref().to_owned()).collect(); Ok(self.synonyms(rtxn)?.remove(&words)) } @@ -551,12 +591,12 @@ impl Index { /// Writes the FST which is the words prefixes dictionnary of the engine. pub fn put_words_prefixes_fst>(&self, wtxn: &mut RwTxn, fst: &fst::Set) -> heed::Result<()> { - self.main.put::<_, Str, ByteSlice>(wtxn, WORDS_PREFIXES_FST_KEY, fst.as_fst().as_bytes()) + self.main.put::<_, Str, ByteSlice>(wtxn, main_key::WORDS_PREFIXES_FST_KEY, fst.as_fst().as_bytes()) } /// Returns the FST which is the words prefixes dictionnary of the engine. pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> Result>> { - match self.main.get::<_, Str, ByteSlice>(rtxn, WORDS_PREFIXES_FST_KEY)? { + match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? { Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?), None => Ok(fst::Set::default().map_data(Cow::Owned)?), } @@ -613,7 +653,7 @@ impl Index { /// Returns the index creation time. pub fn created_at(&self, rtxn: &RoTxn) -> heed::Result> { let time = self.main - .get::<_, Str, SerdeJson>>(rtxn, CREATED_AT_KEY)? + .get::<_, Str, SerdeJson>>(rtxn, main_key::CREATED_AT_KEY)? .expect("Index without creation time"); Ok(time) } @@ -621,13 +661,13 @@ impl Index { /// Returns the index last updated time. pub fn updated_at(&self, rtxn: &RoTxn) -> heed::Result> { let time = self.main - .get::<_, Str, SerdeJson>>(rtxn, UPDATED_AT_KEY)? + .get::<_, Str, SerdeJson>>(rtxn, main_key::UPDATED_AT_KEY)? .expect("Index without update time"); Ok(time) } pub(crate) fn set_updated_at(&self, wtxn: &mut RwTxn, time: &DateTime) -> heed::Result<()> { - self.main.put::<_, Str, SerdeJson>>(wtxn, UPDATED_AT_KEY, &time) + self.main.put::<_, Str, SerdeJson>>(wtxn, main_key::UPDATED_AT_KEY, &time) } } diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index 6792d6278..ceba7bf01 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -9,6 +9,7 @@ use serde_json::Value; use crate::error::{InternalError, UserError}; use crate::heed_codec::CboRoaringBitmapCodec; +use crate::index::{db_name, main_key}; use crate::{Index, DocumentId, FieldId, BEU32, SmallString32, ExternalDocumentsIds, Result}; use super::ClearDocuments; @@ -78,7 +79,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { let fields_ids_map = self.index.fields_ids_map(self.wtxn)?; let primary_key = self.index.primary_key(self.wtxn)?.ok_or_else(|| { - InternalError::DatabaseMissingEntry { db_name: "main", key: Some("primary-key") } + InternalError::DatabaseMissingEntry { + db_name: db_name::MAIN, + key: Some(main_key::PRIMARY_KEY_KEY), + } })?; let id_field = fields_ids_map.id(primary_key).expect(r#"the field "id" to be present"#); diff --git a/milli/src/update/index_documents/store.rs b/milli/src/update/index_documents/store.rs index e5e55682e..94ae12108 100644 --- a/milli/src/update/index_documents/store.rs +++ b/milli/src/update/index_documents/store.rs @@ -32,7 +32,7 @@ const LMDB_MAX_KEY_LENGTH: usize = 511; const ONE_KILOBYTE: usize = 1024 * 1024; const MAX_POSITION: usize = 1000; -const WORDS_FST_KEY: &[u8] = crate::index::WORDS_FST_KEY.as_bytes(); +const WORDS_FST_KEY: &[u8] = crate::index::main_key::WORDS_FST_KEY.as_bytes(); pub struct Readers { pub main: Reader, diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 82003eddc..c44130d7e 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -11,6 +11,7 @@ use roaring::RoaringBitmap; use serde_json::{Map, Value}; use crate::error::{Error, UserError, InternalError}; +use crate::index::db_name; use crate::update::index_documents::merge_function::{merge_obkvs, keep_latest_obkv}; use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; use crate::{BEU32, MergeFn, FieldsIdsMap, ExternalDocumentsIds, FieldId, FieldsDistribution}; @@ -411,7 +412,7 @@ impl Transform<'_, '_> { let key = BEU32::new(docid); let base_obkv = self.index.documents.get(&self.rtxn, &key)? .ok_or(InternalError::DatabaseMissingEntry { - db_name: "documents", + db_name: db_name::DOCUMENTS, key: None, })?; let update_obkv = obkv::KvReader::new(update_obkv);