From e63fdf2b224b6961148600729dd389481b2272e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 30 Oct 2020 10:56:35 +0100 Subject: [PATCH] Move the heed env into the index itself to ease the usage of the library --- src/index.rs | 32 ++++++++++++++++++++++++-------- src/subcommand/infos.rs | 12 +++++------- src/subcommand/search.rs | 10 ++++------ src/subcommand/serve.rs | 20 +++++++------------- src/update/clear_documents.rs | 1 + src/update/delete_documents.rs | 1 + 6 files changed, 42 insertions(+), 34 deletions(-) diff --git a/src/index.rs b/src/index.rs index f62a14ec8..a7b55b552 100644 --- a/src/index.rs +++ b/src/index.rs @@ -1,4 +1,5 @@ use std::borrow::Cow; +use std::path::Path; use anyhow::Context; use heed::types::*; @@ -20,6 +21,8 @@ pub const USERS_IDS_DOCUMENTS_IDS_KEY: &str = "users-ids-documents-ids"; #[derive(Clone)] pub struct Index { + /// The LMDB environment which this index is associated with. + pub env: heed::Env, /// Contains many different types (e.g. the fields ids map). pub main: PolyDatabase, /// A word and all the documents ids containing the word. @@ -33,14 +36,27 @@ pub struct Index { } impl Index { - pub fn new(env: &heed::Env) -> anyhow::Result { - Ok(Index { - main: env.create_poly_database(Some("main"))?, - word_docids: env.create_database(Some("word-docids"))?, - docid_word_positions: env.create_database(Some("docid-word-positions"))?, - word_pair_proximity_docids: env.create_database(Some("word-pair-proximity-docids"))?, - documents: env.create_database(Some("documents"))?, - }) + pub fn new>(mut options: heed::EnvOpenOptions, path: P) -> anyhow::Result { + options.max_dbs(5); + + let env = options.open(path)?; + let main = env.create_poly_database(Some("main"))?; + let word_docids = env.create_database(Some("word-docids"))?; + let docid_word_positions = env.create_database(Some("docid-word-positions"))?; + let word_pair_proximity_docids = env.create_database(Some("word-pair-proximity-docids"))?; + let documents = env.create_database(Some("documents"))?; + + Ok(Index { env, main, word_docids, docid_word_positions, word_pair_proximity_docids, documents }) + } + + /// Create a write transaction to be able to write into the index. + pub fn write_txn(&self) -> heed::Result { + self.env.write_txn() + } + + /// Create a read transaction to be able to read the index. + pub fn read_txn(&self) -> heed::Result { + self.env.read_txn() } /// Writes the documents ids that corresponds to the user-ids-documents-ids FST. diff --git a/src/subcommand/infos.rs b/src/subcommand/infos.rs index 63dae5a95..0a4dabeba 100644 --- a/src/subcommand/infos.rs +++ b/src/subcommand/infos.rs @@ -130,20 +130,18 @@ enum Command { } pub fn run(opt: Opt) -> anyhow::Result<()> { - let env = EnvOpenOptions::new() - .map_size(opt.database_size) - .max_dbs(10) - .open(&opt.database)?; - stderrlog::new() .verbosity(opt.verbose) .show_level(false) .timestamp(stderrlog::Timestamp::Off) .init()?; + let mut options = EnvOpenOptions::new(); + options.map_size(opt.database_size); + // Open the LMDB database. - let index = Index::new(&env)?; - let rtxn = env.read_txn()?; + let index = Index::new(options, opt.database)?; + let rtxn = index.read_txn()?; match opt.command { MostCommonWords { limit } => most_common_words(&index, &rtxn, limit), diff --git a/src/subcommand/search.rs b/src/subcommand/search.rs index 0b7341c30..358805a0b 100644 --- a/src/subcommand/search.rs +++ b/src/subcommand/search.rs @@ -40,14 +40,12 @@ pub fn run(opt: Opt) -> anyhow::Result<()> { .init()?; std::fs::create_dir_all(&opt.database)?; - let env = EnvOpenOptions::new() - .map_size(opt.database_size) - .max_dbs(10) - .open(&opt.database)?; + let mut options = EnvOpenOptions::new(); + options.map_size(opt.database_size); // Open the LMDB database. - let index = Index::new(&env)?; - let rtxn = env.read_txn()?; + let index = Index::new(options, &opt.database)?; + let rtxn = index.read_txn()?; let stdin = io::stdin(); let lines = match opt.query { diff --git a/src/subcommand/serve.rs b/src/subcommand/serve.rs index 7fa021de9..bd88c3661 100644 --- a/src/subcommand/serve.rs +++ b/src/subcommand/serve.rs @@ -181,13 +181,11 @@ pub fn run(opt: Opt) -> anyhow::Result<()> { .init()?; create_dir_all(&opt.database)?; - let env = EnvOpenOptions::new() - .map_size(opt.database_size) - .max_dbs(10) - .open(&opt.database)?; + let mut options = EnvOpenOptions::new(); + options.map_size(opt.database_size); // Open the LMDB database. - let index = Index::new(&env)?; + let index = Index::new(options, &opt.database)?; // Setup the LMDB based update database. let mut update_store_options = EnvOpenOptions::new(); @@ -198,7 +196,6 @@ pub fn run(opt: Opt) -> anyhow::Result<()> { let (update_status_sender, _) = broadcast::channel(100); let update_status_sender_cloned = update_status_sender.clone(); - let env_cloned = env.clone(); let index_cloned = index.clone(); let indexer_opt_cloned = opt.indexer.clone(); let update_store = UpdateStore::open( @@ -226,7 +223,7 @@ pub fn run(opt: Opt) -> anyhow::Result<()> { let result: anyhow::Result<()> = match meta { UpdateMeta::DocumentsAddition => { // We must use the write transaction of the update here. - let mut wtxn = env_cloned.write_txn()?; + let mut wtxn = index_cloned.write_txn()?; let mut builder = update_builder.index_documents(&mut wtxn, &index_cloned); let replace_documents = true; @@ -283,7 +280,6 @@ pub fn run(opt: Opt) -> anyhow::Result<()> { // Expose an HTML page to debug the search in a browser let db_name_cloned = db_name.clone(); let lmdb_path_cloned = lmdb_path.clone(); - let env_cloned = env.clone(); let index_cloned = index.clone(); let dash_html_route = warp::filters::method::get() .and(warp::filters::path::end()) @@ -296,7 +292,7 @@ pub fn run(opt: Opt) -> anyhow::Result<()> { .len() as usize; // And the number of documents in the database. - let rtxn = env_cloned.clone().read_txn().unwrap(); + let rtxn = index_cloned.clone().read_txn().unwrap(); let docs_count = index_cloned.clone().number_of_documents(&rtxn).unwrap() as usize; IndexTemplate { db_name: db_name_cloned.clone(), db_size, docs_count } @@ -304,7 +300,6 @@ pub fn run(opt: Opt) -> anyhow::Result<()> { let update_store_cloned = update_store.clone(); let lmdb_path_cloned = lmdb_path.clone(); - let env_cloned = env.clone(); let index_cloned = index.clone(); let updates_list_or_html_route = warp::filters::method::get() .and(warp::header("Accept")) @@ -335,7 +330,7 @@ pub fn run(opt: Opt) -> anyhow::Result<()> { .len() as usize; // And the number of documents in the database. - let rtxn = env_cloned.clone().read_txn().unwrap(); + let rtxn = index_cloned.clone().read_txn().unwrap(); let docs_count = index_cloned.clone().number_of_documents(&rtxn).unwrap() as usize; let template = UpdatesTemplate { @@ -418,14 +413,13 @@ pub fn run(opt: Opt) -> anyhow::Result<()> { query: Option, } - let env_cloned = env.clone(); let disable_highlighting = opt.disable_highlighting; let query_route = warp::filters::method::post() .and(warp::path!("query")) .and(warp::body::json()) .map(move |query: QueryBody| { let before_search = Instant::now(); - let rtxn = env_cloned.read_txn().unwrap(); + let rtxn = index.read_txn().unwrap(); let mut search = index.search(&rtxn); if let Some(query) = query.query { diff --git a/src/update/clear_documents.rs b/src/update/clear_documents.rs index a19692e19..4e68aac4c 100644 --- a/src/update/clear_documents.rs +++ b/src/update/clear_documents.rs @@ -13,6 +13,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { pub fn execute(self) -> anyhow::Result { let Index { + env: _env, main: _main, word_docids, docid_word_positions, diff --git a/src/update/delete_documents.rs b/src/update/delete_documents.rs index eb0bcd429..e9c3370de 100644 --- a/src/update/delete_documents.rs +++ b/src/update/delete_documents.rs @@ -69,6 +69,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { let id_field = fields_ids_map.id("id").expect(r#"the field "id" to be present"#); let Index { + env: _env, main: _main, word_docids, docid_word_positions,