use std::fs::{create_dir_all, remove_dir_all}; use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::Duration; use anyhow::{bail, Context}; use chrono::{DateTime, Utc}; use dashmap::{mapref::entry::Entry, DashMap}; use heed::{ types::{ByteSlice, SerdeJson, Str}, Database, Env, EnvOpenOptions, RoTxn, RwTxn, }; use log::{error, info}; use milli::Index; use rayon::ThreadPool; use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::update_handler::UpdateHandler; use super::{UpdateMeta, UpdateResult}; use crate::option::IndexerOpts; type UpdateStore = super::update_store::UpdateStore; #[derive(Serialize, Deserialize, Debug, PartialEq)] pub struct IndexMeta { update_store_size: u64, index_store_size: u64, pub uuid: Uuid, pub created_at: DateTime, pub updated_at: DateTime, } impl IndexMeta { fn open( &self, path: impl AsRef, thread_pool: Arc, indexer_options: &IndexerOpts, ) -> anyhow::Result<(Arc, Arc)> { let update_path = make_update_db_path(&path, &self.uuid); let index_path = make_index_db_path(&path, &self.uuid); create_dir_all(&update_path)?; create_dir_all(&index_path)?; let mut options = EnvOpenOptions::new(); options.map_size(self.index_store_size as usize); let index = Arc::new(Index::new(options, index_path)?); let mut options = EnvOpenOptions::new(); options.map_size(self.update_store_size as usize); let handler = UpdateHandler::new(indexer_options, index.clone(), thread_pool)?; let update_store = UpdateStore::open(options, update_path, handler)?; Ok((index, update_store)) } } pub struct IndexStore { env: Env, name_to_uuid: Database, uuid_to_index: DashMap, Arc)>, uuid_to_index_meta: Database>, thread_pool: Arc, indexer_options: IndexerOpts, } impl IndexStore { pub fn new(path: impl AsRef, indexer_options: IndexerOpts) -> anyhow::Result { let env = EnvOpenOptions::new() .map_size(4096 * 100) .max_dbs(2) .open(path)?; let uuid_to_index = DashMap::new(); let name_to_uuid = open_or_create_database(&env, Some("name_to_uid"))?; let uuid_to_index_meta = open_or_create_database(&env, Some("uid_to_index_db"))?; let thread_pool = rayon::ThreadPoolBuilder::new() .num_threads(indexer_options.indexing_jobs.unwrap_or(0)) .build()?; let thread_pool = Arc::new(thread_pool); Ok(Self { env, name_to_uuid, uuid_to_index, uuid_to_index_meta, thread_pool, indexer_options, }) } pub fn delete(&self, index_uid: impl AsRef) -> anyhow::Result<()> { // we remove the references to the index from the index map so it is not accessible anymore let mut txn = self.env.write_txn()?; let uuid = self .index_uuid(&txn, &index_uid)? .with_context(|| format!("Index {:?} doesn't exist", index_uid.as_ref()))?; self.name_to_uuid.delete(&mut txn, index_uid.as_ref())?; self.uuid_to_index_meta.delete(&mut txn, uuid.as_bytes())?; txn.commit()?; // If the index was loaded (i.e it is present in the uuid_to_index map), then we need to // close it. The process goes as follow: // // 1) We want to remove any pending updates from the store. // 2) We try to get ownership on the update store so we can close it. It may take a // couple of tries, but since the update store event loop only has a weak reference to // itself, and we are the only other function holding a reference to it otherwise, we will // get it eventually. // 3) We request a closing of the update store. // 4) We can take ownership on the index, and close it. // 5) We remove all the files from the file system. let index_uid = index_uid.as_ref().to_string(); let path = self.env.path().to_owned(); if let Some((_, (index, updates))) = self.uuid_to_index.remove(&uuid) { std::thread::spawn(move || { info!("Preparing for {:?} deletion.", index_uid); // this error is non fatal, but may delay the deletion. if let Err(e) = updates.abort_pendings() { error!( "error aborting pending updates when deleting index {:?}: {}", index_uid, e ); } let updates = get_arc_ownership_blocking(updates); let close_event = updates.prepare_for_closing(); close_event.wait(); info!("closed update store for {:?}", index_uid); let index = get_arc_ownership_blocking(index); let close_event = index.prepare_for_closing(); close_event.wait(); let update_path = make_update_db_path(&path, &uuid); let index_path = make_index_db_path(&path, &uuid); if let Err(e) = remove_dir_all(index_path) { error!("error removing index {:?}: {}", index_uid, e); } if let Err(e) = remove_dir_all(update_path) { error!("error removing index {:?}: {}", index_uid, e); } info!("index {:?} deleted.", index_uid); }); } Ok(()) } fn index_uuid(&self, txn: &RoTxn, name: impl AsRef) -> anyhow::Result> { match self.name_to_uuid.get(txn, name.as_ref())? { Some(bytes) => { let uuid = Uuid::from_slice(bytes)?; Ok(Some(uuid)) } None => Ok(None), } } fn retrieve_index( &self, txn: &RoTxn, uid: Uuid, ) -> anyhow::Result, Arc)>> { match self.uuid_to_index.entry(uid.clone()) { Entry::Vacant(entry) => match self.uuid_to_index_meta.get(txn, uid.as_bytes())? { Some(meta) => { let path = self.env.path(); let (index, updates) = meta.open(path, self.thread_pool.clone(), &self.indexer_options)?; entry.insert((index.clone(), updates.clone())); Ok(Some((index, updates))) } None => Ok(None), }, Entry::Occupied(entry) => { let (index, updates) = entry.get(); Ok(Some((index.clone(), updates.clone()))) } } } fn get_index_txn( &self, txn: &RoTxn, name: impl AsRef, ) -> anyhow::Result, Arc)>> { match self.index_uuid(&txn, name)? { Some(uid) => self.retrieve_index(&txn, uid), None => Ok(None), } } pub fn index( &self, name: impl AsRef, ) -> anyhow::Result, Arc)>> { let txn = self.env.read_txn()?; self.get_index_txn(&txn, name) } /// Use this function to perform an update on an index. /// This function also puts a lock on what index is allowed to perform an update. pub fn update_index(&self, name: impl AsRef, f: F) -> anyhow::Result<(T, IndexMeta)> where F: FnOnce(&Index) -> anyhow::Result, { let mut txn = self.env.write_txn()?; let (index, _) = self .get_index_txn(&txn, &name)? .with_context(|| format!("Index {:?} doesn't exist", name.as_ref()))?; let result = f(index.as_ref()); match result { Ok(ret) => { let meta = self.update_meta(&mut txn, name, |meta| meta.updated_at = Utc::now())?; txn.commit()?; Ok((ret, meta)) } Err(e) => Err(e), } } pub fn index_with_meta( &self, name: impl AsRef, ) -> anyhow::Result, IndexMeta)>> { let txn = self.env.read_txn()?; let uuid = self.index_uuid(&txn, &name)?; match uuid { Some(uuid) => { let meta = self .uuid_to_index_meta .get(&txn, uuid.as_bytes())? .with_context(|| { format!("unable to retrieve metadata for index {:?}", name.as_ref()) })?; let (index, _) = self .retrieve_index(&txn, uuid)? .with_context(|| format!("unable to retrieve index {:?}", name.as_ref()))?; Ok(Some((index, meta))) } None => Ok(None), } } fn update_meta( &self, txn: &mut RwTxn, name: impl AsRef, f: F, ) -> anyhow::Result where F: FnOnce(&mut IndexMeta), { let uuid = self .index_uuid(txn, &name)? .with_context(|| format!("Index {:?} doesn't exist", name.as_ref()))?; let mut meta = self .uuid_to_index_meta .get(txn, uuid.as_bytes())? .with_context(|| format!("couldn't retrieve metadata for index {:?}", name.as_ref()))?; f(&mut meta); self.uuid_to_index_meta.put(txn, uuid.as_bytes(), &meta)?; Ok(meta) } pub fn get_or_create_index( &self, name: impl AsRef, update_size: u64, index_size: u64, ) -> anyhow::Result<(Arc, Arc)> { let mut txn = self.env.write_txn()?; match self.get_index_txn(&txn, name.as_ref())? { Some(res) => Ok(res), None => { let uuid = Uuid::new_v4(); let (index, updates, _) = self.create_index_txn(&mut txn, uuid, name, update_size, index_size)?; // If we fail to commit the transaction, we must delete the database from the // file-system. if let Err(e) = txn.commit() { self.clean_db(uuid); return Err(e)?; } Ok((index, updates)) } } } // Remove all the files and data associated with a db uuid. fn clean_db(&self, uuid: Uuid) { let update_db_path = make_update_db_path(self.env.path(), &uuid); let index_db_path = make_index_db_path(self.env.path(), &uuid); remove_dir_all(update_db_path).expect("Failed to clean database"); remove_dir_all(index_db_path).expect("Failed to clean database"); self.uuid_to_index.remove(&uuid); } fn create_index_txn( &self, txn: &mut RwTxn, uuid: Uuid, name: impl AsRef, update_store_size: u64, index_store_size: u64, ) -> anyhow::Result<(Arc, Arc, IndexMeta)> { let created_at = Utc::now(); let updated_at = created_at; let meta = IndexMeta { update_store_size, index_store_size, uuid: uuid.clone(), created_at, updated_at, }; self.name_to_uuid.put(txn, name.as_ref(), uuid.as_bytes())?; self.uuid_to_index_meta.put(txn, uuid.as_bytes(), &meta)?; let path = self.env.path(); let (index, update_store) = match meta.open(path, self.thread_pool.clone(), &self.indexer_options) { Ok(res) => res, Err(e) => { self.clean_db(uuid); return Err(e); } }; self.uuid_to_index .insert(uuid, (index.clone(), update_store.clone())); Ok((index, update_store, meta)) } /// Same as `get_or_create`, but returns an error if the index already exists. pub fn create_index( &self, name: impl AsRef, update_size: u64, index_size: u64, ) -> anyhow::Result<(Arc, Arc, IndexMeta)> { let uuid = Uuid::new_v4(); let mut txn = self.env.write_txn()?; if self.name_to_uuid.get(&txn, name.as_ref())?.is_some() { bail!("index {:?} already exists", name.as_ref()) } let result = self.create_index_txn(&mut txn, uuid, name, update_size, index_size)?; // If we fail to commit the transaction, we must delete the database from the // file-system. if let Err(e) = txn.commit() { self.clean_db(uuid); return Err(e)?; } Ok(result) } /// Returns each index associated with its metadata: /// (index_name, IndexMeta, primary_key) /// This method will force all the indexes to be loaded. pub fn list_indexes(&self) -> anyhow::Result)>> { let txn = self.env.read_txn()?; let metas = self.name_to_uuid.iter(&txn)?.filter_map(|entry| { entry .map_err(|e| { error!("error decoding entry while listing indexes: {}", e); e }) .ok() }); let mut indexes = Vec::new(); for (name, uuid) in metas { // get index to retrieve primary key let (index, _) = self .get_index_txn(&txn, name)? .with_context(|| format!("could not load index {:?}", name))?; let primary_key = index.primary_key(&index.read_txn()?)?.map(String::from); // retieve meta let meta = self .uuid_to_index_meta .get(&txn, &uuid)? .with_context(|| format!("could not retieve meta for index {:?}", name))?; indexes.push((name.to_owned(), meta, primary_key)); } Ok(indexes) } } // Loops on an arc to get ownership on the wrapped value. This method sleeps 100ms before retrying. fn get_arc_ownership_blocking(mut item: Arc) -> T { loop { match Arc::try_unwrap(item) { Ok(item) => return item, Err(item_arc) => { item = item_arc; std::thread::sleep(Duration::from_millis(100)); continue; } } } } fn open_or_create_database( env: &Env, name: Option<&str>, ) -> anyhow::Result> { match env.open_database::(name)? { Some(db) => Ok(db), None => Ok(env.create_database::(name)?), } } fn make_update_db_path(path: impl AsRef, uuid: &Uuid) -> PathBuf { let mut path = path.as_ref().to_path_buf(); path.push(format!("update{}", uuid)); path } fn make_index_db_path(path: impl AsRef, uuid: &Uuid) -> PathBuf { let mut path = path.as_ref().to_path_buf(); path.push(format!("index{}", uuid)); path } #[cfg(test)] mod test { use super::*; use std::path::PathBuf; #[test] fn test_make_update_db_path() { let uuid = Uuid::new_v4(); assert_eq!( make_update_db_path("/home", &uuid), PathBuf::from(format!("/home/update{}", uuid)) ); } #[test] fn test_make_index_db_path() { let uuid = Uuid::new_v4(); assert_eq!( make_index_db_path("/home", &uuid), PathBuf::from(format!("/home/index{}", uuid)) ); } mod index_store { use super::*; #[test] fn test_index_uuid() { let temp = tempfile::tempdir().unwrap(); let store = IndexStore::new(temp, IndexerOpts::default()).unwrap(); let name = "foobar"; let txn = store.env.read_txn().unwrap(); // name is not found if the uuid in not present in the db assert!(store.index_uuid(&txn, &name).unwrap().is_none()); drop(txn); // insert an uuid in the the name_to_uuid_db: let uuid = Uuid::new_v4(); let mut txn = store.env.write_txn().unwrap(); store .name_to_uuid .put(&mut txn, &name, uuid.as_bytes()) .unwrap(); txn.commit().unwrap(); // check that the uuid is there let txn = store.env.read_txn().unwrap(); assert_eq!(store.index_uuid(&txn, &name).unwrap(), Some(uuid)); } #[test] fn test_retrieve_index() { let temp = tempfile::tempdir().unwrap(); let store = IndexStore::new(temp, IndexerOpts::default()).unwrap(); let uuid = Uuid::new_v4(); let txn = store.env.read_txn().unwrap(); assert!(store.retrieve_index(&txn, uuid).unwrap().is_none()); let created_at = Utc::now(); let updated_at = created_at; let meta = IndexMeta { update_store_size: 4096 * 100, index_store_size: 4096 * 100, uuid: uuid.clone(), created_at, updated_at, }; let mut txn = store.env.write_txn().unwrap(); store .uuid_to_index_meta .put(&mut txn, uuid.as_bytes(), &meta) .unwrap(); txn.commit().unwrap(); // the index cache should be empty assert!(store.uuid_to_index.is_empty()); let txn = store.env.read_txn().unwrap(); assert!(store.retrieve_index(&txn, uuid).unwrap().is_some()); assert_eq!(store.uuid_to_index.len(), 1); } #[test] fn test_index() { let temp = tempfile::tempdir().unwrap(); let store = IndexStore::new(temp, IndexerOpts::default()).unwrap(); let name = "foobar"; assert!(store.index(&name).unwrap().is_none()); let created_at = Utc::now(); let updated_at = created_at; let uuid = Uuid::new_v4(); let meta = IndexMeta { update_store_size: 4096 * 100, index_store_size: 4096 * 100, uuid: uuid.clone(), created_at, updated_at, }; let mut txn = store.env.write_txn().unwrap(); store .name_to_uuid .put(&mut txn, &name, uuid.as_bytes()) .unwrap(); store .uuid_to_index_meta .put(&mut txn, uuid.as_bytes(), &meta) .unwrap(); txn.commit().unwrap(); assert!(store.index(&name).unwrap().is_some()); } #[test] fn test_get_or_create_index() { let temp = tempfile::tempdir().unwrap(); let store = IndexStore::new(temp, IndexerOpts::default()).unwrap(); let name = "foobar"; let update_store_size = 4096 * 100; let index_store_size = 4096 * 100; store .get_or_create_index(&name, update_store_size, index_store_size) .unwrap(); let txn = store.env.read_txn().unwrap(); let uuid = store.name_to_uuid.get(&txn, &name).unwrap(); assert_eq!(store.uuid_to_index.len(), 1); assert!(uuid.is_some()); let uuid = Uuid::from_slice(uuid.unwrap()).unwrap(); let meta = store .uuid_to_index_meta .get(&txn, uuid.as_bytes()) .unwrap() .unwrap(); assert_eq!(meta.update_store_size, update_store_size); assert_eq!(meta.index_store_size, index_store_size); assert_eq!(meta.uuid, uuid); } #[test] fn test_create_index() { let temp = tempfile::tempdir().unwrap(); let store = IndexStore::new(temp, IndexerOpts::default()).unwrap(); let name = "foobar"; let update_store_size = 4096 * 100; let index_store_size = 4096 * 100; let uuid = Uuid::new_v4(); let mut txn = store.env.write_txn().unwrap(); store .create_index_txn(&mut txn, uuid, name, update_store_size, index_store_size) .unwrap(); let uuid = store.name_to_uuid.get(&txn, &name).unwrap(); assert_eq!(store.uuid_to_index.len(), 1); assert!(uuid.is_some()); let uuid = Uuid::from_slice(uuid.unwrap()).unwrap(); let meta = store .uuid_to_index_meta .get(&txn, uuid.as_bytes()) .unwrap() .unwrap(); assert_eq!(meta.update_store_size, update_store_size); assert_eq!(meta.index_store_size, index_store_size); assert_eq!(meta.uuid, uuid); } } }