2021-09-24 11:53:11 +02:00
|
|
|
use std::collections::HashSet;
|
|
|
|
use std::fs::{create_dir_all, File};
|
|
|
|
use std::io::{BufRead, BufReader, Write};
|
|
|
|
use std::path::{Path, PathBuf};
|
2021-12-15 17:55:39 +01:00
|
|
|
use std::sync::Arc;
|
2021-09-24 11:53:11 +02:00
|
|
|
|
2021-12-02 16:03:26 +01:00
|
|
|
use heed::types::{SerdeBincode, Str};
|
|
|
|
use heed::{CompactionOption, Database, Env};
|
2021-09-24 11:53:11 +02:00
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
use uuid::Uuid;
|
|
|
|
|
2021-09-28 22:22:59 +02:00
|
|
|
use super::error::{IndexResolverError, Result};
|
2021-12-02 16:03:26 +01:00
|
|
|
use crate::tasks::task::TaskId;
|
2021-09-24 11:53:11 +02:00
|
|
|
use crate::EnvSizer;
|
|
|
|
|
|
|
|
#[derive(Serialize, Deserialize)]
|
2021-12-02 16:03:26 +01:00
|
|
|
pub struct DumpEntry {
|
|
|
|
pub uid: String,
|
|
|
|
pub index_meta: IndexMeta,
|
2021-09-24 11:53:11 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
const UUIDS_DB_PATH: &str = "index_uuids";
|
|
|
|
|
|
|
|
#[async_trait::async_trait]
|
2021-10-04 12:15:21 +02:00
|
|
|
#[cfg_attr(test, mockall::automock)]
|
2021-12-02 16:03:26 +01:00
|
|
|
pub trait IndexMetaStore: Sized {
|
2021-09-24 11:53:11 +02:00
|
|
|
// Create a new entry for `name`. Return an error if `err` and the entry already exists, return
|
|
|
|
// the uuid otherwise.
|
2021-12-02 16:03:26 +01:00
|
|
|
async fn get(&self, uid: String) -> Result<(String, Option<IndexMeta>)>;
|
|
|
|
async fn delete(&self, uid: String) -> Result<Option<IndexMeta>>;
|
|
|
|
async fn list(&self) -> Result<Vec<(String, IndexMeta)>>;
|
|
|
|
async fn insert(&self, name: String, meta: IndexMeta) -> Result<()>;
|
2021-09-24 11:53:11 +02:00
|
|
|
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
|
|
|
async fn get_size(&self) -> Result<u64>;
|
2021-12-02 16:03:26 +01:00
|
|
|
async fn dump(&self, path: PathBuf) -> Result<()>;
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Serialize, Deserialize, Debug, Clone)]
|
|
|
|
pub struct IndexMeta {
|
|
|
|
pub uuid: Uuid,
|
|
|
|
pub creation_task_id: TaskId,
|
2021-09-24 11:53:11 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone)]
|
2021-12-02 16:03:26 +01:00
|
|
|
pub struct HeedMetaStore {
|
2021-12-15 17:55:39 +01:00
|
|
|
env: Arc<Env>,
|
2021-12-02 16:03:26 +01:00
|
|
|
db: Database<Str, SerdeBincode<IndexMeta>>,
|
2021-09-24 11:53:11 +02:00
|
|
|
}
|
|
|
|
|
2021-12-15 17:55:39 +01:00
|
|
|
impl Drop for HeedMetaStore {
|
|
|
|
fn drop(&mut self) {
|
|
|
|
if Arc::strong_count(&self.env) == 1 {
|
|
|
|
self.env.as_ref().clone().prepare_for_closing();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-12-02 16:03:26 +01:00
|
|
|
impl HeedMetaStore {
|
2021-12-15 17:55:39 +01:00
|
|
|
pub fn new(env: Arc<heed::Env>) -> Result<Self> {
|
2021-09-27 16:48:03 +02:00
|
|
|
let db = env.create_database(Some("uuids"))?;
|
2021-09-24 11:53:11 +02:00
|
|
|
Ok(Self { env, db })
|
|
|
|
}
|
|
|
|
|
2021-12-02 16:03:26 +01:00
|
|
|
fn get(&self, name: &str) -> Result<Option<IndexMeta>> {
|
2021-09-24 11:53:11 +02:00
|
|
|
let env = self.env.clone();
|
|
|
|
let db = self.db;
|
|
|
|
let txn = env.read_txn()?;
|
|
|
|
match db.get(&txn, name)? {
|
2021-12-02 16:03:26 +01:00
|
|
|
Some(meta) => Ok(Some(meta)),
|
2021-09-24 11:53:11 +02:00
|
|
|
None => Ok(None),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-12-02 16:03:26 +01:00
|
|
|
fn delete(&self, uid: String) -> Result<Option<IndexMeta>> {
|
2021-09-24 11:53:11 +02:00
|
|
|
let env = self.env.clone();
|
|
|
|
let db = self.db;
|
|
|
|
let mut txn = env.write_txn()?;
|
|
|
|
match db.get(&txn, &uid)? {
|
2021-12-02 16:03:26 +01:00
|
|
|
Some(meta) => {
|
2021-09-24 11:53:11 +02:00
|
|
|
db.delete(&mut txn, &uid)?;
|
|
|
|
txn.commit()?;
|
2021-12-02 16:03:26 +01:00
|
|
|
Ok(Some(meta))
|
2021-09-24 11:53:11 +02:00
|
|
|
}
|
|
|
|
None => Ok(None),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-12-02 16:03:26 +01:00
|
|
|
fn list(&self) -> Result<Vec<(String, IndexMeta)>> {
|
2021-09-24 11:53:11 +02:00
|
|
|
let env = self.env.clone();
|
|
|
|
let db = self.db;
|
|
|
|
let txn = env.read_txn()?;
|
|
|
|
let mut entries = Vec::new();
|
|
|
|
for entry in db.iter(&txn)? {
|
2021-12-02 16:03:26 +01:00
|
|
|
let (name, meta) = entry?;
|
|
|
|
entries.push((name.to_string(), meta))
|
2021-09-24 11:53:11 +02:00
|
|
|
}
|
|
|
|
Ok(entries)
|
|
|
|
}
|
|
|
|
|
2021-12-02 16:03:26 +01:00
|
|
|
pub(crate) fn insert(&self, name: String, meta: IndexMeta) -> Result<()> {
|
2021-09-24 11:53:11 +02:00
|
|
|
let env = self.env.clone();
|
|
|
|
let db = self.db;
|
|
|
|
let mut txn = env.write_txn()?;
|
|
|
|
|
|
|
|
if db.get(&txn, &name)?.is_some() {
|
2021-10-26 19:36:48 +02:00
|
|
|
return Err(IndexResolverError::IndexAlreadyExists(name));
|
2021-09-24 11:53:11 +02:00
|
|
|
}
|
|
|
|
|
2021-12-02 16:03:26 +01:00
|
|
|
db.put(&mut txn, &name, &meta)?;
|
2021-09-24 11:53:11 +02:00
|
|
|
txn.commit()?;
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2021-12-02 16:03:26 +01:00
|
|
|
fn snapshot(&self, mut path: PathBuf) -> Result<HashSet<Uuid>> {
|
2021-09-24 11:53:11 +02:00
|
|
|
// Write transaction to acquire a lock on the database.
|
2021-12-02 16:03:26 +01:00
|
|
|
let txn = self.env.write_txn()?;
|
2021-09-24 11:53:11 +02:00
|
|
|
let mut entries = HashSet::new();
|
2021-12-02 16:03:26 +01:00
|
|
|
for entry in self.db.iter(&txn)? {
|
|
|
|
let (_, IndexMeta { uuid, .. }) = entry?;
|
2021-09-24 11:53:11 +02:00
|
|
|
entries.insert(uuid);
|
|
|
|
}
|
|
|
|
|
|
|
|
// only perform snapshot if there are indexes
|
|
|
|
if !entries.is_empty() {
|
|
|
|
path.push(UUIDS_DB_PATH);
|
|
|
|
create_dir_all(&path).unwrap();
|
|
|
|
path.push("data.mdb");
|
2021-12-02 16:03:26 +01:00
|
|
|
self.env.copy_to_path(path, CompactionOption::Enabled)?;
|
2021-09-24 11:53:11 +02:00
|
|
|
}
|
|
|
|
Ok(entries)
|
|
|
|
}
|
|
|
|
|
2021-12-02 16:03:26 +01:00
|
|
|
fn get_size(&self) -> Result<u64> {
|
2021-09-24 11:53:11 +02:00
|
|
|
Ok(self.env.size())
|
|
|
|
}
|
|
|
|
|
2021-12-02 16:03:26 +01:00
|
|
|
pub fn dump(&self, path: PathBuf) -> Result<()> {
|
2021-09-24 11:53:11 +02:00
|
|
|
let dump_path = path.join(UUIDS_DB_PATH);
|
|
|
|
create_dir_all(&dump_path)?;
|
|
|
|
let dump_file_path = dump_path.join("data.jsonl");
|
|
|
|
let mut dump_file = File::create(&dump_file_path)?;
|
|
|
|
|
|
|
|
let txn = self.env.read_txn()?;
|
|
|
|
for entry in self.db.iter(&txn)? {
|
2021-12-02 16:03:26 +01:00
|
|
|
let (uid, index_meta) = entry?;
|
2021-09-24 11:53:11 +02:00
|
|
|
let uid = uid.to_string();
|
|
|
|
|
2021-12-02 16:03:26 +01:00
|
|
|
let entry = DumpEntry { uid, index_meta };
|
2021-09-24 11:53:11 +02:00
|
|
|
serde_json::to_writer(&mut dump_file, &entry)?;
|
|
|
|
dump_file.write_all(b"\n").unwrap();
|
|
|
|
}
|
|
|
|
|
2021-12-02 16:03:26 +01:00
|
|
|
Ok(())
|
2021-09-24 11:53:11 +02:00
|
|
|
}
|
|
|
|
|
2021-12-15 17:55:39 +01:00
|
|
|
pub fn load_dump(src: impl AsRef<Path>, env: Arc<heed::Env>) -> Result<()> {
|
2021-09-24 11:53:11 +02:00
|
|
|
let src_indexes = src.as_ref().join(UUIDS_DB_PATH).join("data.jsonl");
|
|
|
|
let indexes = File::open(&src_indexes)?;
|
|
|
|
let mut indexes = BufReader::new(indexes);
|
|
|
|
let mut line = String::new();
|
|
|
|
|
2021-12-02 16:03:26 +01:00
|
|
|
let db = Self::new(env)?;
|
2021-09-24 11:53:11 +02:00
|
|
|
let mut txn = db.env.write_txn()?;
|
|
|
|
|
|
|
|
loop {
|
|
|
|
match indexes.read_line(&mut line) {
|
|
|
|
Ok(0) => break,
|
|
|
|
Ok(_) => {
|
2021-12-02 16:03:26 +01:00
|
|
|
let DumpEntry { uid, index_meta } = serde_json::from_str(&line)?;
|
|
|
|
db.db.put(&mut txn, &uid, &index_meta)?;
|
2021-09-24 11:53:11 +02:00
|
|
|
}
|
|
|
|
Err(e) => return Err(e.into()),
|
|
|
|
}
|
|
|
|
|
|
|
|
line.clear();
|
|
|
|
}
|
|
|
|
txn.commit()?;
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[async_trait::async_trait]
|
2021-12-02 16:03:26 +01:00
|
|
|
impl IndexMetaStore for HeedMetaStore {
|
|
|
|
async fn get(&self, name: String) -> Result<(String, Option<IndexMeta>)> {
|
2021-09-24 11:53:11 +02:00
|
|
|
let this = self.clone();
|
2021-12-02 16:03:26 +01:00
|
|
|
tokio::task::spawn_blocking(move || this.get(&name).map(|res| (name, res))).await?
|
2021-09-24 11:53:11 +02:00
|
|
|
}
|
|
|
|
|
2021-12-02 16:03:26 +01:00
|
|
|
async fn delete(&self, uid: String) -> Result<Option<IndexMeta>> {
|
2021-09-24 11:53:11 +02:00
|
|
|
let this = self.clone();
|
|
|
|
tokio::task::spawn_blocking(move || this.delete(uid)).await?
|
|
|
|
}
|
|
|
|
|
2021-12-02 16:03:26 +01:00
|
|
|
async fn list(&self) -> Result<Vec<(String, IndexMeta)>> {
|
2021-09-24 11:53:11 +02:00
|
|
|
let this = self.clone();
|
|
|
|
tokio::task::spawn_blocking(move || this.list()).await?
|
|
|
|
}
|
|
|
|
|
2021-12-02 16:03:26 +01:00
|
|
|
async fn insert(&self, name: String, meta: IndexMeta) -> Result<()> {
|
2021-09-24 11:53:11 +02:00
|
|
|
let this = self.clone();
|
2021-12-02 16:03:26 +01:00
|
|
|
tokio::task::spawn_blocking(move || this.insert(name, meta)).await?
|
2021-09-24 11:53:11 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
|
|
|
let this = self.clone();
|
|
|
|
tokio::task::spawn_blocking(move || this.snapshot(path)).await?
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn get_size(&self) -> Result<u64> {
|
|
|
|
self.get_size()
|
|
|
|
}
|
|
|
|
|
2021-12-02 16:03:26 +01:00
|
|
|
async fn dump(&self, path: PathBuf) -> Result<()> {
|
2021-09-24 11:53:11 +02:00
|
|
|
let this = self.clone();
|
2021-12-02 16:03:26 +01:00
|
|
|
Ok(tokio::task::spawn_blocking(move || this.dump(path)).await??)
|
2021-09-24 11:53:11 +02:00
|
|
|
}
|
|
|
|
}
|