2021-05-26 20:42:09 +02:00
|
|
|
use std::{collections::HashSet, io::{BufReader, BufRead, Write}};
|
2021-05-24 16:05:43 +02:00
|
|
|
use std::fs::{create_dir_all, File};
|
2021-05-10 20:23:12 +02:00
|
|
|
use std::path::{Path, PathBuf};
|
2021-03-23 11:00:50 +01:00
|
|
|
|
|
|
|
use heed::{
|
|
|
|
types::{ByteSlice, Str},
|
2021-03-24 11:29:11 +01:00
|
|
|
CompactionOption, Database, Env, EnvOpenOptions,
|
2021-03-23 11:00:50 +01:00
|
|
|
};
|
|
|
|
use uuid::Uuid;
|
2021-05-26 20:42:09 +02:00
|
|
|
use serde::{Serialize, Deserialize};
|
2021-03-23 11:00:50 +01:00
|
|
|
|
2021-05-24 16:05:43 +02:00
|
|
|
use super::{Result, UuidResolverError, UUID_STORE_SIZE};
|
2021-04-09 14:41:24 +02:00
|
|
|
use crate::helpers::EnvSizer;
|
2021-03-23 11:00:50 +01:00
|
|
|
|
2021-05-26 20:42:09 +02:00
|
|
|
#[derive(Serialize, Deserialize)]
|
|
|
|
struct DumpEntry {
|
|
|
|
uuid: Uuid,
|
|
|
|
uid: String,
|
|
|
|
}
|
|
|
|
|
2021-03-23 11:00:50 +01:00
|
|
|
#[async_trait::async_trait]
|
2021-05-26 20:42:09 +02:00
|
|
|
pub trait UuidStore: Sized {
|
2021-03-23 11:00:50 +01:00
|
|
|
// Create a new entry for `name`. Return an error if `err` and the entry already exists, return
|
|
|
|
// the uuid otherwise.
|
|
|
|
async fn create_uuid(&self, uid: String, err: bool) -> Result<Uuid>;
|
|
|
|
async fn get_uuid(&self, uid: String) -> Result<Option<Uuid>>;
|
|
|
|
async fn delete(&self, uid: String) -> Result<Option<Uuid>>;
|
|
|
|
async fn list(&self) -> Result<Vec<(String, Uuid)>>;
|
2021-03-25 14:21:05 +01:00
|
|
|
async fn insert(&self, name: String, uuid: Uuid) -> Result<()>;
|
2021-04-22 10:14:29 +02:00
|
|
|
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
2021-04-09 14:41:24 +02:00
|
|
|
async fn get_size(&self) -> Result<u64>;
|
2021-05-24 16:05:43 +02:00
|
|
|
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
2021-03-23 11:00:50 +01:00
|
|
|
}
|
|
|
|
|
2021-05-10 20:23:12 +02:00
|
|
|
#[derive(Clone)]
|
2021-03-23 11:00:50 +01:00
|
|
|
pub struct HeedUuidStore {
|
|
|
|
env: Env,
|
|
|
|
db: Database<Str, ByteSlice>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl HeedUuidStore {
|
|
|
|
pub fn new(path: impl AsRef<Path>) -> anyhow::Result<Self> {
|
|
|
|
let path = path.as_ref().join("index_uuids");
|
|
|
|
create_dir_all(&path)?;
|
|
|
|
let mut options = EnvOpenOptions::new();
|
|
|
|
options.map_size(UUID_STORE_SIZE); // 1GB
|
|
|
|
let env = options.open(path)?;
|
2021-05-26 22:52:06 +02:00
|
|
|
let db = env.create_database(None)?; Ok(Self { env, db }) } pub fn create_uuid(&self, name: String, err: bool) -> Result<Uuid> { let env = self.env.clone(); let db = self.db; let mut txn = env.write_txn()?;
|
2021-05-10 20:23:12 +02:00
|
|
|
match db.get(&txn, &name)? {
|
|
|
|
Some(uuid) => {
|
|
|
|
if err {
|
2021-05-24 16:05:43 +02:00
|
|
|
Err(UuidResolverError::NameAlreadyExist)
|
2021-05-10 20:23:12 +02:00
|
|
|
} else {
|
|
|
|
let uuid = Uuid::from_slice(uuid)?;
|
2021-03-23 11:00:50 +01:00
|
|
|
Ok(uuid)
|
|
|
|
}
|
|
|
|
}
|
2021-05-10 20:23:12 +02:00
|
|
|
None => {
|
|
|
|
let uuid = Uuid::new_v4();
|
|
|
|
db.put(&mut txn, &name, uuid.as_bytes())?;
|
|
|
|
txn.commit()?;
|
|
|
|
Ok(uuid)
|
|
|
|
}
|
|
|
|
}
|
2021-05-26 20:42:09 +02:00
|
|
|
} pub fn get_uuid(&self, name: String) -> Result<Option<Uuid>> { let env = self.env.clone(); let db = self.db;
|
2021-05-10 20:23:12 +02:00
|
|
|
let txn = env.read_txn()?;
|
|
|
|
match db.get(&txn, &name)? {
|
|
|
|
Some(uuid) => {
|
|
|
|
let uuid = Uuid::from_slice(uuid)?;
|
|
|
|
Ok(Some(uuid))
|
2021-03-23 11:00:50 +01:00
|
|
|
}
|
2021-05-10 20:23:12 +02:00
|
|
|
None => Ok(None),
|
|
|
|
}
|
2021-03-23 11:00:50 +01:00
|
|
|
}
|
|
|
|
|
2021-05-10 20:23:12 +02:00
|
|
|
pub fn delete(&self, uid: String) -> Result<Option<Uuid>> {
|
2021-03-23 11:00:50 +01:00
|
|
|
let env = self.env.clone();
|
|
|
|
let db = self.db;
|
2021-05-10 20:23:12 +02:00
|
|
|
let mut txn = env.write_txn()?;
|
|
|
|
match db.get(&txn, &uid)? {
|
|
|
|
Some(uuid) => {
|
|
|
|
let uuid = Uuid::from_slice(uuid)?;
|
|
|
|
db.delete(&mut txn, &uid)?;
|
|
|
|
txn.commit()?;
|
|
|
|
Ok(Some(uuid))
|
2021-03-23 11:00:50 +01:00
|
|
|
}
|
2021-05-10 20:23:12 +02:00
|
|
|
None => Ok(None),
|
|
|
|
}
|
2021-03-23 11:00:50 +01:00
|
|
|
}
|
|
|
|
|
2021-05-10 20:23:12 +02:00
|
|
|
pub fn list(&self) -> Result<Vec<(String, Uuid)>> {
|
2021-03-23 11:00:50 +01:00
|
|
|
let env = self.env.clone();
|
|
|
|
let db = self.db;
|
2021-05-10 20:23:12 +02:00
|
|
|
let txn = env.read_txn()?;
|
|
|
|
let mut entries = Vec::new();
|
|
|
|
for entry in db.iter(&txn)? {
|
|
|
|
let (name, uuid) = entry?;
|
|
|
|
let uuid = Uuid::from_slice(uuid)?;
|
|
|
|
entries.push((name.to_owned(), uuid))
|
|
|
|
}
|
|
|
|
Ok(entries)
|
2021-03-23 11:00:50 +01:00
|
|
|
}
|
|
|
|
|
2021-05-10 20:23:12 +02:00
|
|
|
pub fn insert(&self, name: String, uuid: Uuid) -> Result<()> {
|
2021-03-25 14:21:05 +01:00
|
|
|
let env = self.env.clone();
|
|
|
|
let db = self.db;
|
2021-05-10 20:23:12 +02:00
|
|
|
let mut txn = env.write_txn()?;
|
|
|
|
db.put(&mut txn, &name, uuid.as_bytes())?;
|
|
|
|
txn.commit()?;
|
|
|
|
Ok(())
|
2021-03-25 14:21:05 +01:00
|
|
|
}
|
|
|
|
|
2021-05-10 20:24:14 +02:00
|
|
|
pub fn snapshot(&self, mut path: PathBuf) -> Result<HashSet<Uuid>> {
|
2021-03-23 11:00:50 +01:00
|
|
|
let env = self.env.clone();
|
|
|
|
let db = self.db;
|
2021-05-10 20:23:12 +02:00
|
|
|
// Write transaction to acquire a lock on the database.
|
|
|
|
let txn = env.write_txn()?;
|
|
|
|
let mut entries = HashSet::new();
|
|
|
|
for entry in db.iter(&txn)? {
|
|
|
|
let (_, uuid) = entry?;
|
|
|
|
let uuid = Uuid::from_slice(uuid)?;
|
|
|
|
entries.insert(uuid);
|
|
|
|
}
|
2021-03-23 11:00:50 +01:00
|
|
|
|
2021-05-10 20:23:12 +02:00
|
|
|
// only perform snapshot if there are indexes
|
|
|
|
if !entries.is_empty() {
|
|
|
|
path.push("index_uuids");
|
|
|
|
create_dir_all(&path).unwrap();
|
|
|
|
path.push("data.mdb");
|
|
|
|
env.copy_to_path(path, CompactionOption::Enabled)?;
|
|
|
|
}
|
|
|
|
Ok(entries)
|
2021-03-23 11:00:50 +01:00
|
|
|
}
|
2021-04-09 14:41:24 +02:00
|
|
|
|
2021-05-10 20:23:12 +02:00
|
|
|
pub fn get_size(&self) -> Result<u64> {
|
2021-04-09 14:41:24 +02:00
|
|
|
Ok(self.env.size())
|
|
|
|
}
|
2021-05-24 16:05:43 +02:00
|
|
|
|
|
|
|
pub fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
|
|
|
let dump_path = path.join("index_uuids");
|
|
|
|
create_dir_all(&dump_path)?;
|
|
|
|
let dump_file_path = dump_path.join("data.jsonl");
|
|
|
|
let mut dump_file = File::create(&dump_file_path)?;
|
|
|
|
let mut uuids = HashSet::new();
|
|
|
|
|
|
|
|
let txn = self.env.read_txn()?;
|
|
|
|
for entry in self.db.iter(&txn)? {
|
2021-05-26 20:42:09 +02:00
|
|
|
let (uid, uuid) = entry?;
|
2021-05-26 22:52:06 +02:00
|
|
|
let uid = uid.to_string();
|
|
|
|
let uuid = Uuid::from_slice(uuid)?;
|
|
|
|
|
2021-05-26 20:42:09 +02:00
|
|
|
let entry = DumpEntry {
|
|
|
|
uuid, uid
|
|
|
|
};
|
|
|
|
serde_json::to_writer(&mut dump_file, &entry)?;
|
|
|
|
dump_file.write(b"\n").unwrap();
|
2021-05-26 22:52:06 +02:00
|
|
|
|
|
|
|
uuids.insert(uuid);
|
2021-05-24 16:05:43 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
Ok(uuids)
|
|
|
|
}
|
2021-05-26 22:52:06 +02:00
|
|
|
|
|
|
|
pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> anyhow::Result<()> {
|
|
|
|
let uuid_resolver_path = dst.as_ref().join("uuid_resolver/");
|
|
|
|
std::fs::create_dir_all(&uuid_resolver_path)?;
|
|
|
|
|
|
|
|
let src_indexes = src.as_ref().join("index_uuids/data.jsonl");
|
|
|
|
let indexes = File::open(&src_indexes)?;
|
|
|
|
let mut indexes = BufReader::new(indexes);
|
|
|
|
let mut line = String::new();
|
|
|
|
|
|
|
|
let db = Self::new(dst)?;
|
|
|
|
let mut txn = db.env.write_txn()?;
|
|
|
|
|
|
|
|
loop {
|
|
|
|
match indexes.read_line(&mut line) {
|
|
|
|
Ok(0) => break,
|
|
|
|
Ok(_) => {
|
|
|
|
let DumpEntry { uuid, uid } = serde_json::from_str(&line)?;
|
|
|
|
db.db.put(&mut txn, &uid, uuid.as_bytes())?;
|
|
|
|
}
|
|
|
|
Err(e) => Err(e)?,
|
|
|
|
}
|
|
|
|
|
|
|
|
line.clear();
|
|
|
|
}
|
|
|
|
txn.commit()?;
|
|
|
|
|
|
|
|
db.env.prepare_for_closing().wait();
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
2021-03-23 11:00:50 +01:00
|
|
|
}
|
2021-05-10 20:23:12 +02:00
|
|
|
|
|
|
|
#[async_trait::async_trait]
|
|
|
|
impl UuidStore for HeedUuidStore {
|
|
|
|
async fn create_uuid(&self, name: String, err: bool) -> Result<Uuid> {
|
|
|
|
let this = self.clone();
|
|
|
|
tokio::task::spawn_blocking(move || this.create_uuid(name, err)).await?
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn get_uuid(&self, name: String) -> Result<Option<Uuid>> {
|
|
|
|
let this = self.clone();
|
|
|
|
tokio::task::spawn_blocking(move || this.get_uuid(name)).await?
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn delete(&self, uid: String) -> Result<Option<Uuid>> {
|
|
|
|
let this = self.clone();
|
|
|
|
tokio::task::spawn_blocking(move || this.delete(uid)).await?
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn list(&self) -> Result<Vec<(String, Uuid)>> {
|
|
|
|
let this = self.clone();
|
|
|
|
tokio::task::spawn_blocking(move || this.list()).await?
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn insert(&self, name: String, uuid: Uuid) -> Result<()> {
|
|
|
|
let this = self.clone();
|
|
|
|
tokio::task::spawn_blocking(move || this.insert(name, uuid)).await?
|
|
|
|
}
|
|
|
|
|
2021-05-10 20:24:14 +02:00
|
|
|
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
2021-05-10 20:23:12 +02:00
|
|
|
let this = self.clone();
|
|
|
|
tokio::task::spawn_blocking(move || this.snapshot(path)).await?
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn get_size(&self) -> Result<u64> {
|
|
|
|
self.get_size()
|
|
|
|
}
|
2021-05-24 16:05:43 +02:00
|
|
|
|
|
|
|
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
|
|
|
let this = self.clone();
|
|
|
|
tokio::task::spawn_blocking(move || this.dump(path)).await?
|
|
|
|
}
|
2021-05-10 20:23:12 +02:00
|
|
|
}
|