introduce index resolver

This commit is contained in:
mpostma 2021-09-24 11:53:11 +02:00
parent 5353be74c3
commit 42a6260b65
23 changed files with 833 additions and 193 deletions

View file

@ -0,0 +1,63 @@
use std::fmt;
use meilisearch_error::{Code, ErrorCode};
use tokio::sync::mpsc::error::SendError as MpscSendError;
use tokio::sync::oneshot::error::RecvError as OneshotRecvError;
use crate::{error::MilliError, index::error::IndexError};
pub type Result<T> = std::result::Result<T, IndexResolverError>;
#[derive(thiserror::Error, Debug)]
pub enum IndexResolverError {
#[error("{0}")]
IndexError(#[from] IndexError),
#[error("Index already exists")]
IndexAlreadyExists,
#[error("Index {0} not found")]
UnexistingIndex(String),
#[error("A primary key is already present. It's impossible to update it")]
ExistingPrimaryKey,
#[error("Internal Error: {0}")]
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
#[error("{0}")]
Milli(#[from] milli::Error),
#[error("Index must have a valid uid; Index uid can be of type integer or string only composed of alphanumeric characters, hyphens (-) and underscores (_).")]
BadlyFormatted(String),
}
impl<T> From<MpscSendError<T>> for IndexResolverError
where T: Send + Sync + 'static + fmt::Debug
{
fn from(other: tokio::sync::mpsc::error::SendError<T>) -> Self {
Self::Internal(Box::new(other))
}
}
impl From<OneshotRecvError> for IndexResolverError {
fn from(other: tokio::sync::oneshot::error::RecvError) -> Self {
Self::Internal(Box::new(other))
}
}
internal_error!(
IndexResolverError: heed::Error,
uuid::Error,
std::io::Error,
tokio::task::JoinError,
serde_json::Error
);
impl ErrorCode for IndexResolverError {
fn error_code(&self) -> Code {
match self {
IndexResolverError::IndexError(e) => e.error_code(),
IndexResolverError::IndexAlreadyExists => Code::IndexAlreadyExists,
IndexResolverError::UnexistingIndex(_) => Code::IndexNotFound,
IndexResolverError::ExistingPrimaryKey => Code::PrimaryKeyAlreadyPresent,
IndexResolverError::Internal(_) => Code::Internal,
IndexResolverError::Milli(e) => MilliError(e).error_code(),
IndexResolverError::BadlyFormatted(_) => Code::InvalidIndexUid,
}
}
}

View file

@ -0,0 +1,116 @@
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use milli::update::UpdateBuilder;
use tokio::fs;
use tokio::sync::RwLock;
use tokio::task::spawn_blocking;
use uuid::Uuid;
use super::error::{IndexResolverError, Result};
use crate::index::Index;
use crate::index::update_handler::UpdateHandler;
use crate::index_controller::update_file_store::UpdateFileStore;
use crate::options::IndexerOpts;
type AsyncMap<K, V> = Arc<RwLock<HashMap<K, V>>>;
#[async_trait::async_trait]
pub trait IndexStore {
async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> Result<Index>;
async fn get(&self, uuid: Uuid) -> Result<Option<Index>>;
async fn delete(&self, uuid: Uuid) -> Result<Option<Index>>;
}
pub struct MapIndexStore {
index_store: AsyncMap<Uuid, Index>,
path: PathBuf,
index_size: usize,
update_file_store: Arc<UpdateFileStore>,
update_handler: Arc<UpdateHandler>,
}
impl MapIndexStore {
pub fn new(path: impl AsRef<Path>, index_size: usize, indexer_opts: &IndexerOpts) -> anyhow::Result<Self> {
let update_handler = Arc::new(UpdateHandler::new(indexer_opts)?);
let update_file_store = Arc::new(UpdateFileStore::new(path.as_ref()).unwrap());
let path = path.as_ref().join("indexes/");
let index_store = Arc::new(RwLock::new(HashMap::new()));
Ok(Self {
index_store,
path,
index_size,
update_file_store,
update_handler,
})
}
}
#[async_trait::async_trait]
impl IndexStore for MapIndexStore {
async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> Result<Index> {
// We need to keep the lock until we are sure the db file has been opened correclty, to
// ensure that another db is not created at the same time.
let mut lock = self.index_store.write().await;
if let Some(index) = lock.get(&uuid) {
return Ok(index.clone());
}
let path = self.path.join(format!("index-{}", uuid));
if path.exists() {
return Err(IndexResolverError::IndexAlreadyExists);
}
let index_size = self.index_size;
let file_store = self.update_file_store.clone();
let update_handler = self.update_handler.clone();
let index = spawn_blocking(move || -> Result<Index> {
let index = Index::open(path, index_size, file_store, uuid, update_handler)?;
if let Some(primary_key) = primary_key {
let mut txn = index.write_txn()?;
let mut builder = UpdateBuilder::new(0).settings(&mut txn, &index);
builder.set_primary_key(primary_key);
builder.execute(|_, _| ())?;
txn.commit()?;
}
Ok(index)
})
.await??;
lock.insert(uuid, index.clone());
Ok(index)
}
async fn get(&self, uuid: Uuid) -> Result<Option<Index>> {
let guard = self.index_store.read().await;
match guard.get(&uuid) {
Some(index) => Ok(Some(index.clone())),
None => {
// drop the guard here so we can perform the write after without deadlocking;
drop(guard);
let path = self.path.join(format!("index-{}", uuid));
if !path.exists() {
return Ok(None);
}
let index_size = self.index_size;
let file_store = self.update_file_store.clone();
let update_handler = self.update_handler.clone();
let index = spawn_blocking(move || Index::open(path, index_size, file_store, uuid, update_handler)).await??;
self.index_store.write().await.insert(uuid, index.clone());
Ok(Some(index))
}
}
}
async fn delete(&self, uuid: Uuid) -> Result<Option<Index>> {
let db_path = self.path.join(format!("index-{}", uuid));
fs::remove_dir_all(db_path).await?;
let index = self.index_store.write().await.remove(&uuid);
Ok(index)
}
}

View file

@ -0,0 +1,37 @@
use std::{collections::HashSet, path::PathBuf};
use tokio::sync::oneshot;
use uuid::Uuid;
use crate::index::Index;
use super::error::Result;
pub enum IndexResolverMsg {
Get {
uid: String,
ret: oneshot::Sender<Result<Index>>,
},
Delete {
uid: String,
ret: oneshot::Sender<Result<Index>>,
},
List {
ret: oneshot::Sender<Result<Vec<(String, Index)>>>,
},
Insert {
uuid: Uuid,
name: String,
ret: oneshot::Sender<Result<()>>,
},
SnapshotRequest {
path: PathBuf,
ret: oneshot::Sender<Result<HashSet<Index>>>,
},
GetSize {
ret: oneshot::Sender<Result<u64>>,
},
DumpRequest {
path: PathBuf,
ret: oneshot::Sender<Result<HashSet<Index>>>,
},
}

View file

@ -0,0 +1,117 @@
pub mod uuid_store;
mod index_store;
//mod message;
pub mod error;
use std::path::Path;
use uuid::Uuid;
use uuid_store::{UuidStore, HeedUuidStore};
use index_store::{IndexStore, MapIndexStore};
use error::{Result, IndexResolverError};
use crate::{index::Index, options::IndexerOpts};
pub type HardStateIndexResolver = IndexResolver<HeedUuidStore, MapIndexStore>;
pub fn create_index_resolver(path: impl AsRef<Path>, index_size: usize, indexer_opts: &IndexerOpts) -> anyhow::Result<HardStateIndexResolver> {
let uuid_store = HeedUuidStore::new(&path)?;
let index_store = MapIndexStore::new(&path, index_size, indexer_opts)?;
Ok(IndexResolver::new(uuid_store, index_store))
}
pub struct IndexResolver<U, I> {
index_uuid_store: U,
index_store: I,
}
impl<U, I> IndexResolver<U ,I>
where U: UuidStore,
I: IndexStore,
{
pub fn new(
index_uuid_store: U,
index_store: I,
) -> Self {
Self {
index_uuid_store,
index_store,
}
}
pub async fn dump(&self, _path: impl AsRef<Path>) -> Result<Vec<Uuid>> {
todo!()
}
pub async fn get_size(&self) -> Result<u64> {
todo!()
}
pub async fn perform_snapshot(&self, _path: impl AsRef<Path>) -> Result<()> {
todo!()
}
pub async fn create_index(&self, uid: String, primary_key: Option<String>) -> Result<(Uuid, Index)> {
let uuid = Uuid::new_v4();
let index = self.index_store.create(uuid, primary_key).await?;
self.index_uuid_store.insert(uid, uuid).await?;
Ok((uuid, index))
}
pub async fn list(&self) -> Result<Vec<(String, Index)>> {
let uuids = self.index_uuid_store.list().await?;
let mut indexes = Vec::new();
for (name, uuid) in uuids {
match self.index_store.get(uuid).await? {
Some(index) => {
indexes.push((name, index))
},
None => {
// we found an unexisting index, we remove it from the uuid store
let _ = self.index_uuid_store.delete(name).await;
},
}
}
Ok(indexes)
}
pub async fn delete_index(&self, uid: String) -> Result<()> {
match self.index_uuid_store.delete(uid.clone()).await? {
Some(uuid) => {
let _ = self.index_store.delete(uuid).await;
Ok(())
}
None => Err(IndexResolverError::UnexistingIndex(uid)),
}
}
pub async fn get_index_by_uuid(&self, uuid: Uuid) -> Result<Index> {
// TODO: Handle this error better.
self.index_store.get(uuid).await?.ok_or(IndexResolverError::UnexistingIndex(String::new()))
}
pub async fn get_index(&self, uid: String) -> Result<Index> {
match self.index_uuid_store.get_uuid(uid).await? {
(name, Some(uuid)) => {
match self.index_store.get(uuid).await? {
Some(index) => Ok(index),
None => {
// For some reason we got a uuid to an unexisting index, we return an error,
// and remove the uuid from th uuid store.
let _ = self.index_uuid_store.delete(name.clone()).await;
Err(IndexResolverError::UnexistingIndex(name))
},
}
}
(name, _) => Err(IndexResolverError::UnexistingIndex(name))
}
}
pub async fn get_uuid(&self, uid: String) -> Result<Uuid> {
match self.index_uuid_store.get_uuid(uid).await? {
(_, Some(uuid)) => Ok(uuid),
(name, _) => Err(IndexResolverError::UnexistingIndex(name))
}
}
}

View file

@ -0,0 +1,226 @@
use std::collections::HashSet;
use std::fs::{create_dir_all, File};
use std::io::{BufRead, BufReader, Write};
use std::path::{Path, PathBuf};
use heed::types::{ByteSlice, Str};
use heed::{CompactionOption, Database, Env, EnvOpenOptions};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use super::error::{Result, IndexResolverError};
use crate::EnvSizer;
const UUID_STORE_SIZE: usize = 1_073_741_824; //1GiB
#[derive(Serialize, Deserialize)]
struct DumpEntry {
uuid: Uuid,
uid: String,
}
const UUIDS_DB_PATH: &str = "index_uuids";
#[async_trait::async_trait]
pub trait UuidStore: Sized {
// Create a new entry for `name`. Return an error if `err` and the entry already exists, return
// the uuid otherwise.
async fn get_uuid(&self, uid: String) -> Result<(String, Option<Uuid>)>;
async fn delete(&self, uid: String) -> Result<Option<Uuid>>;
async fn list(&self) -> Result<Vec<(String, Uuid)>>;
async fn insert(&self, name: String, uuid: Uuid) -> Result<()>;
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
async fn get_size(&self) -> Result<u64>;
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
}
#[derive(Clone)]
pub struct HeedUuidStore {
env: Env,
db: Database<Str, ByteSlice>,
}
impl HeedUuidStore {
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
let path = path.as_ref().join(UUIDS_DB_PATH);
create_dir_all(&path)?;
let mut options = EnvOpenOptions::new();
options.map_size(UUID_STORE_SIZE); // 1GB
let env = options.open(path)?;
let db = env.create_database(None)?;
Ok(Self { env, db })
}
pub fn get_uuid(&self, name: &str) -> Result<Option<Uuid>> {
let env = self.env.clone();
let db = self.db;
let txn = env.read_txn()?;
match db.get(&txn, name)? {
Some(uuid) => {
let uuid = Uuid::from_slice(uuid)?;
Ok(Some(uuid))
}
None => Ok(None),
}
}
pub fn delete(&self, uid: String) -> Result<Option<Uuid>> {
let env = self.env.clone();
let db = self.db;
let mut txn = env.write_txn()?;
match db.get(&txn, &uid)? {
Some(uuid) => {
let uuid = Uuid::from_slice(uuid)?;
db.delete(&mut txn, &uid)?;
txn.commit()?;
Ok(Some(uuid))
}
None => Ok(None),
}
}
pub fn list(&self) -> Result<Vec<(String, Uuid)>> {
let env = self.env.clone();
let db = self.db;
let txn = env.read_txn()?;
let mut entries = Vec::new();
for entry in db.iter(&txn)? {
let (name, uuid) = entry?;
let uuid = Uuid::from_slice(uuid)?;
entries.push((name.to_owned(), uuid))
}
Ok(entries)
}
pub fn insert(&self, name: String, uuid: Uuid) -> Result<()> {
let env = self.env.clone();
let db = self.db;
let mut txn = env.write_txn()?;
if db.get(&txn, &name)?.is_some() {
return Err(IndexResolverError::IndexAlreadyExists);
}
db.put(&mut txn, &name, uuid.as_bytes())?;
txn.commit()?;
Ok(())
}
pub fn snapshot(&self, mut path: PathBuf) -> Result<HashSet<Uuid>> {
let env = self.env.clone();
let db = self.db;
// Write transaction to acquire a lock on the database.
let txn = env.write_txn()?;
let mut entries = HashSet::new();
for entry in db.iter(&txn)? {
let (_, uuid) = entry?;
let uuid = Uuid::from_slice(uuid)?;
entries.insert(uuid);
}
// only perform snapshot if there are indexes
if !entries.is_empty() {
path.push(UUIDS_DB_PATH);
create_dir_all(&path).unwrap();
path.push("data.mdb");
env.copy_to_path(path, CompactionOption::Enabled)?;
}
Ok(entries)
}
pub fn get_size(&self) -> Result<u64> {
Ok(self.env.size())
}
pub fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
let dump_path = path.join(UUIDS_DB_PATH);
create_dir_all(&dump_path)?;
let dump_file_path = dump_path.join("data.jsonl");
let mut dump_file = File::create(&dump_file_path)?;
let mut uuids = HashSet::new();
let txn = self.env.read_txn()?;
for entry in self.db.iter(&txn)? {
let (uid, uuid) = entry?;
let uid = uid.to_string();
let uuid = Uuid::from_slice(uuid)?;
let entry = DumpEntry { uuid, uid };
serde_json::to_writer(&mut dump_file, &entry)?;
dump_file.write_all(b"\n").unwrap();
uuids.insert(uuid);
}
Ok(uuids)
}
pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Result<()> {
let uuid_resolver_path = dst.as_ref().join(UUIDS_DB_PATH);
std::fs::create_dir_all(&uuid_resolver_path)?;
let src_indexes = src.as_ref().join(UUIDS_DB_PATH).join("data.jsonl");
let indexes = File::open(&src_indexes)?;
let mut indexes = BufReader::new(indexes);
let mut line = String::new();
let db = Self::new(dst)?;
let mut txn = db.env.write_txn()?;
loop {
match indexes.read_line(&mut line) {
Ok(0) => break,
Ok(_) => {
let DumpEntry { uuid, uid } = serde_json::from_str(&line)?;
println!("importing {} {}", uid, uuid);
db.db.put(&mut txn, &uid, uuid.as_bytes())?;
}
Err(e) => return Err(e.into()),
}
line.clear();
}
txn.commit()?;
db.env.prepare_for_closing().wait();
Ok(())
}
}
#[async_trait::async_trait]
impl UuidStore for HeedUuidStore {
async fn get_uuid(&self, name: String) -> Result<(String, Option<Uuid>)> {
let this = self.clone();
tokio::task::spawn_blocking(move || this.get_uuid(&name).map(|res| (name, res))).await?
}
async fn delete(&self, uid: String) -> Result<Option<Uuid>> {
let this = self.clone();
tokio::task::spawn_blocking(move || this.delete(uid)).await?
}
async fn list(&self) -> Result<Vec<(String, Uuid)>> {
let this = self.clone();
tokio::task::spawn_blocking(move || this.list()).await?
}
async fn insert(&self, name: String, uuid: Uuid) -> Result<()> {
let this = self.clone();
tokio::task::spawn_blocking(move || this.insert(name, uuid)).await?
}
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
let this = self.clone();
tokio::task::spawn_blocking(move || this.snapshot(path)).await?
}
async fn get_size(&self) -> Result<u64> {
self.get_size()
}
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
let this = self.clone();
tokio::task::spawn_blocking(move || this.dump(path)).await?
}
}