462 lines
14 KiB
Rust
462 lines
14 KiB
Rust
#[allow(clippy::upper_case_acronyms)]
|
||
type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
|
||
|
||
const UID_TASK_IDS: &str = "uid_task_id";
|
||
const TASKS: &str = "tasks";
|
||
|
||
use std::borrow::Cow;
|
||
use std::collections::BinaryHeap;
|
||
use std::convert::TryInto;
|
||
use std::mem::size_of;
|
||
use std::ops::Range;
|
||
use std::result::Result as StdResult;
|
||
use std::sync::Arc;
|
||
|
||
use heed::types::{ByteSlice, OwnedType, SerdeJson, Unit};
|
||
use heed::{BytesDecode, BytesEncode, Database, Env, RoTxn, RwTxn};
|
||
|
||
use crate::tasks::task::{Task, TaskId};
|
||
|
||
use super::super::Result;
|
||
|
||
use super::{Pending, TaskFilter};
|
||
|
||
enum IndexUidTaskIdCodec {}
|
||
|
||
impl<'a> BytesEncode<'a> for IndexUidTaskIdCodec {
|
||
type EItem = (&'a str, TaskId);
|
||
|
||
fn bytes_encode((s, id): &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||
let size = s.len() + std::mem::size_of::<TaskId>() + 1;
|
||
if size > 512 {
|
||
return None;
|
||
}
|
||
let mut b = Vec::with_capacity(size);
|
||
b.extend_from_slice(s.as_bytes());
|
||
// null terminate the string
|
||
b.push(0);
|
||
b.extend_from_slice(&id.to_be_bytes());
|
||
Some(Cow::Owned(b))
|
||
}
|
||
}
|
||
|
||
impl<'a> BytesDecode<'a> for IndexUidTaskIdCodec {
|
||
type DItem = (&'a str, TaskId);
|
||
|
||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||
let len = bytes.len();
|
||
let s_end = len.checked_sub(size_of::<TaskId>())?.checked_sub(1)?;
|
||
let str_bytes = &bytes[..s_end];
|
||
let str = std::str::from_utf8(str_bytes).ok()?;
|
||
let id = TaskId::from_be_bytes(bytes[(len - size_of::<TaskId>())..].try_into().ok()?);
|
||
Some((str, id))
|
||
}
|
||
}
|
||
|
||
pub struct Store {
|
||
env: Arc<Env>,
|
||
uids_task_ids: Database<IndexUidTaskIdCodec, Unit>,
|
||
tasks: Database<OwnedType<BEU64>, SerdeJson<Task>>,
|
||
}
|
||
|
||
impl Drop for Store {
|
||
fn drop(&mut self) {
|
||
if Arc::strong_count(&self.env) == 1 {
|
||
self.env.as_ref().clone().prepare_for_closing();
|
||
}
|
||
}
|
||
}
|
||
|
||
impl Store {
|
||
/// Create a new store from the specified `Path`.
|
||
/// Be really cautious when calling this function, the returned `Store` may
|
||
/// be in an invalid state, with dangling processing tasks.
|
||
/// You want to patch all un-finished tasks and put them in your pending
|
||
/// queue with the `reset_and_return_unfinished_update` method.
|
||
pub fn new(env: Arc<heed::Env>) -> Result<Self> {
|
||
let uids_task_ids = env.create_database(Some(UID_TASK_IDS))?;
|
||
let tasks = env.create_database(Some(TASKS))?;
|
||
|
||
Ok(Self {
|
||
env,
|
||
uids_task_ids,
|
||
tasks,
|
||
})
|
||
}
|
||
|
||
/// This function should be called *right after* creating the store.
|
||
/// It put back all unfinished update in the `Created` state. This
|
||
/// allow us to re-enqueue an update that didn't had the time to finish
|
||
/// when Meilisearch closed.
|
||
pub fn reset_and_return_unfinished_tasks(&mut self) -> Result<BinaryHeap<Pending<TaskId>>> {
|
||
let mut unfinished_tasks: BinaryHeap<Pending<TaskId>> = BinaryHeap::new();
|
||
|
||
let mut wtxn = self.wtxn()?;
|
||
let mut iter = self.tasks.rev_iter_mut(&mut wtxn)?;
|
||
|
||
while let Some(entry) = iter.next() {
|
||
let entry = entry?;
|
||
let (id, mut task): (BEU64, Task) = entry;
|
||
|
||
// Since all tasks are ordered, we can stop iterating when we encounter our first non-finished task.
|
||
if task.is_finished() {
|
||
break;
|
||
}
|
||
|
||
// we only keep the first state. It’s supposed to be a `Created` state.
|
||
task.events.drain(1..);
|
||
unfinished_tasks.push(Pending::Task(id.get()));
|
||
|
||
// Since we own the id and the task this is a safe operation.
|
||
unsafe {
|
||
iter.put_current(&id, &task)?;
|
||
}
|
||
}
|
||
|
||
drop(iter);
|
||
wtxn.commit()?;
|
||
|
||
Ok(unfinished_tasks)
|
||
}
|
||
|
||
pub fn wtxn(&self) -> Result<RwTxn> {
|
||
Ok(self.env.write_txn()?)
|
||
}
|
||
|
||
pub fn rtxn(&self) -> Result<RoTxn> {
|
||
Ok(self.env.read_txn()?)
|
||
}
|
||
|
||
/// Returns the id for the next task.
|
||
///
|
||
/// The required `mut txn` acts as a reservation system. It guarantees that as long as you commit
|
||
/// the task to the store in the same transaction, no one else will hav this task id.
|
||
pub fn next_task_id(&self, txn: &mut RwTxn) -> Result<TaskId> {
|
||
let id = self
|
||
.tasks
|
||
.lazily_decode_data()
|
||
.last(txn)?
|
||
.map(|(id, _)| id.get() + 1)
|
||
.unwrap_or(0);
|
||
Ok(id)
|
||
}
|
||
|
||
pub fn put(&self, txn: &mut RwTxn, task: &Task) -> Result<()> {
|
||
self.tasks.put(txn, &BEU64::new(task.id), task)?;
|
||
self.uids_task_ids
|
||
.put(txn, &(&task.index_uid, task.id), &())?;
|
||
|
||
Ok(())
|
||
}
|
||
|
||
pub fn get(&self, txn: &RoTxn, id: TaskId) -> Result<Option<Task>> {
|
||
let task = self.tasks.get(txn, &BEU64::new(id))?;
|
||
Ok(task)
|
||
}
|
||
|
||
pub fn list_tasks<'a>(
|
||
&self,
|
||
txn: &'a RoTxn,
|
||
from: Option<TaskId>,
|
||
filter: Option<TaskFilter>,
|
||
limit: Option<usize>,
|
||
) -> Result<Vec<Task>> {
|
||
let from = from.unwrap_or_default();
|
||
let range = from..limit
|
||
.map(|limit| (limit as u64).saturating_add(from))
|
||
.unwrap_or(u64::MAX);
|
||
let iter: Box<dyn Iterator<Item = StdResult<_, heed::Error>>> = match filter {
|
||
Some(filter) => {
|
||
let iter = self
|
||
.compute_candidates(txn, filter, range)?
|
||
.into_iter()
|
||
.filter_map(|id| self.tasks.get(txn, &BEU64::new(id)).transpose());
|
||
|
||
Box::new(iter)
|
||
}
|
||
None => Box::new(
|
||
self.tasks
|
||
.rev_range(txn, &(BEU64::new(range.start)..BEU64::new(range.end)))?
|
||
.map(|r| r.map(|(_, t)| t)),
|
||
),
|
||
};
|
||
|
||
// Collect 'limit' task if it exists or all of them.
|
||
let tasks = iter
|
||
.take(limit.unwrap_or(usize::MAX))
|
||
.try_fold::<_, _, StdResult<_, heed::Error>>(Vec::new(), |mut v, task| {
|
||
v.push(task?);
|
||
Ok(v)
|
||
})?;
|
||
|
||
Ok(tasks)
|
||
}
|
||
|
||
fn compute_candidates(
|
||
&self,
|
||
txn: &heed::RoTxn,
|
||
filter: TaskFilter,
|
||
range: Range<TaskId>,
|
||
) -> Result<BinaryHeap<TaskId>> {
|
||
let mut candidates = BinaryHeap::new();
|
||
if let Some(indexes) = filter.indexes {
|
||
for index in indexes {
|
||
// We need to prefix search the null terminated string to make sure that we only
|
||
// get exact matches for the index, and not other uids that would share the same
|
||
// prefix, i.e test and test1.
|
||
let mut index_uid = index.as_bytes().to_vec();
|
||
index_uid.push(0);
|
||
|
||
self.uids_task_ids
|
||
.remap_key_type::<ByteSlice>()
|
||
.rev_prefix_iter(txn, &index_uid)?
|
||
.map(|entry| -> StdResult<_, heed::Error> {
|
||
let (key, _) = entry?;
|
||
let (_, id) =
|
||
IndexUidTaskIdCodec::bytes_decode(key).ok_or(heed::Error::Decoding)?;
|
||
Ok(id)
|
||
})
|
||
.skip_while(|entry| {
|
||
entry
|
||
.as_ref()
|
||
.ok()
|
||
// we skip all elements till we enter in the range
|
||
.map(|key| !range.contains(key))
|
||
// if we encounter an error we returns true to collect it later
|
||
.unwrap_or(true)
|
||
})
|
||
.take_while(|entry| {
|
||
entry
|
||
.as_ref()
|
||
.ok()
|
||
// as soon as we are out of the range we exit
|
||
.map(|key| range.contains(key))
|
||
// if we encounter an error we returns true to collect it later
|
||
.unwrap_or(true)
|
||
})
|
||
.try_for_each::<_, StdResult<(), heed::Error>>(|id| {
|
||
candidates.push(id?);
|
||
Ok(())
|
||
})?;
|
||
}
|
||
}
|
||
|
||
Ok(candidates)
|
||
}
|
||
}
|
||
|
||
#[cfg(test)]
|
||
pub mod test {
|
||
use heed::EnvOpenOptions;
|
||
use itertools::Itertools;
|
||
use nelson::Mocker;
|
||
use proptest::collection::vec;
|
||
use proptest::prelude::*;
|
||
use tempfile::TempDir;
|
||
|
||
use crate::index_resolver::IndexUid;
|
||
use crate::tasks::task::TaskContent;
|
||
|
||
use super::*;
|
||
|
||
/// TODO: use this mock to test the task store properly.
|
||
#[allow(dead_code)]
|
||
pub enum MockStore {
|
||
Real(Store),
|
||
Fake(Mocker),
|
||
}
|
||
|
||
pub struct TmpEnv(TempDir, Arc<heed::Env>);
|
||
|
||
impl TmpEnv {
|
||
pub fn env(&self) -> Arc<heed::Env> {
|
||
self.1.clone()
|
||
}
|
||
}
|
||
|
||
pub fn tmp_env() -> TmpEnv {
|
||
let tmp = tempfile::tempdir().unwrap();
|
||
|
||
let mut options = EnvOpenOptions::new();
|
||
options.map_size(4096 * 100000);
|
||
options.max_dbs(1000);
|
||
let env = Arc::new(options.open(tmp.path()).unwrap());
|
||
|
||
TmpEnv(tmp, env)
|
||
}
|
||
|
||
impl MockStore {
|
||
pub fn new(env: Arc<heed::Env>) -> Result<Self> {
|
||
Ok(Self::Real(Store::new(env)?))
|
||
}
|
||
|
||
pub fn reset_and_return_unfinished_tasks(&mut self) -> Result<BinaryHeap<Pending<TaskId>>> {
|
||
match self {
|
||
MockStore::Real(index) => index.reset_and_return_unfinished_tasks(),
|
||
MockStore::Fake(_) => todo!(),
|
||
}
|
||
}
|
||
|
||
pub fn wtxn(&self) -> Result<RwTxn> {
|
||
match self {
|
||
MockStore::Real(index) => index.wtxn(),
|
||
MockStore::Fake(_) => todo!(),
|
||
}
|
||
}
|
||
|
||
pub fn rtxn(&self) -> Result<RoTxn> {
|
||
match self {
|
||
MockStore::Real(index) => index.rtxn(),
|
||
MockStore::Fake(_) => todo!(),
|
||
}
|
||
}
|
||
|
||
pub fn next_task_id(&self, txn: &mut RwTxn) -> Result<TaskId> {
|
||
match self {
|
||
MockStore::Real(index) => index.next_task_id(txn),
|
||
MockStore::Fake(_) => todo!(),
|
||
}
|
||
}
|
||
|
||
pub fn put(&self, txn: &mut RwTxn, task: &Task) -> Result<()> {
|
||
match self {
|
||
MockStore::Real(index) => index.put(txn, task),
|
||
MockStore::Fake(_) => todo!(),
|
||
}
|
||
}
|
||
|
||
pub fn get(&self, txn: &RoTxn, id: TaskId) -> Result<Option<Task>> {
|
||
match self {
|
||
MockStore::Real(index) => index.get(txn, id),
|
||
MockStore::Fake(_) => todo!(),
|
||
}
|
||
}
|
||
|
||
pub fn list_tasks<'a>(
|
||
&self,
|
||
txn: &'a RoTxn,
|
||
from: Option<TaskId>,
|
||
filter: Option<TaskFilter>,
|
||
limit: Option<usize>,
|
||
) -> Result<Vec<Task>> {
|
||
match self {
|
||
MockStore::Real(index) => index.list_tasks(txn, from, filter, limit),
|
||
MockStore::Fake(_) => todo!(),
|
||
}
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn test_ordered_filtered_updates() {
|
||
let tmp = tmp_env();
|
||
let store = Store::new(tmp.env()).unwrap();
|
||
|
||
let tasks = (0..100)
|
||
.map(|_| Task {
|
||
id: rand::random(),
|
||
index_uid: IndexUid::new_unchecked("test".to_string()),
|
||
content: TaskContent::IndexDeletion,
|
||
events: vec![],
|
||
})
|
||
.collect::<Vec<_>>();
|
||
|
||
let mut txn = store.env.write_txn().unwrap();
|
||
tasks
|
||
.iter()
|
||
.try_for_each(|t| store.put(&mut txn, t))
|
||
.unwrap();
|
||
|
||
let mut filter = TaskFilter::default();
|
||
filter.filter_index("test".into());
|
||
|
||
let tasks = store.list_tasks(&txn, None, Some(filter), None).unwrap();
|
||
|
||
assert!(tasks
|
||
.iter()
|
||
.map(|t| t.id)
|
||
.tuple_windows()
|
||
.all(|(a, b)| a > b));
|
||
}
|
||
|
||
#[test]
|
||
fn test_filter_same_index_prefix() {
|
||
let tmp = tmp_env();
|
||
let store = Store::new(tmp.env()).unwrap();
|
||
|
||
let task_1 = Task {
|
||
id: 1,
|
||
index_uid: IndexUid::new_unchecked("test".to_string()),
|
||
content: TaskContent::IndexDeletion,
|
||
events: vec![],
|
||
};
|
||
|
||
let task_2 = Task {
|
||
id: 0,
|
||
index_uid: IndexUid::new_unchecked("test1".to_string()),
|
||
content: TaskContent::IndexDeletion,
|
||
events: vec![],
|
||
};
|
||
|
||
let mut txn = store.wtxn().unwrap();
|
||
store.put(&mut txn, &task_1).unwrap();
|
||
store.put(&mut txn, &task_2).unwrap();
|
||
|
||
let mut filter = TaskFilter::default();
|
||
filter.filter_index("test".into());
|
||
|
||
let tasks = store.list_tasks(&txn, None, Some(filter), None).unwrap();
|
||
|
||
txn.abort().unwrap();
|
||
assert_eq!(tasks.len(), 1);
|
||
assert_eq!(&*tasks.first().unwrap().index_uid, "test");
|
||
|
||
// same thing but invert the ids
|
||
let task_1 = Task {
|
||
id: 0,
|
||
index_uid: IndexUid::new_unchecked("test".to_string()),
|
||
content: TaskContent::IndexDeletion,
|
||
events: vec![],
|
||
};
|
||
let task_2 = Task {
|
||
id: 1,
|
||
index_uid: IndexUid::new_unchecked("test1".to_string()),
|
||
content: TaskContent::IndexDeletion,
|
||
events: vec![],
|
||
};
|
||
|
||
let mut txn = store.wtxn().unwrap();
|
||
store.put(&mut txn, &task_1).unwrap();
|
||
store.put(&mut txn, &task_2).unwrap();
|
||
|
||
let mut filter = TaskFilter::default();
|
||
filter.filter_index("test".into());
|
||
|
||
let tasks = store.list_tasks(&txn, None, Some(filter), None).unwrap();
|
||
|
||
assert_eq!(tasks.len(), 1);
|
||
assert_eq!(&*tasks.first().unwrap().index_uid, "test");
|
||
}
|
||
|
||
proptest! {
|
||
#[test]
|
||
fn encode_decode_roundtrip(index_uid in any::<IndexUid>(), task_id in 0..TaskId::MAX) {
|
||
let value = (index_uid.as_ref(), task_id);
|
||
let bytes = IndexUidTaskIdCodec::bytes_encode(&value).unwrap();
|
||
let (index, id) = IndexUidTaskIdCodec::bytes_decode(bytes.as_ref()).unwrap();
|
||
assert_eq!(&*index_uid, index);
|
||
assert_eq!(task_id, id);
|
||
}
|
||
|
||
#[test]
|
||
fn encode_doesnt_crash(index_uid in "\\PC*", task_id in 0..TaskId::MAX) {
|
||
let value = (index_uid.as_ref(), task_id);
|
||
IndexUidTaskIdCodec::bytes_encode(&value);
|
||
}
|
||
|
||
#[test]
|
||
fn decode_doesnt_crash(bytes in vec(any::<u8>(), 0..1000)) {
|
||
IndexUidTaskIdCodec::bytes_decode(&bytes);
|
||
}
|
||
}
|
||
}
|