mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-23 05:00:06 +01:00
start implementing some logic to test the internal states of the scheduler
This commit is contained in:
parent
84cd5cef0b
commit
6f4dcc0c38
15
Cargo.lock
generated
15
Cargo.lock
generated
@ -889,6 +889,20 @@ dependencies = [
|
||||
"riscv",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2801af0d36612ae591caa9568261fddce32ce6e08a7275ea334a06a4ad021a2c"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crossbeam-channel",
|
||||
"crossbeam-deque",
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-queue",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.6"
|
||||
@ -1826,6 +1840,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
"bincode",
|
||||
"crossbeam",
|
||||
"csv",
|
||||
"derive_builder",
|
||||
"document-formats",
|
||||
|
@ -25,6 +25,7 @@ synchronoise = "1.0.1"
|
||||
derive_builder = "0.11.2"
|
||||
|
||||
[dev-dependencies]
|
||||
crossbeam = "0.8.2"
|
||||
nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}
|
||||
insta = { version = "1.19.1", features = ["json", "redactions"] }
|
||||
big_s = "1.0.2"
|
||||
|
@ -5,6 +5,7 @@ use file_store::{File, FileStore};
|
||||
use index::Index;
|
||||
use milli::update::IndexerConfig;
|
||||
use synchronoise::SignalEvent;
|
||||
use tempfile::TempDir;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
@ -120,11 +121,33 @@ pub struct IndexScheduler {
|
||||
/// In charge of creating, opening, storing and returning indexes.
|
||||
pub(crate) index_mapper: IndexMapper,
|
||||
|
||||
// set to true when there is work to do.
|
||||
/// Get a signal when a batch needs to be processed.
|
||||
pub(crate) wake_up: Arc<SignalEvent>,
|
||||
|
||||
// ================= tests
|
||||
// The next entries are dedicated to the tests.
|
||||
// It helps us to stop the scheduler and check what it is doing efficiently
|
||||
/// Provide a way to break in multiple part of the scheduler.
|
||||
#[cfg(test)]
|
||||
test_breakpoint_rcv: crossbeam::channel::Receiver<Breakpoint>,
|
||||
#[cfg(test)]
|
||||
test_breakpoint_sdr: crossbeam::channel::Sender<Breakpoint>,
|
||||
|
||||
/// Hold a reference to its own tempdir to delete itself once dropped.
|
||||
#[cfg(test)]
|
||||
test_tempdir: Arc<TempDir>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
enum Breakpoint {
|
||||
Start,
|
||||
BatchCreated,
|
||||
BatchProcessed,
|
||||
}
|
||||
|
||||
impl IndexScheduler {
|
||||
#[cfg(not(test))]
|
||||
pub fn new(
|
||||
tasks_path: PathBuf,
|
||||
update_file_path: PathBuf,
|
||||
@ -140,9 +163,6 @@ impl IndexScheduler {
|
||||
options.max_dbs(6);
|
||||
|
||||
let env = options.open(tasks_path)?;
|
||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||
let wake_up = SignalEvent::auto(true);
|
||||
|
||||
let processing_tasks = (OffsetDateTime::now_utc(), RoaringBitmap::new());
|
||||
let file_store = FileStore::new(&update_file_path)?;
|
||||
|
||||
@ -156,10 +176,52 @@ impl IndexScheduler {
|
||||
index_tasks: env.create_database(Some(db_name::INDEX_TASKS))?,
|
||||
index_mapper: IndexMapper::new(&env, indexes_path, index_size, indexer_config)?,
|
||||
env,
|
||||
wake_up: Arc::new(wake_up),
|
||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||
wake_up: Arc::new(SignalEvent::auto(true)),
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn test() -> Self {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let tasks_path = dir.path().join("db_path");
|
||||
let update_file_path = dir.path().join("file_store");
|
||||
let indexes_path = dir.path().join("indexes");
|
||||
let index_size = 1024 * 1024;
|
||||
let indexer_config = IndexerConfig::default();
|
||||
|
||||
std::fs::create_dir_all(&tasks_path).unwrap();
|
||||
std::fs::create_dir_all(&update_file_path).unwrap();
|
||||
std::fs::create_dir_all(&indexes_path).unwrap();
|
||||
|
||||
let mut options = heed::EnvOpenOptions::new();
|
||||
options.max_dbs(6);
|
||||
|
||||
let env = options.open(tasks_path).unwrap();
|
||||
let processing_tasks = (OffsetDateTime::now_utc(), RoaringBitmap::new());
|
||||
let file_store = FileStore::new(&update_file_path).unwrap();
|
||||
|
||||
let (sender, receiver) = crossbeam::channel::bounded(0);
|
||||
|
||||
Self {
|
||||
// by default there is no processing tasks
|
||||
processing_tasks: Arc::new(RwLock::new(processing_tasks)),
|
||||
file_store,
|
||||
all_tasks: env.create_database(Some(db_name::ALL_TASKS)).unwrap(),
|
||||
status: env.create_database(Some(db_name::STATUS)).unwrap(),
|
||||
kind: env.create_database(Some(db_name::KIND)).unwrap(),
|
||||
index_tasks: env.create_database(Some(db_name::INDEX_TASKS)).unwrap(),
|
||||
index_mapper: IndexMapper::new(&env, indexes_path, index_size, indexer_config).unwrap(),
|
||||
env,
|
||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||
wake_up: Arc::new(SignalEvent::auto(true)),
|
||||
|
||||
test_breakpoint_rcv: receiver,
|
||||
test_breakpoint_sdr: sender,
|
||||
test_tempdir: Arc::new(dir),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the index corresponding to the name. If it wasn't opened before
|
||||
/// it'll be opened. But if it doesn't exist on disk it'll throw an
|
||||
/// `IndexNotFound` error.
|
||||
@ -221,7 +283,7 @@ impl IndexScheduler {
|
||||
.map_err(|_| Error::CorruptedTaskQueue)?
|
||||
.clone();
|
||||
|
||||
let mut ret = tasks.into_iter().map(|task| task.as_task_view());
|
||||
let ret = tasks.into_iter().map(|task| task.as_task_view());
|
||||
if processing.is_empty() {
|
||||
Ok(ret.collect())
|
||||
} else {
|
||||
@ -309,6 +371,10 @@ impl IndexScheduler {
|
||||
|
||||
/// Create and execute and store the result of one batch of registered tasks.
|
||||
fn tick(&self) -> Result<()> {
|
||||
// We notifiy we're starting a tick.
|
||||
#[cfg(test)]
|
||||
self.test_breakpoint_sdr.send(Breakpoint::Start);
|
||||
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let batch = match self.create_next_batch(&wtxn)? {
|
||||
Some(batch) => batch,
|
||||
@ -322,6 +388,10 @@ impl IndexScheduler {
|
||||
let started_at = OffsetDateTime::now_utc();
|
||||
*self.processing_tasks.write().unwrap() = (started_at, processing_tasks);
|
||||
|
||||
// We notifiy we've finished creating the tasks.
|
||||
#[cfg(test)]
|
||||
self.test_breakpoint_sdr.send(Breakpoint::BatchCreated);
|
||||
|
||||
// 2. process the tasks
|
||||
let res = self.process_batch(&mut wtxn, batch);
|
||||
|
||||
@ -358,6 +428,11 @@ impl IndexScheduler {
|
||||
|
||||
wtxn.commit()?;
|
||||
log::info!("A batch of tasks was successfully completed.");
|
||||
|
||||
// We notifiy we finished processing the tasks.
|
||||
#[cfg(test)]
|
||||
self.test_breakpoint_sdr.send(Breakpoint::BatchProcessed);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@ -365,6 +440,28 @@ impl IndexScheduler {
|
||||
pub fn notify(&self) {
|
||||
self.wake_up.signal()
|
||||
}
|
||||
|
||||
/// /!\ Used only for tests purposes.
|
||||
/// Wait until the provided breakpoint is reached.
|
||||
#[cfg(test)]
|
||||
fn test_wait_till(&self, breakpoint: Breakpoint) {
|
||||
self.test_breakpoint_rcv.iter().find(|b| *b == breakpoint);
|
||||
}
|
||||
|
||||
/// /!\ Used only for tests purposes.
|
||||
/// Wait until the provided breakpoint is reached.
|
||||
#[cfg(test)]
|
||||
fn test_next_breakpoint(&self) -> Breakpoint {
|
||||
self.test_breakpoint_rcv.recv().unwrap()
|
||||
}
|
||||
|
||||
/// /!\ Used only for tests purposes.
|
||||
/// The scheduler will not stop on breakpoints.
|
||||
#[cfg(test)]
|
||||
fn test_dont_block(&self) {
|
||||
// unroll and ignore all the state the scheduler is going to send us.
|
||||
self.test_breakpoint_rcv.iter().last();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@ -373,13 +470,13 @@ mod tests {
|
||||
use insta::*;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{assert_smol_debug_snapshot, tests::index_scheduler};
|
||||
use crate::assert_smol_debug_snapshot;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn register() {
|
||||
let (index_scheduler, _) = index_scheduler();
|
||||
let index_scheduler = IndexScheduler::test();
|
||||
let kinds = [
|
||||
KindWithContent::IndexCreation {
|
||||
index_uid: S("catto"),
|
||||
@ -453,7 +550,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn document_addition() {
|
||||
let (index_scheduler, _dir) = index_scheduler();
|
||||
let index_scheduler = IndexScheduler::test();
|
||||
|
||||
let content = r#"
|
||||
{
|
||||
@ -474,7 +571,47 @@ mod tests {
|
||||
.unwrap();
|
||||
file.persist().unwrap();
|
||||
|
||||
index_scheduler.tick().unwrap();
|
||||
// After registering the task we should see the update being enqueued
|
||||
let task = index_scheduler.get_tasks(Query::default()).unwrap();
|
||||
assert_json_snapshot!(task,
|
||||
{ "[].enqueuedAt" => "date", "[].startedAt" => "date", "[].finishedAt" => "date", "[].duration" => "duration" }
|
||||
,@r###"
|
||||
[
|
||||
{
|
||||
"uid": 0,
|
||||
"indexUid": "doggos",
|
||||
"status": "enqueued",
|
||||
"type": "documentAddition",
|
||||
"enqueuedAt": "date"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
|
||||
let t_index_scheduler = index_scheduler.clone();
|
||||
std::thread::spawn(move || t_index_scheduler.tick().unwrap());
|
||||
|
||||
index_scheduler.test_wait_till(Breakpoint::BatchCreated);
|
||||
|
||||
// Once the task has started being batched it should be marked as processing
|
||||
let task = index_scheduler.get_tasks(Query::default()).unwrap();
|
||||
assert_json_snapshot!(task,
|
||||
{ "[].enqueuedAt" => "date", "[].startedAt" => "date", "[].finishedAt" => "date", "[].duration" => "duration" }
|
||||
,@r###"
|
||||
[
|
||||
{
|
||||
"uid": 0,
|
||||
"indexUid": "doggos",
|
||||
"status": "processing",
|
||||
"type": "documentAddition",
|
||||
"enqueuedAt": "date",
|
||||
"startedAt": "date"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
assert_eq!(
|
||||
index_scheduler.test_next_breakpoint(),
|
||||
Breakpoint::BatchProcessed
|
||||
);
|
||||
|
||||
let task = index_scheduler.get_tasks(Query::default()).unwrap();
|
||||
assert_json_snapshot!(task,
|
||||
|
@ -15,11 +15,6 @@ pub use task::{Kind, KindWithContent, Status, TaskView};
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use milli::update::IndexerConfig;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::IndexScheduler;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! assert_smol_debug_snapshot {
|
||||
($value:expr, @$snapshot:literal) => {{
|
||||
@ -36,24 +31,8 @@ mod tests {
|
||||
}};
|
||||
}
|
||||
|
||||
pub fn index_scheduler() -> (IndexScheduler, TempDir) {
|
||||
let dir = TempDir::new().unwrap();
|
||||
|
||||
(
|
||||
IndexScheduler::new(
|
||||
dir.path().join("db_path"),
|
||||
dir.path().join("file_store"),
|
||||
dir.path().join("indexes"),
|
||||
1024 * 1024,
|
||||
IndexerConfig::default(),
|
||||
)
|
||||
.unwrap(),
|
||||
dir,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_new() {
|
||||
index_scheduler();
|
||||
crate::IndexScheduler::test();
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user