mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 20:07:09 +02:00
feat(lib): auto-batching
This commit is contained in:
parent
622c15e825
commit
c9a236b0af
28 changed files with 1181 additions and 777 deletions
|
@ -10,7 +10,7 @@ use tokio::sync::{mpsc, oneshot, RwLock};
|
|||
|
||||
use super::error::{DumpActorError, Result};
|
||||
use super::{DumpInfo, DumpJob, DumpMsg, DumpStatus};
|
||||
use crate::tasks::TaskStore;
|
||||
use crate::tasks::Scheduler;
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
|
||||
pub const CONCURRENT_DUMP_MSG: usize = 10;
|
||||
|
@ -18,7 +18,7 @@ pub const CONCURRENT_DUMP_MSG: usize = 10;
|
|||
pub struct DumpActor {
|
||||
inbox: Option<mpsc::Receiver<DumpMsg>>,
|
||||
update_file_store: UpdateFileStore,
|
||||
task_store: TaskStore,
|
||||
scheduler: Arc<RwLock<Scheduler>>,
|
||||
dump_path: PathBuf,
|
||||
analytics_path: PathBuf,
|
||||
lock: Arc<Mutex<()>>,
|
||||
|
@ -36,7 +36,7 @@ impl DumpActor {
|
|||
pub fn new(
|
||||
inbox: mpsc::Receiver<DumpMsg>,
|
||||
update_file_store: UpdateFileStore,
|
||||
task_store: TaskStore,
|
||||
scheduler: Arc<RwLock<Scheduler>>,
|
||||
dump_path: impl AsRef<Path>,
|
||||
analytics_path: impl AsRef<Path>,
|
||||
index_db_size: usize,
|
||||
|
@ -46,7 +46,7 @@ impl DumpActor {
|
|||
let lock = Arc::new(Mutex::new(()));
|
||||
Self {
|
||||
inbox: Some(inbox),
|
||||
task_store,
|
||||
scheduler,
|
||||
update_file_store,
|
||||
dump_path: dump_path.as_ref().into(),
|
||||
analytics_path: analytics_path.as_ref().into(),
|
||||
|
@ -118,13 +118,13 @@ impl DumpActor {
|
|||
dump_path: self.dump_path.clone(),
|
||||
db_path: self.analytics_path.clone(),
|
||||
update_file_store: self.update_file_store.clone(),
|
||||
task_store: self.task_store.clone(),
|
||||
scheduler: self.scheduler.clone(),
|
||||
uid: uid.clone(),
|
||||
update_db_size: self.update_db_size,
|
||||
index_db_size: self.index_db_size,
|
||||
};
|
||||
|
||||
let task_result = tokio::task::spawn(task.run()).await;
|
||||
let task_result = tokio::task::spawn_local(task.run()).await;
|
||||
|
||||
let mut dump_infos = self.dump_infos.write().await;
|
||||
let dump_infos = dump_infos
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
use std::fs::File;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::bail;
|
||||
use chrono::{DateTime, Utc};
|
||||
|
@ -12,7 +13,7 @@ use meilisearch_auth::AuthController;
|
|||
pub use message::DumpMsg;
|
||||
use tempfile::TempDir;
|
||||
use tokio::fs::create_dir_all;
|
||||
use tokio::sync::oneshot;
|
||||
use tokio::sync::{oneshot, RwLock};
|
||||
|
||||
use crate::analytics;
|
||||
use crate::compression::{from_tar_gz, to_tar_gz};
|
||||
|
@ -20,7 +21,7 @@ use crate::index_controller::dump_actor::error::DumpActorError;
|
|||
use crate::index_controller::dump_actor::loaders::{v2, v3, v4};
|
||||
use crate::options::IndexerOpts;
|
||||
use crate::tasks::task::Job;
|
||||
use crate::tasks::TaskStore;
|
||||
use crate::tasks::Scheduler;
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
use error::Result;
|
||||
|
||||
|
@ -319,7 +320,7 @@ struct DumpJob {
|
|||
dump_path: PathBuf,
|
||||
db_path: PathBuf,
|
||||
update_file_store: UpdateFileStore,
|
||||
task_store: TaskStore,
|
||||
scheduler: Arc<RwLock<Scheduler>>,
|
||||
uid: String,
|
||||
update_db_size: usize,
|
||||
index_db_size: usize,
|
||||
|
@ -344,21 +345,28 @@ impl DumpJob {
|
|||
|
||||
let (sender, receiver) = oneshot::channel();
|
||||
|
||||
self.task_store
|
||||
.register_job(Job::Dump {
|
||||
self.scheduler
|
||||
.write()
|
||||
.await
|
||||
.schedule_job(Job::Dump {
|
||||
ret: sender,
|
||||
path: temp_dump_path.clone(),
|
||||
})
|
||||
.await;
|
||||
receiver.await??;
|
||||
self.task_store
|
||||
.dump(&temp_dump_path, self.update_file_store.clone())
|
||||
.await?;
|
||||
|
||||
// wait until the job has started performing before finishing the dump process
|
||||
let sender = receiver.await??;
|
||||
|
||||
AuthController::dump(&self.db_path, &temp_dump_path)?;
|
||||
|
||||
//TODO(marin): this is not right, the scheduler should dump itself, not do it here...
|
||||
self.scheduler
|
||||
.read()
|
||||
.await
|
||||
.dump(&temp_dump_path, self.update_file_store.clone())
|
||||
.await?;
|
||||
|
||||
let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> {
|
||||
let _ = &self;
|
||||
// for now we simply copy the updates/updates_files
|
||||
// FIXME: We may copy more files than necessary, if new files are added while we are
|
||||
// performing the dump. We need a way to filter them out.
|
||||
|
@ -374,6 +382,9 @@ impl DumpJob {
|
|||
})
|
||||
.await??;
|
||||
|
||||
// notify the update loop that we are finished performing the dump.
|
||||
let _ = sender.send(());
|
||||
|
||||
info!("Created dump in {:?}.", dump_path);
|
||||
|
||||
Ok(())
|
||||
|
@ -382,19 +393,15 @@ impl DumpJob {
|
|||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::collections::HashSet;
|
||||
|
||||
use futures::future::{err, ok};
|
||||
use nelson::Mocker;
|
||||
use once_cell::sync::Lazy;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::*;
|
||||
use crate::index::error::Result as IndexResult;
|
||||
use crate::index::Index;
|
||||
use crate::index_resolver::error::IndexResolverError;
|
||||
use crate::index_resolver::index_store::MockIndexStore;
|
||||
use crate::index_resolver::meta_store::MockIndexMetaStore;
|
||||
use crate::options::SchedulerConfig;
|
||||
use crate::tasks::error::Result as TaskResult;
|
||||
use crate::tasks::task::{Task, TaskId};
|
||||
use crate::tasks::{MockTaskPerformer, TaskFilter, TaskStore};
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
|
||||
fn setup() {
|
||||
|
@ -411,86 +418,91 @@ mod test {
|
|||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
#[ignore]
|
||||
async fn test_dump_normal() {
|
||||
setup();
|
||||
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
|
||||
let uuids = std::iter::repeat_with(Uuid::new_v4)
|
||||
.take(4)
|
||||
.collect::<HashSet<_>>();
|
||||
let mut uuid_store = MockIndexMetaStore::new();
|
||||
uuid_store
|
||||
.expect_dump()
|
||||
.once()
|
||||
.returning(move |_| Box::pin(ok(())));
|
||||
|
||||
let mut index_store = MockIndexStore::new();
|
||||
index_store.expect_get().times(4).returning(move |uuid| {
|
||||
let mocker = Mocker::default();
|
||||
let uuids_clone = uuids.clone();
|
||||
mocker.when::<(), Uuid>("uuid").once().then(move |_| {
|
||||
assert!(uuids_clone.contains(&uuid));
|
||||
uuid
|
||||
});
|
||||
mocker
|
||||
.when::<&Path, IndexResult<()>>("dump")
|
||||
.once()
|
||||
.then(move |_| Ok(()));
|
||||
Box::pin(ok(Some(Index::mock(mocker))))
|
||||
});
|
||||
|
||||
let mocker = Mocker::default();
|
||||
let update_file_store = UpdateFileStore::mock(mocker);
|
||||
|
||||
//let update_sender =
|
||||
// create_update_handler(index_resolver.clone(), tmp.path(), 4096 * 100).unwrap();
|
||||
|
||||
//TODO: fix dump tests
|
||||
let mut performer = MockTaskPerformer::new();
|
||||
performer
|
||||
.expect_process_job()
|
||||
.once()
|
||||
.returning(|j| match j {
|
||||
Job::Dump { ret, .. } => {
|
||||
let (sender, _receiver) = oneshot::channel();
|
||||
ret.send(Ok(sender)).unwrap();
|
||||
}
|
||||
_ => unreachable!(),
|
||||
});
|
||||
let performer = Arc::new(performer);
|
||||
let mocker = Mocker::default();
|
||||
let task_store = TaskStore::mock(mocker);
|
||||
mocker
|
||||
.when::<(&Path, UpdateFileStore), TaskResult<()>>("dump")
|
||||
.then(|_| Ok(()));
|
||||
mocker
|
||||
.when::<(Option<TaskId>, Option<TaskFilter>, Option<usize>), TaskResult<Vec<Task>>>(
|
||||
"list_tasks",
|
||||
)
|
||||
.then(|_| Ok(Vec::new()));
|
||||
let store = TaskStore::mock(mocker);
|
||||
let config = SchedulerConfig::default();
|
||||
|
||||
let scheduler = Scheduler::new(store, performer, config).unwrap();
|
||||
|
||||
let task = DumpJob {
|
||||
dump_path: tmp.path().into(),
|
||||
// this should do nothing
|
||||
update_file_store,
|
||||
db_path: tmp.path().into(),
|
||||
task_store,
|
||||
uid: String::from("test"),
|
||||
update_db_size: 4096 * 10,
|
||||
index_db_size: 4096 * 10,
|
||||
scheduler,
|
||||
};
|
||||
|
||||
task.run().await.unwrap();
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
#[ignore]
|
||||
async fn error_performing_dump() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
|
||||
let mut uuid_store = MockIndexMetaStore::new();
|
||||
uuid_store
|
||||
.expect_dump()
|
||||
.once()
|
||||
.returning(move |_| Box::pin(err(IndexResolverError::ExistingPrimaryKey)));
|
||||
|
||||
let mocker = Mocker::default();
|
||||
let file_store = UpdateFileStore::mock(mocker);
|
||||
|
||||
let mocker = Mocker::default();
|
||||
mocker
|
||||
.when::<(Option<TaskId>, Option<TaskFilter>, Option<usize>), TaskResult<Vec<Task>>>(
|
||||
"list_tasks",
|
||||
)
|
||||
.then(|_| Ok(Vec::new()));
|
||||
let task_store = TaskStore::mock(mocker);
|
||||
let mut performer = MockTaskPerformer::new();
|
||||
performer
|
||||
.expect_process_job()
|
||||
.once()
|
||||
.returning(|job| match job {
|
||||
Job::Dump { ret, .. } => drop(ret.send(Err(IndexResolverError::BadlyFormatted(
|
||||
"blabla".to_string(),
|
||||
)))),
|
||||
_ => unreachable!(),
|
||||
});
|
||||
let performer = Arc::new(performer);
|
||||
|
||||
let scheduler = Scheduler::new(task_store, performer, SchedulerConfig::default()).unwrap();
|
||||
|
||||
let task = DumpJob {
|
||||
dump_path: tmp.path().into(),
|
||||
// this should do nothing
|
||||
db_path: tmp.path().into(),
|
||||
update_file_store: file_store,
|
||||
task_store,
|
||||
uid: String::from("test"),
|
||||
update_db_size: 4096 * 10,
|
||||
index_db_size: 4096 * 10,
|
||||
scheduler,
|
||||
};
|
||||
|
||||
assert!(task.run().await.is_err());
|
||||
|
|
|
@ -13,7 +13,7 @@ use futures::Stream;
|
|||
use futures::StreamExt;
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::sync::{mpsc, RwLock};
|
||||
use tokio::task::spawn_blocking;
|
||||
use tokio::time::sleep;
|
||||
use uuid::Uuid;
|
||||
|
@ -23,12 +23,11 @@ use crate::index::{
|
|||
Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked,
|
||||
};
|
||||
use crate::index_controller::dump_actor::{load_dump, DumpActor, DumpActorHandleImpl};
|
||||
use crate::options::IndexerOpts;
|
||||
use crate::options::{IndexerOpts, SchedulerConfig};
|
||||
use crate::snapshot::{load_snapshot, SnapshotService};
|
||||
use crate::tasks::create_task_store;
|
||||
use crate::tasks::error::TaskError;
|
||||
use crate::tasks::task::{DocumentDeletion, Task, TaskContent, TaskId};
|
||||
use crate::tasks::{TaskFilter, TaskStore};
|
||||
use crate::tasks::{Scheduler, TaskFilter, TaskStore};
|
||||
use error::Result;
|
||||
|
||||
use self::dump_actor::{DumpActorHandle, DumpInfo};
|
||||
|
@ -68,6 +67,7 @@ pub struct IndexSettings {
|
|||
|
||||
pub struct IndexController<U, I> {
|
||||
index_resolver: Arc<IndexResolver<U, I>>,
|
||||
scheduler: Arc<RwLock<Scheduler>>,
|
||||
task_store: TaskStore,
|
||||
dump_handle: dump_actor::DumpActorHandleImpl,
|
||||
update_file_store: UpdateFileStore,
|
||||
|
@ -78,9 +78,10 @@ impl<U, I> Clone for IndexController<U, I> {
|
|||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
index_resolver: self.index_resolver.clone(),
|
||||
task_store: self.task_store.clone(),
|
||||
scheduler: self.scheduler.clone(),
|
||||
dump_handle: self.dump_handle.clone(),
|
||||
update_file_store: self.update_file_store.clone(),
|
||||
task_store: self.task_store.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -160,6 +161,7 @@ impl IndexControllerBuilder {
|
|||
self,
|
||||
db_path: impl AsRef<Path>,
|
||||
indexer_options: IndexerOpts,
|
||||
scheduler_config: SchedulerConfig,
|
||||
) -> anyhow::Result<MeiliSearch> {
|
||||
let index_size = self
|
||||
.max_index_size
|
||||
|
@ -217,8 +219,9 @@ impl IndexControllerBuilder {
|
|||
update_file_store.clone(),
|
||||
)?);
|
||||
|
||||
let task_store =
|
||||
create_task_store(meta_env, index_resolver.clone()).map_err(|e| anyhow::anyhow!(e))?;
|
||||
let task_store = TaskStore::new(meta_env)?;
|
||||
let scheduler =
|
||||
Scheduler::new(task_store.clone(), index_resolver.clone(), scheduler_config)?;
|
||||
|
||||
let dump_path = self
|
||||
.dump_dst
|
||||
|
@ -229,14 +232,14 @@ impl IndexControllerBuilder {
|
|||
let actor = DumpActor::new(
|
||||
receiver,
|
||||
update_file_store.clone(),
|
||||
task_store.clone(),
|
||||
scheduler.clone(),
|
||||
dump_path,
|
||||
analytics_path,
|
||||
index_size,
|
||||
task_store_size,
|
||||
);
|
||||
|
||||
tokio::task::spawn(actor.run());
|
||||
tokio::task::spawn_local(actor.run());
|
||||
|
||||
DumpActorHandleImpl { sender }
|
||||
};
|
||||
|
@ -255,17 +258,18 @@ impl IndexControllerBuilder {
|
|||
snapshot_path,
|
||||
index_size,
|
||||
meta_env_size: task_store_size,
|
||||
task_store: task_store.clone(),
|
||||
scheduler: scheduler.clone(),
|
||||
};
|
||||
|
||||
tokio::task::spawn(snapshot_service.run());
|
||||
tokio::task::spawn_local(snapshot_service.run());
|
||||
}
|
||||
|
||||
Ok(IndexController {
|
||||
index_resolver,
|
||||
task_store,
|
||||
scheduler,
|
||||
dump_handle,
|
||||
update_file_store,
|
||||
task_store,
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -415,12 +419,13 @@ where
|
|||
};
|
||||
|
||||
let task = self.task_store.register(uid, content).await?;
|
||||
self.scheduler.read().await.notify();
|
||||
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
pub async fn get_task(&self, id: TaskId, filter: Option<TaskFilter>) -> Result<Task> {
|
||||
let task = self.task_store.get_task(id, filter).await?;
|
||||
let task = self.scheduler.read().await.get_task(id, filter).await?;
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
|
@ -435,7 +440,12 @@ where
|
|||
|
||||
let mut filter = TaskFilter::default();
|
||||
filter.filter_index(index_uid);
|
||||
let task = self.task_store.get_task(task_id, Some(filter)).await?;
|
||||
let task = self
|
||||
.scheduler
|
||||
.read()
|
||||
.await
|
||||
.get_task(task_id, Some(filter))
|
||||
.await?;
|
||||
|
||||
Ok(task)
|
||||
}
|
||||
|
@ -446,7 +456,12 @@ where
|
|||
limit: Option<usize>,
|
||||
offset: Option<TaskId>,
|
||||
) -> Result<Vec<Task>> {
|
||||
let tasks = self.task_store.list_tasks(offset, filter, limit).await?;
|
||||
let tasks = self
|
||||
.scheduler
|
||||
.read()
|
||||
.await
|
||||
.list_tasks(offset, filter, limit)
|
||||
.await?;
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
|
@ -466,7 +481,9 @@ where
|
|||
filter.filter_index(index_uid);
|
||||
|
||||
let tasks = self
|
||||
.task_store
|
||||
.scheduler
|
||||
.read()
|
||||
.await
|
||||
.list_tasks(
|
||||
Some(offset.unwrap_or_default() + task_id),
|
||||
Some(filter),
|
||||
|
@ -547,10 +564,11 @@ where
|
|||
}
|
||||
|
||||
pub async fn get_index_stats(&self, uid: String) -> Result<IndexStats> {
|
||||
let last_task = self.task_store.get_processing_task().await?;
|
||||
let processing_tasks = self.scheduler.read().await.get_processing_tasks().await?;
|
||||
// Check if the currently indexing update is from our index.
|
||||
let is_indexing = last_task
|
||||
.map(|task| task.index_uid.into_inner() == uid)
|
||||
let is_indexing = processing_tasks
|
||||
.first()
|
||||
.map(|task| task.index_uid.as_str() == uid)
|
||||
.unwrap_or_default();
|
||||
|
||||
let index = self.index_resolver.get_index(uid).await?;
|
||||
|
@ -564,7 +582,7 @@ where
|
|||
let mut last_task: Option<DateTime<_>> = None;
|
||||
let mut indexes = BTreeMap::new();
|
||||
let mut database_size = 0;
|
||||
let processing_task = self.task_store.get_processing_task().await?;
|
||||
let processing_tasks = self.scheduler.read().await.get_processing_tasks().await?;
|
||||
|
||||
for (index_uid, index) in self.index_resolver.list().await? {
|
||||
if !search_rules.is_index_authorized(&index_uid) {
|
||||
|
@ -584,8 +602,8 @@ where
|
|||
});
|
||||
|
||||
// Check if the currently indexing update is from our index.
|
||||
stats.is_indexing = processing_task
|
||||
.as_ref()
|
||||
stats.is_indexing = processing_tasks
|
||||
.first()
|
||||
.map(|p| p.index_uid.as_str() == index_uid)
|
||||
.or(Some(false));
|
||||
|
||||
|
@ -637,16 +655,18 @@ mod test {
|
|||
|
||||
impl IndexController<MockIndexMetaStore, MockIndexStore> {
|
||||
pub fn mock(
|
||||
index_resolver: IndexResolver<MockIndexMetaStore, MockIndexStore>,
|
||||
index_resolver: Arc<IndexResolver<MockIndexMetaStore, MockIndexStore>>,
|
||||
task_store: TaskStore,
|
||||
update_file_store: UpdateFileStore,
|
||||
dump_handle: DumpActorHandleImpl,
|
||||
scheduler: Arc<RwLock<Scheduler>>,
|
||||
) -> Self {
|
||||
IndexController {
|
||||
index_resolver: Arc::new(index_resolver),
|
||||
index_resolver,
|
||||
task_store,
|
||||
dump_handle,
|
||||
update_file_store,
|
||||
scheduler,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -719,13 +739,27 @@ mod test {
|
|||
let task_store_mocker = nelson::Mocker::default();
|
||||
let mocker = Mocker::default();
|
||||
let update_file_store = UpdateFileStore::mock(mocker);
|
||||
let index_resolver = IndexResolver::new(uuid_store, index_store, update_file_store.clone());
|
||||
let index_resolver = Arc::new(IndexResolver::new(
|
||||
uuid_store,
|
||||
index_store,
|
||||
update_file_store.clone(),
|
||||
));
|
||||
let task_store = TaskStore::mock(task_store_mocker);
|
||||
// let dump_actor = MockDumpActorHandle::new();
|
||||
let scheduler = Scheduler::new(
|
||||
task_store.clone(),
|
||||
index_resolver.clone(),
|
||||
SchedulerConfig::default(),
|
||||
)
|
||||
.unwrap();
|
||||
let (sender, _) = mpsc::channel(1);
|
||||
let dump_handle = DumpActorHandleImpl { sender };
|
||||
let index_controller =
|
||||
IndexController::mock(index_resolver, task_store, update_file_store, dump_handle);
|
||||
let index_controller = IndexController::mock(
|
||||
index_resolver,
|
||||
task_store,
|
||||
update_file_store,
|
||||
dump_handle,
|
||||
scheduler,
|
||||
);
|
||||
|
||||
let r = index_controller
|
||||
.search(index_uid.to_owned(), query.clone())
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue