mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
feat(lib): auto-batching
This commit is contained in:
parent
622c15e825
commit
c9a236b0af
28 changed files with 1181 additions and 777 deletions
|
@ -1,14 +1,15 @@
|
|||
use std::collections::HashMap;
|
||||
use std::convert::TryFrom;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use milli::update::IndexerConfig;
|
||||
use tokio::fs;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio::task::spawn_blocking;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::error::{IndexResolverError, Result};
|
||||
use crate::index::update_handler::UpdateHandler;
|
||||
use crate::index::Index;
|
||||
use crate::options::IndexerOpts;
|
||||
|
||||
|
@ -26,7 +27,7 @@ pub struct MapIndexStore {
|
|||
index_store: AsyncMap<Uuid, Index>,
|
||||
path: PathBuf,
|
||||
index_size: usize,
|
||||
update_handler: Arc<UpdateHandler>,
|
||||
indexer_config: Arc<IndexerConfig>,
|
||||
}
|
||||
|
||||
impl MapIndexStore {
|
||||
|
@ -35,14 +36,14 @@ impl MapIndexStore {
|
|||
index_size: usize,
|
||||
indexer_opts: &IndexerOpts,
|
||||
) -> anyhow::Result<Self> {
|
||||
let update_handler = Arc::new(UpdateHandler::new(indexer_opts)?);
|
||||
let indexer_config = Arc::new(IndexerConfig::try_from(indexer_opts)?);
|
||||
let path = path.as_ref().join("indexes/");
|
||||
let index_store = Arc::new(RwLock::new(HashMap::new()));
|
||||
Ok(Self {
|
||||
index_store,
|
||||
path,
|
||||
index_size,
|
||||
update_handler,
|
||||
indexer_config,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -63,7 +64,7 @@ impl IndexStore for MapIndexStore {
|
|||
}
|
||||
|
||||
let index_size = self.index_size;
|
||||
let update_handler = self.update_handler.clone();
|
||||
let update_handler = self.indexer_config.clone();
|
||||
let index = spawn_blocking(move || -> Result<Index> {
|
||||
let index = Index::open(path, index_size, uuid, update_handler)?;
|
||||
Ok(index)
|
||||
|
@ -88,7 +89,7 @@ impl IndexStore for MapIndexStore {
|
|||
}
|
||||
|
||||
let index_size = self.index_size;
|
||||
let update_handler = self.update_handler.clone();
|
||||
let update_handler = self.indexer_config.clone();
|
||||
let index =
|
||||
spawn_blocking(move || Index::open(path, index_size, uuid, update_handler))
|
||||
.await??;
|
||||
|
|
|
@ -2,7 +2,7 @@ pub mod error;
|
|||
pub mod index_store;
|
||||
pub mod meta_store;
|
||||
|
||||
use std::convert::TryInto;
|
||||
use std::convert::{TryFrom, TryInto};
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
|
@ -12,16 +12,17 @@ use heed::Env;
|
|||
use index_store::{IndexStore, MapIndexStore};
|
||||
use meilisearch_error::ResponseError;
|
||||
use meta_store::{HeedMetaStore, IndexMetaStore};
|
||||
use milli::update::DocumentDeletionResult;
|
||||
use milli::update::{DocumentDeletionResult, IndexerConfig};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::sync::oneshot;
|
||||
use tokio::task::spawn_blocking;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index::{error::Result as IndexResult, update_handler::UpdateHandler, Index};
|
||||
use crate::index::{error::Result as IndexResult, Index};
|
||||
use crate::options::IndexerOpts;
|
||||
use crate::tasks::batch::Batch;
|
||||
use crate::tasks::task::{DocumentDeletion, Job, Task, TaskContent, TaskEvent, TaskId, TaskResult};
|
||||
use crate::tasks::{Pending, TaskPerformer};
|
||||
use crate::tasks::TaskPerformer;
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
|
||||
use self::meta_store::IndexMeta;
|
||||
|
@ -96,14 +97,24 @@ where
|
|||
U: IndexMetaStore + Send + Sync + 'static,
|
||||
I: IndexStore + Send + Sync + 'static,
|
||||
{
|
||||
type Error = ResponseError;
|
||||
async fn process_batch(&self, mut batch: Batch) -> Batch {
|
||||
// If a batch contains multiple tasks, then it must be a document addition batch
|
||||
if let Some(Task {
|
||||
content: TaskContent::DocumentAddition { .. },
|
||||
..
|
||||
}) = batch.tasks.first()
|
||||
{
|
||||
debug_assert!(batch.tasks.iter().all(|t| matches!(
|
||||
t,
|
||||
Task {
|
||||
content: TaskContent::DocumentAddition { .. },
|
||||
..
|
||||
}
|
||||
)));
|
||||
|
||||
async fn process(&self, mut batch: Batch) -> Batch {
|
||||
// Until batching is implemented, all batch should contain only one update.
|
||||
debug_assert_eq!(batch.len(), 1);
|
||||
|
||||
match batch.tasks.first_mut() {
|
||||
Some(Pending::Task(task)) => {
|
||||
self.process_document_addition_batch(batch).await
|
||||
} else {
|
||||
if let Some(task) = batch.tasks.first_mut() {
|
||||
task.events.push(TaskEvent::Processing(Utc::now()));
|
||||
|
||||
match self.process_task(task).await {
|
||||
|
@ -119,15 +130,12 @@ where
|
|||
}),
|
||||
}
|
||||
}
|
||||
Some(Pending::Job(job)) => {
|
||||
let job = std::mem::take(job);
|
||||
self.process_job(job).await;
|
||||
}
|
||||
|
||||
None => (),
|
||||
batch
|
||||
}
|
||||
}
|
||||
|
||||
batch
|
||||
async fn process_job(&self, job: Job) {
|
||||
self.process_job(job).await;
|
||||
}
|
||||
|
||||
async fn finish(&self, batch: &Batch) {
|
||||
|
@ -158,9 +166,9 @@ impl IndexResolver<HeedMetaStore, MapIndexStore> {
|
|||
HeedMetaStore::load_dump(&src, env)?;
|
||||
let indexes_path = src.as_ref().join("indexes");
|
||||
let indexes = indexes_path.read_dir()?;
|
||||
let update_handler = UpdateHandler::new(indexer_opts)?;
|
||||
let indexer_config = IndexerConfig::try_from(indexer_opts)?;
|
||||
for index in indexes {
|
||||
Index::load_dump(&index?.path(), &dst, index_db_size, &update_handler)?;
|
||||
Index::load_dump(&index?.path(), &dst, index_db_size, &indexer_config)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
@ -180,33 +188,100 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
async fn process_task(&self, task: &Task) -> Result<TaskResult> {
|
||||
let index_uid = task.index_uid.clone();
|
||||
match &task.content {
|
||||
TaskContent::DocumentAddition {
|
||||
content_uuid,
|
||||
merge_strategy,
|
||||
primary_key,
|
||||
allow_index_creation,
|
||||
async fn process_document_addition_batch(&self, mut batch: Batch) -> Batch {
|
||||
fn get_content_uuid(task: &Task) -> Uuid {
|
||||
match task {
|
||||
Task {
|
||||
content: TaskContent::DocumentAddition { content_uuid, .. },
|
||||
..
|
||||
} => *content_uuid,
|
||||
_ => panic!("unexpected task in the document addition batch"),
|
||||
}
|
||||
}
|
||||
|
||||
let content_uuids = batch.tasks.iter().map(get_content_uuid).collect::<Vec<_>>();
|
||||
|
||||
match batch.tasks.first() {
|
||||
Some(Task {
|
||||
index_uid,
|
||||
id,
|
||||
content:
|
||||
TaskContent::DocumentAddition {
|
||||
merge_strategy,
|
||||
primary_key,
|
||||
allow_index_creation,
|
||||
..
|
||||
},
|
||||
..
|
||||
} => {
|
||||
}) => {
|
||||
let primary_key = primary_key.clone();
|
||||
let content_uuid = *content_uuid;
|
||||
let method = *merge_strategy;
|
||||
|
||||
let index = if *allow_index_creation {
|
||||
self.get_or_create_index(index_uid, task.id).await?
|
||||
self.get_or_create_index(index_uid.clone(), *id).await
|
||||
} else {
|
||||
self.get_index(index_uid.into_inner()).await?
|
||||
self.get_index(index_uid.as_str().to_string()).await
|
||||
};
|
||||
|
||||
// If the index doesn't exist and we are not allowed to create it with the first
|
||||
// task, we must fails the whole batch.
|
||||
let now = Utc::now();
|
||||
let index = match index {
|
||||
Ok(index) => index,
|
||||
Err(e) => {
|
||||
let error = ResponseError::from(e);
|
||||
for task in batch.tasks.iter_mut() {
|
||||
task.events.push(TaskEvent::Failed {
|
||||
error: error.clone(),
|
||||
timestamp: now,
|
||||
});
|
||||
}
|
||||
return batch;
|
||||
}
|
||||
};
|
||||
|
||||
let file_store = self.file_store.clone();
|
||||
let result = spawn_blocking(move || {
|
||||
index.update_documents(method, content_uuid, primary_key, file_store)
|
||||
index.update_documents(
|
||||
method,
|
||||
primary_key,
|
||||
file_store,
|
||||
content_uuids.into_iter(),
|
||||
)
|
||||
})
|
||||
.await??;
|
||||
.await;
|
||||
|
||||
Ok(result.into())
|
||||
let event = match result {
|
||||
Ok(Ok(result)) => TaskEvent::Succeded {
|
||||
timestamp: Utc::now(),
|
||||
result: TaskResult::DocumentAddition {
|
||||
indexed_documents: result.indexed_documents,
|
||||
},
|
||||
},
|
||||
Ok(Err(e)) => TaskEvent::Failed {
|
||||
timestamp: Utc::now(),
|
||||
error: e.into(),
|
||||
},
|
||||
Err(e) => TaskEvent::Failed {
|
||||
timestamp: Utc::now(),
|
||||
error: IndexResolverError::from(e).into(),
|
||||
},
|
||||
};
|
||||
|
||||
for task in batch.tasks.iter_mut() {
|
||||
task.events.push(event.clone());
|
||||
}
|
||||
|
||||
batch
|
||||
}
|
||||
_ => panic!("invalid batch!"),
|
||||
}
|
||||
}
|
||||
|
||||
async fn process_task(&self, task: &Task) -> Result<TaskResult> {
|
||||
let index_uid = task.index_uid.clone();
|
||||
match &task.content {
|
||||
TaskContent::DocumentAddition { .. } => panic!("updates should be handled by batch"),
|
||||
TaskContent::DocumentDeletion(DocumentDeletion::Ids(ids)) => {
|
||||
let ids = ids.clone();
|
||||
let index = self.get_index(index_uid.into_inner()).await?;
|
||||
|
@ -282,9 +357,13 @@ where
|
|||
Job::Dump { ret, path } => {
|
||||
log::trace!("The Dump task is getting executed");
|
||||
|
||||
if ret.send(self.dump(path).await).is_err() {
|
||||
let (sender, receiver) = oneshot::channel();
|
||||
if ret.send(self.dump(path).await.map(|_| sender)).is_err() {
|
||||
log::error!("The dump actor died.");
|
||||
}
|
||||
|
||||
// wait until the dump has finished performing.
|
||||
let _ = receiver.await;
|
||||
}
|
||||
Job::Empty => log::error!("Tried to process an empty task."),
|
||||
Job::Snapshot(job) => {
|
||||
|
@ -404,7 +483,7 @@ where
|
|||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::collections::BTreeMap;
|
||||
use std::{collections::BTreeMap, vec::IntoIter};
|
||||
|
||||
use super::*;
|
||||
|
||||
|
@ -447,7 +526,7 @@ mod test {
|
|||
mocker.when::<String, IndexResult<IndexMeta>>("update_primary_key")
|
||||
.then(move |_| Ok(IndexMeta{ created_at: Utc::now(), updated_at: Utc::now(), primary_key: None }));
|
||||
}
|
||||
mocker.when::<(IndexDocumentsMethod, Uuid, Option<String>, UpdateFileStore), IndexResult<DocumentAdditionResult>>("update_documents")
|
||||
mocker.when::<(IndexDocumentsMethod, Option<String>, UpdateFileStore, IntoIter<Uuid>), IndexResult<DocumentAdditionResult>>("update_documents")
|
||||
.then(move |(_, _, _, _)| result());
|
||||
}
|
||||
TaskContent::SettingsUpdate{..} => {
|
||||
|
@ -462,13 +541,13 @@ mod test {
|
|||
}
|
||||
TaskContent::DocumentDeletion(DocumentDeletion::Ids(_ids)) => {
|
||||
let result = move || if !index_op_fails {
|
||||
Ok(any_int as u64)
|
||||
Ok(DocumentDeletionResult { deleted_documents: any_int as u64, remaining_documents: any_int as u64 })
|
||||
} else {
|
||||
// return this error because it's easy to generate...
|
||||
Err(IndexError::DocumentNotFound("a doc".into()))
|
||||
};
|
||||
|
||||
mocker.when::<&[String], IndexResult<u64>>("delete_documents")
|
||||
mocker.when::<&[String], IndexResult<DocumentDeletionResult>>("delete_documents")
|
||||
.then(move |_| result());
|
||||
},
|
||||
TaskContent::DocumentDeletion(DocumentDeletion::Clear) => {
|
||||
|
@ -561,7 +640,8 @@ mod test {
|
|||
let update_file_store = UpdateFileStore::mock(mocker);
|
||||
let index_resolver = IndexResolver::new(uuid_store, index_store, update_file_store);
|
||||
|
||||
let result = index_resolver.process_task(&task).await;
|
||||
let batch = Batch { id: 1, created_at: Utc::now(), tasks: vec![task.clone()] };
|
||||
let result = index_resolver.process_batch(batch).await;
|
||||
|
||||
// Test for some expected output scenarios:
|
||||
// Index creation and deletion cannot fail because of a failed index op, since they
|
||||
|
@ -575,9 +655,9 @@ mod test {
|
|||
| TaskContent::DocumentAddition { allow_index_creation: false, ..}
|
||||
| TaskContent::IndexUpdate { .. } ))
|
||||
{
|
||||
assert!(result.is_err(), "{:?}", result);
|
||||
assert!(matches!(result.tasks[0].events.last().unwrap(), TaskEvent::Failed { .. }), "{:?}", result);
|
||||
} else {
|
||||
assert!(result.is_ok(), "{:?}", result);
|
||||
assert!(matches!(result.tasks[0].events.last().unwrap(), TaskEvent::Succeded { .. }), "{:?}", result);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue