mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-24 04:07:30 +01:00
Merge pull request #23 from meilisearch/update-builder-thread-pool
Allow library users to specify the rayon ThreadPool for UpdateBuilder
This commit is contained in:
commit
58c07e7f8c
@ -15,6 +15,8 @@ use futures::{FutureExt, StreamExt};
|
||||
use grenad::CompressionType;
|
||||
use heed::EnvOpenOptions;
|
||||
use indexmap::IndexMap;
|
||||
use once_cell::sync::OnceCell;
|
||||
use rayon::ThreadPool;
|
||||
use serde::{Serialize, Deserialize, Deserializer};
|
||||
use structopt::StructOpt;
|
||||
use tokio::fs::File as TFile;
|
||||
@ -27,6 +29,8 @@ use crate::tokenizer::{simple_tokenizer, TokenType};
|
||||
use crate::update::{UpdateBuilder, IndexDocumentsMethod, UpdateFormat};
|
||||
use crate::{Index, UpdateStore, SearchResult};
|
||||
|
||||
static GLOBAL_THREAD_POOL: OnceCell<ThreadPool> = OnceCell::new();
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
/// The HTTP main server of the milli project.
|
||||
pub struct Opt {
|
||||
@ -201,6 +205,11 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(opt.database_size);
|
||||
|
||||
// Setup the global thread pool
|
||||
let jobs = opt.indexer.indexing_jobs.unwrap_or(0);
|
||||
let pool = rayon::ThreadPoolBuilder::new().num_threads(jobs).build()?;
|
||||
GLOBAL_THREAD_POOL.set(pool).unwrap();
|
||||
|
||||
// Open the LMDB database.
|
||||
let index = Index::new(options, &opt.database)?;
|
||||
|
||||
@ -227,9 +236,7 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
if let Some(chunk_compression_level) = indexer_opt_cloned.chunk_compression_level {
|
||||
update_builder.chunk_compression_level(chunk_compression_level);
|
||||
}
|
||||
if let Some(indexing_jobs) = indexer_opt_cloned.indexing_jobs {
|
||||
update_builder.indexing_jobs(indexing_jobs);
|
||||
}
|
||||
update_builder.thread_pool(GLOBAL_THREAD_POOL.get().unwrap());
|
||||
update_builder.log_every_n(indexer_opt_cloned.log_every_n);
|
||||
update_builder.max_memory(indexer_opt_cloned.max_memory);
|
||||
update_builder.linked_hash_map_size(indexer_opt_cloned.linked_hash_map_size);
|
||||
|
@ -10,6 +10,7 @@ use grenad::{Writer, Sorter, Merger, Reader, FileFuse, CompressionType};
|
||||
use heed::types::ByteSlice;
|
||||
use log::{debug, info, error};
|
||||
use rayon::prelude::*;
|
||||
use rayon::ThreadPool;
|
||||
use crate::index::Index;
|
||||
use self::store::Store;
|
||||
use self::merge_function::{
|
||||
@ -191,7 +192,7 @@ pub enum UpdateFormat {
|
||||
JsonStream,
|
||||
}
|
||||
|
||||
pub struct IndexDocuments<'t, 'u, 'i> {
|
||||
pub struct IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
index: &'i Index,
|
||||
pub(crate) log_every_n: Option<usize>,
|
||||
@ -201,14 +202,14 @@ pub struct IndexDocuments<'t, 'u, 'i> {
|
||||
pub(crate) chunk_compression_type: CompressionType,
|
||||
pub(crate) chunk_compression_level: Option<u32>,
|
||||
pub(crate) chunk_fusing_shrink_size: Option<u64>,
|
||||
pub(crate) indexing_jobs: Option<usize>,
|
||||
pub(crate) thread_pool: Option<&'a ThreadPool>,
|
||||
update_method: IndexDocumentsMethod,
|
||||
update_format: UpdateFormat,
|
||||
autogenerate_docids: bool,
|
||||
}
|
||||
|
||||
impl<'t, 'u, 'i> IndexDocuments<'t, 'u, 'i> {
|
||||
pub fn new(wtxn: &'t mut heed::RwTxn<'i, 'u>, index: &'i Index) -> IndexDocuments<'t, 'u, 'i> {
|
||||
impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
pub fn new(wtxn: &'t mut heed::RwTxn<'i, 'u>, index: &'i Index) -> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
IndexDocuments {
|
||||
wtxn,
|
||||
index,
|
||||
@ -219,7 +220,7 @@ impl<'t, 'u, 'i> IndexDocuments<'t, 'u, 'i> {
|
||||
chunk_compression_type: CompressionType::None,
|
||||
chunk_compression_level: None,
|
||||
chunk_fusing_shrink_size: None,
|
||||
indexing_jobs: None,
|
||||
thread_pool: None,
|
||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
update_format: UpdateFormat::Json,
|
||||
autogenerate_docids: true,
|
||||
@ -288,7 +289,7 @@ impl<'t, 'u, 'i> IndexDocuments<'t, 'u, 'i> {
|
||||
chunk_compression_type: self.chunk_compression_type,
|
||||
chunk_compression_level: self.chunk_compression_level,
|
||||
chunk_fusing_shrink_size: self.chunk_fusing_shrink_size,
|
||||
indexing_jobs: self.indexing_jobs,
|
||||
thread_pool: self.thread_pool,
|
||||
};
|
||||
let mut deletion_builder = update_builder.delete_documents(self.wtxn, self.index)?;
|
||||
deletion_builder.delete_documents(&replaced_documents_ids);
|
||||
@ -323,8 +324,16 @@ impl<'t, 'u, 'i> IndexDocuments<'t, 'u, 'i> {
|
||||
let log_every_n = self.log_every_n;
|
||||
let chunk_fusing_shrink_size = self.chunk_fusing_shrink_size;
|
||||
|
||||
let jobs = self.indexing_jobs.unwrap_or(0);
|
||||
let pool = rayon::ThreadPoolBuilder::new().num_threads(jobs).build()?;
|
||||
let backup_pool;
|
||||
let pool = match self.thread_pool {
|
||||
Some(pool) => pool,
|
||||
None => {
|
||||
// We initialize a bakcup pool with the default
|
||||
// settings if none have already been set.
|
||||
backup_pool = rayon::ThreadPoolBuilder::new().build()?;
|
||||
&backup_pool
|
||||
},
|
||||
};
|
||||
|
||||
let (receiver, docid_word_positions_readers, documents_readers) = pool.install(|| {
|
||||
let num_threads = rayon::current_num_threads();
|
||||
|
@ -1,9 +1,10 @@
|
||||
use grenad::CompressionType;
|
||||
use rayon::ThreadPool;
|
||||
|
||||
use crate::Index;
|
||||
use super::{ClearDocuments, DeleteDocuments, IndexDocuments, Settings};
|
||||
|
||||
pub struct UpdateBuilder {
|
||||
pub struct UpdateBuilder<'a> {
|
||||
pub(crate) log_every_n: Option<usize>,
|
||||
pub(crate) max_nb_chunks: Option<usize>,
|
||||
pub(crate) max_memory: Option<usize>,
|
||||
@ -11,11 +12,11 @@ pub struct UpdateBuilder {
|
||||
pub(crate) chunk_compression_type: CompressionType,
|
||||
pub(crate) chunk_compression_level: Option<u32>,
|
||||
pub(crate) chunk_fusing_shrink_size: Option<u64>,
|
||||
pub(crate) indexing_jobs: Option<usize>,
|
||||
pub(crate) thread_pool: Option<&'a ThreadPool>,
|
||||
}
|
||||
|
||||
impl UpdateBuilder {
|
||||
pub fn new() -> UpdateBuilder {
|
||||
impl<'a> UpdateBuilder<'a> {
|
||||
pub fn new() -> UpdateBuilder<'a> {
|
||||
UpdateBuilder {
|
||||
log_every_n: None,
|
||||
max_nb_chunks: None,
|
||||
@ -24,7 +25,7 @@ impl UpdateBuilder {
|
||||
chunk_compression_type: CompressionType::None,
|
||||
chunk_compression_level: None,
|
||||
chunk_fusing_shrink_size: None,
|
||||
indexing_jobs: None,
|
||||
thread_pool: None,
|
||||
}
|
||||
}
|
||||
|
||||
@ -56,8 +57,8 @@ impl UpdateBuilder {
|
||||
self.chunk_fusing_shrink_size = Some(chunk_fusing_shrink_size);
|
||||
}
|
||||
|
||||
pub fn indexing_jobs(&mut self, indexing_jobs: usize) {
|
||||
self.indexing_jobs = Some(indexing_jobs);
|
||||
pub fn thread_pool(&mut self, thread_pool: &'a ThreadPool) {
|
||||
self.thread_pool = Some(thread_pool);
|
||||
}
|
||||
|
||||
pub fn clear_documents<'t, 'u, 'i>(
|
||||
@ -82,7 +83,7 @@ impl UpdateBuilder {
|
||||
self,
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
index: &'i Index,
|
||||
) -> IndexDocuments<'t, 'u, 'i>
|
||||
) -> IndexDocuments<'t, 'u, 'i, 'a>
|
||||
{
|
||||
let mut builder = IndexDocuments::new(wtxn, index);
|
||||
|
||||
@ -93,7 +94,7 @@ impl UpdateBuilder {
|
||||
builder.chunk_compression_type = self.chunk_compression_type;
|
||||
builder.chunk_compression_level = self.chunk_compression_level;
|
||||
builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size;
|
||||
builder.indexing_jobs = self.indexing_jobs;
|
||||
builder.thread_pool = self.thread_pool;
|
||||
|
||||
builder
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user