mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 20:07:09 +02:00
first mostly working version
This commit is contained in:
parent
c051166bcc
commit
d976e680c5
18 changed files with 403 additions and 57 deletions
|
@ -13,14 +13,28 @@ pub mod metrics;
|
|||
#[cfg(feature = "metrics")]
|
||||
pub mod route_metrics;
|
||||
|
||||
use std::sync::{atomic::AtomicBool, Arc};
|
||||
use std::{
|
||||
fs::File,
|
||||
io::{BufReader, BufWriter, Seek, SeekFrom},
|
||||
path::Path,
|
||||
sync::{atomic::AtomicBool, Arc},
|
||||
};
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use actix_web::error::JsonPayloadError;
|
||||
use actix_web::web::Data;
|
||||
use analytics::Analytics;
|
||||
use anyhow::bail;
|
||||
use error::PayloadError;
|
||||
use http::header::CONTENT_TYPE;
|
||||
use meilisearch_types::{
|
||||
milli::{
|
||||
self,
|
||||
documents::{DocumentsBatchBuilder, DocumentsBatchReader},
|
||||
update::{IndexDocumentsConfig, IndexDocumentsMethod},
|
||||
},
|
||||
settings::apply_settings_to_builder,
|
||||
};
|
||||
pub use option::Opt;
|
||||
|
||||
use actix_web::{web, HttpRequest};
|
||||
|
@ -31,19 +45,83 @@ use meilisearch_auth::AuthController;
|
|||
|
||||
pub static AUTOBATCHING_ENABLED: AtomicBool = AtomicBool::new(false);
|
||||
|
||||
/// Check if a db is empty. It does not provide any information on the
|
||||
/// validity of the data in it.
|
||||
/// We consider a database as non empty when it's a non empty directory.
|
||||
fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
|
||||
let db_path = db_path.as_ref();
|
||||
|
||||
if !db_path.exists() {
|
||||
true
|
||||
// if we encounter an error or if the db is a file we consider the db non empty
|
||||
} else if let Ok(dir) = db_path.read_dir() {
|
||||
dir.count() == 0
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: TAMO: Finish setting up things
|
||||
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<IndexScheduler> {
|
||||
let meilisearch = IndexScheduler::new(
|
||||
opt.db_path.join("tasks"),
|
||||
opt.db_path.join("update_files"),
|
||||
opt.db_path.join("indexes"),
|
||||
opt.dumps_dir.clone(),
|
||||
opt.max_index_size.get_bytes() as usize,
|
||||
(&opt.indexer_options).try_into()?,
|
||||
true,
|
||||
#[cfg(test)]
|
||||
todo!("We'll see later"),
|
||||
)?;
|
||||
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(IndexScheduler, AuthController)> {
|
||||
// we don't want to create anything in the data.ms yet, thus we
|
||||
// wrap our two builders in a closure that'll be executed later.
|
||||
let auth_controller_builder = || AuthController::new(&opt.db_path, &opt.master_key);
|
||||
|
||||
let index_scheduler_builder = || {
|
||||
IndexScheduler::new(
|
||||
opt.db_path.join("tasks"),
|
||||
opt.db_path.join("update_files"),
|
||||
opt.db_path.join("indexes"),
|
||||
opt.dumps_dir.clone(),
|
||||
opt.max_index_size.get_bytes() as usize,
|
||||
(&opt.indexer_options).try_into()?,
|
||||
true,
|
||||
#[cfg(test)]
|
||||
todo!("We'll see later"),
|
||||
)
|
||||
};
|
||||
|
||||
let (index_scheduler, auth_controller) = if let Some(ref _path) = opt.import_snapshot {
|
||||
// handle the snapshot with something akin to the dumps
|
||||
// + the snapshot interval / spawning a thread
|
||||
todo!();
|
||||
} else if let Some(ref path) = opt.import_dump {
|
||||
let empty_db = is_empty_db(&opt.db_path);
|
||||
let src_path_exists = path.exists();
|
||||
|
||||
if empty_db && src_path_exists {
|
||||
let mut index_scheduler = index_scheduler_builder()?;
|
||||
let mut auth_controller = auth_controller_builder()?;
|
||||
import_dump(
|
||||
&opt.db_path,
|
||||
path,
|
||||
&mut index_scheduler,
|
||||
&mut auth_controller,
|
||||
)?;
|
||||
(index_scheduler, auth_controller)
|
||||
} else if !empty_db && !opt.ignore_dump_if_db_exists {
|
||||
bail!(
|
||||
"database already exists at {:?}, try to delete it or rename it",
|
||||
opt.db_path
|
||||
.canonicalize()
|
||||
.unwrap_or_else(|_| opt.db_path.to_owned())
|
||||
)
|
||||
} else if !src_path_exists && !opt.ignore_missing_dump {
|
||||
bail!("dump doesn't exist at {:?}", path)
|
||||
} else {
|
||||
let mut index_scheduler = index_scheduler_builder()?;
|
||||
let mut auth_controller = auth_controller_builder()?;
|
||||
import_dump(
|
||||
&opt.db_path,
|
||||
path,
|
||||
&mut index_scheduler,
|
||||
&mut auth_controller,
|
||||
)?;
|
||||
(index_scheduler, auth_controller)
|
||||
}
|
||||
} else {
|
||||
(index_scheduler_builder()?, auth_controller_builder()?)
|
||||
};
|
||||
|
||||
/*
|
||||
TODO: We should start a thread to handle the snapshots.
|
||||
|
@ -53,25 +131,125 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<IndexScheduler> {
|
|||
.set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists)
|
||||
.set_snapshot_interval(Duration::from_secs(opt.snapshot_interval_sec))
|
||||
.set_snapshot_dir(opt.snapshot_dir.clone())
|
||||
// dump
|
||||
.set_ignore_missing_dump(opt.ignore_missing_dump)
|
||||
.set_ignore_dump_if_db_exists(opt.ignore_dump_if_db_exists)
|
||||
.set_dump_dst(opt.dumps_dir.clone());
|
||||
|
||||
if let Some(ref path) = opt.import_snapshot {
|
||||
meilisearch.set_import_snapshot(path.clone());
|
||||
}
|
||||
|
||||
if let Some(ref path) = opt.import_dump {
|
||||
meilisearch.set_dump_src(path.clone());
|
||||
}
|
||||
|
||||
if opt.schedule_snapshot {
|
||||
meilisearch.set_schedule_snapshot();
|
||||
}
|
||||
*/
|
||||
|
||||
Ok(meilisearch)
|
||||
Ok((index_scheduler, auth_controller))
|
||||
}
|
||||
|
||||
fn import_dump(
|
||||
db_path: &Path,
|
||||
dump_path: &Path,
|
||||
index_scheduler: &mut IndexScheduler,
|
||||
auth: &mut AuthController,
|
||||
) -> Result<(), anyhow::Error> {
|
||||
let reader = File::open(dump_path)?;
|
||||
let mut dump_reader = dump::DumpReader::open(reader)?;
|
||||
|
||||
if let Some(date) = dump_reader.date() {
|
||||
log::info!(
|
||||
"Importing a dump of meilisearch `{:?}` from the {}",
|
||||
dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
|
||||
date
|
||||
);
|
||||
} else {
|
||||
log::info!(
|
||||
"Importing a dump of meilisearch `{:?}`",
|
||||
dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
|
||||
);
|
||||
}
|
||||
|
||||
let instance_uid = dump_reader.instance_uid()?;
|
||||
|
||||
// 1. Import the instance-uid.
|
||||
if let Some(ref instance_uid) = instance_uid {
|
||||
// we don't want to panic if there is an error with the instance-uid.
|
||||
let _ = std::fs::write(
|
||||
db_path.join("instance-uid"),
|
||||
instance_uid.to_string().as_bytes(),
|
||||
);
|
||||
};
|
||||
|
||||
// 2. Import the `Key`s.
|
||||
let mut keys = Vec::new();
|
||||
auth.raw_delete_all_keys()?;
|
||||
for key in dump_reader.keys() {
|
||||
let key = key?;
|
||||
auth.raw_insert_key(key.clone())?;
|
||||
keys.push(key);
|
||||
}
|
||||
|
||||
// 3. Import the tasks.
|
||||
for ret in dump_reader.tasks() {
|
||||
let (task, file) = ret?;
|
||||
index_scheduler.register_dumpped_task(task, file, &keys, instance_uid)?;
|
||||
}
|
||||
|
||||
let indexer_config = index_scheduler.indexer_config();
|
||||
|
||||
// 4. Import the indexes.
|
||||
for index_reader in dump_reader.indexes()? {
|
||||
let mut index_reader = index_reader?;
|
||||
let metadata = index_reader.metadata();
|
||||
log::info!("Importing index `{}`.", metadata.uid);
|
||||
let index = index_scheduler.create_raw_index(&metadata.uid)?;
|
||||
|
||||
let mut wtxn = index.write_txn()?;
|
||||
|
||||
let mut builder = milli::update::Settings::new(&mut wtxn, &index, indexer_config);
|
||||
// 4.1 Import the primary key if there is one.
|
||||
if let Some(ref primary_key) = metadata.primary_key {
|
||||
builder.set_primary_key(primary_key.to_string());
|
||||
}
|
||||
|
||||
// 4.2 Import the settings.
|
||||
log::info!("Importing the settings.");
|
||||
let settings = index_reader.settings()?;
|
||||
apply_settings_to_builder(&settings, &mut builder);
|
||||
builder.execute(|indexing_step| {
|
||||
log::debug!("update: {:?}", indexing_step);
|
||||
})?;
|
||||
|
||||
// 4.3 Import the documents.
|
||||
// 4.3.1 We need to recreate the grenad+obkv format accepted by the index.
|
||||
log::info!("Importing the documents.");
|
||||
let mut file = tempfile::tempfile()?;
|
||||
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(&mut file));
|
||||
for document in index_reader.documents()? {
|
||||
builder.append_json_object(&document?)?;
|
||||
}
|
||||
builder.into_inner()?; // this actually flush the content of the batch builder.
|
||||
|
||||
// 4.3.2 We feed it to the milli index.
|
||||
file.seek(SeekFrom::Start(0))?;
|
||||
let reader = BufReader::new(file);
|
||||
let reader = DocumentsBatchReader::from_reader(reader)?;
|
||||
|
||||
let builder = milli::update::IndexDocuments::new(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
indexer_config,
|
||||
IndexDocumentsConfig {
|
||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
..Default::default()
|
||||
},
|
||||
|indexing_step| log::debug!("update: {:?}", indexing_step),
|
||||
)?;
|
||||
|
||||
let (builder, user_result) = builder.add_documents(reader)?;
|
||||
log::info!("{} documents found.", user_result?);
|
||||
builder.execute()?;
|
||||
wtxn.commit()?;
|
||||
log::info!("All documents successfully imported.");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn configure_data(
|
||||
|
|
|
@ -48,9 +48,13 @@ async fn main() -> anyhow::Result<()> {
|
|||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
let index_scheduler = setup_meilisearch(&opt)?;
|
||||
|
||||
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key)?;
|
||||
let (index_scheduler, auth_controller) = match setup_meilisearch(&opt) {
|
||||
Ok(ret) => ret,
|
||||
Err(e) => {
|
||||
std::fs::remove_dir_all(opt.db_path)?;
|
||||
return Err(e);
|
||||
}
|
||||
};
|
||||
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
let analytics = if !opt.no_analytics {
|
||||
|
|
|
@ -242,7 +242,9 @@ async fn document_addition(
|
|||
|
||||
let (uuid, mut update_file) = index_scheduler.create_update_file()?;
|
||||
|
||||
// TODO: this can be slow, maybe we should spawn a thread? But the payload isn't Send+Sync :weary:
|
||||
// push the entire stream into a `Vec`.
|
||||
// If someone sends us a never ending stream we're going to block the thread.
|
||||
// TODO: Maybe we should write it to a file to reduce the RAM consumption
|
||||
// and then reread it to convert it to obkv?
|
||||
let mut buffer = Vec::new();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue