mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-28 22:28:05 +01:00
Send a basic progressing status to the updates front page
This commit is contained in:
parent
4eeeccb9cd
commit
f6eecb855e
@ -49,22 +49,16 @@ $(window).on('load', function () {
|
||||
prependChild(results, elem);
|
||||
}
|
||||
|
||||
if (status.type == "Processing") {
|
||||
const id = 'update-' + status.update_id;
|
||||
const content = $(`#${id} .updateStatus.content`);
|
||||
content.html('processing...');
|
||||
}
|
||||
|
||||
if (status.type == "Progressing") {
|
||||
const id = 'update-' + status.update_id;
|
||||
const content = $(`#${id} .updateStatus.content`);
|
||||
content.html('progressing...');
|
||||
content.html('progressing... ' + JSON.stringify(status.meta));
|
||||
}
|
||||
|
||||
if (status.type == "Processed") {
|
||||
const id = 'update-' + status.update_id;
|
||||
const content = $(`#${id} .updateStatus.content`);
|
||||
content.html('processed');
|
||||
content.html('processed ' + JSON.stringify(status.meta));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
@ -230,26 +230,30 @@ fn csv_bytes_readers<'a>(
|
||||
readers
|
||||
}
|
||||
|
||||
pub fn run<'a>(
|
||||
pub fn run<'a, F>(
|
||||
env: &heed::Env,
|
||||
index: &Index,
|
||||
opt: &IndexerOpt,
|
||||
content: &'a [u8],
|
||||
gzipped: bool,
|
||||
progress_callback: F,
|
||||
) -> anyhow::Result<()>
|
||||
where F: Fn(u32) + Sync + Send,
|
||||
{
|
||||
let jobs = opt.indexing_jobs.unwrap_or(0);
|
||||
let pool = rayon::ThreadPoolBuilder::new().num_threads(jobs).build()?;
|
||||
pool.install(|| run_intern(env, index, opt, content, gzipped))
|
||||
pool.install(|| run_intern(env, index, opt, content, gzipped, progress_callback))
|
||||
}
|
||||
|
||||
fn run_intern<'a>(
|
||||
fn run_intern<'a, F>(
|
||||
env: &heed::Env,
|
||||
index: &Index,
|
||||
opt: &IndexerOpt,
|
||||
content: &'a [u8],
|
||||
gzipped: bool,
|
||||
progress_callback: F,
|
||||
) -> anyhow::Result<()>
|
||||
where F: Fn(u32) + Sync + Send,
|
||||
{
|
||||
let before_indexing = Instant::now();
|
||||
let num_threads = rayon::current_num_threads();
|
||||
@ -283,7 +287,7 @@ fn run_intern<'a>(
|
||||
chunk_fusing_shrink_size,
|
||||
)?;
|
||||
let base_document_id = number_of_documents;
|
||||
store.index_csv(rdr, base_document_id, i, num_threads, log_every_n)
|
||||
store.index_csv(rdr, base_document_id, i, num_threads, log_every_n, &progress_callback)
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
|
||||
|
@ -301,14 +301,16 @@ impl Store {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn index_csv<'a>(
|
||||
pub fn index_csv<'a, F>(
|
||||
mut self,
|
||||
mut rdr: csv::Reader<Box<dyn Read + Send + 'a>>,
|
||||
base_document_id: usize,
|
||||
thread_index: usize,
|
||||
num_threads: usize,
|
||||
log_every_n: usize,
|
||||
mut progress_callback: F,
|
||||
) -> anyhow::Result<Readers>
|
||||
where F: FnMut(u32),
|
||||
{
|
||||
debug!("{:?}: Indexing in a Store...", thread_index);
|
||||
|
||||
@ -328,6 +330,7 @@ impl Store {
|
||||
if document_id % log_every_n == 0 {
|
||||
let count = format_count(document_id);
|
||||
info!("We have seen {} documents so far ({:.02?}).", count, before.elapsed());
|
||||
progress_callback((document_id - base_document_id) as u32);
|
||||
before = Instant::now();
|
||||
}
|
||||
|
||||
@ -349,6 +352,8 @@ impl Store {
|
||||
document_id = document_id + 1;
|
||||
}
|
||||
|
||||
progress_callback((document_id - base_document_id) as u32);
|
||||
|
||||
let readers = self.finish()?;
|
||||
debug!("{:?}: Store created!", thread_index);
|
||||
Ok(readers)
|
||||
|
@ -63,5 +63,5 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
let file = File::open(file_path)?;
|
||||
let content = unsafe { memmap::Mmap::map(&file)? };
|
||||
|
||||
indexing::run(&env, &index, &opt.indexer, &content, gzipped)
|
||||
indexing::run(&env, &index, &opt.indexer, &content, gzipped, |_docid| { })
|
||||
}
|
||||
|
@ -87,20 +87,39 @@ struct IndexTemplate {
|
||||
|
||||
#[derive(Template)]
|
||||
#[template(path = "updates.html")]
|
||||
struct UpdatesTemplate<M: Serialize + Send> {
|
||||
struct UpdatesTemplate<M: Serialize + Send, P: Serialize + Send, N: Serialize + Send> {
|
||||
db_name: String,
|
||||
db_size: usize,
|
||||
docs_count: usize,
|
||||
updates: Vec<UpdateStatus<M>>,
|
||||
updates: Vec<UpdateStatus<M, P, N>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(tag = "type")]
|
||||
enum UpdateStatus<M> {
|
||||
enum UpdateStatus<M, P, N> {
|
||||
Pending { update_id: u64, meta: M },
|
||||
Processing { update_id: u64, meta: M },
|
||||
Progressing { update_id: u64, meta: M },
|
||||
Processed { update_id: u64, meta: M },
|
||||
Progressing { update_id: u64, meta: P },
|
||||
Processed { update_id: u64, meta: N },
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
enum UpdateMeta {
|
||||
DocumentsAddition {
|
||||
total_number_of_documents: Option<usize>,
|
||||
},
|
||||
DocumentsAdditionFromPath {
|
||||
path: PathBuf,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
enum UpdateMetaProgress {
|
||||
DocumentsAddition {
|
||||
processed_number_of_documents: usize,
|
||||
total_number_of_documents: Option<usize>,
|
||||
},
|
||||
}
|
||||
|
||||
pub fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
@ -134,21 +153,62 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
let update_store = UpdateStore::open(
|
||||
update_store_options,
|
||||
update_store_path,
|
||||
move |update_id, meta: String, content| {
|
||||
let processing = UpdateStatus::Processing { update_id, meta: meta.clone() };
|
||||
let _ = update_status_sender_cloned.send(processing);
|
||||
move |update_id, meta, content| {
|
||||
let result = match meta {
|
||||
UpdateMeta::DocumentsAddition { total_number_of_documents } => {
|
||||
let gzipped = false;
|
||||
indexing::run(
|
||||
&env_cloned,
|
||||
&index_cloned,
|
||||
&indexer_opt_cloned,
|
||||
content,
|
||||
gzipped,
|
||||
|count| {
|
||||
// We send progress status...
|
||||
let meta = UpdateMetaProgress::DocumentsAddition {
|
||||
processed_number_of_documents: count as usize,
|
||||
total_number_of_documents,
|
||||
};
|
||||
let progress = UpdateStatus::Progressing { update_id, meta };
|
||||
let _ = update_status_sender_cloned.send(progress);
|
||||
},
|
||||
)
|
||||
},
|
||||
UpdateMeta::DocumentsAdditionFromPath { path } => {
|
||||
let file = match File::open(&path) {
|
||||
Ok(file) => file,
|
||||
Err(e) => {
|
||||
let meta = format!("documents addition file ({}) error: {}", path.display(), e);
|
||||
return Ok(meta);
|
||||
}
|
||||
};
|
||||
let content = match unsafe { memmap::Mmap::map(&file) } {
|
||||
Ok(mmap) => mmap,
|
||||
Err(e) => {
|
||||
let meta = format!("documents addition file ({}) mmap error: {}", path.display(), e);
|
||||
return Ok(meta);
|
||||
},
|
||||
};
|
||||
|
||||
let _progress = UpdateStatus::Progressing { update_id, meta: meta.clone() };
|
||||
// let _ = update_status_sender_cloned.send(progress);
|
||||
|
||||
let gzipped = false;
|
||||
let result = indexing::run(
|
||||
&env_cloned,
|
||||
&index_cloned,
|
||||
&indexer_opt_cloned,
|
||||
content,
|
||||
gzipped,
|
||||
);
|
||||
let gzipped = path.extension().map_or(false, |e| e == "gz" || e == "gzip");
|
||||
indexing::run(
|
||||
&env_cloned,
|
||||
&index_cloned,
|
||||
&indexer_opt_cloned,
|
||||
&content,
|
||||
gzipped,
|
||||
|count| {
|
||||
// We send progress status...
|
||||
let meta = UpdateMetaProgress::DocumentsAddition {
|
||||
processed_number_of_documents: count as usize,
|
||||
total_number_of_documents: None,
|
||||
};
|
||||
let progress = UpdateStatus::Progressing { update_id, meta };
|
||||
let _ = update_status_sender_cloned.send(progress);
|
||||
},
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
let meta = match result {
|
||||
Ok(()) => format!("valid update content"),
|
||||
@ -201,7 +261,7 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
.map(move |header: String| {
|
||||
let update_store = update_store_cloned.clone();
|
||||
let mut updates = update_store.iter_metas(|processed, pending| {
|
||||
let mut updates = Vec::new();
|
||||
let mut updates = Vec::<UpdateStatus<_, UpdateMetaProgress, _>>::new();
|
||||
for result in processed {
|
||||
let (uid, meta) = result?;
|
||||
updates.push(UpdateStatus::Processed { update_id: uid.get(), meta });
|
||||
@ -359,8 +419,8 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
});
|
||||
|
||||
async fn buf_stream(
|
||||
update_store: Arc<UpdateStore<String, String>>,
|
||||
update_status_sender: broadcast::Sender<UpdateStatus<String>>,
|
||||
update_store: Arc<UpdateStore<UpdateMeta, String>>,
|
||||
update_status_sender: broadcast::Sender<UpdateStatus<UpdateMeta, UpdateMetaProgress, String>>,
|
||||
mut stream: impl futures::Stream<Item=Result<impl bytes::Buf, warp::Error>> + Unpin,
|
||||
) -> Result<impl warp::Reply, warp::Rejection>
|
||||
{
|
||||
@ -375,7 +435,7 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
let file = file.into_std().await;
|
||||
let mmap = unsafe { memmap::Mmap::map(&file).unwrap() };
|
||||
|
||||
let meta = String::from("I am the metadata");
|
||||
let meta = UpdateMeta::DocumentsAddition { total_number_of_documents: None };
|
||||
let update_id = update_store.register_update(&meta, &mmap[..]).unwrap();
|
||||
let _ = update_status_sender.send(UpdateStatus::Pending { update_id, meta });
|
||||
eprintln!("update {} registered", update_id);
|
||||
@ -385,13 +445,29 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
|
||||
let update_store_cloned = update_store.clone();
|
||||
let update_status_sender_cloned = update_status_sender.clone();
|
||||
let indexing_route = warp::filters::method::post()
|
||||
let indexing_route_csv = warp::filters::method::post()
|
||||
.and(warp::path!("documents"))
|
||||
.and(warp::header::exact_ignore_case("content-type", "text/csv"))
|
||||
.and(warp::body::stream())
|
||||
.and_then(move |stream| {
|
||||
buf_stream(update_store_cloned.clone(), update_status_sender_cloned.clone(), stream)
|
||||
});
|
||||
|
||||
let update_store_cloned = update_store.clone();
|
||||
let update_status_sender_cloned = update_status_sender.clone();
|
||||
let indexing_route_filepath = warp::filters::method::post()
|
||||
.and(warp::path!("documents"))
|
||||
.and(warp::header::exact_ignore_case("content-type", "text/x-filepath"))
|
||||
.and(warp::body::bytes())
|
||||
.map(move |bytes: bytes::Bytes| {
|
||||
let string = std::str::from_utf8(&bytes).unwrap().trim();
|
||||
let meta = UpdateMeta::DocumentsAdditionFromPath { path: PathBuf::from(string) };
|
||||
let update_id = update_store_cloned.register_update(&meta, &[]).unwrap();
|
||||
let _ = update_status_sender_cloned.send(UpdateStatus::Pending { update_id, meta });
|
||||
eprintln!("update {} registered", update_id);
|
||||
Ok(warp::reply())
|
||||
});
|
||||
|
||||
let update_ws_route = warp::ws()
|
||||
.and(warp::path!("updates" / "ws"))
|
||||
.map(move |ws: warp::ws::Ws| {
|
||||
@ -435,7 +511,8 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
.or(dash_logo_white_route)
|
||||
.or(dash_logo_black_route)
|
||||
.or(query_route)
|
||||
.or(indexing_route)
|
||||
.or(indexing_route_csv)
|
||||
.or(indexing_route_filepath)
|
||||
.or(update_ws_route);
|
||||
|
||||
let addr = SocketAddr::from_str(&opt.http_listen_addr)?;
|
||||
|
@ -2,7 +2,7 @@ use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crossbeam_channel::Sender;
|
||||
use heed::types::{OwnedType, DecodeIgnore, SerdeBincode, ByteSlice};
|
||||
use heed::types::{OwnedType, DecodeIgnore, SerdeJson, ByteSlice};
|
||||
use heed::{EnvOpenOptions, Env, Database};
|
||||
use serde::{Serialize, Deserialize};
|
||||
|
||||
@ -11,9 +11,9 @@ use crate::BEU64;
|
||||
#[derive(Clone)]
|
||||
pub struct UpdateStore<M, N> {
|
||||
env: Env,
|
||||
pending_meta: Database<OwnedType<BEU64>, SerdeBincode<M>>,
|
||||
pending_meta: Database<OwnedType<BEU64>, SerdeJson<M>>,
|
||||
pending: Database<OwnedType<BEU64>, ByteSlice>,
|
||||
processed_meta: Database<OwnedType<BEU64>, SerdeBincode<N>>,
|
||||
processed_meta: Database<OwnedType<BEU64>, SerdeJson<N>>,
|
||||
notification_sender: Sender<()>,
|
||||
}
|
||||
|
||||
@ -156,8 +156,8 @@ impl<M: 'static, N: 'static> UpdateStore<M, N> {
|
||||
M: for<'a> Deserialize<'a>,
|
||||
N: for<'a> Deserialize<'a>,
|
||||
F: for<'a> FnMut(
|
||||
heed::RoIter<'a, OwnedType<BEU64>, SerdeBincode<N>>,
|
||||
heed::RoIter<'a, OwnedType<BEU64>, SerdeBincode<M>>,
|
||||
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<N>>,
|
||||
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<M>>,
|
||||
) -> heed::Result<T>,
|
||||
{
|
||||
let rtxn = self.env.read_txn()?;
|
||||
|
@ -55,7 +55,7 @@
|
||||
|
||||
{% for update in updates %}
|
||||
{% match update %}
|
||||
{% when UpdateStatus::Pending with { update_id , meta } %}
|
||||
{% when UpdateStatus::Pending with { update_id, meta } %}
|
||||
<li id="update-{{ update_id }}" class="document">
|
||||
<ol>
|
||||
<li class="field">
|
||||
@ -64,7 +64,7 @@
|
||||
</li>
|
||||
</ol>
|
||||
</li>
|
||||
{% when UpdateStatus::Processed with { update_id , meta } %}
|
||||
{% when UpdateStatus::Processed with { update_id, meta } %}
|
||||
<li id="update-{{ update_id }}" class="document">
|
||||
<ol>
|
||||
<li class="field">
|
||||
|
Loading…
x
Reference in New Issue
Block a user