Fix the test and simplify types

This commit is contained in:
Mubelotix 2025-06-23 18:55:23 +02:00
parent 4925b30196
commit 2f82d94502
No known key found for this signature in database
GPG Key ID: 89F391DBCC8CE7F0
9 changed files with 87 additions and 51 deletions

View File

@ -329,7 +329,7 @@ pub(crate) mod test {
write_channel_congestion: None,
internal_database_sizes: Default::default(),
},
embedder_stats: None,
embedder_stats: Default::default(),
enqueued_at: Some(BatchEnqueuedAt {
earliest: datetime!(2022-11-11 0:00 UTC),
oldest: datetime!(2022-11-11 0:00 UTC),

View File

@ -174,7 +174,7 @@ impl BatchQueue {
pub(crate) fn write_batch(&self, wtxn: &mut RwTxn, batch: ProcessingBatch) -> Result<()> {
let old_batch = self.all_batches.get(wtxn, &batch.uid)?;
println!("Saving batch: {}", batch.embedder_stats.is_some());
println!("Saving batch: {:?}", batch.embedder_stats);
self.all_batches.put(
wtxn,
@ -184,7 +184,7 @@ impl BatchQueue {
progress: None,
details: batch.details,
stats: batch.stats,
embedder_stats: batch.embedder_stats.as_ref().map(|s| BatchEmbeddingStats::from(s.as_ref())),
embedder_stats: batch.embedder_stats.as_ref().into(),
started_at: batch.started_at,
finished_at: batch.finished_at,
enqueued_at: batch.enqueued_at,

View File

@ -437,8 +437,10 @@ impl IndexScheduler {
#[cfg(test)]
self.maybe_fail(crate::test_utils::FailureLocation::InsideCreateBatch)?;
println!("create next batch");
let batch_id = self.queue.batches.next_batch_id(rtxn)?;
let mut current_batch = ProcessingBatch::new(batch_id);
println!("over");
let enqueued = &self.queue.tasks.get_status(rtxn, Status::Enqueued)?;
let count_total_enqueued = enqueued.len();
@ -454,6 +456,7 @@ impl IndexScheduler {
kind: Kind::TaskCancelation,
id: task_id,
});
println!("task cancelled");
return Ok(Some((Batch::TaskCancelation { task }, current_batch)));
}
@ -524,7 +527,7 @@ impl IndexScheduler {
}
// 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { println!("return"); return Ok(None) };
let mut task =
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
@ -602,6 +605,7 @@ impl IndexScheduler {
autobatcher::autobatch(enqueued, index_already_exists, primary_key.as_deref())
{
current_batch.reason(autobatch_stop_reason.unwrap_or(stop_reason));
println!("autobatch");
return Ok(self
.create_next_batch_index(
rtxn,
@ -615,6 +619,7 @@ impl IndexScheduler {
// If we found no tasks then we were notified for something that got autobatched
// somehow and there is nothing to do.
println!("nothing to do");
Ok(None)
}
}

View File

@ -164,7 +164,7 @@ impl IndexScheduler {
let pre_commit_dabases_sizes = index.database_sizes(&index_wtxn)?;
let (tasks, congestion) =
self.apply_index_operation(&mut index_wtxn, &index, op, &progress, current_batch.clone_embedder_stats())?;
self.apply_index_operation(&mut index_wtxn, &index, op, &progress, current_batch.embedder_stats.clone())?;
{
progress.update_progress(FinalizingIndexStep::Committing);
@ -240,20 +240,11 @@ impl IndexScheduler {
builder.set_primary_key(primary_key);
let must_stop_processing = self.scheduler.must_stop_processing.clone();
let embedder_stats = match current_batch.embedder_stats {
Some(ref stats) => stats.clone(),
None => {
let embedder_stats: Arc<EmbedderStats> = Default::default();
current_batch.embedder_stats = Some(embedder_stats.clone());
embedder_stats
},
};
builder
.execute(
|indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.get(),
embedder_stats,
current_batch.embedder_stats.clone(),
)
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
index_wtxn.commit()?;

View File

@ -29,7 +29,7 @@ pub struct ProcessingBatch {
pub uid: BatchId,
pub details: DetailsView,
pub stats: BatchStats,
pub embedder_stats: Option<Arc<EmbedderStats>>,
pub embedder_stats: Arc<EmbedderStats>,
pub statuses: HashSet<Status>,
pub kinds: HashSet<Kind>,
@ -47,11 +47,13 @@ impl ProcessingBatch {
let mut statuses = HashSet::default();
statuses.insert(Status::Processing);
println!("Processing batch created: {}", uid);
Self {
uid,
details: DetailsView::default(),
stats: BatchStats::default(),
embedder_stats: None,
embedder_stats: Default::default(),
statuses,
kinds: HashSet::default(),
@ -64,17 +66,6 @@ impl ProcessingBatch {
}
}
pub fn clone_embedder_stats(&mut self) -> Arc<EmbedderStats> {
match self.embedder_stats {
Some(ref stats) => stats.clone(),
None => {
let embedder_stats: Arc<EmbedderStats> = Default::default();
self.embedder_stats = Some(embedder_stats.clone());
embedder_stats
},
}
}
/// Update itself with the content of the task and update the batch id in the task.
pub fn processing<'a>(&mut self, tasks: impl IntoIterator<Item = &'a mut Task>) {
for task in tasks.into_iter() {
@ -113,11 +104,14 @@ impl ProcessingBatch {
}
pub fn reason(&mut self, reason: BatchStopReason) {
println!("batch stopped: {:?}", reason);
self.reason = reason;
}
/// Must be called once the batch has finished processing.
pub fn finished(&mut self) {
println!("Batch finished: {}", self.uid);
self.details = DetailsView::default();
self.stats = BatchStats::default();
self.finished_at = Some(OffsetDateTime::now_utc());
@ -132,6 +126,8 @@ impl ProcessingBatch {
/// Update the timestamp of the tasks and the inner structure of this structure.
pub fn update(&mut self, task: &mut Task) {
println!("Updating task: {} in batch: {}", task.uid, self.uid);
// We must re-set this value in case we're dealing with a task that has been added between
// the `processing` and `finished` state
// We must re-set this value in case we're dealing with a task that has been added between
@ -156,13 +152,13 @@ impl ProcessingBatch {
}
pub fn to_batch(&self) -> Batch {
println!("Converting to batch: {:?}", self.embedder_stats);
println!("Converting to batch: {:?} {:?}", self.uid, self.embedder_stats);
Batch {
uid: self.uid,
progress: None,
details: self.details.clone(),
stats: self.stats.clone(),
embedder_stats: self.embedder_stats.as_ref().map(|s| BatchEmbeddingStats::from(s.as_ref())),
embedder_stats: self.embedder_stats.as_ref().into(),
started_at: self.started_at,
finished_at: self.finished_at,
enqueued_at: self.enqueued_at,

View File

@ -31,8 +31,8 @@ pub struct BatchView {
pub struct BatchStatsView {
#[serde(flatten)]
pub stats: BatchStats,
#[serde(skip_serializing_if = "BatchEmbeddingStats::skip_serializing")]
pub embedder: Option<BatchEmbeddingStats>,
#[serde(skip_serializing_if = "BatchEmbeddingStats::skip_serializing", default)]
pub embedder: BatchEmbeddingStats,
}
impl BatchView {

View File

@ -20,7 +20,8 @@ pub struct Batch {
pub progress: Option<ProgressView>,
pub details: DetailsView,
pub stats: BatchStats,
pub embedder_stats: Option<BatchEmbeddingStats>,
#[serde(skip_serializing_if = "BatchEmbeddingStats::skip_serializing", default)]
pub embedder_stats: BatchEmbeddingStats,
#[serde(with = "time::serde::rfc3339")]
pub started_at: OffsetDateTime,
@ -110,10 +111,7 @@ impl From<&EmbedderStats> for BatchEmbeddingStats {
}
impl BatchEmbeddingStats {
pub fn skip_serializing(this: &Option<BatchEmbeddingStats>) -> bool {
match this {
Some(stats) => stats.total_count == 0 && stats.error_count == 0 && stats.last_error.is_none(),
None => true,
}
pub fn skip_serializing(&self) -> bool {
self.total_count == 0 && self.error_count == 0 && self.last_error.is_none()
}
}

View File

@ -1,10 +1,12 @@
use std::collections::BTreeMap;
use std::collections::{BTreeMap, BTreeSet};
use meili_snap::{json_string, snapshot};
use reqwest::IntoUrl;
use tokio::spawn;
use tokio::sync::mpsc;
use wiremock::matchers::{method, path};
use wiremock::{Mock, MockServer, Request, ResponseTemplate};
use std::thread::sleep;
use tokio::time::sleep;
use std::time::Duration;
use crate::common::Value;
@ -307,7 +309,6 @@ async fn create_mock_raw() -> (MockServer, Value) {
Mock::given(method("POST"))
.and(path("/"))
.respond_with(move |req: &Request| {
println!("Sent!");
let req: String = match req.body_json() {
Ok(req) => req,
Err(error) => {
@ -337,6 +338,50 @@ async fn create_mock_raw() -> (MockServer, Value) {
(mock_server, embedder_settings)
}
/// A mock server that returns 500 errors, and sends a message once 5 requests are received
async fn create_faulty_mock_raw(mut sender: mpsc::Sender<()>) -> (MockServer, Value) {
let mock_server = MockServer::start().await;
Mock::given(method("POST"))
.and(path("/"))
.respond_with(move |req: &Request| {
let req: String = match req.body_json() {
Ok(req) => req,
Err(error) => {
return ResponseTemplate::new(400).set_body_json(json!({
"error": format!("Invalid request: {error}")
}));
}
};
let sender = sender.clone();
spawn(async move {
sender.send(()).await;
});
ResponseTemplate::new(500)
.set_delay(Duration::from_millis(500))
.set_body_json(json!({
"error": "Service Unavailable",
"text": req
}))
})
.mount(&mock_server)
.await;
let url = mock_server.uri();
let embedder_settings = json!({
"source": "rest",
"url": url,
"dimensions": 3,
"request": "{{text}}",
"response": "{{embedding}}",
"documentTemplate": "{{doc.name}}"
});
(mock_server, embedder_settings)
}
pub async fn post<T: IntoUrl>(url: T, text: &str) -> reqwest::Result<reqwest::Response> {
reqwest::Client::builder().build()?.post(url).json(&json!(text)).send().await
}
@ -2118,7 +2163,8 @@ async fn searchable_reindex() {
#[actix_rt::test]
async fn observability() {
let (_mock, setting) = create_mock_raw().await;
let (sender, mut receiver) = mpsc::channel(10);
let (_mock, setting) = create_faulty_mock_raw(sender).await;
let server = get_server_vector().await;
let index = server.index("doggo");
@ -2133,20 +2179,19 @@ async fn observability() {
let task = server.wait_task(response.uid()).await;
snapshot!(task["status"], @r###""succeeded""###);
let documents = json!([
{"id": 0, "name": "kefir"},
{"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }},
{"id": 2, "name": "intel"},
{"id": 3, "name": "missing"}, // Stuff that doesn't exist
{"id": 4, "name": "invalid"},
{"id": 5, "name": "foobar"},
{"id": 0, "name": "will_return_500"}, // Stuff that doesn't exist
{"id": 1, "name": "will_error"},
{"id": 2, "name": "must_error"},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let batches = index.filtered_batches(&[], &[], &[]).await;
println!("Batches: {batches:?}");
// The task will eventually fail, so let's not wait for it.
// Let's just wait for 5 errors from the mock server.
for _errors in 0..5 {
receiver.recv().await;
}
let task = index.wait_task(value.uid()).await;
let batches = index.filtered_batches(&[], &[], &[]).await;
println!("Batches: {batches:?}");

View File

@ -316,6 +316,7 @@ where
if let Some(embedder_stats) = &embedder_stats {
embedder_stats.as_ref().total_requests.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
}
// TODO: also catch 403 errors
let response = request.clone().send_json(&body);
let result = check_response(response, data.configuration_source).and_then(|response| {
response_to_embedding(response, data, expected_count, expected_dimension)