mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 11:57:07 +02:00
Merge #2523
2523: Improve the tasks error reporting when processed in batches r=irevoire a=Kerollmops This fixes #2478 by changing the behavior of the task handler when there is an error in a batch of document addition or update. What changes is that when there is a user error in a task in a batch we now report this task as failed with the right error message but we continue to process the other tasks. A user error can be when a geo field is invalid, a document id is invalid, or missing. fixes #2582, #2478 Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
commit
b5f91b91c3
23 changed files with 251 additions and 222 deletions
|
@ -4,7 +4,7 @@ use std::path::Path;
|
|||
|
||||
use anyhow::Context;
|
||||
use indexmap::IndexMap;
|
||||
use milli::documents::DocumentBatchReader;
|
||||
use milli::documents::DocumentsBatchReader;
|
||||
use milli::heed::{EnvOpenOptions, RoTxn};
|
||||
use milli::update::{IndexDocumentsConfig, IndexerConfig};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
@ -135,19 +135,20 @@ impl Index {
|
|||
if !empty {
|
||||
tmp_doc_file.seek(SeekFrom::Start(0))?;
|
||||
|
||||
let documents_reader = DocumentBatchReader::from_reader(tmp_doc_file)?;
|
||||
let documents_reader = DocumentsBatchReader::from_reader(tmp_doc_file)?;
|
||||
|
||||
//If the document file is empty, we don't perform the document addition, to prevent
|
||||
//a primary key error to be thrown.
|
||||
let config = IndexDocumentsConfig::default();
|
||||
let mut builder = milli::update::IndexDocuments::new(
|
||||
let builder = milli::update::IndexDocuments::new(
|
||||
&mut txn,
|
||||
&index,
|
||||
indexer_config,
|
||||
config,
|
||||
|_| (),
|
||||
)?;
|
||||
builder.add_documents(documents_reader)?;
|
||||
let (builder, user_error) = builder.add_documents(documents_reader)?;
|
||||
user_error?;
|
||||
builder.execute()?;
|
||||
}
|
||||
|
||||
|
|
|
@ -40,6 +40,12 @@ impl ErrorCode for IndexError {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<milli::UserError> for IndexError {
|
||||
fn from(error: milli::UserError) -> IndexError {
|
||||
IndexError::Milli(error.into())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum FacetError {
|
||||
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
|
||||
|
|
|
@ -4,7 +4,6 @@ use std::marker::PhantomData;
|
|||
use std::ops::Deref;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use fst::IntoStreamer;
|
||||
use milli::heed::{CompactionOption, EnvOpenOptions, RoTxn};
|
||||
|
@ -14,6 +13,7 @@ use serde::{Deserialize, Serialize};
|
|||
use serde_json::{Map, Value};
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use crate::index::search::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||
|
||||
|
@ -245,11 +245,8 @@ impl Index {
|
|||
let fields_ids_map = self.fields_ids_map(&txn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
|
||||
let iter = self.all_documents(&txn)?.skip(offset).take(limit);
|
||||
|
||||
let mut documents = Vec::new();
|
||||
|
||||
for entry in iter {
|
||||
for entry in self.all_documents(&txn)?.skip(offset).take(limit) {
|
||||
let (_id, obkv) = entry?;
|
||||
let document = obkv_to_json(&all_fields, &fields_ids_map, obkv)?;
|
||||
let document = match &attributes_to_retrieve {
|
||||
|
@ -302,7 +299,7 @@ impl Index {
|
|||
}
|
||||
|
||||
pub fn size(&self) -> u64 {
|
||||
WalkDir::new(self.inner.path())
|
||||
WalkDir::new(self.path())
|
||||
.into_iter()
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter_map(|entry| entry.metadata().ok())
|
||||
|
|
|
@ -24,12 +24,12 @@ pub use test::MockIndex as Index;
|
|||
/// code for unit testing, in places where an index would normally be used.
|
||||
#[cfg(test)]
|
||||
pub mod test {
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use milli::update::IndexerConfig;
|
||||
use milli::update::{DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsMethod};
|
||||
use milli::update::{
|
||||
DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsMethod, IndexerConfig,
|
||||
};
|
||||
use nelson::Mocker;
|
||||
use uuid::Uuid;
|
||||
|
||||
|
@ -162,7 +162,7 @@ pub mod test {
|
|||
primary_key: Option<String>,
|
||||
file_store: UpdateFileStore,
|
||||
contents: impl Iterator<Item = Uuid>,
|
||||
) -> Result<DocumentAdditionResult> {
|
||||
) -> Result<Vec<Result<DocumentAdditionResult>>> {
|
||||
match self {
|
||||
MockIndex::Real(index) => {
|
||||
index.update_documents(method, primary_key, file_store, contents)
|
||||
|
|
|
@ -3,7 +3,7 @@ use std::marker::PhantomData;
|
|||
use std::num::NonZeroUsize;
|
||||
|
||||
use log::{debug, info, trace};
|
||||
use milli::documents::DocumentBatchReader;
|
||||
use milli::documents::DocumentsBatchReader;
|
||||
use milli::update::{
|
||||
DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
|
||||
Setting,
|
||||
|
@ -11,7 +11,7 @@ use milli::update::{
|
|||
use serde::{Deserialize, Serialize, Serializer};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::error::Result;
|
||||
use super::error::{IndexError, Result};
|
||||
use super::index::{Index, IndexMeta};
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
|
||||
|
@ -299,7 +299,7 @@ impl Index {
|
|||
primary_key: Option<String>,
|
||||
file_store: UpdateFileStore,
|
||||
contents: impl IntoIterator<Item = Uuid>,
|
||||
) -> Result<DocumentAdditionResult> {
|
||||
) -> Result<Vec<Result<DocumentAdditionResult>>> {
|
||||
trace!("performing document addition");
|
||||
let mut txn = self.write_txn()?;
|
||||
|
||||
|
@ -323,19 +323,34 @@ impl Index {
|
|||
indexing_callback,
|
||||
)?;
|
||||
|
||||
let mut results = Vec::new();
|
||||
for content_uuid in contents.into_iter() {
|
||||
let content_file = file_store.get_update(content_uuid)?;
|
||||
let reader = DocumentBatchReader::from_reader(content_file)?;
|
||||
builder.add_documents(reader)?;
|
||||
let reader = DocumentsBatchReader::from_reader(content_file)?;
|
||||
let (new_builder, user_result) = builder.add_documents(reader)?;
|
||||
builder = new_builder;
|
||||
|
||||
let user_result = match user_result {
|
||||
Ok(count) => {
|
||||
let addition = DocumentAdditionResult {
|
||||
indexed_documents: count,
|
||||
number_of_documents: count,
|
||||
};
|
||||
info!("document addition done: {:?}", addition);
|
||||
Ok(addition)
|
||||
}
|
||||
Err(e) => Err(IndexError::from(e)),
|
||||
};
|
||||
|
||||
results.push(user_result);
|
||||
}
|
||||
|
||||
let addition = builder.execute()?;
|
||||
if results.iter().any(Result::is_ok) {
|
||||
let _addition = builder.execute()?;
|
||||
txn.commit()?;
|
||||
}
|
||||
|
||||
txn.commit()?;
|
||||
|
||||
info!("document addition done: {:?}", addition);
|
||||
|
||||
Ok(addition)
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
pub fn update_settings(&self, settings: &Settings<Checked>) -> Result<()> {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue