remove anyhow refs & implement missing errors

This commit is contained in:
marin postma 2021-06-14 21:26:35 +02:00
parent c1b6f0e833
commit 58f9974be4
No known key found for this signature in database
GPG key ID: 6088B7721C3E39F9
40 changed files with 707 additions and 668 deletions

View file

@ -3,7 +3,6 @@ use std::io::{BufRead, BufReader, Write};
use std::path::Path;
use std::sync::Arc;
use anyhow::{bail, Context};
use heed::RoTxn;
use indexmap::IndexMap;
use milli::update::{IndexDocumentsMethod, UpdateFormat::JsonStream};
@ -12,6 +11,7 @@ use serde::{Deserialize, Serialize};
use crate::option::IndexerOpts;
use super::{update_handler::UpdateHandler, Index, Settings, Unchecked};
use super::error::{IndexError, Result};
#[derive(Serialize, Deserialize)]
struct DumpMeta {
@ -23,7 +23,7 @@ const META_FILE_NAME: &str = "meta.json";
const DATA_FILE_NAME: &str = "documents.jsonl";
impl Index {
pub fn dump(&self, path: impl AsRef<Path>) -> anyhow::Result<()> {
pub fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
// acquire write txn make sure any ongoing write is finished before we start.
let txn = self.env.write_txn()?;
@ -33,11 +33,12 @@ impl Index {
Ok(())
}
fn dump_documents(&self, txn: &RoTxn, path: impl AsRef<Path>) -> anyhow::Result<()> {
fn dump_documents(&self, txn: &RoTxn, path: impl AsRef<Path>) -> Result<()> {
let document_file_path = path.as_ref().join(DATA_FILE_NAME);
let mut document_file = File::create(&document_file_path)?;
let documents = self.all_documents(txn)?;
let documents = self.all_documents(txn)
.map_err(|e| IndexError::Internal(e.into()))?;
let fields_ids_map = self.fields_ids_map(txn)?;
// dump documents
@ -60,7 +61,7 @@ impl Index {
Ok(())
}
fn dump_meta(&self, txn: &RoTxn, path: impl AsRef<Path>) -> anyhow::Result<()> {
fn dump_meta(&self, txn: &RoTxn, path: impl AsRef<Path>) -> Result<()> {
let meta_file_path = path.as_ref().join(META_FILE_NAME);
let mut meta_file = File::create(&meta_file_path)?;
@ -81,11 +82,13 @@ impl Index {
dst: impl AsRef<Path>,
size: usize,
indexing_options: &IndexerOpts,
) -> anyhow::Result<()> {
) -> std::result::Result<(), Box<dyn std::error::Error>> {
let dir_name = src
.as_ref()
.file_name()
.with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?;
// TODO: remove
//.with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?;
.unwrap();
let dst_dir_path = dst.as_ref().join("indexes").join(dir_name);
create_dir_all(&dst_dir_path)?;
@ -124,7 +127,7 @@ impl Index {
match Arc::try_unwrap(index.0) {
Ok(inner) => inner.prepare_for_closing().wait(),
Err(_) => bail!("Could not close index properly."),
Err(_) => todo!("Could not close index properly."),
}
Ok(())

View file

@ -0,0 +1,60 @@
use std::error::Error;
use meilisearch_error::{Code, ErrorCode};
use serde_json::Value;
pub type Result<T> = std::result::Result<T, IndexError>;
#[derive(Debug, thiserror::Error)]
pub enum IndexError {
#[error("Internal error: {0}")]
Internal(Box<dyn Error + Send + Sync + 'static>),
#[error("Document with id {0} not found.")]
DocumentNotFound(String),
#[error("error with facet: {0}")]
Facet(#[from] FacetError),
}
macro_rules! internal_error {
($($other:path), *) => {
$(
impl From<$other> for IndexError {
fn from(other: $other) -> Self {
Self::Internal(Box::new(other))
}
}
)*
}
}
internal_error!(
std::io::Error,
heed::Error,
fst::Error,
serde_json::Error
);
impl ErrorCode for IndexError {
fn error_code(&self) -> Code {
match self {
IndexError::Internal(_) => Code::Internal,
IndexError::DocumentNotFound(_) => Code::DocumentNotFound,
IndexError::Facet(e) => e.error_code(),
}
}
}
#[derive(Debug, thiserror::Error)]
pub enum FacetError {
#[error("Invalid facet expression, expected {}, found: {1}", .0.join(", "))]
InvalidExpression(&'static [&'static str], Value)
}
impl ErrorCode for FacetError {
fn error_code(&self) -> Code {
match self {
FacetError::InvalidExpression(_, _) => Code::Facet,
}
}
}

View file

@ -5,19 +5,24 @@ use std::ops::Deref;
use std::path::Path;
use std::sync::Arc;
use anyhow::{bail, Context};
use heed::{EnvOpenOptions, RoTxn};
use milli::obkv_to_json;
use serde::{de::Deserializer, Deserialize};
use serde_json::{Map, Value};
use crate::helpers::EnvSizer;
use error::Result;
pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
use serde::{de::Deserializer, Deserialize};
pub use updates::{Checked, Facets, Settings, Unchecked};
use self::error::IndexError;
pub mod error;
pub mod update_handler;
mod dump;
mod search;
pub mod update_handler;
mod updates;
pub type Document = Map<String, Value>;
@ -33,7 +38,7 @@ impl Deref for Index {
}
}
pub fn deserialize_some<'de, T, D>(deserializer: D) -> Result<Option<T>, D::Error>
pub fn deserialize_some<'de, T, D>(deserializer: D) -> std::result::Result<Option<T>, D::Error>
where
T: Deserialize<'de>,
D: Deserializer<'de>,
@ -42,20 +47,21 @@ where
}
impl Index {
pub fn open(path: impl AsRef<Path>, size: usize) -> anyhow::Result<Self> {
pub fn open(path: impl AsRef<Path>, size: usize) -> Result<Self> {
create_dir_all(&path)?;
let mut options = EnvOpenOptions::new();
options.map_size(size);
let index = milli::Index::new(options, &path)?;
let index =
milli::Index::new(options, &path).map_err(|e| IndexError::Internal(e.into()))?;
Ok(Index(Arc::new(index)))
}
pub fn settings(&self) -> anyhow::Result<Settings<Checked>> {
pub fn settings(&self) -> Result<Settings<Checked>> {
let txn = self.read_txn()?;
self.settings_txn(&txn)
}
pub fn settings_txn(&self, txn: &RoTxn) -> anyhow::Result<Settings<Checked>> {
pub fn settings_txn(&self, txn: &RoTxn) -> Result<Settings<Checked>> {
let displayed_attributes = self
.displayed_fields(&txn)?
.map(|fields| fields.into_iter().map(String::from).collect());
@ -65,7 +71,8 @@ impl Index {
.map(|fields| fields.into_iter().map(String::from).collect());
let faceted_attributes = self
.faceted_fields(&txn)?
.faceted_fields(&txn)
.map_err(|e| IndexError::Internal(Box::new(e)))?
.into_iter()
.collect();
@ -76,8 +83,9 @@ impl Index {
.collect();
let stop_words = self
.stop_words(&txn)?
.map(|stop_words| -> anyhow::Result<BTreeSet<_>> {
.stop_words(&txn)
.map_err(|e| IndexError::Internal(e.into()))?
.map(|stop_words| -> Result<BTreeSet<_>> {
Ok(stop_words.stream().into_strs()?.into_iter().collect())
})
.transpose()?
@ -114,12 +122,13 @@ impl Index {
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<S>>,
) -> anyhow::Result<Vec<Map<String, Value>>> {
) -> Result<Vec<Map<String, Value>>> {
let txn = self.read_txn()?;
let fields_ids_map = self.fields_ids_map(&txn)?;
let fields_to_display =
self.fields_to_display(&txn, &attributes_to_retrieve, &fields_ids_map)?;
let fields_to_display = self
.fields_to_display(&txn, &attributes_to_retrieve, &fields_ids_map)
.map_err(|e| IndexError::Internal(e.into()))?;
let iter = self.documents.range(&txn, &(..))?.skip(offset).take(limit);
@ -127,7 +136,8 @@ impl Index {
for entry in iter {
let (_id, obkv) = entry?;
let object = obkv_to_json(&fields_to_display, &fields_ids_map, obkv)?;
let object = obkv_to_json(&fields_to_display, &fields_ids_map, obkv)
.map_err(|e| IndexError::Internal(e.into()))?;
documents.push(object);
}
@ -138,28 +148,35 @@ impl Index {
&self,
doc_id: String,
attributes_to_retrieve: Option<Vec<S>>,
) -> anyhow::Result<Map<String, Value>> {
) -> Result<Map<String, Value>> {
let txn = self.read_txn()?;
let fields_ids_map = self.fields_ids_map(&txn)?;
let fields_to_display =
self.fields_to_display(&txn, &attributes_to_retrieve, &fields_ids_map)?;
let fields_to_display = self
.fields_to_display(&txn, &attributes_to_retrieve, &fields_ids_map)
.map_err(|e| IndexError::Internal(e.into()))?;
let internal_id = self
.external_documents_ids(&txn)?
.external_documents_ids(&txn)
.map_err(|e| IndexError::Internal(e.into()))?
.get(doc_id.as_bytes())
.with_context(|| format!("Document with id {} not found", doc_id))?;
.ok_or_else(|| IndexError::DocumentNotFound(doc_id.clone()))?;
let document = self
.documents(&txn, std::iter::once(internal_id))?
.documents(&txn, std::iter::once(internal_id))
.map_err(|e| IndexError::Internal(e.into()))?
.into_iter()
.next()
.map(|(_, d)| d);
match document {
Some(document) => Ok(obkv_to_json(&fields_to_display, &fields_ids_map, document)?),
None => bail!("Document with id {} not found", doc_id),
Some(document) => {
let document = obkv_to_json(&fields_to_display, &fields_ids_map, document)
.map_err(|e| IndexError::Internal(e.into()))?;
Ok(document)
}
None => Err(IndexError::DocumentNotFound(doc_id)),
}
}
@ -172,8 +189,9 @@ impl Index {
txn: &heed::RoTxn,
attributes_to_retrieve: &Option<Vec<S>>,
fields_ids_map: &milli::FieldsIdsMap,
) -> anyhow::Result<Vec<u8>> {
let mut displayed_fields_ids = match self.displayed_fields_ids(&txn)? {
) -> Result<Vec<u8>> {
let mut displayed_fields_ids = match self.displayed_fields_ids(&txn)
.map_err(|e| IndexError::Internal(Box::new(e)))? {
Some(ids) => ids.into_iter().collect::<Vec<_>>(),
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
};

View file

@ -2,7 +2,6 @@ use std::borrow::Cow;
use std::collections::{BTreeMap, BTreeSet, HashSet};
use std::time::Instant;
use anyhow::bail;
use either::Either;
use heed::RoTxn;
use indexmap::IndexMap;
@ -11,6 +10,9 @@ use milli::{FilterCondition, FieldId, FieldsIdsMap, MatchingWords};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use crate::index::error::FacetError;
use super::error::{IndexError, Result};
use super::Index;
pub type Document = IndexMap<String, Value>;
@ -71,7 +73,7 @@ struct FormatOptions {
}
impl Index {
pub fn perform_search(&self, query: SearchQuery) -> anyhow::Result<SearchResult> {
pub fn perform_search(&self, query: SearchQuery) -> Result<SearchResult> {
let before_search = Instant::now();
let rtxn = self.read_txn()?;
@ -95,12 +97,14 @@ impl Index {
matching_words,
candidates,
..
} = search.execute()?;
let mut documents = Vec::new();
} = search
.execute()
.map_err(|e| IndexError::Internal(e.into()))?;
let fields_ids_map = self.fields_ids_map(&rtxn).unwrap();
let displayed_ids = self
.displayed_fields_ids(&rtxn)?
.map_err(|e| IndexError::Internal(Box::new(e)))?
.map(|fields| fields.into_iter().collect::<BTreeSet<_>>())
.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
@ -158,6 +162,8 @@ impl Index {
let formatter =
Formatter::new(&stop_words, (String::from("<em>"), String::from("</em>")));
let mut documents = Vec::new();
for (_id, obkv) in self.documents(&rtxn, documents_ids)? {
let document = make_document(&to_retrieve_ids, &fields_ids_map, obkv)?;
let formatted = format_fields(
@ -167,6 +173,7 @@ impl Index {
&matching_words,
&formatted_options,
)?;
let hit = SearchHit {
document,
formatted,
@ -182,7 +189,12 @@ impl Index {
if fields.iter().all(|f| f != "*") {
facet_distribution.facets(fields);
}
Some(facet_distribution.candidates(candidates).execute()?)
let distribution = facet_distribution
.candidates(candidates)
.execute()
.map_err(|e| IndexError::Internal(e.into()))?;
Some(distribution)
}
None => None,
};
@ -326,7 +338,7 @@ fn make_document(
attributes_to_retrieve: &BTreeSet<FieldId>,
field_ids_map: &FieldsIdsMap,
obkv: obkv::KvReader,
) -> anyhow::Result<Document> {
) -> Result<Document> {
let mut document = Document::new();
for attr in attributes_to_retrieve {
if let Some(value) = obkv.get(*attr) {
@ -351,7 +363,7 @@ fn format_fields<A: AsRef<[u8]>>(
formatter: &Formatter<A>,
matching_words: &impl Matcher,
formatted_options: &BTreeMap<FieldId, FormatOptions>,
) -> anyhow::Result<Document> {
) -> Result<Document> {
let mut document = Document::new();
for (id, format) in formatted_options {
@ -513,15 +525,15 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> {
}
}
fn parse_filter(
facets: &Value,
index: &Index,
txn: &RoTxn,
) -> anyhow::Result<Option<FilterCondition>> {
fn parse_filter(facets: &Value, index: &Index, txn: &RoTxn) -> Result<Option<FilterCondition>> {
match facets {
Value::String(expr) => Ok(Some(FilterCondition::from_str(txn, index, expr)?)),
Value::String(expr) => {
let condition = FilterCondition::from_str(txn, index, expr)
.map_err(|e| IndexError::Internal(e.into()))?;
Ok(Some(condition))
}
Value::Array(arr) => parse_filter_array(txn, index, arr),
v => bail!("Invalid facet expression, expected Array, found: {:?}", v),
v => return Err(FacetError::InvalidExpression(&["Array"], v.clone()).into()),
}
}
@ -529,7 +541,7 @@ fn parse_filter_array(
txn: &RoTxn,
index: &Index,
arr: &[Value],
) -> anyhow::Result<Option<FilterCondition>> {
) -> Result<Option<FilterCondition>> {
let mut ands = Vec::new();
for value in arr {
match value {
@ -539,19 +551,22 @@ fn parse_filter_array(
for value in arr {
match value {
Value::String(s) => ors.push(s.clone()),
v => bail!("Invalid facet expression, expected String, found: {:?}", v),
v => {
return Err(FacetError::InvalidExpression(&["String"], v.clone()).into())
}
}
}
ands.push(Either::Left(ors));
}
v => bail!(
"Invalid facet expression, expected String or [String], found: {:?}",
v
),
v => {
return Err(
FacetError::InvalidExpression(&["String", "[String]"], v.clone()).into(),
)
}
}
}
Ok(FilterCondition::from_array(txn, &index.0, ands)?)
FilterCondition::from_array(txn, &index.0, ands).map_err(|e| IndexError::Internal(Box::new(e)))
}
#[cfg(test)]

View file

@ -1,7 +1,6 @@
use std::fs::File;
use crate::index::Index;
use anyhow::Result;
use grenad::CompressionType;
use milli::update::UpdateBuilder;
use rayon::ThreadPool;
@ -22,7 +21,7 @@ pub struct UpdateHandler {
}
impl UpdateHandler {
pub fn new(opt: &IndexerOpts) -> anyhow::Result<Self> {
pub fn new(opt: &IndexerOpts) -> std::result::Result<Self, Box<dyn std::error::Error>> {
let thread_pool = rayon::ThreadPoolBuilder::new()
.num_threads(opt.indexing_jobs.unwrap_or(0))
.build()?;

View file

@ -8,11 +8,16 @@ use log::info;
use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
use serde::{Deserialize, Serialize, Serializer};
use crate::index::error::IndexError;
use crate::index_controller::UpdateResult;
use super::error::Result;
use super::{deserialize_some, Index};
fn serialize_with_wildcard<S>(field: &Option<Option<Vec<String>>>, s: S) -> Result<S::Ok, S::Error>
fn serialize_with_wildcard<S>(
field: &Option<Option<Vec<String>>>,
s: S,
) -> std::result::Result<S::Ok, S::Error>
where
S: Serializer,
{
@ -174,7 +179,7 @@ impl Index {
content: Option<impl io::Read>,
update_builder: UpdateBuilder,
primary_key: Option<&str>,
) -> anyhow::Result<UpdateResult> {
) -> Result<UpdateResult> {
let mut txn = self.write_txn()?;
let result = self.update_documents_txn(
&mut txn,
@ -196,7 +201,7 @@ impl Index {
content: Option<impl io::Read>,
update_builder: UpdateBuilder,
primary_key: Option<&str>,
) -> anyhow::Result<UpdateResult> {
) -> Result<UpdateResult> {
info!("performing document addition");
// Set the primary key if not set already, ignore if already set.
@ -204,7 +209,8 @@ impl Index {
let mut builder = UpdateBuilder::new(0)
.settings(txn, &self);
builder.set_primary_key(primary_key.to_string());
builder.execute(|_, _| ())?;
builder.execute(|_, _| ())
.map_err(|e| IndexError::Internal(Box::new(e)))?;
}
let mut builder = update_builder.index_documents(txn, self);
@ -216,11 +222,15 @@ impl Index {
let gzipped = false;
let addition = match content {
Some(content) if gzipped => {
builder.execute(GzDecoder::new(content), indexing_callback)?
}
Some(content) => builder.execute(content, indexing_callback)?,
None => builder.execute(std::io::empty(), indexing_callback)?,
Some(content) if gzipped => builder
.execute(GzDecoder::new(content), indexing_callback)
.map_err(|e| IndexError::Internal(e.into()))?,
Some(content) => builder
.execute(content, indexing_callback)
.map_err(|e| IndexError::Internal(e.into()))?,
None => builder
.execute(std::io::empty(), indexing_callback)
.map_err(|e| IndexError::Internal(e.into()))?,
};
info!("document addition done: {:?}", addition);
@ -228,7 +238,7 @@ impl Index {
Ok(UpdateResult::DocumentsAddition(addition))
}
pub fn clear_documents(&self, update_builder: UpdateBuilder) -> anyhow::Result<UpdateResult> {
pub fn clear_documents(&self, update_builder: UpdateBuilder) -> Result<UpdateResult> {
// We must use the write transaction of the update here.
let mut wtxn = self.write_txn()?;
let builder = update_builder.clear_documents(&mut wtxn, self);
@ -238,7 +248,7 @@ impl Index {
.commit()
.and(Ok(UpdateResult::Other))
.map_err(Into::into),
Err(e) => Err(e.into()),
Err(e) => Err(IndexError::Internal(Box::new(e))),
}
}
@ -247,7 +257,7 @@ impl Index {
txn: &mut heed::RwTxn<'a, 'b>,
settings: &Settings<Checked>,
update_builder: UpdateBuilder,
) -> anyhow::Result<UpdateResult> {
) -> Result<UpdateResult> {
// We must use the write transaction of the update here.
let mut builder = update_builder.settings(txn, self);
@ -300,7 +310,8 @@ impl Index {
builder.execute(|indexing_step, update_id| {
info!("update {}: {:?}", update_id, indexing_step)
})?;
})
.map_err(|e| IndexError::Internal(e.into()))?;
Ok(UpdateResult::Other)
}
@ -309,7 +320,7 @@ impl Index {
&self,
settings: &Settings<Checked>,
update_builder: UpdateBuilder,
) -> anyhow::Result<UpdateResult> {
) -> Result<UpdateResult> {
let mut txn = self.write_txn()?;
let result = self.update_settings_txn(&mut txn, settings, update_builder)?;
txn.commit()?;
@ -320,9 +331,10 @@ impl Index {
&self,
document_ids: &[String],
update_builder: UpdateBuilder,
) -> anyhow::Result<UpdateResult> {
) -> Result<UpdateResult> {
let mut txn = self.write_txn()?;
let mut builder = update_builder.delete_documents(&mut txn, self)?;
let mut builder = update_builder.delete_documents(&mut txn, self)
.map_err(|e| IndexError::Internal(e.into()))?;
// We ignore unexisting document ids
document_ids.iter().for_each(|id| {
@ -334,7 +346,7 @@ impl Index {
.commit()
.and(Ok(UpdateResult::DocumentDeletion { deleted }))
.map_err(Into::into),
Err(e) => Err(e.into()),
Err(e) => Err(IndexError::Internal(Box::new(e))),
}
}
}