improve the error handling in general and introduce the concept of reserved keywords

This commit is contained in:
Tamo 2021-09-02 15:57:40 +02:00
parent e8c093c1d0
commit bd4c248292
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
6 changed files with 50 additions and 17 deletions

View File

@ -3,7 +3,7 @@ use std::str::FromStr;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::error::{Error, UserError}; use crate::error::{is_reserved_keyword, Error, UserError};
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Criterion { pub enum Criterion {
@ -50,18 +50,20 @@ impl FromStr for Criterion {
"sort" => Ok(Criterion::Sort), "sort" => Ok(Criterion::Sort),
"exactness" => Ok(Criterion::Exactness), "exactness" => Ok(Criterion::Exactness),
text => match AscDesc::from_str(text) { text => match AscDesc::from_str(text) {
Ok(AscDesc::Asc(field)) => Ok(Criterion::Asc(field)), Ok(AscDesc::Asc(Member::Field(field))) if is_reserved_keyword(&field) => {
Ok(AscDesc::Desc(field)) => Ok(Criterion::Desc(field)), Err(UserError::InvalidReservedRankingRuleName { name: text.to_string() })?
}
Ok(AscDesc::Asc(Member::Field(field))) => Ok(Criterion::Asc(field)),
Ok(AscDesc::Desc(Member::Field(field))) => Ok(Criterion::Desc(field)),
Ok(AscDesc::Asc(Member::Geo(_))) | Ok(AscDesc::Desc(Member::Geo(_))) => {
Err(UserError::InvalidRankingRuleName { name: text.to_string() })?
}
Err(UserError::InvalidAscDescSyntax { name }) => { Err(UserError::InvalidAscDescSyntax { name }) => {
Err(UserError::InvalidCriterionName { name }.into()) Err(UserError::InvalidCriterionName { name }.into())
} }
Err(error) => { Err(error) => {
Err(UserError::InvalidCriterionName { name: error.to_string() }.into()) Err(UserError::InvalidCriterionName { name: error.to_string() }.into())
} }
Ok(AscDesc::Asc(Member::Geo(_))) | Ok(AscDesc::Desc(Member::Geo(_))) => {
Err(UserError::AttributeLimitReached)? // TODO: TAMO: use a real error
}
Err(error) => Err(error.into()),
}, },
} }
} }
@ -81,12 +83,12 @@ impl FromStr for Member {
let point = let point =
text.strip_prefix("_geoPoint(") text.strip_prefix("_geoPoint(")
.and_then(|point| point.strip_suffix(")")) .and_then(|point| point.strip_suffix(")"))
.ok_or_else(|| UserError::InvalidCriterionName { name: text.to_string() })?; .ok_or_else(|| UserError::InvalidRankingRuleName { name: text.to_string() })?;
let point = point let point = point
.split(',') .split(',')
.map(|el| el.trim().parse()) .map(|el| el.trim().parse())
.collect::<Result<Vec<f64>, _>>() .collect::<Result<Vec<f64>, _>>()
.map_err(|_| UserError::InvalidCriterionName { name: text.to_string() })?; .map_err(|_| UserError::InvalidRankingRuleName { name: text.to_string() })?;
Ok(Member::Geo([point[0], point[1]])) Ok(Member::Geo([point[0], point[1]]))
} else { } else {
Ok(Member::Field(text.to_string())) Ok(Member::Field(text.to_string()))
@ -147,7 +149,7 @@ impl FromStr for AscDesc {
match text.rsplit_once(':') { match text.rsplit_once(':') {
Some((left, "asc")) => Ok(AscDesc::Asc(left.parse()?)), Some((left, "asc")) => Ok(AscDesc::Asc(left.parse()?)),
Some((left, "desc")) => Ok(AscDesc::Desc(left.parse()?)), Some((left, "desc")) => Ok(AscDesc::Desc(left.parse()?)),
_ => Err(UserError::InvalidCriterionName { name: text.to_string() }), _ => Err(UserError::InvalidRankingRuleName { name: text.to_string() }),
} }
} }
} }

View File

@ -12,6 +12,12 @@ use crate::{DocumentId, FieldId};
pub type Object = Map<String, Value>; pub type Object = Map<String, Value>;
const RESERVED_KEYWORD: &[&'static str] = &["_geo", "_geoDistance"];
pub fn is_reserved_keyword(keyword: &str) -> bool {
RESERVED_KEYWORD.contains(&keyword)
}
#[derive(Debug)] #[derive(Debug)]
pub enum Error { pub enum Error {
InternalError(InternalError), InternalError(InternalError),
@ -60,6 +66,9 @@ pub enum UserError {
InvalidFilter(pest::error::Error<ParserRule>), InvalidFilter(pest::error::Error<ParserRule>),
InvalidFilterAttribute(pest::error::Error<ParserRule>), InvalidFilterAttribute(pest::error::Error<ParserRule>),
InvalidSortName { name: String }, InvalidSortName { name: String },
InvalidGeoField { document_id: Value, object: Value },
InvalidRankingRuleName { name: String },
InvalidReservedRankingRuleName { name: String },
InvalidSortableAttribute { field: String, valid_fields: HashSet<String> }, InvalidSortableAttribute { field: String, valid_fields: HashSet<String> },
SortRankingRuleMissing, SortRankingRuleMissing,
InvalidStoreFile, InvalidStoreFile,
@ -222,6 +231,15 @@ impl fmt::Display for UserError {
write!(f, "invalid asc/desc syntax for {}", name) write!(f, "invalid asc/desc syntax for {}", name)
} }
Self::InvalidCriterionName { name } => write!(f, "invalid criterion {}", name), Self::InvalidCriterionName { name } => write!(f, "invalid criterion {}", name),
Self::InvalidGeoField { document_id, object } => write!(
f,
"the document with the id: {} contains an invalid _geo field: {}",
document_id, object
),
Self::InvalidRankingRuleName { name } => write!(f, "invalid criterion {}", name),
Self::InvalidReservedRankingRuleName { name } => {
write!(f, "{} is a reserved keyword and thus can't be used as a ranking rule", name)
}
Self::InvalidDocumentId { document_id } => { Self::InvalidDocumentId { document_id } => {
let json = serde_json::to_string(document_id).unwrap(); let json = serde_json::to_string(document_id).unwrap();
write!( write!(

View File

@ -2,11 +2,10 @@ use std::fs::File;
use std::io; use std::io;
use concat_arrays::concat_arrays; use concat_arrays::concat_arrays;
use log::warn;
use serde_json::Value; use serde_json::Value;
use super::helpers::{create_writer, writer_into_reader, GrenadParameters}; use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
use crate::{FieldId, InternalError, Result}; use crate::{FieldId, InternalError, Result, UserError};
/// Extracts the geographical coordinates contained in each document under the `_geo` field. /// Extracts the geographical coordinates contained in each document under the `_geo` field.
/// ///
@ -14,6 +13,7 @@ use crate::{FieldId, InternalError, Result};
pub fn extract_geo_points<R: io::Read>( pub fn extract_geo_points<R: io::Read>(
mut obkv_documents: grenad::Reader<R>, mut obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters, indexer: GrenadParameters,
primary_key_id: FieldId,
geo_field_id: FieldId, geo_field_id: FieldId,
) -> Result<grenad::Reader<File>> { ) -> Result<grenad::Reader<File>> {
let mut writer = tempfile::tempfile().and_then(|file| { let mut writer = tempfile::tempfile().and_then(|file| {
@ -33,9 +33,10 @@ pub fn extract_geo_points<R: io::Read>(
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()]; let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
writer.insert(docid_bytes, bytes)?; writer.insert(docid_bytes, bytes)?;
} else { } else {
// TAMO: improve the warn let primary_key = obkv.get(primary_key_id).unwrap(); // TODO: TAMO: is this valid?
warn!("Malformed `_geo` field"); let primary_key =
continue; serde_json::from_slice(primary_key).map_err(InternalError::SerdeJson)?;
Err(UserError::InvalidGeoField { document_id: primary_key, object: point })?
} }
} }

View File

@ -39,6 +39,7 @@ pub(crate) fn data_from_obkv_documents(
lmdb_writer_sx: Sender<Result<TypedChunk>>, lmdb_writer_sx: Sender<Result<TypedChunk>>,
searchable_fields: Option<HashSet<FieldId>>, searchable_fields: Option<HashSet<FieldId>>,
faceted_fields: HashSet<FieldId>, faceted_fields: HashSet<FieldId>,
primary_key_id: FieldId,
geo_field_id: Option<FieldId>, geo_field_id: Option<FieldId>,
stop_words: Option<fst::Set<&[u8]>>, stop_words: Option<fst::Set<&[u8]>>,
) -> Result<()> { ) -> Result<()> {
@ -51,6 +52,7 @@ pub(crate) fn data_from_obkv_documents(
lmdb_writer_sx.clone(), lmdb_writer_sx.clone(),
&searchable_fields, &searchable_fields,
&faceted_fields, &faceted_fields,
primary_key_id,
geo_field_id, geo_field_id,
&stop_words, &stop_words,
) )
@ -172,6 +174,7 @@ fn extract_documents_data(
lmdb_writer_sx: Sender<Result<TypedChunk>>, lmdb_writer_sx: Sender<Result<TypedChunk>>,
searchable_fields: &Option<HashSet<FieldId>>, searchable_fields: &Option<HashSet<FieldId>>,
faceted_fields: &HashSet<FieldId>, faceted_fields: &HashSet<FieldId>,
primary_key_id: FieldId,
geo_field_id: Option<FieldId>, geo_field_id: Option<FieldId>,
stop_words: &Option<fst::Set<&[u8]>>, stop_words: &Option<fst::Set<&[u8]>>,
) -> Result<( ) -> Result<(
@ -186,7 +189,12 @@ fn extract_documents_data(
let documents_chunk_cloned = documents_chunk.clone(); let documents_chunk_cloned = documents_chunk.clone();
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone(); let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
rayon::spawn(move || { rayon::spawn(move || {
let _ = match extract_geo_points(documents_chunk_cloned, indexer, geo_field_id) { let _ = match extract_geo_points(
documents_chunk_cloned,
indexer,
primary_key_id,
geo_field_id,
) {
Ok(geo_points) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))), Ok(geo_points) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))),
Err(error) => lmdb_writer_sx_cloned.send(Err(error)), Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
}; };

View File

@ -228,6 +228,9 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
Receiver<Result<TypedChunk>>, Receiver<Result<TypedChunk>>,
) = crossbeam_channel::unbounded(); ) = crossbeam_channel::unbounded();
// get the primary key field id
let primary_key_id = fields_ids_map.id(&primary_key).unwrap(); // TODO: TAMO: is this unwrap 100% valid?
// get searchable fields for word databases // get searchable fields for word databases
let searchable_fields = let searchable_fields =
self.index.searchable_fields_ids(self.wtxn)?.map(HashSet::from_iter); self.index.searchable_fields_ids(self.wtxn)?.map(HashSet::from_iter);
@ -269,6 +272,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
lmdb_writer_sx.clone(), lmdb_writer_sx.clone(),
searchable_fields, searchable_fields,
faceted_fields, faceted_fields,
primary_key_id,
geo_field_id, geo_field_id,
stop_words, stop_words,
) )

View File

@ -180,7 +180,7 @@ pub(crate) fn write_typed_chunk_into_index(
is_merged_database = true; is_merged_database = true;
} }
TypedChunk::GeoPoints(mut geo_points) => { TypedChunk::GeoPoints(mut geo_points) => {
// TODO: TAMO: we should create the rtree with the `RTree::bulk_load` function // TODO: we should create the rtree with the `RTree::bulk_load` function
let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default(); let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default();
let mut doc_ids = index.geo_faceted_documents_ids(wtxn)?; let mut doc_ids = index.geo_faceted_documents_ids(wtxn)?;