improve the error handling in general and introduce the concept of reserved keywords

This commit is contained in:
Tamo 2021-09-02 15:57:40 +02:00
parent e8c093c1d0
commit bd4c248292
No known key found for this signature in database
GPG key ID: 20CD8020AFA88D69
6 changed files with 50 additions and 17 deletions

View file

@ -2,11 +2,10 @@ use std::fs::File;
use std::io;
use concat_arrays::concat_arrays;
use log::warn;
use serde_json::Value;
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
use crate::{FieldId, InternalError, Result};
use crate::{FieldId, InternalError, Result, UserError};
/// Extracts the geographical coordinates contained in each document under the `_geo` field.
///
@ -14,6 +13,7 @@ use crate::{FieldId, InternalError, Result};
pub fn extract_geo_points<R: io::Read>(
mut obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters,
primary_key_id: FieldId,
geo_field_id: FieldId,
) -> Result<grenad::Reader<File>> {
let mut writer = tempfile::tempfile().and_then(|file| {
@ -33,9 +33,10 @@ pub fn extract_geo_points<R: io::Read>(
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
writer.insert(docid_bytes, bytes)?;
} else {
// TAMO: improve the warn
warn!("Malformed `_geo` field");
continue;
let primary_key = obkv.get(primary_key_id).unwrap(); // TODO: TAMO: is this valid?
let primary_key =
serde_json::from_slice(primary_key).map_err(InternalError::SerdeJson)?;
Err(UserError::InvalidGeoField { document_id: primary_key, object: point })?
}
}

View file

@ -39,6 +39,7 @@ pub(crate) fn data_from_obkv_documents(
lmdb_writer_sx: Sender<Result<TypedChunk>>,
searchable_fields: Option<HashSet<FieldId>>,
faceted_fields: HashSet<FieldId>,
primary_key_id: FieldId,
geo_field_id: Option<FieldId>,
stop_words: Option<fst::Set<&[u8]>>,
) -> Result<()> {
@ -51,6 +52,7 @@ pub(crate) fn data_from_obkv_documents(
lmdb_writer_sx.clone(),
&searchable_fields,
&faceted_fields,
primary_key_id,
geo_field_id,
&stop_words,
)
@ -172,6 +174,7 @@ fn extract_documents_data(
lmdb_writer_sx: Sender<Result<TypedChunk>>,
searchable_fields: &Option<HashSet<FieldId>>,
faceted_fields: &HashSet<FieldId>,
primary_key_id: FieldId,
geo_field_id: Option<FieldId>,
stop_words: &Option<fst::Set<&[u8]>>,
) -> Result<(
@ -186,7 +189,12 @@ fn extract_documents_data(
let documents_chunk_cloned = documents_chunk.clone();
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
rayon::spawn(move || {
let _ = match extract_geo_points(documents_chunk_cloned, indexer, geo_field_id) {
let _ = match extract_geo_points(
documents_chunk_cloned,
indexer,
primary_key_id,
geo_field_id,
) {
Ok(geo_points) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))),
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
};

View file

@ -228,6 +228,9 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
Receiver<Result<TypedChunk>>,
) = crossbeam_channel::unbounded();
// get the primary key field id
let primary_key_id = fields_ids_map.id(&primary_key).unwrap(); // TODO: TAMO: is this unwrap 100% valid?
// get searchable fields for word databases
let searchable_fields =
self.index.searchable_fields_ids(self.wtxn)?.map(HashSet::from_iter);
@ -269,6 +272,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
lmdb_writer_sx.clone(),
searchable_fields,
faceted_fields,
primary_key_id,
geo_field_id,
stop_words,
)

View file

@ -180,7 +180,7 @@ pub(crate) fn write_typed_chunk_into_index(
is_merged_database = true;
}
TypedChunk::GeoPoints(mut geo_points) => {
// TODO: TAMO: we should create the rtree with the `RTree::bulk_load` function
// TODO: we should create the rtree with the `RTree::bulk_load` function
let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default();
let mut doc_ids = index.geo_faceted_documents_ids(wtxn)?;