From 2828b5fa191ebf2091c3ed2ce1c5c4877d4551c2 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 19 May 2020 14:37:13 +0200 Subject: [PATCH] Move the helper function to their own module --- .../src/update/documents_addition.rs | 126 +----------------- meilisearch-core/src/update/helpers.rs | 122 +++++++++++++++++ meilisearch-core/src/update/mod.rs | 16 +-- meilisearch-http/src/routes/document.rs | 22 ++- 4 files changed, 151 insertions(+), 135 deletions(-) create mode 100644 meilisearch-core/src/update/helpers.rs diff --git a/meilisearch-core/src/update/documents_addition.rs b/meilisearch-core/src/update/documents_addition.rs index ec99858e2..7f3ae178b 100644 --- a/meilisearch-core/src/update/documents_addition.rs +++ b/meilisearch-core/src/update/documents_addition.rs @@ -1,25 +1,20 @@ use std::collections::HashMap; -use std::fmt::Write as _; -use std::hash::{Hash, Hasher}; use fst::{set::OpBuilder, SetBuilder}; use indexmap::IndexMap; use sdset::{duo::Union, SetOperation}; use serde::Deserialize; use serde_json::Value; -use siphasher::sip::SipHasher; - -use meilisearch_types::DocumentId; -use meilisearch_schema::IndexedPos; use crate::database::{MainT, UpdateT}; use crate::database::{UpdateEvent, UpdateEventsEmitter}; use crate::facets; use crate::raw_indexer::RawIndexer; -use crate::serde::{Deserializer, SerializerError}; +use crate::serde::Deserializer; use crate::store; +use crate::update::helpers::{index_value, value_to_number, extract_document_id}; use crate::update::{apply_documents_deletion, compute_short_prefixes, next_update_id, Update}; -use crate::{Error, Number, MResult, RankedMap}; +use crate::{Error, MResult, RankedMap}; pub struct DocumentsAddition { updates_store: store::Updates, @@ -111,121 +106,6 @@ pub fn push_documents_addition( Ok(last_update_id) } -// TODO move this helper functions elsewhere -/// Returns the number of words indexed or `None` if the type -fn index_value( - indexer: &mut RawIndexer, - document_id: DocumentId, - indexed_pos: IndexedPos, - value: &Value, -) -> Option -{ - match value { - Value::Null => None, - Value::Bool(boolean) => { - let text = boolean.to_string(); - let number_of_words = indexer.index_text(document_id, indexed_pos, &text); - Some(number_of_words) - }, - Value::Number(number) => { - let text = number.to_string(); - let number_of_words = indexer.index_text(document_id, indexed_pos, &text); - Some(number_of_words) - }, - Value::String(string) => { - let number_of_words = indexer.index_text(document_id, indexed_pos, &string); - Some(number_of_words) - }, - Value::Array(_) => { - let text = value_to_string(value); - let number_of_words = indexer.index_text(document_id, indexed_pos, &text); - Some(number_of_words) - }, - Value::Object(_) => { - let text = value_to_string(value); - let number_of_words = indexer.index_text(document_id, indexed_pos, &text); - Some(number_of_words) - }, - } -} - -// TODO move this helper functions elsewhere -pub fn value_to_string(value: &Value) -> String { - fn internal_value_to_string(string: &mut String, value: &Value) { - match value { - Value::Null => (), - Value::Bool(boolean) => { let _ = write!(string, "{}", &boolean); }, - Value::Number(number) => { let _ = write!(string, "{}", &number); }, - Value::String(text) => string.push_str(&text), - Value::Array(array) => { - for value in array { - internal_value_to_string(string, value); - let _ = string.write_str(". "); - } - }, - Value::Object(object) => { - for (key, value) in object { - string.push_str(key); - let _ = string.write_str(". "); - internal_value_to_string(string, value); - let _ = string.write_str(". "); - } - }, - } - } - - let mut string = String::new(); - internal_value_to_string(&mut string, value); - string -} - -// TODO move this helper functions elsewhere -fn value_to_number(value: &Value) -> Option { - use std::str::FromStr; - - match value { - Value::Null => None, - Value::Bool(boolean) => Some(Number::Unsigned(*boolean as u64)), - Value::Number(number) => Number::from_str(&number.to_string()).ok(), // TODO improve that - Value::String(string) => Number::from_str(string).ok(), - Value::Array(_array) => None, - Value::Object(_object) => None, - } -} - -// TODO move this helper functions elsewhere -pub fn compute_document_id(t: H) -> DocumentId { - let mut s = SipHasher::new(); - t.hash(&mut s); - let hash = s.finish(); - DocumentId(hash) -} - -// TODO move this helper functions elsewhere -pub fn extract_document_id(primary_key: &str, document: &IndexMap) -> Result { - - fn validate_document_id(string: &str) -> bool { - string.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') - } - - match document.get(primary_key) { - Some(value) => { - let string = match value { - Value::Number(number) => number.to_string(), - Value::String(string) => string.clone(), - _ => return Err(SerializerError::InvalidDocumentIdFormat), - }; - - if validate_document_id(&string) { - Ok(compute_document_id(string)) - } else { - Err(SerializerError::InvalidDocumentIdFormat) - } - } - None => Err(SerializerError::DocumentIdNotFound), - } -} - pub fn apply_addition<'a, 'b>( writer: &'a mut heed::RwTxn<'b, MainT>, index: &store::Index, diff --git a/meilisearch-core/src/update/helpers.rs b/meilisearch-core/src/update/helpers.rs new file mode 100644 index 000000000..a3be7bb22 --- /dev/null +++ b/meilisearch-core/src/update/helpers.rs @@ -0,0 +1,122 @@ +use std::fmt::Write as _; +use std::hash::{Hash, Hasher}; + +use indexmap::IndexMap; +use meilisearch_schema::IndexedPos; +use meilisearch_types::DocumentId; +use serde_json::Value; +use siphasher::sip::SipHasher; + +use crate::raw_indexer::RawIndexer; +use crate::serde::SerializerError; +use crate::Number; + +/// Returns the number of words indexed or `None` if the type is unindexable. +pub fn index_value( + indexer: &mut RawIndexer, + document_id: DocumentId, + indexed_pos: IndexedPos, + value: &Value, +) -> Option +{ + match value { + Value::Null => None, + Value::Bool(boolean) => { + let text = boolean.to_string(); + let number_of_words = indexer.index_text(document_id, indexed_pos, &text); + Some(number_of_words) + }, + Value::Number(number) => { + let text = number.to_string(); + Some(indexer.index_text(document_id, indexed_pos, &text)) + }, + Value::String(string) => { + Some(indexer.index_text(document_id, indexed_pos, &string)) + }, + Value::Array(_) => { + let text = value_to_string(value); + Some(indexer.index_text(document_id, indexed_pos, &text)) + }, + Value::Object(_) => { + let text = value_to_string(value); + Some(indexer.index_text(document_id, indexed_pos, &text)) + }, + } +} + +/// Transforms the JSON Value type into a String. +pub fn value_to_string(value: &Value) -> String { + fn internal_value_to_string(string: &mut String, value: &Value) { + match value { + Value::Null => (), + Value::Bool(boolean) => { let _ = write!(string, "{}", &boolean); }, + Value::Number(number) => { let _ = write!(string, "{}", &number); }, + Value::String(text) => string.push_str(&text), + Value::Array(array) => { + for value in array { + internal_value_to_string(string, value); + let _ = string.write_str(". "); + } + }, + Value::Object(object) => { + for (key, value) in object { + string.push_str(key); + let _ = string.write_str(". "); + internal_value_to_string(string, value); + let _ = string.write_str(". "); + } + }, + } + } + + let mut string = String::new(); + internal_value_to_string(&mut string, value); + string +} + +/// Transforms the JSON Value type into a Number. +pub fn value_to_number(value: &Value) -> Option { + use std::str::FromStr; + + match value { + Value::Null => None, + Value::Bool(boolean) => Some(Number::Unsigned(*boolean as u64)), + Value::Number(number) => Number::from_str(&number.to_string()).ok(), // TODO improve that + Value::String(string) => Number::from_str(string).ok(), + Value::Array(_array) => None, + Value::Object(_object) => None, + } +} + +/// Compute the hash of the given type, this is the way we produce documents ids. +pub fn compute_document_id(t: H) -> DocumentId { + let mut s = SipHasher::new(); + t.hash(&mut s); + let hash = s.finish(); + DocumentId(hash) +} + +/// Validates a string representation to be a correct document id. +pub fn validate_document_id(string: &str) -> bool { + string.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') +} + +/// Extracts and validates the document id of a document. +pub fn extract_document_id(primary_key: &str, document: &IndexMap) -> Result { + match document.get(primary_key) { + Some(value) => { + let string = match value { + Value::Number(number) => number.to_string(), + Value::String(string) => string.clone(), + _ => return Err(SerializerError::InvalidDocumentIdFormat), + }; + + if validate_document_id(&string) { + Ok(compute_document_id(string)) + } else { + Err(SerializerError::InvalidDocumentIdFormat) + } + } + None => Err(SerializerError::DocumentIdNotFound), + } +} diff --git a/meilisearch-core/src/update/mod.rs b/meilisearch-core/src/update/mod.rs index 6599c3f99..124e6450a 100644 --- a/meilisearch-core/src/update/mod.rs +++ b/meilisearch-core/src/update/mod.rs @@ -3,14 +3,13 @@ mod customs_update; mod documents_addition; mod documents_deletion; mod settings_update; +mod helpers; pub use self::clear_all::{apply_clear_all, push_clear_all}; pub use self::customs_update::{apply_customs_update, push_customs_update}; -pub use self::documents_addition::{ - apply_documents_addition, apply_documents_partial_addition, DocumentsAddition, - value_to_string, compute_document_id, extract_document_id, -}; +pub use self::documents_addition::{apply_documents_addition, apply_documents_partial_addition, DocumentsAddition}; pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion}; +pub use self::helpers::{index_value, value_to_string, value_to_number, compute_document_id, extract_document_id, validate_document_id}; pub use self::settings_update::{apply_settings_update, push_settings_update}; use std::cmp; @@ -23,6 +22,7 @@ use indexmap::IndexMap; use log::debug; use sdset::Set; use serde::{Deserialize, Serialize}; +use serde_json::Value; use crate::{store, DocumentId, MResult}; use crate::database::{MainT, UpdateT}; @@ -49,14 +49,14 @@ impl Update { } } - fn documents_addition(data: Vec>) -> Update { + fn documents_addition(data: Vec>) -> Update { Update { data: UpdateData::DocumentsAddition(data), enqueued_at: Utc::now(), } } - fn documents_partial(data: Vec>) -> Update { + fn documents_partial(data: Vec>) -> Update { Update { data: UpdateData::DocumentsPartial(data), enqueued_at: Utc::now(), @@ -82,8 +82,8 @@ impl Update { pub enum UpdateData { ClearAll, Customs(Vec), - DocumentsAddition(Vec>), - DocumentsPartial(Vec>), + DocumentsAddition(Vec>), + DocumentsPartial(Vec>), DocumentsDeletion(Vec), Settings(SettingsUpdate) } diff --git a/meilisearch-http/src/routes/document.rs b/meilisearch-http/src/routes/document.rs index 4ef4027dc..22586ecb6 100644 --- a/meilisearch-http/src/routes/document.rs +++ b/meilisearch-http/src/routes/document.rs @@ -6,6 +6,9 @@ use indexmap::IndexMap; use serde::Deserialize; use serde_json::Value; +use meilisearch_core::{Error, serde::SerializerError}; +use meilisearch_core::update; + use crate::error::ResponseError; use crate::helpers::Authentication; use crate::routes::{IndexParam, IndexUpdateResponse}; @@ -42,8 +45,11 @@ async fn get_document( .open_index(&path.index_uid) .ok_or(ResponseError::index_not_found(&path.index_uid))?; - let document_id = meilisearch_core::update::compute_document_id(&path.document_id); + if !update::validate_document_id(&path.document_id) { + return Err(Error::Serializer(SerializerError::InvalidDocumentIdFormat).into()) + } + let document_id = update::compute_document_id(&path.document_id); let reader = data.db.main_read_txn()?; let response: Document = index @@ -65,7 +71,12 @@ async fn delete_document( .db .open_index(&path.index_uid) .ok_or(ResponseError::index_not_found(&path.index_uid))?; - let document_id = meilisearch_core::update::compute_document_id(&path.document_id); + + if !update::validate_document_id(&path.document_id) { + return Err(Error::Serializer(SerializerError::InvalidDocumentIdFormat).into()) + } + + let document_id = update::compute_document_id(&path.document_id); let mut update_writer = data.db.update_write_txn()?; @@ -237,8 +248,11 @@ async fn delete_documents( let mut documents_deletion = index.documents_deletion(); for document_id in body.into_inner() { - let document_id_string = meilisearch_core::update::value_to_string(&document_id); - let document_id = meilisearch_core::update::compute_document_id(document_id_string); + let document_id_string = update::value_to_string(&document_id); + if !update::validate_document_id(&document_id_string) { + return Err(Error::Serializer(SerializerError::InvalidDocumentIdFormat).into()) + } + let document_id = update::compute_document_id(document_id_string); documents_deletion.delete_document_by_id(document_id); }