mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-12 06:24:29 +01:00
Move the helper function to their own module
This commit is contained in:
parent
25b3c9a057
commit
2828b5fa19
@ -1,25 +1,20 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::fmt::Write as _;
|
|
||||||
use std::hash::{Hash, Hasher};
|
|
||||||
|
|
||||||
use fst::{set::OpBuilder, SetBuilder};
|
use fst::{set::OpBuilder, SetBuilder};
|
||||||
use indexmap::IndexMap;
|
use indexmap::IndexMap;
|
||||||
use sdset::{duo::Union, SetOperation};
|
use sdset::{duo::Union, SetOperation};
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use siphasher::sip::SipHasher;
|
|
||||||
|
|
||||||
use meilisearch_types::DocumentId;
|
|
||||||
use meilisearch_schema::IndexedPos;
|
|
||||||
|
|
||||||
use crate::database::{MainT, UpdateT};
|
use crate::database::{MainT, UpdateT};
|
||||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||||
use crate::facets;
|
use crate::facets;
|
||||||
use crate::raw_indexer::RawIndexer;
|
use crate::raw_indexer::RawIndexer;
|
||||||
use crate::serde::{Deserializer, SerializerError};
|
use crate::serde::Deserializer;
|
||||||
use crate::store;
|
use crate::store;
|
||||||
|
use crate::update::helpers::{index_value, value_to_number, extract_document_id};
|
||||||
use crate::update::{apply_documents_deletion, compute_short_prefixes, next_update_id, Update};
|
use crate::update::{apply_documents_deletion, compute_short_prefixes, next_update_id, Update};
|
||||||
use crate::{Error, Number, MResult, RankedMap};
|
use crate::{Error, MResult, RankedMap};
|
||||||
|
|
||||||
pub struct DocumentsAddition<D> {
|
pub struct DocumentsAddition<D> {
|
||||||
updates_store: store::Updates,
|
updates_store: store::Updates,
|
||||||
@ -111,121 +106,6 @@ pub fn push_documents_addition<D: serde::Serialize>(
|
|||||||
Ok(last_update_id)
|
Ok(last_update_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO move this helper functions elsewhere
|
|
||||||
/// Returns the number of words indexed or `None` if the type
|
|
||||||
fn index_value(
|
|
||||||
indexer: &mut RawIndexer,
|
|
||||||
document_id: DocumentId,
|
|
||||||
indexed_pos: IndexedPos,
|
|
||||||
value: &Value,
|
|
||||||
) -> Option<usize>
|
|
||||||
{
|
|
||||||
match value {
|
|
||||||
Value::Null => None,
|
|
||||||
Value::Bool(boolean) => {
|
|
||||||
let text = boolean.to_string();
|
|
||||||
let number_of_words = indexer.index_text(document_id, indexed_pos, &text);
|
|
||||||
Some(number_of_words)
|
|
||||||
},
|
|
||||||
Value::Number(number) => {
|
|
||||||
let text = number.to_string();
|
|
||||||
let number_of_words = indexer.index_text(document_id, indexed_pos, &text);
|
|
||||||
Some(number_of_words)
|
|
||||||
},
|
|
||||||
Value::String(string) => {
|
|
||||||
let number_of_words = indexer.index_text(document_id, indexed_pos, &string);
|
|
||||||
Some(number_of_words)
|
|
||||||
},
|
|
||||||
Value::Array(_) => {
|
|
||||||
let text = value_to_string(value);
|
|
||||||
let number_of_words = indexer.index_text(document_id, indexed_pos, &text);
|
|
||||||
Some(number_of_words)
|
|
||||||
},
|
|
||||||
Value::Object(_) => {
|
|
||||||
let text = value_to_string(value);
|
|
||||||
let number_of_words = indexer.index_text(document_id, indexed_pos, &text);
|
|
||||||
Some(number_of_words)
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO move this helper functions elsewhere
|
|
||||||
pub fn value_to_string(value: &Value) -> String {
|
|
||||||
fn internal_value_to_string(string: &mut String, value: &Value) {
|
|
||||||
match value {
|
|
||||||
Value::Null => (),
|
|
||||||
Value::Bool(boolean) => { let _ = write!(string, "{}", &boolean); },
|
|
||||||
Value::Number(number) => { let _ = write!(string, "{}", &number); },
|
|
||||||
Value::String(text) => string.push_str(&text),
|
|
||||||
Value::Array(array) => {
|
|
||||||
for value in array {
|
|
||||||
internal_value_to_string(string, value);
|
|
||||||
let _ = string.write_str(". ");
|
|
||||||
}
|
|
||||||
},
|
|
||||||
Value::Object(object) => {
|
|
||||||
for (key, value) in object {
|
|
||||||
string.push_str(key);
|
|
||||||
let _ = string.write_str(". ");
|
|
||||||
internal_value_to_string(string, value);
|
|
||||||
let _ = string.write_str(". ");
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut string = String::new();
|
|
||||||
internal_value_to_string(&mut string, value);
|
|
||||||
string
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO move this helper functions elsewhere
|
|
||||||
fn value_to_number(value: &Value) -> Option<Number> {
|
|
||||||
use std::str::FromStr;
|
|
||||||
|
|
||||||
match value {
|
|
||||||
Value::Null => None,
|
|
||||||
Value::Bool(boolean) => Some(Number::Unsigned(*boolean as u64)),
|
|
||||||
Value::Number(number) => Number::from_str(&number.to_string()).ok(), // TODO improve that
|
|
||||||
Value::String(string) => Number::from_str(string).ok(),
|
|
||||||
Value::Array(_array) => None,
|
|
||||||
Value::Object(_object) => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO move this helper functions elsewhere
|
|
||||||
pub fn compute_document_id<H: Hash>(t: H) -> DocumentId {
|
|
||||||
let mut s = SipHasher::new();
|
|
||||||
t.hash(&mut s);
|
|
||||||
let hash = s.finish();
|
|
||||||
DocumentId(hash)
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO move this helper functions elsewhere
|
|
||||||
pub fn extract_document_id(primary_key: &str, document: &IndexMap<String, Value>) -> Result<DocumentId, SerializerError> {
|
|
||||||
|
|
||||||
fn validate_document_id(string: &str) -> bool {
|
|
||||||
string.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_')
|
|
||||||
}
|
|
||||||
|
|
||||||
match document.get(primary_key) {
|
|
||||||
Some(value) => {
|
|
||||||
let string = match value {
|
|
||||||
Value::Number(number) => number.to_string(),
|
|
||||||
Value::String(string) => string.clone(),
|
|
||||||
_ => return Err(SerializerError::InvalidDocumentIdFormat),
|
|
||||||
};
|
|
||||||
|
|
||||||
if validate_document_id(&string) {
|
|
||||||
Ok(compute_document_id(string))
|
|
||||||
} else {
|
|
||||||
Err(SerializerError::InvalidDocumentIdFormat)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None => Err(SerializerError::DocumentIdNotFound),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn apply_addition<'a, 'b>(
|
pub fn apply_addition<'a, 'b>(
|
||||||
writer: &'a mut heed::RwTxn<'b, MainT>,
|
writer: &'a mut heed::RwTxn<'b, MainT>,
|
||||||
index: &store::Index,
|
index: &store::Index,
|
||||||
|
122
meilisearch-core/src/update/helpers.rs
Normal file
122
meilisearch-core/src/update/helpers.rs
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
use std::fmt::Write as _;
|
||||||
|
use std::hash::{Hash, Hasher};
|
||||||
|
|
||||||
|
use indexmap::IndexMap;
|
||||||
|
use meilisearch_schema::IndexedPos;
|
||||||
|
use meilisearch_types::DocumentId;
|
||||||
|
use serde_json::Value;
|
||||||
|
use siphasher::sip::SipHasher;
|
||||||
|
|
||||||
|
use crate::raw_indexer::RawIndexer;
|
||||||
|
use crate::serde::SerializerError;
|
||||||
|
use crate::Number;
|
||||||
|
|
||||||
|
/// Returns the number of words indexed or `None` if the type is unindexable.
|
||||||
|
pub fn index_value(
|
||||||
|
indexer: &mut RawIndexer,
|
||||||
|
document_id: DocumentId,
|
||||||
|
indexed_pos: IndexedPos,
|
||||||
|
value: &Value,
|
||||||
|
) -> Option<usize>
|
||||||
|
{
|
||||||
|
match value {
|
||||||
|
Value::Null => None,
|
||||||
|
Value::Bool(boolean) => {
|
||||||
|
let text = boolean.to_string();
|
||||||
|
let number_of_words = indexer.index_text(document_id, indexed_pos, &text);
|
||||||
|
Some(number_of_words)
|
||||||
|
},
|
||||||
|
Value::Number(number) => {
|
||||||
|
let text = number.to_string();
|
||||||
|
Some(indexer.index_text(document_id, indexed_pos, &text))
|
||||||
|
},
|
||||||
|
Value::String(string) => {
|
||||||
|
Some(indexer.index_text(document_id, indexed_pos, &string))
|
||||||
|
},
|
||||||
|
Value::Array(_) => {
|
||||||
|
let text = value_to_string(value);
|
||||||
|
Some(indexer.index_text(document_id, indexed_pos, &text))
|
||||||
|
},
|
||||||
|
Value::Object(_) => {
|
||||||
|
let text = value_to_string(value);
|
||||||
|
Some(indexer.index_text(document_id, indexed_pos, &text))
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Transforms the JSON Value type into a String.
|
||||||
|
pub fn value_to_string(value: &Value) -> String {
|
||||||
|
fn internal_value_to_string(string: &mut String, value: &Value) {
|
||||||
|
match value {
|
||||||
|
Value::Null => (),
|
||||||
|
Value::Bool(boolean) => { let _ = write!(string, "{}", &boolean); },
|
||||||
|
Value::Number(number) => { let _ = write!(string, "{}", &number); },
|
||||||
|
Value::String(text) => string.push_str(&text),
|
||||||
|
Value::Array(array) => {
|
||||||
|
for value in array {
|
||||||
|
internal_value_to_string(string, value);
|
||||||
|
let _ = string.write_str(". ");
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Value::Object(object) => {
|
||||||
|
for (key, value) in object {
|
||||||
|
string.push_str(key);
|
||||||
|
let _ = string.write_str(". ");
|
||||||
|
internal_value_to_string(string, value);
|
||||||
|
let _ = string.write_str(". ");
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut string = String::new();
|
||||||
|
internal_value_to_string(&mut string, value);
|
||||||
|
string
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Transforms the JSON Value type into a Number.
|
||||||
|
pub fn value_to_number(value: &Value) -> Option<Number> {
|
||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
match value {
|
||||||
|
Value::Null => None,
|
||||||
|
Value::Bool(boolean) => Some(Number::Unsigned(*boolean as u64)),
|
||||||
|
Value::Number(number) => Number::from_str(&number.to_string()).ok(), // TODO improve that
|
||||||
|
Value::String(string) => Number::from_str(string).ok(),
|
||||||
|
Value::Array(_array) => None,
|
||||||
|
Value::Object(_object) => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compute the hash of the given type, this is the way we produce documents ids.
|
||||||
|
pub fn compute_document_id<H: Hash>(t: H) -> DocumentId {
|
||||||
|
let mut s = SipHasher::new();
|
||||||
|
t.hash(&mut s);
|
||||||
|
let hash = s.finish();
|
||||||
|
DocumentId(hash)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Validates a string representation to be a correct document id.
|
||||||
|
pub fn validate_document_id(string: &str) -> bool {
|
||||||
|
string.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_')
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extracts and validates the document id of a document.
|
||||||
|
pub fn extract_document_id(primary_key: &str, document: &IndexMap<String, Value>) -> Result<DocumentId, SerializerError> {
|
||||||
|
match document.get(primary_key) {
|
||||||
|
Some(value) => {
|
||||||
|
let string = match value {
|
||||||
|
Value::Number(number) => number.to_string(),
|
||||||
|
Value::String(string) => string.clone(),
|
||||||
|
_ => return Err(SerializerError::InvalidDocumentIdFormat),
|
||||||
|
};
|
||||||
|
|
||||||
|
if validate_document_id(&string) {
|
||||||
|
Ok(compute_document_id(string))
|
||||||
|
} else {
|
||||||
|
Err(SerializerError::InvalidDocumentIdFormat)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => Err(SerializerError::DocumentIdNotFound),
|
||||||
|
}
|
||||||
|
}
|
@ -3,14 +3,13 @@ mod customs_update;
|
|||||||
mod documents_addition;
|
mod documents_addition;
|
||||||
mod documents_deletion;
|
mod documents_deletion;
|
||||||
mod settings_update;
|
mod settings_update;
|
||||||
|
mod helpers;
|
||||||
|
|
||||||
pub use self::clear_all::{apply_clear_all, push_clear_all};
|
pub use self::clear_all::{apply_clear_all, push_clear_all};
|
||||||
pub use self::customs_update::{apply_customs_update, push_customs_update};
|
pub use self::customs_update::{apply_customs_update, push_customs_update};
|
||||||
pub use self::documents_addition::{
|
pub use self::documents_addition::{apply_documents_addition, apply_documents_partial_addition, DocumentsAddition};
|
||||||
apply_documents_addition, apply_documents_partial_addition, DocumentsAddition,
|
|
||||||
value_to_string, compute_document_id, extract_document_id,
|
|
||||||
};
|
|
||||||
pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion};
|
pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion};
|
||||||
|
pub use self::helpers::{index_value, value_to_string, value_to_number, compute_document_id, extract_document_id, validate_document_id};
|
||||||
pub use self::settings_update::{apply_settings_update, push_settings_update};
|
pub use self::settings_update::{apply_settings_update, push_settings_update};
|
||||||
|
|
||||||
use std::cmp;
|
use std::cmp;
|
||||||
@ -23,6 +22,7 @@ use indexmap::IndexMap;
|
|||||||
use log::debug;
|
use log::debug;
|
||||||
use sdset::Set;
|
use sdset::Set;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use serde_json::Value;
|
||||||
|
|
||||||
use crate::{store, DocumentId, MResult};
|
use crate::{store, DocumentId, MResult};
|
||||||
use crate::database::{MainT, UpdateT};
|
use crate::database::{MainT, UpdateT};
|
||||||
@ -49,14 +49,14 @@ impl Update {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn documents_addition(data: Vec<IndexMap<String, serde_json::Value>>) -> Update {
|
fn documents_addition(data: Vec<IndexMap<String, Value>>) -> Update {
|
||||||
Update {
|
Update {
|
||||||
data: UpdateData::DocumentsAddition(data),
|
data: UpdateData::DocumentsAddition(data),
|
||||||
enqueued_at: Utc::now(),
|
enqueued_at: Utc::now(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn documents_partial(data: Vec<IndexMap<String, serde_json::Value>>) -> Update {
|
fn documents_partial(data: Vec<IndexMap<String, Value>>) -> Update {
|
||||||
Update {
|
Update {
|
||||||
data: UpdateData::DocumentsPartial(data),
|
data: UpdateData::DocumentsPartial(data),
|
||||||
enqueued_at: Utc::now(),
|
enqueued_at: Utc::now(),
|
||||||
@ -82,8 +82,8 @@ impl Update {
|
|||||||
pub enum UpdateData {
|
pub enum UpdateData {
|
||||||
ClearAll,
|
ClearAll,
|
||||||
Customs(Vec<u8>),
|
Customs(Vec<u8>),
|
||||||
DocumentsAddition(Vec<IndexMap<String, serde_json::Value>>),
|
DocumentsAddition(Vec<IndexMap<String, Value>>),
|
||||||
DocumentsPartial(Vec<IndexMap<String, serde_json::Value>>),
|
DocumentsPartial(Vec<IndexMap<String, Value>>),
|
||||||
DocumentsDeletion(Vec<DocumentId>),
|
DocumentsDeletion(Vec<DocumentId>),
|
||||||
Settings(SettingsUpdate)
|
Settings(SettingsUpdate)
|
||||||
}
|
}
|
||||||
|
@ -6,6 +6,9 @@ use indexmap::IndexMap;
|
|||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
|
use meilisearch_core::{Error, serde::SerializerError};
|
||||||
|
use meilisearch_core::update;
|
||||||
|
|
||||||
use crate::error::ResponseError;
|
use crate::error::ResponseError;
|
||||||
use crate::helpers::Authentication;
|
use crate::helpers::Authentication;
|
||||||
use crate::routes::{IndexParam, IndexUpdateResponse};
|
use crate::routes::{IndexParam, IndexUpdateResponse};
|
||||||
@ -42,8 +45,11 @@ async fn get_document(
|
|||||||
.open_index(&path.index_uid)
|
.open_index(&path.index_uid)
|
||||||
.ok_or(ResponseError::index_not_found(&path.index_uid))?;
|
.ok_or(ResponseError::index_not_found(&path.index_uid))?;
|
||||||
|
|
||||||
let document_id = meilisearch_core::update::compute_document_id(&path.document_id);
|
if !update::validate_document_id(&path.document_id) {
|
||||||
|
return Err(Error::Serializer(SerializerError::InvalidDocumentIdFormat).into())
|
||||||
|
}
|
||||||
|
|
||||||
|
let document_id = update::compute_document_id(&path.document_id);
|
||||||
let reader = data.db.main_read_txn()?;
|
let reader = data.db.main_read_txn()?;
|
||||||
|
|
||||||
let response: Document = index
|
let response: Document = index
|
||||||
@ -65,7 +71,12 @@ async fn delete_document(
|
|||||||
.db
|
.db
|
||||||
.open_index(&path.index_uid)
|
.open_index(&path.index_uid)
|
||||||
.ok_or(ResponseError::index_not_found(&path.index_uid))?;
|
.ok_or(ResponseError::index_not_found(&path.index_uid))?;
|
||||||
let document_id = meilisearch_core::update::compute_document_id(&path.document_id);
|
|
||||||
|
if !update::validate_document_id(&path.document_id) {
|
||||||
|
return Err(Error::Serializer(SerializerError::InvalidDocumentIdFormat).into())
|
||||||
|
}
|
||||||
|
|
||||||
|
let document_id = update::compute_document_id(&path.document_id);
|
||||||
|
|
||||||
let mut update_writer = data.db.update_write_txn()?;
|
let mut update_writer = data.db.update_write_txn()?;
|
||||||
|
|
||||||
@ -237,8 +248,11 @@ async fn delete_documents(
|
|||||||
let mut documents_deletion = index.documents_deletion();
|
let mut documents_deletion = index.documents_deletion();
|
||||||
|
|
||||||
for document_id in body.into_inner() {
|
for document_id in body.into_inner() {
|
||||||
let document_id_string = meilisearch_core::update::value_to_string(&document_id);
|
let document_id_string = update::value_to_string(&document_id);
|
||||||
let document_id = meilisearch_core::update::compute_document_id(document_id_string);
|
if !update::validate_document_id(&document_id_string) {
|
||||||
|
return Err(Error::Serializer(SerializerError::InvalidDocumentIdFormat).into())
|
||||||
|
}
|
||||||
|
let document_id = update::compute_document_id(document_id_string);
|
||||||
documents_deletion.delete_document_by_id(document_id);
|
documents_deletion.delete_document_by_id(document_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user