MeiliSearch/meilisearch-core/src/update/helpers.rs

144 lines
4.8 KiB
Rust
Raw Normal View History

use std::fmt::Write as _;
use indexmap::IndexMap;
use meilisearch_schema::IndexedPos;
use meilisearch_types::DocumentId;
use ordered_float::OrderedFloat;
use serde_json::Value;
2020-07-07 15:12:27 +02:00
use crate::Number;
use crate::raw_indexer::RawIndexer;
use crate::serde::SerializerError;
2020-05-19 11:45:46 +02:00
use crate::store::DiscoverIds;
/// Returns the number of words indexed or `None` if the type is unindexable.
2020-05-22 15:00:50 +02:00
pub fn index_value<A>(
indexer: &mut RawIndexer<A>,
document_id: DocumentId,
indexed_pos: IndexedPos,
value: &Value,
) -> Option<usize>
2020-05-22 15:00:50 +02:00
where A: AsRef<[u8]>,
{
match value {
Value::Null => None,
Value::Bool(boolean) => {
let text = boolean.to_string();
let number_of_words = indexer.index_text(document_id, indexed_pos, &text);
Some(number_of_words)
},
Value::Number(number) => {
let text = number.to_string();
Some(indexer.index_text(document_id, indexed_pos, &text))
},
Value::String(string) => {
Some(indexer.index_text(document_id, indexed_pos, &string))
},
Value::Array(_) => {
let text = value_to_string(value);
Some(indexer.index_text(document_id, indexed_pos, &text))
},
Value::Object(_) => {
let text = value_to_string(value);
Some(indexer.index_text(document_id, indexed_pos, &text))
},
}
}
/// Transforms the JSON Value type into a String.
pub fn value_to_string(value: &Value) -> String {
fn internal_value_to_string(string: &mut String, value: &Value) {
match value {
Value::Null => (),
Value::Bool(boolean) => { let _ = write!(string, "{}", &boolean); },
Value::Number(number) => { let _ = write!(string, "{}", &number); },
Value::String(text) => string.push_str(&text),
Value::Array(array) => {
for value in array {
internal_value_to_string(string, value);
let _ = string.write_str(". ");
}
},
Value::Object(object) => {
for (key, value) in object {
string.push_str(key);
let _ = string.write_str(". ");
internal_value_to_string(string, value);
let _ = string.write_str(". ");
}
},
}
}
let mut string = String::new();
internal_value_to_string(&mut string, value);
string
}
/// Transforms the JSON Value type into a Number.
pub fn value_to_number(value: &Value) -> Option<Number> {
use std::str::FromStr;
match value {
Value::Null => None,
Value::Bool(boolean) => Some(Number::Unsigned(*boolean as u64)),
Value::Number(number) => {
match (number.as_i64(), number.as_u64(), number.as_f64()) {
(Some(n), _, _) => Some(Number::Signed(n)),
(_, Some(n), _) => Some(Number::Unsigned(n)),
(_, _, Some(n)) => Some(Number::Float(OrderedFloat(n))),
(None, None, None) => None,
}
},
Value::String(string) => Number::from_str(string).ok(),
Value::Array(_array) => None,
Value::Object(_object) => None,
}
}
2020-05-19 11:45:46 +02:00
/// Validates a string representation to be a correct document id and returns
/// the corresponding id or generate a new one, this is the way we produce documents ids.
2020-07-07 14:52:49 +02:00
pub fn discover_document_id<F>(
2020-05-20 15:21:08 +02:00
docid: &str,
2020-07-07 14:52:49 +02:00
external_docids_get: F,
2020-05-20 15:21:08 +02:00
available_docids: &mut DiscoverIds<'_>,
2020-05-19 11:45:46 +02:00
) -> Result<DocumentId, SerializerError>
2020-07-07 14:52:49 +02:00
where
F: FnOnce(&str) -> Option<u32>
2020-05-19 11:45:46 +02:00
{
2020-05-20 15:21:08 +02:00
if docid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') {
2020-07-07 14:52:49 +02:00
match external_docids_get(docid) {
Some(id) => Ok(DocumentId(id)),
2020-05-19 11:45:46 +02:00
None => {
2020-05-20 15:21:08 +02:00
let internal_id = available_docids.next().expect("no more ids available");
2020-05-19 11:45:46 +02:00
Ok(internal_id)
},
}
} else {
Err(SerializerError::InvalidDocumentIdFormat)
}
}
/// Extracts and validates the document id of a document.
2020-07-07 14:52:49 +02:00
pub fn extract_document_id<F>(
2020-05-19 11:45:46 +02:00
primary_key: &str,
document: &IndexMap<String, Value>,
2020-07-07 14:52:49 +02:00
external_docids_get: F,
2020-05-20 15:21:08 +02:00
available_docids: &mut DiscoverIds<'_>,
2020-05-19 11:45:46 +02:00
) -> Result<(DocumentId, String), SerializerError>
2020-07-07 14:52:49 +02:00
where
F: FnOnce(&str) -> Option<u32>
2020-05-19 11:45:46 +02:00
{
match document.get(primary_key) {
Some(value) => {
2020-05-20 15:21:08 +02:00
let docid = match value {
Value::Number(number) => number.to_string(),
Value::String(string) => string.clone(),
_ => return Err(SerializerError::InvalidDocumentIdFormat),
};
2020-07-07 14:52:49 +02:00
discover_document_id(&docid, external_docids_get, available_docids).map(|id| (id, docid))
}
None => Err(SerializerError::DocumentIdNotFound),
}
}