mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 20:07:09 +02:00
fix for review
This commit is contained in:
parent
14b5fc4d6c
commit
a5b0e468ee
48 changed files with 558 additions and 1216 deletions
|
@ -161,11 +161,13 @@ where
|
|||
debug!("criterion loop took {:.02?}", before_criterion_loop.elapsed());
|
||||
debug!("proximity evaluation called {} times", proximity_count.load(Ordering::Relaxed));
|
||||
|
||||
let schema = main_store.schema(reader)?.ok_or(Error::SchemaMissing)?;
|
||||
let iter = raw_documents.into_iter().skip(range.start).take(range.len());
|
||||
let iter = iter.map(|rd| Document::from_raw(rd, &queries_kinds, &arena, searchable_attrs.as_ref()));
|
||||
let iter = iter.map(|rd| Document::from_raw(rd, &automatons, &arena, searchable_attrs.as_ref(), &schema));
|
||||
let documents = iter.collect();
|
||||
|
||||
debug!("bucket sort took {:.02?}", before_bucket_sort.elapsed());
|
||||
|
||||
|
||||
Ok(documents)
|
||||
}
|
||||
|
@ -330,7 +332,7 @@ where
|
|||
// once we classified the documents related to the current
|
||||
// automatons we save that as the next valid result
|
||||
let mut seen = BufferedDistinctMap::new(&mut distinct_map);
|
||||
let schema = main_store.schema(reader)?.unwrap();
|
||||
let schema = main_store.schema(reader)?.ok_or(Error::SchemaMissing)?;
|
||||
|
||||
let mut documents = Vec::with_capacity(range.len());
|
||||
for raw_document in raw_documents.into_iter().skip(distinct_raw_offset) {
|
||||
|
|
|
@ -68,12 +68,12 @@ impl<'a> SortByAttr<'a> {
|
|||
attr_name: &str,
|
||||
reversed: bool,
|
||||
) -> Result<SortByAttr<'a>, SortByAttrError> {
|
||||
let field_id = match schema.get_id(attr_name) {
|
||||
let field_id = match schema.id(attr_name) {
|
||||
Some(field_id) => field_id,
|
||||
None => return Err(SortByAttrError::AttributeNotFound),
|
||||
};
|
||||
|
||||
if !schema.id_is_ranked(field_id) {
|
||||
if !schema.is_ranked(field_id) {
|
||||
return Err(SortByAttrError::AttributeNotRegisteredForRanking);
|
||||
}
|
||||
|
||||
|
|
|
@ -353,7 +353,6 @@ impl Database {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
use crate::criterion::{self, CriteriaBuilder};
|
||||
|
@ -381,13 +380,13 @@ mod tests {
|
|||
let settings = {
|
||||
let data = r#"
|
||||
{
|
||||
"attributeIdentifier": "id",
|
||||
"attributesSearchable": ["name", "description"],
|
||||
"attributesDisplayed": ["name", "description"]
|
||||
"identifier": "id",
|
||||
"searchableAttributes": ["name", "description"],
|
||||
"displayedAttributes": ["name", "description"]
|
||||
}
|
||||
"#;
|
||||
let settings: Settings = serde_json::from_str(data).unwrap();
|
||||
settings.into()
|
||||
settings.into_update().unwrap()
|
||||
};
|
||||
|
||||
let mut update_writer = db.update_write_txn().unwrap();
|
||||
|
@ -441,13 +440,13 @@ mod tests {
|
|||
let settings = {
|
||||
let data = r#"
|
||||
{
|
||||
"attributeIdentifier": "id",
|
||||
"attributesSearchable": ["name", "description"],
|
||||
"attributesDisplayed": ["name", "description"]
|
||||
"identifier": "id",
|
||||
"searchableAttributes": ["name", "description"],
|
||||
"displayedAttributes": ["name", "description"]
|
||||
}
|
||||
"#;
|
||||
let settings: Settings = serde_json::from_str(data).unwrap();
|
||||
settings.into()
|
||||
settings.into_update().unwrap()
|
||||
};
|
||||
|
||||
let mut update_writer = db.update_write_txn().unwrap();
|
||||
|
@ -500,13 +499,13 @@ mod tests {
|
|||
let settings = {
|
||||
let data = r#"
|
||||
{
|
||||
"attributeIdentifier": "id",
|
||||
"attributesSearchable": ["name"],
|
||||
"attributesDisplayed": ["name"]
|
||||
"identifier": "id",
|
||||
"searchableAttributes": ["name"],
|
||||
"displayedAttributes": ["name"]
|
||||
}
|
||||
"#;
|
||||
let settings: Settings = serde_json::from_str(data).unwrap();
|
||||
settings.into()
|
||||
settings.into_update().unwrap()
|
||||
};
|
||||
|
||||
let mut update_writer = db.update_write_txn().unwrap();
|
||||
|
@ -552,13 +551,13 @@ mod tests {
|
|||
let settings = {
|
||||
let data = r#"
|
||||
{
|
||||
"attributeIdentifier": "id",
|
||||
"attributesSearchable": ["name", "description"],
|
||||
"attributesDisplayed": ["name", "description"]
|
||||
"identifier": "id",
|
||||
"searchableAttributes": ["name", "description"],
|
||||
"displayedAttributes": ["name", "description"]
|
||||
}
|
||||
"#;
|
||||
let settings: Settings = serde_json::from_str(data).unwrap();
|
||||
settings.into()
|
||||
settings.into_update().unwrap()
|
||||
};
|
||||
|
||||
let mut update_writer = db.update_write_txn().unwrap();
|
||||
|
@ -586,17 +585,16 @@ mod tests {
|
|||
let _update_id = additions.finalize(&mut update_writer).unwrap();
|
||||
update_writer.commit().unwrap();
|
||||
|
||||
|
||||
let settings = {
|
||||
let data = r#"
|
||||
{
|
||||
"attributeIdentifier": "id",
|
||||
"attributesSearchable": ["name", "description", "age", "sex"],
|
||||
"attributesDisplayed": ["name", "description", "age", "sex"]
|
||||
"identifier": "id",
|
||||
"searchableAttributes": ["name", "description", "age", "sex"],
|
||||
"displayedAttributes": ["name", "description", "age", "sex"]
|
||||
}
|
||||
"#;
|
||||
let settings: Settings = serde_json::from_str(data).unwrap();
|
||||
settings.into()
|
||||
settings.into_update().unwrap()
|
||||
};
|
||||
|
||||
let mut writer = db.update_write_txn().unwrap();
|
||||
|
@ -657,13 +655,13 @@ mod tests {
|
|||
let settings = {
|
||||
let data = r#"
|
||||
{
|
||||
"attributeIdentifier": "id",
|
||||
"attributesSearchable": ["name", "description", "city", "age", "sex"],
|
||||
"attributesDisplayed": ["name", "description", "city", "age", "sex"]
|
||||
"identifier": "id",
|
||||
"searchableAttributes": ["name", "description", "city", "age", "sex"],
|
||||
"displayedAttributes": ["name", "description", "city", "age", "sex"]
|
||||
}
|
||||
"#;
|
||||
let settings: Settings = serde_json::from_str(data).unwrap();
|
||||
settings.into()
|
||||
settings.into_update().unwrap()
|
||||
};
|
||||
|
||||
let mut writer = db.update_write_txn().unwrap();
|
||||
|
@ -696,13 +694,13 @@ mod tests {
|
|||
let settings = {
|
||||
let data = r#"
|
||||
{
|
||||
"attributeIdentifier": "id",
|
||||
"attributesSearchable": ["name", "description"],
|
||||
"attributesDisplayed": ["name", "description"]
|
||||
"identifier": "id",
|
||||
"searchableAttributes": ["name", "description"],
|
||||
"displayedAttributes": ["name", "description"]
|
||||
}
|
||||
"#;
|
||||
let settings: Settings = serde_json::from_str(data).unwrap();
|
||||
settings.into()
|
||||
settings.into_update().unwrap()
|
||||
};
|
||||
|
||||
let mut writer = db.update_write_txn().unwrap();
|
||||
|
@ -773,13 +771,13 @@ mod tests {
|
|||
let settings = {
|
||||
let data = r#"
|
||||
{
|
||||
"attributeIdentifier": "id",
|
||||
"attributesSearchable": ["name", "description"],
|
||||
"attributesDisplayed": ["name", "description", "id"]
|
||||
"identifier": "id",
|
||||
"searchableAttributes": ["name", "description"],
|
||||
"displayedAttributes": ["name", "description", "id"]
|
||||
}
|
||||
"#;
|
||||
let settings: Settings = serde_json::from_str(data).unwrap();
|
||||
settings.into()
|
||||
settings.into_update().unwrap()
|
||||
};
|
||||
|
||||
let mut writer = db.update_write_txn().unwrap();
|
||||
|
@ -909,13 +907,13 @@ mod tests {
|
|||
let settings = {
|
||||
let data = r#"
|
||||
{
|
||||
"attributeIdentifier": "id",
|
||||
"attributesSearchable": ["name", "description"],
|
||||
"attributesDisplayed": ["name", "description"]
|
||||
"identifier": "id",
|
||||
"searchableAttributes": ["name", "description"],
|
||||
"displayedAttributes": ["name", "description"]
|
||||
}
|
||||
"#;
|
||||
let settings: Settings = serde_json::from_str(data).unwrap();
|
||||
settings.into()
|
||||
settings.into_update().unwrap()
|
||||
};
|
||||
|
||||
let mut writer = db.update_write_txn().unwrap();
|
||||
|
@ -982,13 +980,13 @@ mod tests {
|
|||
"_exact",
|
||||
"dsc(release_date)"
|
||||
],
|
||||
"attributeIdentifier": "id",
|
||||
"attributesSearchable": ["name", "release_date"],
|
||||
"attributesDisplayed": ["name", "release_date"]
|
||||
"identifier": "id",
|
||||
"searchableAttributes": ["name", "release_date"],
|
||||
"displayedAttributes": ["name", "release_date"]
|
||||
}
|
||||
"#;
|
||||
let settings: Settings = serde_json::from_str(data).unwrap();
|
||||
settings.into()
|
||||
settings.into_update().unwrap()
|
||||
};
|
||||
|
||||
let mut writer = db.update_write_txn().unwrap();
|
||||
|
|
|
@ -8,7 +8,7 @@ pub type MResult<T> = Result<T, Error>;
|
|||
pub enum Error {
|
||||
Io(io::Error),
|
||||
IndexAlreadyExists,
|
||||
MissingSchemaIdentifier,
|
||||
MissingIdentifier,
|
||||
SchemaMissing,
|
||||
WordIndexMissing,
|
||||
MissingDocumentId,
|
||||
|
@ -83,7 +83,7 @@ impl fmt::Display for Error {
|
|||
match self {
|
||||
Io(e) => write!(f, "{}", e),
|
||||
IndexAlreadyExists => write!(f, "index already exists"),
|
||||
MissingSchemaIdentifier => write!(f, "schema cannot be build without identifier"),
|
||||
MissingIdentifier => write!(f, "schema cannot be build without identifier"),
|
||||
SchemaMissing => write!(f, "this index does not have a schema"),
|
||||
WordIndexMissing => write!(f, "this index does not have a word index"),
|
||||
MissingDocumentId => write!(f, "document id is missing"),
|
||||
|
|
|
@ -1,94 +0,0 @@
|
|||
use std::io::{Read, Write};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use crate::{MResult, Error};
|
||||
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct FieldsMap {
|
||||
name_map: HashMap<String, u16>,
|
||||
id_map: HashMap<u16, String>,
|
||||
next_id: u16
|
||||
}
|
||||
|
||||
impl FieldsMap {
|
||||
pub fn len(&self) -> usize {
|
||||
self.name_map.len()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.name_map.is_empty()
|
||||
}
|
||||
|
||||
pub fn insert<T: ToString>(&mut self, name: T) -> MResult<u16> {
|
||||
let name = name.to_string();
|
||||
if let Some(id) = self.name_map.get(&name) {
|
||||
return Ok(*id)
|
||||
}
|
||||
let id = self.next_id;
|
||||
if self.next_id.checked_add(1).is_none() {
|
||||
return Err(Error::MaxFieldsLimitExceeded)
|
||||
} else {
|
||||
self.next_id += 1;
|
||||
}
|
||||
self.name_map.insert(name.clone(), id);
|
||||
self.id_map.insert(id, name);
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
pub fn remove<T: ToString>(&mut self, name: T) {
|
||||
let name = name.to_string();
|
||||
if let Some(id) = self.name_map.get(&name) {
|
||||
self.id_map.remove(&id);
|
||||
}
|
||||
self.name_map.remove(&name);
|
||||
}
|
||||
|
||||
pub fn get_id<T: ToString>(&self, name: T) -> Option<&u16> {
|
||||
let name = name.to_string();
|
||||
self.name_map.get(&name)
|
||||
}
|
||||
|
||||
pub fn get_name(&self, id: u16) -> Option<&String> {
|
||||
self.id_map.get(&id)
|
||||
}
|
||||
|
||||
pub fn read_from_bin<R: Read>(reader: R) -> bincode::Result<FieldsMap> {
|
||||
bincode::deserialize_from(reader)
|
||||
}
|
||||
|
||||
pub fn write_to_bin<W: Write>(&self, writer: W) -> bincode::Result<()> {
|
||||
bincode::serialize_into(writer, &self)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn fields_map() {
|
||||
let mut fields_map = FieldsMap::default();
|
||||
|
||||
assert_eq!(fields_map.insert("id").unwrap(), 0);
|
||||
assert_eq!(fields_map.insert("title").unwrap(), 1);
|
||||
assert_eq!(fields_map.insert("descritpion").unwrap(), 2);
|
||||
assert_eq!(fields_map.insert("id").unwrap(), 0);
|
||||
assert_eq!(fields_map.insert("title").unwrap(), 1);
|
||||
assert_eq!(fields_map.insert("descritpion").unwrap(), 2);
|
||||
assert_eq!(fields_map.get_id("id"), Some(&0));
|
||||
assert_eq!(fields_map.get_id("title"), Some(&1));
|
||||
assert_eq!(fields_map.get_id("descritpion"), Some(&2));
|
||||
assert_eq!(fields_map.get_id("date"), None);
|
||||
assert_eq!(fields_map.len(), 3);
|
||||
assert_eq!(fields_map.get_name(0), Some(&"id".to_owned()));
|
||||
assert_eq!(fields_map.get_name(1), Some(&"title".to_owned()));
|
||||
assert_eq!(fields_map.get_name(2), Some(&"descritpion".to_owned()));
|
||||
assert_eq!(fields_map.get_name(4), None);
|
||||
fields_map.remove("title");
|
||||
assert_eq!(fields_map.get_id("title"), None);
|
||||
assert_eq!(fields_map.insert("title").unwrap(), 3);
|
||||
assert_eq!(fields_map.len(), 3);
|
||||
}
|
||||
}
|
|
@ -86,7 +86,7 @@ fn highlights_from_raw_document<'a, 'tag, 'txn>(
|
|||
Some(field_id) => field_id.0,
|
||||
None => {
|
||||
error!("Cannot convert indexed_pos {} to field_id", attribute);
|
||||
trace!("Schema is compronized; {:?}", schema);
|
||||
trace!("Schema is compromized; {:?}", schema);
|
||||
continue
|
||||
}
|
||||
};
|
||||
|
@ -164,7 +164,7 @@ impl Document {
|
|||
Some(field_id) => field_id.0,
|
||||
None => {
|
||||
error!("Cannot convert indexed_pos {} to field_id", attribute);
|
||||
trace!("Schema is compronized; {:?}", schema);
|
||||
trace!("Schema is compromized; {:?}", schema);
|
||||
continue
|
||||
}
|
||||
};
|
||||
|
|
|
@ -19,16 +19,16 @@ impl RankedMap {
|
|||
self.0.is_empty()
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, document: DocumentId, attribute: FieldId, number: Number) {
|
||||
self.0.insert((document, attribute), number);
|
||||
pub fn insert(&mut self, document: DocumentId, field: FieldId, number: Number) {
|
||||
self.0.insert((document, field), number);
|
||||
}
|
||||
|
||||
pub fn remove(&mut self, document: DocumentId, attribute: FieldId) {
|
||||
self.0.remove(&(document, attribute));
|
||||
pub fn remove(&mut self, document: DocumentId, field: FieldId) {
|
||||
self.0.remove(&(document, field));
|
||||
}
|
||||
|
||||
pub fn get(&self, document: DocumentId, attribute: FieldId) -> Option<Number> {
|
||||
self.0.get(&(document, attribute)).cloned()
|
||||
pub fn get(&self, document: DocumentId, field: FieldId) -> Option<Number> {
|
||||
self.0.get(&(document, field)).cloned()
|
||||
}
|
||||
|
||||
pub fn read_from_bin<R: Read>(reader: R) -> bincode::Result<RankedMap> {
|
||||
|
|
|
@ -178,7 +178,6 @@ fn token_to_docindex(id: DocumentId, indexed_pos: IndexedPos, token: Token) -> O
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use meilisearch_schema::IndexedPos;
|
||||
|
||||
|
|
|
@ -54,7 +54,7 @@ pub struct Deserializer<'a> {
|
|||
pub reader: &'a heed::RoTxn<MainT>,
|
||||
pub documents_fields: DocumentsFields,
|
||||
pub schema: &'a Schema,
|
||||
pub attributes: Option<&'a HashSet<FieldId>>,
|
||||
pub fields: Option<&'a HashSet<FieldId>>,
|
||||
}
|
||||
|
||||
impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> {
|
||||
|
@ -92,9 +92,9 @@ impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> {
|
|||
}
|
||||
};
|
||||
|
||||
let is_displayed = self.schema.id_is_displayed(attr);
|
||||
if is_displayed && self.attributes.map_or(true, |f| f.contains(&attr)) {
|
||||
if let Some(attribute_name) = self.schema.get_name(attr) {
|
||||
let is_displayed = self.schema.is_displayed(attr);
|
||||
if is_displayed && self.fields.map_or(true, |f| f.contains(&attr)) {
|
||||
if let Some(attribute_name) = self.schema.name(attr) {
|
||||
let cursor = Cursor::new(value.to_owned());
|
||||
let ioread = SerdeJsonIoRead::new(cursor);
|
||||
let value = Value(SerdeJsonDeserializer::new(ioread));
|
||||
|
|
|
@ -2,7 +2,7 @@ use std::hash::{Hash, Hasher};
|
|||
|
||||
use crate::DocumentId;
|
||||
use serde::{ser, Serialize};
|
||||
use serde_json::Value;
|
||||
use serde_json::{Value, Number};
|
||||
use siphasher::sip::SipHasher;
|
||||
|
||||
use super::{ConvertToString, SerializerError};
|
||||
|
@ -18,18 +18,27 @@ where
|
|||
document.serialize(serializer)
|
||||
}
|
||||
|
||||
fn validate_number(value: &Number) -> Option<String> {
|
||||
if value.is_f64() {
|
||||
return None
|
||||
}
|
||||
return Some(value.to_string())
|
||||
}
|
||||
|
||||
fn validate_string(value: &String) -> Option<String> {
|
||||
if value.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') {
|
||||
Some(value.to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn value_to_string(value: &Value) -> Option<String> {
|
||||
match value {
|
||||
Value::Null => None,
|
||||
Value::Bool(_) => None,
|
||||
Value::Number(value) => Some(value.to_string()),
|
||||
Value::String(value) => {
|
||||
if value.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') {
|
||||
Some(value.to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
},
|
||||
Value::Number(value) => validate_number(value),
|
||||
Value::String(value) => validate_string(value),
|
||||
Value::Array(_) => None,
|
||||
Value::Object(_) => None,
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use meilisearch_schema::{IndexedPos};
|
||||
use meilisearch_schema::IndexedPos;
|
||||
use serde::ser;
|
||||
use serde::Serialize;
|
||||
|
||||
|
|
|
@ -57,7 +57,7 @@ impl fmt::Display for SerializerError {
|
|||
f.write_str("serialized document does not have an id according to the schema")
|
||||
}
|
||||
SerializerError::InvalidDocumentIdType => {
|
||||
f.write_str("document identifier can only be of type number or string (A-Z, a-z, 0-9, -_)")
|
||||
f.write_str("documents identifiers can be of type integer or string only composed of alphanumeric characters, hyphens (-) and underscores (_).")
|
||||
}
|
||||
SerializerError::Zlmdb(e) => write!(f, "heed related error: {}", e),
|
||||
SerializerError::SerdeJson(e) => write!(f, "serde json error: {}", e),
|
||||
|
|
|
@ -305,7 +305,7 @@ pub fn serialize_value<'a, T: ?Sized>(
|
|||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
let field_id = schema.get_or_create(attribute.clone())?;
|
||||
let field_id = schema.get_or_create(&attribute)?;
|
||||
|
||||
serialize_value_with_id(
|
||||
txn,
|
||||
|
@ -337,7 +337,7 @@ where
|
|||
let serialized = serde_json::to_vec(value)?;
|
||||
document_store.put_document_field(txn, document_id, field_id, &serialized)?;
|
||||
|
||||
if let Some(indexed_pos) = schema.id_is_indexed(field_id) {
|
||||
if let Some(indexed_pos) = schema.is_indexed(field_id) {
|
||||
let indexer = Indexer {
|
||||
pos: *indexed_pos,
|
||||
indexer,
|
||||
|
@ -353,7 +353,7 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
if schema.id_is_ranked(field_id) {
|
||||
if schema.is_ranked(field_id) {
|
||||
let number = value.serialize(ConvertToNumber)?;
|
||||
ranked_map.insert(document_id, field_id, number);
|
||||
}
|
||||
|
|
|
@ -1,16 +1,14 @@
|
|||
use std::sync::Mutex;
|
||||
use std::collections::{BTreeMap, BTreeSet, HashSet};
|
||||
use std::str::FromStr;
|
||||
|
||||
use serde::{Deserialize, Deserializer, Serialize};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
static RANKING_RULE_REGEX: Lazy<Mutex<regex::Regex>> = Lazy::new(|| {
|
||||
static RANKING_RULE_REGEX: Lazy<regex::Regex> = Lazy::new(|| {
|
||||
let regex = regex::Regex::new(r"(asc|dsc)\(([a-zA-Z0-9-_]*)\)").unwrap();
|
||||
Mutex::new(regex)
|
||||
regex
|
||||
});
|
||||
|
||||
|
||||
#[derive(Default, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct Settings {
|
||||
|
@ -19,11 +17,11 @@ pub struct Settings {
|
|||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub ranking_distinct: Option<Option<String>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub attribute_identifier: Option<Option<String>>,
|
||||
pub identifier: Option<Option<String>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub attributes_searchable: Option<Option<Vec<String>>>,
|
||||
pub searchable_attributes: Option<Option<Vec<String>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub attributes_displayed: Option<Option<HashSet<String>>>,
|
||||
pub displayed_attributes: Option<Option<HashSet<String>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub stop_words: Option<Option<BTreeSet<String>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
|
@ -40,34 +38,32 @@ fn deserialize_some<'de, T, D>(deserializer: D) -> Result<Option<T>, D::Error>
|
|||
Deserialize::deserialize(deserializer).map(Some)
|
||||
}
|
||||
|
||||
impl Into<SettingsUpdate> for Settings {
|
||||
fn into(self) -> SettingsUpdate {
|
||||
impl Settings {
|
||||
pub fn into_update(&self) -> Result<SettingsUpdate, RankingRuleConversionError> {
|
||||
let settings = self.clone();
|
||||
|
||||
let ranking_rules = match settings.ranking_rules {
|
||||
Some(Some(rules)) => UpdateState::Update(RankingRule::from_vec(rules)),
|
||||
Some(Some(rules)) => UpdateState::Update(RankingRule::from_vec(rules.iter().map(|m| m.as_ref()).collect())?),
|
||||
Some(None) => UpdateState::Clear,
|
||||
None => UpdateState::Nothing,
|
||||
};
|
||||
|
||||
SettingsUpdate {
|
||||
Ok(SettingsUpdate {
|
||||
ranking_rules: ranking_rules,
|
||||
ranking_distinct: settings.ranking_distinct.into(),
|
||||
attribute_identifier: settings.attribute_identifier.into(),
|
||||
attributes_searchable: settings.attributes_searchable.into(),
|
||||
attributes_displayed: settings.attributes_displayed.into(),
|
||||
identifier: settings.identifier.into(),
|
||||
searchable_attributes: settings.searchable_attributes.into(),
|
||||
displayed_attributes: settings.displayed_attributes.into(),
|
||||
stop_words: settings.stop_words.into(),
|
||||
synonyms: settings.synonyms.into(),
|
||||
index_new_fields: settings.index_new_fields.into(),
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum UpdateState<T> {
|
||||
Update(T),
|
||||
Add(T),
|
||||
Delete(T),
|
||||
Clear,
|
||||
Nothing,
|
||||
}
|
||||
|
@ -82,15 +78,6 @@ impl <T> From<Option<Option<T>>> for UpdateState<T> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<T> UpdateState<T> {
|
||||
pub fn is_changed(&self) -> bool {
|
||||
match self {
|
||||
UpdateState::Nothing => false,
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RankingRuleConversionError;
|
||||
|
||||
|
@ -139,10 +126,10 @@ impl FromStr for RankingRule {
|
|||
"_words_position" => RankingRule::WordsPosition,
|
||||
"_exact" => RankingRule::Exact,
|
||||
_ => {
|
||||
let captures = RANKING_RULE_REGEX.lock().unwrap().captures(s).unwrap();
|
||||
match captures[1].as_ref() {
|
||||
"asc" => RankingRule::Asc(captures[2].to_string()),
|
||||
"dsc" => RankingRule::Dsc(captures[2].to_string()),
|
||||
let captures = RANKING_RULE_REGEX.captures(s).ok_or(RankingRuleConversionError)?;
|
||||
match (captures.get(1).map(|m| m.as_str()), captures.get(2)) {
|
||||
(Some("asc"), Some(field)) => RankingRule::Asc(field.as_str().to_string()),
|
||||
(Some("dsc"), Some(field)) => RankingRule::Dsc(field.as_str().to_string()),
|
||||
_ => return Err(RankingRuleConversionError)
|
||||
}
|
||||
}
|
||||
|
@ -152,17 +139,16 @@ impl FromStr for RankingRule {
|
|||
}
|
||||
|
||||
impl RankingRule {
|
||||
pub fn get_field(&self) -> Option<String> {
|
||||
pub fn get_field(&self) -> Option<&str> {
|
||||
match self {
|
||||
RankingRule::Asc(field) | RankingRule::Dsc(field) => Some((*field).clone()),
|
||||
RankingRule::Asc(field) | RankingRule::Dsc(field) => Some(field),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_vec(rules: Vec<String>) -> Vec<RankingRule> {
|
||||
pub fn from_vec(rules: Vec<&str>) -> Result<Vec<RankingRule>, RankingRuleConversionError> {
|
||||
rules.iter()
|
||||
.map(|s| RankingRule::from_str(s.as_str()))
|
||||
.filter_map(Result::ok)
|
||||
.map(|s| RankingRule::from_str(s))
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
@ -171,9 +157,9 @@ impl RankingRule {
|
|||
pub struct SettingsUpdate {
|
||||
pub ranking_rules: UpdateState<Vec<RankingRule>>,
|
||||
pub ranking_distinct: UpdateState<String>,
|
||||
pub attribute_identifier: UpdateState<String>,
|
||||
pub attributes_searchable: UpdateState<Vec<String>>,
|
||||
pub attributes_displayed: UpdateState<HashSet<String>>,
|
||||
pub identifier: UpdateState<String>,
|
||||
pub searchable_attributes: UpdateState<Vec<String>>,
|
||||
pub displayed_attributes: UpdateState<HashSet<String>>,
|
||||
pub stop_words: UpdateState<BTreeSet<String>>,
|
||||
pub synonyms: UpdateState<BTreeMap<String, Vec<String>>>,
|
||||
pub index_new_fields: UpdateState<bool>,
|
||||
|
@ -184,9 +170,9 @@ impl Default for SettingsUpdate {
|
|||
Self {
|
||||
ranking_rules: UpdateState::Nothing,
|
||||
ranking_distinct: UpdateState::Nothing,
|
||||
attribute_identifier: UpdateState::Nothing,
|
||||
attributes_searchable: UpdateState::Nothing,
|
||||
attributes_displayed: UpdateState::Nothing,
|
||||
identifier: UpdateState::Nothing,
|
||||
searchable_attributes: UpdateState::Nothing,
|
||||
displayed_attributes: UpdateState::Nothing,
|
||||
stop_words: UpdateState::Nothing,
|
||||
synonyms: UpdateState::Nothing,
|
||||
index_new_fields: UpdateState::Nothing,
|
||||
|
|
|
@ -16,10 +16,10 @@ impl DocumentsFields {
|
|||
self,
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
attribute: FieldId,
|
||||
field: FieldId,
|
||||
value: &[u8],
|
||||
) -> ZResult<()> {
|
||||
let key = DocumentFieldStoredKey::new(document_id, attribute);
|
||||
let key = DocumentFieldStoredKey::new(document_id, field);
|
||||
self.documents_fields.put(writer, &key, value)
|
||||
}
|
||||
|
||||
|
@ -41,9 +41,9 @@ impl DocumentsFields {
|
|||
self,
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
attribute: FieldId,
|
||||
field: FieldId,
|
||||
) -> ZResult<Option<&'txn [u8]>> {
|
||||
let key = DocumentFieldStoredKey::new(document_id, attribute);
|
||||
let key = DocumentFieldStoredKey::new(document_id, field);
|
||||
self.documents_fields.get(reader, &key)
|
||||
}
|
||||
|
||||
|
|
|
@ -11,11 +11,11 @@ use crate::RankedMap;
|
|||
use crate::settings::RankingRule;
|
||||
|
||||
const CREATED_AT_KEY: &str = "created-at";
|
||||
const RANKING_RULES_KEY: &str = "ranking-rules-key";
|
||||
const RANKING_DISTINCT_KEY: &str = "ranking-distinct-key";
|
||||
const STOP_WORDS_KEY: &str = "stop-words-key";
|
||||
const SYNONYMS_KEY: &str = "synonyms-key";
|
||||
const CUSTOMS_KEY: &str = "customs-key";
|
||||
const RANKING_RULES_KEY: &str = "ranking-rules";
|
||||
const RANKING_DISTINCT_KEY: &str = "ranking-distinct";
|
||||
const STOP_WORDS_KEY: &str = "stop-words";
|
||||
const SYNONYMS_KEY: &str = "synonyms";
|
||||
const CUSTOMS_KEY: &str = "customs";
|
||||
const FIELDS_FREQUENCY_KEY: &str = "fields-frequency";
|
||||
const NAME_KEY: &str = "name";
|
||||
const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
|
||||
|
@ -188,7 +188,7 @@ impl Main {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn ranking_rules<'txn>(&self, reader: &'txn heed::RoTxn<MainT>) -> ZResult<Option<Vec<RankingRule>>> {
|
||||
pub fn ranking_rules(&self, reader: &heed::RoTxn<MainT>) -> ZResult<Option<Vec<RankingRule>>> {
|
||||
self.main.get::<_, Str, SerdeBincode<Vec<RankingRule>>>(reader, RANKING_RULES_KEY)
|
||||
}
|
||||
|
||||
|
@ -200,7 +200,7 @@ impl Main {
|
|||
self.main.delete::<_, Str>(writer, RANKING_RULES_KEY)
|
||||
}
|
||||
|
||||
pub fn ranking_distinct<'txn>(&self, reader: &'txn heed::RoTxn<MainT>) -> ZResult<Option<String>> {
|
||||
pub fn ranking_distinct(&self, reader: &heed::RoTxn<MainT>) -> ZResult<Option<String>> {
|
||||
self.main.get::<_, Str, SerdeBincode<String>>(reader, RANKING_DISTINCT_KEY)
|
||||
}
|
||||
|
||||
|
|
|
@ -223,7 +223,7 @@ impl Index {
|
|||
let schema = schema.ok_or(Error::SchemaMissing)?;
|
||||
|
||||
let attributes = match attributes {
|
||||
Some(attributes) => Some(attributes.iter().filter_map(|name| schema.get_id(*name)).collect()),
|
||||
Some(attributes) => Some(attributes.iter().filter_map(|name| schema.id(*name)).collect()),
|
||||
None => None,
|
||||
};
|
||||
|
||||
|
@ -232,7 +232,7 @@ impl Index {
|
|||
reader,
|
||||
documents_fields: self.documents_fields,
|
||||
schema: &schema,
|
||||
attributes: attributes.as_ref(),
|
||||
fields: attributes.as_ref(),
|
||||
};
|
||||
|
||||
Ok(Option::<T>::deserialize(&mut deserializer)?)
|
||||
|
|
|
@ -158,8 +158,6 @@ pub fn apply_documents_addition<'a, 'b>(
|
|||
document.serialize(serializer)?;
|
||||
}
|
||||
|
||||
|
||||
|
||||
write_documents_addition_index(
|
||||
writer,
|
||||
index,
|
||||
|
@ -199,7 +197,7 @@ pub fn apply_documents_partial_addition<'a, 'b>(
|
|||
reader: writer,
|
||||
documents_fields: index.documents_fields,
|
||||
schema: &schema,
|
||||
attributes: None,
|
||||
fields: None,
|
||||
};
|
||||
|
||||
// retrieve the old document and
|
||||
|
@ -246,8 +244,6 @@ pub fn apply_documents_partial_addition<'a, 'b>(
|
|||
document.serialize(serializer)?;
|
||||
}
|
||||
|
||||
|
||||
|
||||
write_documents_addition_index(
|
||||
writer,
|
||||
index,
|
||||
|
|
|
@ -101,12 +101,12 @@ pub fn apply_documents_deletion(
|
|||
};
|
||||
|
||||
// collect the ranked attributes according to the schema
|
||||
let ranked_attrs = schema.get_ranked();
|
||||
let ranked_fields = schema.ranked();
|
||||
|
||||
let mut words_document_ids = HashMap::new();
|
||||
for id in idset {
|
||||
// remove all the ranked attributes from the ranked_map
|
||||
for ranked_attr in &ranked_attrs {
|
||||
for ranked_attr in &ranked_fields {
|
||||
ranked_map.remove(id, *ranked_attr);
|
||||
}
|
||||
|
||||
|
|
|
@ -4,7 +4,6 @@ mod documents_addition;
|
|||
mod documents_deletion;
|
||||
mod settings_update;
|
||||
|
||||
|
||||
pub use self::clear_all::{apply_clear_all, push_clear_all};
|
||||
pub use self::customs_update::{apply_customs_update, push_customs_update};
|
||||
pub use self::documents_addition::{
|
||||
|
|
|
@ -35,27 +35,27 @@ pub fn apply_settings_update(
|
|||
let mut schema = match index.main.schema(writer)? {
|
||||
Some(schema) => schema,
|
||||
None => {
|
||||
match settings.attribute_identifier.clone() {
|
||||
UpdateState::Update(id) => Schema::with_identifier(id),
|
||||
_ => return Err(Error::MissingSchemaIdentifier)
|
||||
match settings.identifier.clone() {
|
||||
UpdateState::Update(id) => Schema::with_identifier(&id),
|
||||
_ => return Err(Error::MissingIdentifier)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
match settings.ranking_rules {
|
||||
UpdateState::Update(v) => {
|
||||
let ranked_field: Vec<String> = v.iter().filter_map(RankingRule::get_field).collect();
|
||||
let ranked_field: Vec<&str> = v.iter().filter_map(RankingRule::get_field).collect();
|
||||
schema.update_ranked(ranked_field)?;
|
||||
index.main.put_ranking_rules(writer, v)?;
|
||||
must_reindex = true;
|
||||
},
|
||||
UpdateState::Clear => {
|
||||
let clear: Vec<String> = Vec::new();
|
||||
let clear: Vec<&str> = Vec::new();
|
||||
schema.update_ranked(clear)?;
|
||||
index.main.delete_ranking_rules(writer)?;
|
||||
must_reindex = true;
|
||||
},
|
||||
_ => (),
|
||||
UpdateState::Nothing => (),
|
||||
}
|
||||
|
||||
match settings.ranking_distinct {
|
||||
|
@ -65,65 +65,43 @@ pub fn apply_settings_update(
|
|||
UpdateState::Clear => {
|
||||
index.main.delete_ranking_distinct(writer)?;
|
||||
},
|
||||
_ => (),
|
||||
UpdateState::Nothing => (),
|
||||
}
|
||||
|
||||
match settings.index_new_fields {
|
||||
UpdateState::Update(v) => {
|
||||
schema.set_must_index_new_fields(v);
|
||||
schema.set_index_new_fields(v);
|
||||
},
|
||||
UpdateState::Clear => {
|
||||
schema.set_must_index_new_fields(true);
|
||||
schema.set_index_new_fields(true);
|
||||
},
|
||||
_ => (),
|
||||
UpdateState::Nothing => (),
|
||||
}
|
||||
|
||||
match settings.attributes_searchable.clone() {
|
||||
match settings.searchable_attributes.clone() {
|
||||
UpdateState::Update(v) => {
|
||||
schema.update_indexed(v)?;
|
||||
must_reindex = true;
|
||||
},
|
||||
UpdateState::Clear => {
|
||||
let clear: Vec<String> = Vec::new();
|
||||
let clear: Vec<&str> = Vec::new();
|
||||
schema.update_indexed(clear)?;
|
||||
must_reindex = true;
|
||||
},
|
||||
UpdateState::Nothing => (),
|
||||
UpdateState::Add(attrs) => {
|
||||
for attr in attrs {
|
||||
schema.set_indexed(attr)?;
|
||||
}
|
||||
must_reindex = true;
|
||||
},
|
||||
UpdateState::Delete(attrs) => {
|
||||
for attr in attrs {
|
||||
schema.remove_indexed(attr);
|
||||
}
|
||||
must_reindex = true;
|
||||
}
|
||||
};
|
||||
match settings.attributes_displayed.clone() {
|
||||
match settings.displayed_attributes.clone() {
|
||||
UpdateState::Update(v) => schema.update_displayed(v)?,
|
||||
UpdateState::Clear => {
|
||||
let clear: Vec<String> = Vec::new();
|
||||
let clear: Vec<&str> = Vec::new();
|
||||
schema.update_displayed(clear)?;
|
||||
},
|
||||
UpdateState::Nothing => (),
|
||||
UpdateState::Add(attrs) => {
|
||||
for attr in attrs {
|
||||
schema.set_displayed(attr)?;
|
||||
}
|
||||
},
|
||||
UpdateState::Delete(attrs) => {
|
||||
for attr in attrs {
|
||||
schema.remove_displayed(attr);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
match settings.attribute_identifier.clone() {
|
||||
match settings.identifier.clone() {
|
||||
UpdateState::Update(v) => {
|
||||
schema.set_identifier(v)?;
|
||||
schema.set_identifier(v.as_ref())?;
|
||||
index.main.put_schema(writer, &schema)?;
|
||||
must_reindex = true;
|
||||
},
|
||||
|
@ -168,7 +146,7 @@ pub fn apply_settings_update(
|
|||
docs_words_store,
|
||||
)?;
|
||||
}
|
||||
if let UpdateState::Clear = settings.attribute_identifier {
|
||||
if let UpdateState::Clear = settings.identifier {
|
||||
index.main.delete_schema(writer)?;
|
||||
}
|
||||
Ok(())
|
||||
|
@ -189,8 +167,8 @@ pub fn apply_stop_words_update(
|
|||
.stream()
|
||||
.into_strs().unwrap().into_iter().collect();
|
||||
|
||||
let deletion: BTreeSet<String> = old_stop_words.clone().difference(&stop_words).cloned().collect();
|
||||
let addition: BTreeSet<String> = stop_words.clone().difference(&old_stop_words).cloned().collect();
|
||||
let deletion: BTreeSet<String> = old_stop_words.difference(&stop_words).cloned().collect();
|
||||
let addition: BTreeSet<String> = stop_words.difference(&old_stop_words).cloned().collect();
|
||||
|
||||
if !addition.is_empty() {
|
||||
apply_stop_words_addition(
|
||||
|
@ -201,11 +179,12 @@ pub fn apply_stop_words_update(
|
|||
}
|
||||
|
||||
if !deletion.is_empty() {
|
||||
must_reindex = apply_stop_words_deletion(
|
||||
apply_stop_words_deletion(
|
||||
writer,
|
||||
index,
|
||||
deletion
|
||||
)?;
|
||||
must_reindex = true;
|
||||
}
|
||||
|
||||
Ok(must_reindex)
|
||||
|
@ -275,7 +254,7 @@ fn apply_stop_words_deletion(
|
|||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
deletion: BTreeSet<String>,
|
||||
) -> MResult<bool> {
|
||||
) -> MResult<()> {
|
||||
|
||||
let main_store = index.main;
|
||||
|
||||
|
@ -306,17 +285,7 @@ fn apply_stop_words_deletion(
|
|||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
main_store.put_stop_words_fst(writer, &stop_words_fst)?;
|
||||
|
||||
// now that we have setup the stop words
|
||||
// lets reindex everything...
|
||||
if let Ok(number) = main_store.number_of_documents(writer) {
|
||||
if number > 0 {
|
||||
return Ok(true)
|
||||
}
|
||||
}
|
||||
|
||||
Ok(false)
|
||||
Ok(main_store.put_stop_words_fst(writer, &stop_words_fst)?)
|
||||
}
|
||||
|
||||
pub fn apply_synonyms_update(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue