1128: Settings consistency r=MarinPostma a=MarinPostma

- close #1124, fix #761 
- fix some clippy warnings
- makes dump process reentrant

Co-authored-by: mpostma <postma.marin@protonmail.com>
Co-authored-by: marin <postma.marin@protonmail.com>
This commit is contained in:
bors[bot] 2020-12-16 14:12:09 +00:00 committed by GitHub
commit 446b2e7058
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 530 additions and 411 deletions

View File

@ -350,7 +350,7 @@ impl Database {
index.main.put_name(&mut writer, name)?; index.main.put_name(&mut writer, name)?;
index.main.put_created_at(&mut writer)?; index.main.put_created_at(&mut writer)?;
index.main.put_updated_at(&mut writer)?; index.main.put_updated_at(&mut writer)?;
index.main.put_schema(&mut writer, &Schema::new())?; index.main.put_schema(&mut writer, &Schema::default())?;
let env_clone = self.env.clone(); let env_clone = self.env.clone();
let update_env_clone = self.update_env.clone(); let update_env_clone = self.update_env.clone();

View File

@ -245,8 +245,8 @@ mod test {
#[test] #[test]
fn test_facet_key() { fn test_facet_key() {
let mut schema = Schema::new(); let mut schema = Schema::default();
let id = schema.insert_and_index("hello").unwrap(); let id = schema.insert_with_position("hello").unwrap().0;
let facet_list = [schema.id("hello").unwrap()]; let facet_list = [schema.id("hello").unwrap()];
assert_eq!( assert_eq!(
FacetKey::from_str("hello:12", &schema, &facet_list).unwrap(), FacetKey::from_str("hello:12", &schema, &facet_list).unwrap(),
@ -286,8 +286,8 @@ mod test {
#[test] #[test]
fn test_parse_facet_array() { fn test_parse_facet_array() {
use either::Either::{Left, Right}; use either::Either::{Left, Right};
let mut schema = Schema::new(); let mut schema = Schema::default();
let _id = schema.insert_and_index("hello").unwrap(); let _id = schema.insert_with_position("hello").unwrap();
let facet_list = [schema.id("hello").unwrap()]; let facet_list = [schema.id("hello").unwrap()];
assert_eq!( assert_eq!(
FacetFilter::from_str("[[\"hello:12\"]]", &schema, &facet_list).unwrap(), FacetFilter::from_str("[[\"hello:12\"]]", &schema, &facet_list).unwrap(),

View File

@ -415,8 +415,7 @@ mod tests {
let mut final_indexes = Vec::new(); let mut final_indexes = Vec::new();
for index in indexes { for index in indexes {
let name = index.attribute.to_string(); let name = index.attribute.to_string();
schema.insert(&name).unwrap(); let indexed_pos = schema.insert_with_position(&name).unwrap().1;
let indexed_pos = schema.set_indexed(&name).unwrap().1;
let index = DocIndex { let index = DocIndex {
attribute: indexed_pos.0, attribute: indexed_pos.0,
..*index ..*index

View File

@ -13,7 +13,7 @@ static RANKING_RULE_REGEX: Lazy<regex::Regex> = Lazy::new(|| {
regex::Regex::new(r"(asc|desc)\(([a-zA-Z0-9-_]*)\)").unwrap() regex::Regex::new(r"(asc|desc)\(([a-zA-Z0-9-_]*)\)").unwrap()
}); });
#[derive(Default, Clone, Serialize, Deserialize)] #[derive(Default, Clone, Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase", deny_unknown_fields)] #[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct Settings { pub struct Settings {
#[serde(default, deserialize_with = "deserialize_some")] #[serde(default, deserialize_with = "deserialize_some")]

View File

@ -126,13 +126,13 @@ where A: AsRef<[u8]>,
let serialized = serde_json::to_vec(value)?; let serialized = serde_json::to_vec(value)?;
documents_fields.put_document_field(writer, document_id, field_id, &serialized)?; documents_fields.put_document_field(writer, document_id, field_id, &serialized)?;
if let Some(indexed_pos) = schema.is_indexed(field_id) { if let Some(indexed_pos) = schema.is_searchable(field_id) {
let number_of_words = index_value(indexer, document_id, *indexed_pos, value); let number_of_words = index_value(indexer, document_id, indexed_pos, value);
if let Some(number_of_words) = number_of_words { if let Some(number_of_words) = number_of_words {
documents_fields_counts.put_document_field_count( documents_fields_counts.put_document_field_count(
writer, writer,
document_id, document_id,
*indexed_pos, indexed_pos,
number_of_words as u16, number_of_words as u16,
)?; )?;
} }
@ -146,8 +146,8 @@ where A: AsRef<[u8]>,
Ok(()) Ok(())
} }
pub fn apply_addition<'a, 'b, 'c>( pub fn apply_addition(
writer: &'a mut heed::RwTxn<'b, 'c, MainT>, writer: &mut heed::RwTxn<MainT>,
index: &store::Index, index: &store::Index,
new_documents: Vec<IndexMap<String, Value>>, new_documents: Vec<IndexMap<String, Value>>,
partial: bool partial: bool
@ -228,7 +228,7 @@ pub fn apply_addition<'a, 'b, 'c>(
for (document_id, document) in &documents_additions { for (document_id, document) in &documents_additions {
// For each key-value pair in the document. // For each key-value pair in the document.
for (attribute, value) in document { for (attribute, value) in document {
let field_id = schema.insert_and_index(&attribute)?; let (field_id, _) = schema.insert_with_position(&attribute)?;
index_document( index_document(
writer, writer,
index.documents_fields, index.documents_fields,
@ -272,16 +272,16 @@ pub fn apply_addition<'a, 'b, 'c>(
Ok(()) Ok(())
} }
pub fn apply_documents_partial_addition<'a, 'b, 'c>( pub fn apply_documents_partial_addition(
writer: &'a mut heed::RwTxn<'b, 'c, MainT>, writer: &mut heed::RwTxn<MainT>,
index: &store::Index, index: &store::Index,
new_documents: Vec<IndexMap<String, Value>>, new_documents: Vec<IndexMap<String, Value>>,
) -> MResult<()> { ) -> MResult<()> {
apply_addition(writer, index, new_documents, true) apply_addition(writer, index, new_documents, true)
} }
pub fn apply_documents_addition<'a, 'b, 'c>( pub fn apply_documents_addition(
writer: &'a mut heed::RwTxn<'b, 'c, MainT>, writer: &mut heed::RwTxn<MainT>,
index: &store::Index, index: &store::Index,
new_documents: Vec<IndexMap<String, Value>>, new_documents: Vec<IndexMap<String, Value>>,
) -> MResult<()> { ) -> MResult<()> {

View File

@ -212,8 +212,8 @@ pub fn next_update_id(
Ok(new_update_id) Ok(new_update_id)
} }
pub fn update_task<'a, 'b, 'c>( pub fn update_task(
writer: &'a mut heed::RwTxn<'b, 'c, MainT>, writer: &mut heed::RwTxn<MainT>,
index: &store::Index, index: &store::Index,
update_id: u64, update_id: u64,
update: Update, update: Update,

View File

@ -71,14 +71,14 @@ pub fn apply_settings_update(
match settings.searchable_attributes.clone() { match settings.searchable_attributes.clone() {
UpdateState::Update(v) => { UpdateState::Update(v) => {
if v.iter().any(|e| e == "*") || v.is_empty() { if v.iter().any(|e| e == "*") || v.is_empty() {
schema.set_all_fields_as_indexed(); schema.set_all_searchable();
} else { } else {
schema.update_indexed(v)?; schema.update_searchable(v)?;
} }
must_reindex = true; must_reindex = true;
}, },
UpdateState::Clear => { UpdateState::Clear => {
schema.set_all_fields_as_indexed(); schema.set_all_searchable();
must_reindex = true; must_reindex = true;
}, },
UpdateState::Nothing => (), UpdateState::Nothing => (),
@ -86,13 +86,13 @@ pub fn apply_settings_update(
match settings.displayed_attributes.clone() { match settings.displayed_attributes.clone() {
UpdateState::Update(v) => { UpdateState::Update(v) => {
if v.contains("*") || v.is_empty() { if v.contains("*") || v.is_empty() {
schema.set_all_fields_as_displayed(); schema.set_all_displayed();
} else { } else {
schema.update_displayed(v)? schema.update_displayed(v)?
} }
}, },
UpdateState::Clear => { UpdateState::Clear => {
schema.set_all_fields_as_displayed(); schema.set_all_displayed();
}, },
UpdateState::Nothing => (), UpdateState::Nothing => (),
} }

View File

@ -1,7 +1,7 @@
use std::error::Error; use std::error::Error;
use std::ops::Deref; use std::ops::Deref;
use std::path::PathBuf; use std::path::PathBuf;
use std::sync::Arc; use std::sync::{Arc, Mutex};
use meilisearch_core::{Database, DatabaseOptions, Index}; use meilisearch_core::{Database, DatabaseOptions, Index};
use sha2::Digest; use sha2::Digest;
@ -9,6 +9,7 @@ use sha2::Digest;
use crate::error::{Error as MSError, ResponseError}; use crate::error::{Error as MSError, ResponseError};
use crate::index_update_callback; use crate::index_update_callback;
use crate::option::Opt; use crate::option::Opt;
use crate::dump::DumpInfo;
#[derive(Clone)] #[derive(Clone)]
pub struct Data { pub struct Data {
@ -32,6 +33,7 @@ pub struct DataInner {
pub api_keys: ApiKeys, pub api_keys: ApiKeys,
pub server_pid: u32, pub server_pid: u32,
pub http_payload_size_limit: usize, pub http_payload_size_limit: usize,
pub current_dump: Arc<Mutex<Option<DumpInfo>>>,
} }
#[derive(Clone)] #[derive(Clone)]
@ -82,6 +84,8 @@ impl Data {
api_keys.generate_missing_api_keys(); api_keys.generate_missing_api_keys();
let current_dump = Arc::new(Mutex::new(None));
let inner_data = DataInner { let inner_data = DataInner {
db: db.clone(), db: db.clone(),
db_path, db_path,
@ -90,6 +94,7 @@ impl Data {
api_keys, api_keys,
server_pid, server_pid,
http_payload_size_limit, http_payload_size_limit,
current_dump,
}; };
let data = Data { let data = Data {
@ -135,6 +140,14 @@ impl Data {
Ok(created_index) Ok(created_index)
} }
pub fn get_current_dump_info(&self) -> Option<DumpInfo> {
self.current_dump.lock().unwrap().clone()
}
pub fn set_current_dump_info(&self, dump_info: DumpInfo) {
self.current_dump.lock().unwrap().replace(dump_info);
}
pub fn get_or_create_index<F, R>(&self, uid: &str, f: F) -> Result<R, ResponseError> pub fn get_or_create_index<F, R>(&self, uid: &str, f: F) -> Result<R, ResponseError>
where where
F: FnOnce(&Index) -> Result<R, ResponseError>, F: FnOnce(&Index) -> Result<R, ResponseError>,

View File

@ -1,7 +1,6 @@
use std::fs::{create_dir_all, File}; use std::fs::{create_dir_all, File};
use std::io::prelude::*; use std::io::prelude::*;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::Mutex;
use std::thread; use std::thread;
use actix_web::web; use actix_web::web;
@ -11,7 +10,6 @@ use log::{error, info};
use meilisearch_core::{MainWriter, MainReader, UpdateReader}; use meilisearch_core::{MainWriter, MainReader, UpdateReader};
use meilisearch_core::settings::Settings; use meilisearch_core::settings::Settings;
use meilisearch_core::update::{apply_settings_update, apply_documents_addition}; use meilisearch_core::update::{apply_settings_update, apply_documents_addition};
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::json; use serde_json::json;
use tempfile::TempDir; use tempfile::TempDir;
@ -22,9 +20,6 @@ use crate::helpers::compression;
use crate::routes::index; use crate::routes::index;
use crate::routes::index::IndexResponse; use crate::routes::index::IndexResponse;
// Mutex to share dump progress.
static DUMP_INFO: Lazy<Mutex<Option<DumpInfo>>> = Lazy::new(Mutex::default);
#[derive(Debug, Serialize, Deserialize, Copy, Clone)] #[derive(Debug, Serialize, Deserialize, Copy, Clone)]
enum DumpVersion { enum DumpVersion {
V1, V1,
@ -211,6 +206,7 @@ pub struct DumpInfo {
pub status: DumpStatus, pub status: DumpStatus,
#[serde(skip_serializing_if = "Option::is_none", flatten)] #[serde(skip_serializing_if = "Option::is_none", flatten)]
pub error: Option<serde_json::Value>, pub error: Option<serde_json::Value>,
} }
impl DumpInfo { impl DumpInfo {
@ -228,14 +224,6 @@ impl DumpInfo {
pub fn dump_already_in_progress(&self) -> bool { pub fn dump_already_in_progress(&self) -> bool {
self.status == DumpStatus::InProgress self.status == DumpStatus::InProgress
} }
pub fn get_current() -> Option<Self> {
DUMP_INFO.lock().unwrap().clone()
}
pub fn set_current(&self) {
*DUMP_INFO.lock().unwrap() = Some(self.clone());
}
} }
/// Generate uid from creation date /// Generate uid from creation date
@ -299,11 +287,11 @@ fn dump_index_documents(data: &web::Data<Data>, reader: &MainReader, dir_path: &
} }
/// Write error with a context. /// Write error with a context.
fn fail_dump_process<E: std::error::Error>(dump_info: DumpInfo, context: &str, error: E) { fn fail_dump_process<E: std::error::Error>(data: &web::Data<Data>, dump_info: DumpInfo, context: &str, error: E) {
let error_message = format!("{}; {}", context, error); let error_message = format!("{}; {}", context, error);
error!("Something went wrong during dump process: {}", &error_message); error!("Something went wrong during dump process: {}", &error_message);
dump_info.with_error(Error::dump_failed(error_message).into()).set_current(); data.set_current_dump_info(dump_info.with_error(Error::dump_failed(error_message).into()))
} }
/// Main function of dump. /// Main function of dump.
@ -312,7 +300,7 @@ fn dump_process(data: web::Data<Data>, dumps_dir: PathBuf, dump_info: DumpInfo)
let update_reader = match data.db.update_read_txn() { let update_reader = match data.db.update_read_txn() {
Ok(r) => r, Ok(r) => r,
Err(e) => { Err(e) => {
fail_dump_process(dump_info, "creating RO transaction on updates", e); fail_dump_process(&data, dump_info, "creating RO transaction on updates", e);
return ; return ;
} }
}; };
@ -321,7 +309,7 @@ fn dump_process(data: web::Data<Data>, dumps_dir: PathBuf, dump_info: DumpInfo)
let main_reader = match data.db.main_read_txn() { let main_reader = match data.db.main_read_txn() {
Ok(r) => r, Ok(r) => r,
Err(e) => { Err(e) => {
fail_dump_process(dump_info, "creating RO transaction on main", e); fail_dump_process(&data, dump_info, "creating RO transaction on main", e);
return ; return ;
} }
}; };
@ -330,7 +318,7 @@ fn dump_process(data: web::Data<Data>, dumps_dir: PathBuf, dump_info: DumpInfo)
let tmp_dir = match TempDir::new() { let tmp_dir = match TempDir::new() {
Ok(tmp_dir) => tmp_dir, Ok(tmp_dir) => tmp_dir,
Err(e) => { Err(e) => {
fail_dump_process(dump_info, "creating temporary directory", e); fail_dump_process(&data, dump_info, "creating temporary directory", e);
return ; return ;
} }
}; };
@ -340,14 +328,14 @@ fn dump_process(data: web::Data<Data>, dumps_dir: PathBuf, dump_info: DumpInfo)
let indexes = match crate::routes::index::list_indexes_sync(&data, &main_reader) { let indexes = match crate::routes::index::list_indexes_sync(&data, &main_reader) {
Ok(indexes) => indexes, Ok(indexes) => indexes,
Err(e) => { Err(e) => {
fail_dump_process(dump_info, "listing indexes", e); fail_dump_process(&data, dump_info, "listing indexes", e);
return ; return ;
} }
}; };
// create metadata // create metadata
if let Err(e) = dump_metadata(&data, &tmp_dir_path, indexes.clone()) { if let Err(e) = dump_metadata(&data, &tmp_dir_path, indexes.clone()) {
fail_dump_process(dump_info, "generating metadata", e); fail_dump_process(&data, dump_info, "generating metadata", e);
return ; return ;
} }
@ -357,32 +345,32 @@ fn dump_process(data: web::Data<Data>, dumps_dir: PathBuf, dump_info: DumpInfo)
// create index sub-dircetory // create index sub-dircetory
if let Err(e) = create_dir_all(&index_path) { if let Err(e) = create_dir_all(&index_path) {
fail_dump_process(dump_info, &format!("creating directory for index {}", &index.uid), e); fail_dump_process(&data, dump_info, &format!("creating directory for index {}", &index.uid), e);
return ; return ;
} }
// export settings // export settings
if let Err(e) = dump_index_settings(&data, &main_reader, &index_path, &index.uid) { if let Err(e) = dump_index_settings(&data, &main_reader, &index_path, &index.uid) {
fail_dump_process(dump_info, &format!("generating settings for index {}", &index.uid), e); fail_dump_process(&data, dump_info, &format!("generating settings for index {}", &index.uid), e);
return ; return ;
} }
// export documents // export documents
if let Err(e) = dump_index_documents(&data, &main_reader, &index_path, &index.uid) { if let Err(e) = dump_index_documents(&data, &main_reader, &index_path, &index.uid) {
fail_dump_process(dump_info, &format!("generating documents for index {}", &index.uid), e); fail_dump_process(&data, dump_info, &format!("generating documents for index {}", &index.uid), e);
return ; return ;
} }
// export updates // export updates
if let Err(e) = dump_index_updates(&data, &update_reader, &index_path, &index.uid) { if let Err(e) = dump_index_updates(&data, &update_reader, &index_path, &index.uid) {
fail_dump_process(dump_info, &format!("generating updates for index {}", &index.uid), e); fail_dump_process(&data, dump_info, &format!("generating updates for index {}", &index.uid), e);
return ; return ;
} }
} }
// compress dump in a file named `{dump_uid}.dump` in `dumps_dir` // compress dump in a file named `{dump_uid}.dump` in `dumps_dir`
if let Err(e) = crate::helpers::compression::to_tar_gz(&tmp_dir_path, &compressed_dumps_dir(&dumps_dir, &dump_info.uid)) { if let Err(e) = crate::helpers::compression::to_tar_gz(&tmp_dir_path, &compressed_dumps_dir(&dumps_dir, &dump_info.uid)) {
fail_dump_process(dump_info, "compressing dump", e); fail_dump_process(&data, dump_info, "compressing dump", e);
return ; return ;
} }
@ -392,14 +380,14 @@ fn dump_process(data: web::Data<Data>, dumps_dir: PathBuf, dump_info: DumpInfo)
DumpStatus::Done DumpStatus::Done
); );
resume.set_current(); data.set_current_dump_info(resume);
} }
pub fn init_dump_process(data: &web::Data<Data>, dumps_dir: &Path) -> Result<DumpInfo, Error> { pub fn init_dump_process(data: &web::Data<Data>, dumps_dir: &Path) -> Result<DumpInfo, Error> {
create_dir_all(dumps_dir).map_err(|e| Error::dump_failed(format!("creating temporary directory {}", e)))?; create_dir_all(dumps_dir).map_err(|e| Error::dump_failed(format!("creating temporary directory {}", e)))?;
// check if a dump is already in progress // check if a dump is already in progress
if let Some(resume) = DumpInfo::get_current() { if let Some(resume) = data.get_current_dump_info() {
if resume.dump_already_in_progress() { if resume.dump_already_in_progress() {
return Err(Error::dump_conflict()) return Err(Error::dump_conflict())
} }
@ -411,7 +399,7 @@ pub fn init_dump_process(data: &web::Data<Data>, dumps_dir: &Path) -> Result<Dum
DumpStatus::InProgress DumpStatus::InProgress
); );
info.set_current(); data.set_current_dump_info(info.clone());
let data = data.clone(); let data = data.clone();
let dumps_dir = dumps_dir.to_path_buf(); let dumps_dir = dumps_dir.to_path_buf();

View File

@ -178,7 +178,7 @@ impl<'a> SearchBuilder<'a> {
all_attributes.extend(&all_formatted); all_attributes.extend(&all_formatted);
}, },
None => { None => {
all_attributes.extend(schema.displayed_name()); all_attributes.extend(schema.displayed_names());
// If we specified at least one attribute to highlight or crop then // If we specified at least one attribute to highlight or crop then
// all available attributes will be returned in the _formatted field. // all available attributes will be returned in the _formatted field.
if self.attributes_to_highlight.is_some() || self.attributes_to_crop.is_some() { if self.attributes_to_highlight.is_some() || self.attributes_to_crop.is_some() {
@ -445,7 +445,7 @@ fn calculate_matches(
continue; continue;
} }
} }
if !schema.displayed_name().contains(attribute) { if !schema.displayed_names().contains(&attribute) {
continue; continue;
} }
if let Some(pos) = matches_result.get_mut(attribute) { if let Some(pos) = matches_result.get_mut(attribute) {

View File

@ -45,7 +45,7 @@ async fn get_dump_status(
let dumps_dir = Path::new(&data.dumps_dir); let dumps_dir = Path::new(&data.dumps_dir);
let dump_uid = &path.dump_uid; let dump_uid = &path.dump_uid;
if let Some(resume) = DumpInfo::get_current() { if let Some(resume) = data.get_current_dump_info() {
if &resume.uid == dump_uid { if &resume.uid == dump_uid {
return Ok(HttpResponse::Ok().json(resume)); return Ok(HttpResponse::Ok().json(resume));
} }

View File

@ -1,4 +1,4 @@
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet, BTreeSet};
use actix_web::{get, post, web, HttpResponse}; use actix_web::{get, post, web, HttpResponse};
use log::warn; use log::warn;
@ -120,8 +120,8 @@ impl SearchQuery {
search_builder.limit(limit); search_builder.limit(limit);
} }
let available_attributes = schema.displayed_name(); let available_attributes = schema.displayed_names();
let mut restricted_attributes: HashSet<&str>; let mut restricted_attributes: BTreeSet<&str>;
match &self.attributes_to_retrieve { match &self.attributes_to_retrieve {
Some(attributes_to_retrieve) => { Some(attributes_to_retrieve) => {
let attributes_to_retrieve: HashSet<&str> = let attributes_to_retrieve: HashSet<&str> =
@ -129,7 +129,7 @@ impl SearchQuery {
if attributes_to_retrieve.contains("*") { if attributes_to_retrieve.contains("*") {
restricted_attributes = available_attributes.clone(); restricted_attributes = available_attributes.clone();
} else { } else {
restricted_attributes = HashSet::new(); restricted_attributes = BTreeSet::new();
search_builder.attributes_to_retrieve(HashSet::new()); search_builder.attributes_to_retrieve(HashSet::new());
for attr in attributes_to_retrieve { for attr in attributes_to_retrieve {
if available_attributes.contains(attr) { if available_attributes.contains(attr) {

View File

@ -523,11 +523,11 @@ async fn delete_attributes_for_faceting(
} }
fn get_indexed_attributes(schema: &Schema) -> Vec<String> { fn get_indexed_attributes(schema: &Schema) -> Vec<String> {
if schema.is_indexed_all() { if schema.is_searchable_all() {
["*"].iter().map(|s| s.to_string()).collect() vec!["*".to_string()]
} else { } else {
schema schema
.indexed_name() .searchable_names()
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
.collect() .collect()
@ -539,7 +539,7 @@ fn get_displayed_attributes(schema: &Schema) -> BTreeSet<String> {
["*"].iter().map(|s| s.to_string()).collect() ["*"].iter().map(|s| s.to_string()).collect()
} else { } else {
schema schema
.displayed_name() .displayed_names()
.iter() .iter()
.map(|s| s.to_string()) .map(|s| s.to_string())
.collect() .collect()

View File

@ -1,2 +1,3 @@
{"status": "processed","updateId": 0,"type": {"name":"Settings","settings":{"ranking_rules":{"Update":["Typo","Words","Proximity","Attribute","WordsPosition","Exactness"]},"distinct_attribute":"Nothing","primary_key":"Nothing","searchable_attributes":{"Update":["balance","picture","age","color","name","gender","email","phone","address","about","registered","latitude","longitude","tags"]},"displayed_attributes":{"Update":["about","address","age","balance","color","email","gender","id","isActive","latitude","longitude","name","phone","picture","registered","tags"]},"stop_words":"Nothing","synonyms":"Nothing","attributes_for_faceting":"Nothing"}}} {"status":"processed","updateId":0,"type":{"name":"Settings","settings":{"ranking_rules":{"Update":["Typo","Words","Proximity","Attribute","WordsPosition","Exactness"]},"distinct_attribute":"Nothing","primary_key":"Nothing","searchable_attributes":"Nothing","displayed_attributes":"Nothing","stop_words":"Nothing","synonyms":"Nothing","attributes_for_faceting":"Nothing"}}}
{"status": "processed", "updateId": 1, "type": { "name": "DocumentsAddition"}} {"status":"processed","updateId":1,"type":{"name":"DocumentsAddition","number":77}}

View File

@ -88,40 +88,6 @@ impl Server {
"wordsPosition", "wordsPosition",
"exactness", "exactness",
], ],
"searchableAttributes": [
"balance",
"picture",
"age",
"color",
"name",
"gender",
"email",
"phone",
"address",
"about",
"registered",
"latitude",
"longitude",
"tags",
],
"displayedAttributes": [
"id",
"isActive",
"balance",
"picture",
"age",
"color",
"name",
"gender",
"email",
"phone",
"address",
"about",
"registered",
"latitude",
"longitude",
"tags",
],
}); });
server.update_all_settings(body).await; server.update_all_settings(body).await;

View File

@ -367,20 +367,10 @@ async fn dump_index_updates_should_be_valid() {
compression::from_tar_gz(&dumps_dir.join(&format!("{}.dump", uid)), tmp_dir_path).unwrap(); compression::from_tar_gz(&dumps_dir.join(&format!("{}.dump", uid)), tmp_dir_path).unwrap();
let file = File::open(tmp_dir_path.join("test").join("updates.jsonl")).unwrap(); let file = File::open(tmp_dir_path.join("test").join("updates.jsonl")).unwrap();
let mut updates = read_all_jsonline(file); let updates = read_all_jsonline(file);
eprintln!("{}\n", updates);
// hotfix until #943 is fixed (https://github.com/meilisearch/MeiliSearch/issues/943) eprintln!("{}", expected);
updates.as_array_mut().unwrap()
.get_mut(0).unwrap()
.get_mut("type").unwrap()
.get_mut("settings").unwrap()
.get_mut("displayed_attributes").unwrap()
.get_mut("Update").unwrap()
.as_array_mut().unwrap().sort_by(|a, b| a.as_str().cmp(&b.as_str()));
eprintln!("{}\n", updates.to_string());
eprintln!("{}", expected.to_string());
assert_json_include!(expected: expected, actual: updates); assert_json_include!(expected: expected, actual: updates);
} }

View File

@ -779,30 +779,32 @@ async fn update_existing_primary_key_is_error() {
} }
#[actix_rt::test] #[actix_rt::test]
async fn test_facets_distribution_attribute() { async fn test_field_distribution_attribute() {
let mut server = common::Server::test_server().await; let mut server = common::Server::test_server().await;
let (response, _status_code) = server.get_index_stats().await; let (response, _status_code) = server.get_index_stats().await;
let expected = json!({ let expected = json!({
"isIndexing": false, "fieldsDistribution": {
"numberOfDocuments":77, "about": 77,
"fieldsDistribution":{ "address": 77,
"age":77, "age": 77,
"gender":77, "balance": 77,
"phone":77, "color": 77,
"name":77, "email": 77,
"registered":77, "gender": 77,
"latitude":77, "id": 77,
"email":77, "isActive": 77,
"tags":77, "latitude": 77,
"longitude":77, "longitude": 77,
"color":77, "name": 77,
"address":77, "phone": 77,
"balance":77, "picture": 77,
"about":77, "registered": 77,
"picture":77, "tags": 77
}, },
"isIndexing": false,
"numberOfDocuments": 77
}); });
assert_json_eq!(expected, response, ordered: true); assert_json_eq!(expected, response, ordered: true);

View File

@ -1789,8 +1789,6 @@ async fn update_documents_with_facet_distribution() {
server.create_index(body).await; server.create_index(body).await;
let settings = json!({ let settings = json!({
"attributesForFaceting": ["genre"], "attributesForFaceting": ["genre"],
"displayedAttributes": ["genre"],
"searchableAttributes": ["genre"]
}); });
server.update_all_settings(settings).await; server.update_all_settings(settings).await;
let update1 = json!([ let update1 = json!([

View File

@ -6,22 +6,14 @@ use serde::{Deserialize, Serialize};
use crate::{SResult, FieldId}; use crate::{SResult, FieldId};
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] #[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct FieldsMap { pub(crate) struct FieldsMap {
name_map: HashMap<String, FieldId>, name_map: HashMap<String, FieldId>,
id_map: HashMap<FieldId, String>, id_map: HashMap<FieldId, String>,
next_id: FieldId next_id: FieldId
} }
impl FieldsMap { impl FieldsMap {
pub fn len(&self) -> usize { pub(crate) fn insert(&mut self, name: &str) -> SResult<FieldId> {
self.name_map.len()
}
pub fn is_empty(&self) -> bool {
self.name_map.is_empty()
}
pub fn insert(&mut self, name: &str) -> SResult<FieldId> {
if let Some(id) = self.name_map.get(name) { if let Some(id) = self.name_map.get(name) {
return Ok(*id) return Ok(*id)
} }
@ -32,22 +24,15 @@ impl FieldsMap {
Ok(id) Ok(id)
} }
pub fn remove(&mut self, name: &str) { pub(crate) fn id(&self, name: &str) -> Option<FieldId> {
if let Some(id) = self.name_map.get(name) {
self.id_map.remove(&id);
}
self.name_map.remove(name);
}
pub fn id(&self, name: &str) -> Option<FieldId> {
self.name_map.get(name).copied() self.name_map.get(name).copied()
} }
pub fn name<I: Into<FieldId>>(&self, id: I) -> Option<&str> { pub(crate) fn name<I: Into<FieldId>>(&self, id: I) -> Option<&str> {
self.id_map.get(&id.into()).map(|s| s.as_str()) self.id_map.get(&id.into()).map(|s| s.as_str())
} }
pub fn iter(&self) -> Iter<'_, String, FieldId> { pub(crate) fn iter(&self) -> Iter<'_, String, FieldId> {
self.name_map.iter() self.name_map.iter()
} }
} }
@ -69,14 +54,10 @@ mod tests {
assert_eq!(fields_map.id("title"), Some(1.into())); assert_eq!(fields_map.id("title"), Some(1.into()));
assert_eq!(fields_map.id("descritpion"), Some(2.into())); assert_eq!(fields_map.id("descritpion"), Some(2.into()));
assert_eq!(fields_map.id("date"), None); assert_eq!(fields_map.id("date"), None);
assert_eq!(fields_map.len(), 3);
assert_eq!(fields_map.name(0), Some("id")); assert_eq!(fields_map.name(0), Some("id"));
assert_eq!(fields_map.name(1), Some("title")); assert_eq!(fields_map.name(1), Some("title"));
assert_eq!(fields_map.name(2), Some("descritpion")); assert_eq!(fields_map.name(2), Some("descritpion"));
assert_eq!(fields_map.name(4), None); assert_eq!(fields_map.name(4), None);
fields_map.remove("title"); assert_eq!(fields_map.insert("title").unwrap(), 1.into());
assert_eq!(fields_map.id("title"), None);
assert_eq!(fields_map.insert("title").unwrap(), 3.into());
assert_eq!(fields_map.len(), 3);
} }
} }

View File

@ -1,9 +1,10 @@
mod error; mod error;
mod fields_map; mod fields_map;
mod schema; mod schema;
mod position_map;
pub use error::{Error, SResult}; pub use error::{Error, SResult};
pub use fields_map::FieldsMap; use fields_map::FieldsMap;
pub use schema::Schema; pub use schema::Schema;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use zerocopy::{AsBytes, FromBytes}; use zerocopy::{AsBytes, FromBytes};

View File

@ -0,0 +1,161 @@
use std::collections::BTreeMap;
use crate::{FieldId, IndexedPos};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct PositionMap {
pos_to_field: Vec<FieldId>,
field_to_pos: BTreeMap<FieldId, IndexedPos>,
}
impl PositionMap {
/// insert `id` at the specified `position` updating the other position if a shit if caused by
/// the operation. If `id` is already present in the position map, it is moved to the requested
/// `position`, potentially causing shifts.
pub fn insert(&mut self, id: FieldId, position: IndexedPos) -> IndexedPos {
let mut upos = position.0 as usize;
let mut must_rebuild_map = false;
if let Some(old_pos) = self.field_to_pos.get(&id) {
let uold_pos = old_pos.0 as usize;
self.pos_to_field.remove(uold_pos);
must_rebuild_map = true;
}
if upos < self.pos_to_field.len() {
self.pos_to_field.insert(upos, id);
must_rebuild_map = true;
} else {
upos = self.pos_to_field.len();
self.pos_to_field.push(id);
}
// we only need to update all the positions if there have been a shift a some point. In
// most cases we only did a push, so we don't need to rebuild the `field_to_pos` map.
if must_rebuild_map {
self.field_to_pos.clear();
self.field_to_pos.extend(
self.pos_to_field
.iter()
.enumerate()
.map(|(p, f)| (*f, IndexedPos(p as u16))),
);
} else {
self.field_to_pos.insert(id, IndexedPos(upos as u16));
}
IndexedPos(upos as u16)
}
/// Pushes `id` in last position
pub fn push(&mut self, id: FieldId) -> IndexedPos {
let pos = self.len();
self.insert(id, IndexedPos(pos as u16))
}
pub fn len(&self) -> usize {
self.pos_to_field.len()
}
pub fn field_to_pos(&self, id: FieldId) -> Option<IndexedPos> {
self.field_to_pos.get(&id).cloned()
}
pub fn pos_to_field(&self, pos: IndexedPos) -> Option<FieldId> {
let pos = pos.0 as usize;
self.pos_to_field.get(pos).cloned()
}
pub fn field_pos(&self) -> impl Iterator<Item = (FieldId, IndexedPos)> + '_ {
self.pos_to_field
.iter()
.enumerate()
.map(|(i, f)| (*f, IndexedPos(i as u16)))
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_default() {
assert_eq!(
format!("{:?}", PositionMap::default()),
r##"PositionMap { pos_to_field: [], field_to_pos: {} }"##
);
}
#[test]
fn test_insert() {
let mut map = PositionMap::default();
// changing position removes from old position
map.insert(0.into(), 0.into());
map.insert(1.into(), 1.into());
assert_eq!(
format!("{:?}", map),
r##"PositionMap { pos_to_field: [FieldId(0), FieldId(1)], field_to_pos: {FieldId(0): IndexedPos(0), FieldId(1): IndexedPos(1)} }"##
);
map.insert(0.into(), 1.into());
assert_eq!(
format!("{:?}", map),
r##"PositionMap { pos_to_field: [FieldId(1), FieldId(0)], field_to_pos: {FieldId(0): IndexedPos(1), FieldId(1): IndexedPos(0)} }"##
);
map.insert(2.into(), 1.into());
assert_eq!(
format!("{:?}", map),
r##"PositionMap { pos_to_field: [FieldId(1), FieldId(2), FieldId(0)], field_to_pos: {FieldId(0): IndexedPos(2), FieldId(1): IndexedPos(0), FieldId(2): IndexedPos(1)} }"##
);
}
#[test]
fn test_push() {
let mut map = PositionMap::default();
map.push(0.into());
map.push(2.into());
assert_eq!(map.len(), 2);
assert_eq!(
format!("{:?}", map),
r##"PositionMap { pos_to_field: [FieldId(0), FieldId(2)], field_to_pos: {FieldId(0): IndexedPos(0), FieldId(2): IndexedPos(1)} }"##
);
}
#[test]
fn test_field_to_pos() {
let mut map = PositionMap::default();
map.push(0.into());
map.push(2.into());
assert_eq!(map.field_to_pos(2.into()), Some(1.into()));
assert_eq!(map.field_to_pos(0.into()), Some(0.into()));
assert_eq!(map.field_to_pos(4.into()), None);
}
#[test]
fn test_pos_to_field() {
let mut map = PositionMap::default();
map.push(0.into());
map.push(2.into());
map.push(3.into());
map.push(4.into());
assert_eq!(
format!("{:?}", map),
r##"PositionMap { pos_to_field: [FieldId(0), FieldId(2), FieldId(3), FieldId(4)], field_to_pos: {FieldId(0): IndexedPos(0), FieldId(2): IndexedPos(1), FieldId(3): IndexedPos(2), FieldId(4): IndexedPos(3)} }"##
);
assert_eq!(map.pos_to_field(0.into()), Some(0.into()));
assert_eq!(map.pos_to_field(1.into()), Some(2.into()));
assert_eq!(map.pos_to_field(2.into()), Some(3.into()));
assert_eq!(map.pos_to_field(3.into()), Some(4.into()));
assert_eq!(map.pos_to_field(4.into()), None);
}
#[test]
fn test_field_pos() {
let mut map = PositionMap::default();
map.push(0.into());
map.push(2.into());
let mut iter = map.field_pos();
assert_eq!(iter.next(), Some((0.into(), 0.into())));
assert_eq!(iter.next(), Some((2.into(), 1.into())));
assert_eq!(iter.next(), None);
}
}

View File

@ -1,39 +1,10 @@
use crate::{FieldsMap, FieldId, SResult, Error, IndexedPos};
use serde::{Serialize, Deserialize};
use std::collections::{HashMap, HashSet};
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::{BTreeSet, HashSet};
#[derive(Clone, Debug, Serialize, Deserialize)] use serde::{Deserialize, Serialize};
enum OptionAll<T> {
All,
Some(T),
None,
}
impl<T> OptionAll<T> { use crate::position_map::PositionMap;
// replace the value with None and return the previous value use crate::{Error, FieldId, FieldsMap, IndexedPos, SResult};
fn take(&mut self) -> OptionAll<T> {
std::mem::replace(self, OptionAll::None)
}
fn map<U, F: FnOnce(T) -> U>(self, f: F) -> OptionAll<U> {
match self {
OptionAll::Some(x) => OptionAll::Some(f(x)),
OptionAll::All => OptionAll::All,
OptionAll::None => OptionAll::None,
}
}
pub fn is_all(&self) -> bool {
matches!(self, OptionAll::All)
}
}
impl<T> Default for OptionAll<T> {
fn default() -> OptionAll<T> {
OptionAll::All
}
}
#[derive(Clone, Debug, Serialize, Deserialize, Default)] #[derive(Clone, Debug, Serialize, Deserialize, Default)]
pub struct Schema { pub struct Schema {
@ -41,34 +12,26 @@ pub struct Schema {
primary_key: Option<FieldId>, primary_key: Option<FieldId>,
ranked: HashSet<FieldId>, ranked: HashSet<FieldId>,
displayed: OptionAll<HashSet<FieldId>>, displayed: Option<BTreeSet<FieldId>>,
indexed: OptionAll<Vec<FieldId>>, searchable: Option<Vec<FieldId>>,
indexed_map: HashMap<FieldId, IndexedPos>, pub indexed_position: PositionMap,
} }
impl Schema { impl Schema {
pub fn new() -> Schema {
Schema::default()
}
pub fn with_primary_key(name: &str) -> Schema { pub fn with_primary_key(name: &str) -> Schema {
let mut fields_map = FieldsMap::default(); let mut fields_map = FieldsMap::default();
let field_id = fields_map.insert(name).unwrap(); let field_id = fields_map.insert(name).unwrap();
let mut indexed_position = PositionMap::default();
let mut displayed = HashSet::new(); indexed_position.push(field_id);
let mut indexed_map = HashMap::new();
displayed.insert(field_id);
indexed_map.insert(field_id, 0.into());
Schema { Schema {
fields_map, fields_map,
primary_key: Some(field_id), primary_key: Some(field_id),
ranked: HashSet::new(), ranked: HashSet::new(),
displayed: OptionAll::All, displayed: None,
indexed: OptionAll::All, searchable: None,
indexed_map, indexed_position,
} }
} }
@ -78,13 +41,11 @@ impl Schema {
pub fn set_primary_key(&mut self, name: &str) -> SResult<FieldId> { pub fn set_primary_key(&mut self, name: &str) -> SResult<FieldId> {
if self.primary_key.is_some() { if self.primary_key.is_some() {
return Err(Error::PrimaryKeyAlreadyPresent) return Err(Error::PrimaryKeyAlreadyPresent);
} }
let id = self.insert(name)?; let id = self.insert(name)?;
self.primary_key = Some(id); self.primary_key = Some(id);
self.set_indexed(name)?;
self.set_displayed(name)?;
Ok(id) Ok(id)
} }
@ -101,202 +62,98 @@ impl Schema {
self.fields_map.iter().map(|(k, _)| k.as_ref()) self.fields_map.iter().map(|(k, _)| k.as_ref())
} }
pub fn contains(&self, name: &str) -> bool { /// add `name` to the list of known fields
self.fields_map.id(name).is_some()
}
pub fn insert(&mut self, name: &str) -> SResult<FieldId> { pub fn insert(&mut self, name: &str) -> SResult<FieldId> {
self.fields_map.insert(name) self.fields_map.insert(name)
} }
pub fn insert_and_index(&mut self, name: &str) -> SResult<FieldId> { /// Adds `name` to the list of known fields, and in the last position of the indexed_position map. This
match self.fields_map.id(name) { /// field is taken into acccount when `searchableAttribute` or `displayedAttributes` is set to `"*"`
Some(id) => { pub fn insert_with_position(&mut self, name: &str) -> SResult<(FieldId, IndexedPos)> {
Ok(id) let field_id = self.fields_map.insert(name)?;
} let position = self
None => { .is_searchable(field_id)
self.set_indexed(name)?; .unwrap_or_else(|| self.indexed_position.push(field_id));
self.set_displayed(name) Ok((field_id, position))
}
}
} }
pub fn ranked(&self) -> &HashSet<FieldId> { pub fn ranked(&self) -> &HashSet<FieldId> {
&self.ranked &self.ranked
} }
pub fn ranked_name(&self) -> HashSet<&str> { fn displayed(&self) -> Cow<BTreeSet<FieldId>> {
self.ranked.iter().filter_map(|a| self.name(*a)).collect() match &self.displayed {
} Some(displayed) => Cow::Borrowed(displayed),
None => Cow::Owned(self.indexed_position.field_pos().map(|(f, _)| f).collect()),
pub fn displayed(&self) -> Cow<HashSet<FieldId>> {
match self.displayed {
OptionAll::Some(ref v) => Cow::Borrowed(v),
OptionAll::All => {
let fields = self
.fields_map
.iter()
.map(|(_, &v)| v)
.collect::<HashSet<_>>();
Cow::Owned(fields)
}
OptionAll::None => Cow::Owned(HashSet::new())
} }
} }
pub fn is_displayed_all(&self) -> bool { pub fn is_displayed_all(&self) -> bool {
self.displayed.is_all() self.displayed.is_none()
} }
pub fn displayed_name(&self) -> HashSet<&str> { pub fn displayed_names(&self) -> BTreeSet<&str> {
match self.displayed { self.displayed()
OptionAll::All => self.fields_map.iter().filter_map(|(_, &v)| self.name(v)).collect(),
OptionAll::Some(ref v) => v.iter().filter_map(|a| self.name(*a)).collect(),
OptionAll::None => HashSet::new(),
}
}
pub fn indexed(&self) -> Cow<[FieldId]> {
match self.indexed {
OptionAll::Some(ref v) => Cow::Borrowed(v),
OptionAll::All => {
let fields = self
.fields_map
.iter() .iter()
.map(|(_, &f)| f) .filter_map(|&f| self.name(f))
.collect(); .collect()
Cow::Owned(fields) }
},
OptionAll::None => Cow::Owned(Vec::new()) fn searchable(&self) -> Cow<[FieldId]> {
match &self.searchable {
Some(searchable) => Cow::Borrowed(&searchable),
None => Cow::Owned(self.indexed_position.field_pos().map(|(f, _)| f).collect()),
} }
} }
pub fn indexed_name(&self) -> Vec<&str> { pub fn searchable_names(&self) -> Vec<&str> {
self.indexed().iter().filter_map(|a| self.name(*a)).collect() self.searchable()
.iter()
.filter_map(|a| self.name(*a))
.collect()
} }
pub fn set_ranked(&mut self, name: &str) -> SResult<FieldId> { pub(crate) fn set_ranked(&mut self, name: &str) -> SResult<FieldId> {
let id = self.fields_map.insert(name)?; let id = self.fields_map.insert(name)?;
self.ranked.insert(id); self.ranked.insert(id);
Ok(id) Ok(id)
} }
pub fn set_displayed(&mut self, name: &str) -> SResult<FieldId> {
let id = self.fields_map.insert(name)?;
self.displayed = match self.displayed.take() {
OptionAll::All => OptionAll::All,
OptionAll::None => {
let mut displayed = HashSet::new();
displayed.insert(id);
OptionAll::Some(displayed)
},
OptionAll::Some(mut v) => {
v.insert(id);
OptionAll::Some(v)
}
};
Ok(id)
}
pub fn set_indexed(&mut self, name: &str) -> SResult<(FieldId, IndexedPos)> {
let id = self.fields_map.insert(name)?;
if let Some(indexed_pos) = self.indexed_map.get(&id) {
return Ok((id, *indexed_pos))
};
let pos = self.indexed_map.len() as u16;
self.indexed_map.insert(id, pos.into());
self.indexed = self.indexed.take().map(|mut v| {
v.push(id);
v
});
Ok((id, pos.into()))
}
pub fn clear_ranked(&mut self) { pub fn clear_ranked(&mut self) {
self.ranked.clear(); self.ranked.clear();
} }
pub fn remove_ranked(&mut self, name: &str) {
if let Some(id) = self.fields_map.id(name) {
self.ranked.remove(&id);
}
}
/// remove field from displayed attributes. If diplayed attributes is OptionAll::All,
/// dipslayed attributes is turned into OptionAll::Some(v) where v is all displayed attributes
/// except name.
pub fn remove_displayed(&mut self, name: &str) {
if let Some(id) = self.fields_map.id(name) {
self.displayed = match self.displayed.take() {
OptionAll::Some(mut v) => {
v.remove(&id);
OptionAll::Some(v)
}
OptionAll::All => {
let displayed = self.fields_map
.iter()
.filter_map(|(key, &value)| {
if key != name {
Some(value)
} else {
None
}
})
.collect::<HashSet<_>>();
OptionAll::Some(displayed)
}
OptionAll::None => OptionAll::None,
};
}
}
pub fn remove_indexed(&mut self, name: &str) {
if let Some(id) = self.fields_map.id(name) {
self.indexed_map.remove(&id);
self.indexed = match self.indexed.take() {
// valid because indexed is All and indexed() return the content of
// indexed_map that is already updated
OptionAll::All => OptionAll::Some(self.indexed().into_owned()),
OptionAll::Some(mut v) => {
v.retain(|x| *x != id);
OptionAll::Some(v)
}
OptionAll::None => OptionAll::None,
}
}
}
pub fn is_ranked(&self, id: FieldId) -> bool { pub fn is_ranked(&self, id: FieldId) -> bool {
self.ranked.get(&id).is_some() self.ranked.get(&id).is_some()
} }
pub fn is_displayed(&self, id: FieldId) -> bool { pub fn is_displayed(&self, id: FieldId) -> bool {
match self.displayed { match &self.displayed {
OptionAll::Some(ref v) => v.contains(&id), Some(displayed) => displayed.contains(&id),
OptionAll::All => true, None => true,
OptionAll::None => false,
} }
} }
pub fn is_indexed(&self, id: FieldId) -> Option<&IndexedPos> { pub fn is_searchable(&self, id: FieldId) -> Option<IndexedPos> {
self.indexed_map.get(&id) match &self.searchable {
Some(searchable) if searchable.contains(&id) => self.indexed_position.field_to_pos(id),
None => self.indexed_position.field_to_pos(id),
_ => None,
}
} }
pub fn is_indexed_all(&self) -> bool { pub fn is_searchable_all(&self) -> bool {
self.indexed.is_all() self.searchable.is_none()
} }
pub fn indexed_pos_to_field_id<I: Into<IndexedPos>>(&self, pos: I) -> Option<FieldId> { pub fn indexed_pos_to_field_id<I: Into<IndexedPos>>(&self, pos: I) -> Option<FieldId> {
let indexed_pos = pos.into().0; self.indexed_position.pos_to_field(pos.into())
self
.indexed_map
.iter()
.find(|(_, &v)| v.0 == indexed_pos)
.map(|(&k, _)| k)
} }
pub fn update_ranked<S: AsRef<str>>(&mut self, data: impl IntoIterator<Item = S>) -> SResult<()> { pub fn update_ranked<S: AsRef<str>>(
&mut self,
data: impl IntoIterator<Item = S>,
) -> SResult<()> {
self.ranked.clear(); self.ranked.clear();
for name in data { for name in data {
self.set_ranked(name.as_ref())?; self.set_ranked(name.as_ref())?;
@ -304,46 +161,208 @@ impl Schema {
Ok(()) Ok(())
} }
pub fn update_displayed<S: AsRef<str>>(&mut self, data: impl IntoIterator<Item = S>) -> SResult<()> { pub fn update_displayed<S: AsRef<str>>(
self.displayed = match self.displayed.take() { &mut self,
OptionAll::Some(mut v) => { data: impl IntoIterator<Item = S>,
v.clear(); ) -> SResult<()> {
OptionAll::Some(v) let mut displayed = BTreeSet::new();
}
_ => OptionAll::Some(HashSet::new())
};
for name in data { for name in data {
self.set_displayed(name.as_ref())?; let id = self.fields_map.insert(name.as_ref())?;
displayed.insert(id);
} }
self.displayed.replace(displayed);
Ok(()) Ok(())
} }
pub fn update_indexed<S: AsRef<str>>(&mut self, data: Vec<S>) -> SResult<()> { pub fn update_searchable<S: AsRef<str>>(&mut self, data: Vec<S>) -> SResult<()> {
self.indexed = match self.indexed.take() { let mut searchable = Vec::with_capacity(data.len());
OptionAll::Some(mut v) => { for (pos, name) in data.iter().enumerate() {
v.clear(); let id = self.insert(name.as_ref())?;
OptionAll::Some(v) self.indexed_position.insert(id, IndexedPos(pos as u16));
}, searchable.push(id);
_ => OptionAll::Some(Vec::new()),
};
self.indexed_map.clear();
for name in data {
self.set_indexed(name.as_ref())?;
} }
self.searchable.replace(searchable);
Ok(()) Ok(())
} }
pub fn set_all_fields_as_indexed(&mut self) { pub fn set_all_searchable(&mut self) {
self.indexed = OptionAll::All; self.searchable.take();
self.indexed_map.clear();
for (_name, id) in self.fields_map.iter() {
let pos = self.indexed_map.len() as u16;
self.indexed_map.insert(*id, pos.into());
}
} }
pub fn set_all_fields_as_displayed(&mut self) { pub fn set_all_displayed(&mut self) {
self.displayed = OptionAll::All self.displayed.take();
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_with_primary_key() {
let schema = Schema::with_primary_key("test");
assert_eq!(
format!("{:?}", schema),
r##"Schema { fields_map: FieldsMap { name_map: {"test": FieldId(0)}, id_map: {FieldId(0): "test"}, next_id: FieldId(1) }, primary_key: Some(FieldId(0)), ranked: {}, displayed: None, searchable: None, indexed_position: PositionMap { pos_to_field: [FieldId(0)], field_to_pos: {FieldId(0): IndexedPos(0)} } }"##
);
}
#[test]
fn primary_key() {
let schema = Schema::with_primary_key("test");
assert_eq!(schema.primary_key(), Some("test"));
}
#[test]
fn test_insert_with_position_base() {
let mut schema = Schema::default();
let (id, position) = schema.insert_with_position("foo").unwrap();
assert!(schema.searchable.is_none());
assert!(schema.displayed.is_none());
assert_eq!(id, 0.into());
assert_eq!(position, 0.into());
let (id, position) = schema.insert_with_position("bar").unwrap();
assert_eq!(id, 1.into());
assert_eq!(position, 1.into());
}
#[test]
fn test_insert_with_position_primary_key() {
let mut schema = Schema::with_primary_key("test");
let (id, position) = schema.insert_with_position("foo").unwrap();
assert!(schema.searchable.is_none());
assert!(schema.displayed.is_none());
assert_eq!(id, 1.into());
assert_eq!(position, 1.into());
let (id, position) = schema.insert_with_position("test").unwrap();
assert_eq!(id, 0.into());
assert_eq!(position, 0.into());
}
#[test]
fn test_insert() {
let mut schema = Schema::default();
let field_id = schema.insert("foo").unwrap();
assert!(schema.fields_map.name(field_id).is_some());
assert!(schema.searchable.is_none());
assert!(schema.displayed.is_none());
}
#[test]
fn test_update_searchable() {
let mut schema = Schema::default();
schema.update_searchable(vec!["foo", "bar"]).unwrap();
assert_eq!(
format!("{:?}", schema.indexed_position),
r##"PositionMap { pos_to_field: [FieldId(0), FieldId(1)], field_to_pos: {FieldId(0): IndexedPos(0), FieldId(1): IndexedPos(1)} }"##
);
assert_eq!(
format!("{:?}", schema.searchable),
r##"Some([FieldId(0), FieldId(1)])"##
);
schema.update_searchable(vec!["bar"]).unwrap();
assert_eq!(
format!("{:?}", schema.searchable),
r##"Some([FieldId(1)])"##
);
assert_eq!(
format!("{:?}", schema.indexed_position),
r##"PositionMap { pos_to_field: [FieldId(1), FieldId(0)], field_to_pos: {FieldId(0): IndexedPos(1), FieldId(1): IndexedPos(0)} }"##
);
}
#[test]
fn test_update_displayed() {
let mut schema = Schema::default();
schema.update_displayed(vec!["foobar"]).unwrap();
assert_eq!(
format!("{:?}", schema.displayed),
r##"Some({FieldId(0)})"##
);
assert_eq!(
format!("{:?}", schema.indexed_position),
r##"PositionMap { pos_to_field: [], field_to_pos: {} }"##
);
}
#[test]
fn test_is_searchable_all() {
let mut schema = Schema::default();
assert!(schema.is_searchable_all());
schema.update_searchable(vec!["foo"]).unwrap();
assert!(!schema.is_searchable_all());
}
#[test]
fn test_is_displayed_all() {
let mut schema = Schema::default();
assert!(schema.is_displayed_all());
schema.update_displayed(vec!["foo"]).unwrap();
assert!(!schema.is_displayed_all());
}
#[test]
fn test_searchable_names() {
let mut schema = Schema::default();
assert_eq!(format!("{:?}", schema.searchable_names()), r##"[]"##);
schema.insert_with_position("foo").unwrap();
schema.insert_with_position("bar").unwrap();
assert_eq!(
format!("{:?}", schema.searchable_names()),
r##"["foo", "bar"]"##
);
schema.update_searchable(vec!["hello", "world"]).unwrap();
assert_eq!(
format!("{:?}", schema.searchable_names()),
r##"["hello", "world"]"##
);
schema.set_all_searchable();
assert_eq!(
format!("{:?}", schema.searchable_names()),
r##"["hello", "world", "foo", "bar"]"##
);
}
#[test]
fn test_displayed_names() {
let mut schema = Schema::default();
assert_eq!(format!("{:?}", schema.displayed_names()), r##"{}"##);
schema.insert_with_position("foo").unwrap();
schema.insert_with_position("bar").unwrap();
assert_eq!(
format!("{:?}", schema.displayed_names()),
r##"{"bar", "foo"}"##
);
schema.update_displayed(vec!["hello", "world"]).unwrap();
assert_eq!(
format!("{:?}", schema.displayed_names()),
r##"{"hello", "world"}"##
);
schema.set_all_displayed();
assert_eq!(
format!("{:?}", schema.displayed_names()),
r##"{"bar", "foo"}"##
);
}
#[test]
fn test_set_all_searchable() {
let mut schema = Schema::default();
assert!(schema.is_searchable_all());
schema.update_searchable(vec!["foobar"]).unwrap();
assert!(!schema.is_searchable_all());
schema.set_all_searchable();
assert!(schema.is_searchable_all());
}
#[test]
fn test_set_all_displayed() {
let mut schema = Schema::default();
assert!(schema.is_displayed_all());
schema.update_displayed(vec!["foobar"]).unwrap();
assert!(!schema.is_displayed_all());
schema.set_all_displayed();
assert!(schema.is_displayed_all());
} }
} }

View File

@ -4,22 +4,22 @@ use slice_group_by::StrGroupBy;
use std::iter::Peekable; use std::iter::Peekable;
pub fn is_cjk(c: char) -> bool { pub fn is_cjk(c: char) -> bool {
(c >= '\u{1100}' && c <= '\u{11ff}') // Hangul Jamo ('\u{1100}'..='\u{11ff}').contains(&c)
|| (c >= '\u{2e80}' && c <= '\u{2eff}') // CJK Radicals Supplement || ('\u{2e80}'..='\u{2eff}').contains(&c) // CJK Radicals Supplement
|| (c >= '\u{2f00}' && c <= '\u{2fdf}') // Kangxi radical || ('\u{2f00}'..='\u{2fdf}').contains(&c) // Kangxi radical
|| (c >= '\u{3000}' && c <= '\u{303f}') // Japanese-style punctuation || ('\u{3000}'..='\u{303f}').contains(&c) // Japanese-style punctuation
|| (c >= '\u{3040}' && c <= '\u{309f}') // Japanese Hiragana || ('\u{3040}'..='\u{309f}').contains(&c) // Japanese Hiragana
|| (c >= '\u{30a0}' && c <= '\u{30ff}') // Japanese Katakana || ('\u{30a0}'..='\u{30ff}').contains(&c) // Japanese Katakana
|| (c >= '\u{3100}' && c <= '\u{312f}') || ('\u{3100}'..='\u{312f}').contains(&c)
|| (c >= '\u{3130}' && c <= '\u{318F}') // Hangul Compatibility Jamo || ('\u{3130}'..='\u{318F}').contains(&c) // Hangul Compatibility Jamo
|| (c >= '\u{3200}' && c <= '\u{32ff}') // Enclosed CJK Letters and Months || ('\u{3200}'..='\u{32ff}').contains(&c) // Enclosed CJK Letters and Months
|| (c >= '\u{3400}' && c <= '\u{4dbf}') // CJK Unified Ideographs Extension A || ('\u{3400}'..='\u{4dbf}').contains(&c) // CJK Unified Ideographs Extension A
|| (c >= '\u{4e00}' && c <= '\u{9fff}') // CJK Unified Ideographs || ('\u{4e00}'..='\u{9fff}').contains(&c) // CJK Unified Ideographs
|| (c >= '\u{a960}' && c <= '\u{a97f}') // Hangul Jamo Extended-A || ('\u{a960}'..='\u{a97f}').contains(&c) // Hangul Jamo Extended-A
|| (c >= '\u{ac00}' && c <= '\u{d7a3}') // Hangul Syllables || ('\u{ac00}'..='\u{d7a3}').contains(&c) // Hangul Syllables
|| (c >= '\u{d7b0}' && c <= '\u{d7ff}') // Hangul Jamo Extended-B || ('\u{d7b0}'..='\u{d7ff}').contains(&c) // Hangul Jamo Extended-B
|| (c >= '\u{f900}' && c <= '\u{faff}') // CJK Compatibility Ideographs || ('\u{f900}'..='\u{faff}').contains(&c) // CJK Compatibility Ideographs
|| (c >= '\u{ff00}' && c <= '\u{ffef}') // Full-width roman characters and half-width katakana || ('\u{ff00}'..='\u{ffef}').contains(&c) // Full-width roman characters and half-width katakana
} }
#[derive(Debug, Copy, Clone, PartialEq, Eq)] #[derive(Debug, Copy, Clone, PartialEq, Eq)]