diff --git a/meilisearch-core/src/database.rs b/meilisearch-core/src/database.rs index d93c81acb..94563a5a9 100644 --- a/meilisearch-core/src/database.rs +++ b/meilisearch-core/src/database.rs @@ -350,7 +350,7 @@ impl Database { index.main.put_name(&mut writer, name)?; index.main.put_created_at(&mut writer)?; index.main.put_updated_at(&mut writer)?; - index.main.put_schema(&mut writer, &Schema::new())?; + index.main.put_schema(&mut writer, &Schema::default())?; let env_clone = self.env.clone(); let update_env_clone = self.update_env.clone(); diff --git a/meilisearch-core/src/facets.rs b/meilisearch-core/src/facets.rs index 11135c179..c4689ee87 100644 --- a/meilisearch-core/src/facets.rs +++ b/meilisearch-core/src/facets.rs @@ -245,8 +245,8 @@ mod test { #[test] fn test_facet_key() { - let mut schema = Schema::new(); - let id = schema.insert_and_index("hello").unwrap(); + let mut schema = Schema::default(); + let id = schema.insert_with_position("hello").unwrap().0; let facet_list = [schema.id("hello").unwrap()]; assert_eq!( FacetKey::from_str("hello:12", &schema, &facet_list).unwrap(), @@ -286,8 +286,8 @@ mod test { #[test] fn test_parse_facet_array() { use either::Either::{Left, Right}; - let mut schema = Schema::new(); - let _id = schema.insert_and_index("hello").unwrap(); + let mut schema = Schema::default(); + let _id = schema.insert_with_position("hello").unwrap(); let facet_list = [schema.id("hello").unwrap()]; assert_eq!( FacetFilter::from_str("[[\"hello:12\"]]", &schema, &facet_list).unwrap(), diff --git a/meilisearch-core/src/query_builder.rs b/meilisearch-core/src/query_builder.rs index 6f4485342..21a15cc9c 100644 --- a/meilisearch-core/src/query_builder.rs +++ b/meilisearch-core/src/query_builder.rs @@ -415,8 +415,7 @@ mod tests { let mut final_indexes = Vec::new(); for index in indexes { let name = index.attribute.to_string(); - schema.insert(&name).unwrap(); - let indexed_pos = schema.set_indexed(&name).unwrap().1; + let indexed_pos = schema.insert_with_position(&name).unwrap().1; let index = DocIndex { attribute: indexed_pos.0, ..*index @@ -447,7 +446,7 @@ mod tests { .postings_lists .put_postings_list(&mut writer, &word, &postings_list) .unwrap(); - } + } for ((docid, attr, _), count) in fields_counts { let prev = index @@ -461,7 +460,7 @@ mod tests { .documents_fields_counts .put_document_field_count(&mut writer, docid, IndexedPos(attr), prev + count) .unwrap(); - } + } writer.commit().unwrap(); diff --git a/meilisearch-core/src/settings.rs b/meilisearch-core/src/settings.rs index 50ac9c92c..f26865dd7 100644 --- a/meilisearch-core/src/settings.rs +++ b/meilisearch-core/src/settings.rs @@ -13,7 +13,7 @@ static RANKING_RULE_REGEX: Lazy = Lazy::new(|| { regex::Regex::new(r"(asc|desc)\(([a-zA-Z0-9-_]*)\)").unwrap() }); -#[derive(Default, Clone, Serialize, Deserialize)] +#[derive(Default, Clone, Serialize, Deserialize, Debug)] #[serde(rename_all = "camelCase", deny_unknown_fields)] pub struct Settings { #[serde(default, deserialize_with = "deserialize_some")] diff --git a/meilisearch-core/src/update/documents_addition.rs b/meilisearch-core/src/update/documents_addition.rs index d46aeb7fe..b783ae978 100644 --- a/meilisearch-core/src/update/documents_addition.rs +++ b/meilisearch-core/src/update/documents_addition.rs @@ -126,13 +126,13 @@ where A: AsRef<[u8]>, let serialized = serde_json::to_vec(value)?; documents_fields.put_document_field(writer, document_id, field_id, &serialized)?; - if let Some(indexed_pos) = schema.is_indexed(field_id) { - let number_of_words = index_value(indexer, document_id, *indexed_pos, value); + if let Some(indexed_pos) = schema.is_searchable(field_id) { + let number_of_words = index_value(indexer, document_id, indexed_pos, value); if let Some(number_of_words) = number_of_words { documents_fields_counts.put_document_field_count( writer, document_id, - *indexed_pos, + indexed_pos, number_of_words as u16, )?; } @@ -146,8 +146,8 @@ where A: AsRef<[u8]>, Ok(()) } -pub fn apply_addition<'a, 'b, 'c>( - writer: &'a mut heed::RwTxn<'b, 'c, MainT>, +pub fn apply_addition( + writer: &mut heed::RwTxn, index: &store::Index, new_documents: Vec>, partial: bool @@ -228,7 +228,7 @@ pub fn apply_addition<'a, 'b, 'c>( for (document_id, document) in &documents_additions { // For each key-value pair in the document. for (attribute, value) in document { - let field_id = schema.insert_and_index(&attribute)?; + let (field_id, _) = schema.insert_with_position(&attribute)?; index_document( writer, index.documents_fields, @@ -272,16 +272,16 @@ pub fn apply_addition<'a, 'b, 'c>( Ok(()) } -pub fn apply_documents_partial_addition<'a, 'b, 'c>( - writer: &'a mut heed::RwTxn<'b, 'c, MainT>, +pub fn apply_documents_partial_addition( + writer: &mut heed::RwTxn, index: &store::Index, new_documents: Vec>, ) -> MResult<()> { apply_addition(writer, index, new_documents, true) } -pub fn apply_documents_addition<'a, 'b, 'c>( - writer: &'a mut heed::RwTxn<'b, 'c, MainT>, +pub fn apply_documents_addition( + writer: &mut heed::RwTxn, index: &store::Index, new_documents: Vec>, ) -> MResult<()> { diff --git a/meilisearch-core/src/update/mod.rs b/meilisearch-core/src/update/mod.rs index d1ea83108..d10f484a4 100644 --- a/meilisearch-core/src/update/mod.rs +++ b/meilisearch-core/src/update/mod.rs @@ -212,8 +212,8 @@ pub fn next_update_id( Ok(new_update_id) } -pub fn update_task<'a, 'b, 'c>( - writer: &'a mut heed::RwTxn<'b, 'c, MainT>, +pub fn update_task( + writer: &mut heed::RwTxn, index: &store::Index, update_id: u64, update: Update, diff --git a/meilisearch-core/src/update/settings_update.rs b/meilisearch-core/src/update/settings_update.rs index 94e337265..7b82c1c6e 100644 --- a/meilisearch-core/src/update/settings_update.rs +++ b/meilisearch-core/src/update/settings_update.rs @@ -71,14 +71,14 @@ pub fn apply_settings_update( match settings.searchable_attributes.clone() { UpdateState::Update(v) => { if v.iter().any(|e| e == "*") || v.is_empty() { - schema.set_all_fields_as_indexed(); + schema.set_all_searchable(); } else { - schema.update_indexed(v)?; + schema.update_searchable(v)?; } must_reindex = true; }, UpdateState::Clear => { - schema.set_all_fields_as_indexed(); + schema.set_all_searchable(); must_reindex = true; }, UpdateState::Nothing => (), @@ -86,13 +86,13 @@ pub fn apply_settings_update( match settings.displayed_attributes.clone() { UpdateState::Update(v) => { if v.contains("*") || v.is_empty() { - schema.set_all_fields_as_displayed(); + schema.set_all_displayed(); } else { schema.update_displayed(v)? } }, UpdateState::Clear => { - schema.set_all_fields_as_displayed(); + schema.set_all_displayed(); }, UpdateState::Nothing => (), } diff --git a/meilisearch-http/src/data.rs b/meilisearch-http/src/data.rs index 783c81fd8..2deeab693 100644 --- a/meilisearch-http/src/data.rs +++ b/meilisearch-http/src/data.rs @@ -1,7 +1,7 @@ use std::error::Error; use std::ops::Deref; use std::path::PathBuf; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use meilisearch_core::{Database, DatabaseOptions, Index}; use sha2::Digest; @@ -9,6 +9,7 @@ use sha2::Digest; use crate::error::{Error as MSError, ResponseError}; use crate::index_update_callback; use crate::option::Opt; +use crate::dump::DumpInfo; #[derive(Clone)] pub struct Data { @@ -32,6 +33,7 @@ pub struct DataInner { pub api_keys: ApiKeys, pub server_pid: u32, pub http_payload_size_limit: usize, + pub current_dump: Arc>>, } #[derive(Clone)] @@ -82,6 +84,8 @@ impl Data { api_keys.generate_missing_api_keys(); + let current_dump = Arc::new(Mutex::new(None)); + let inner_data = DataInner { db: db.clone(), db_path, @@ -90,6 +94,7 @@ impl Data { api_keys, server_pid, http_payload_size_limit, + current_dump, }; let data = Data { @@ -135,6 +140,14 @@ impl Data { Ok(created_index) } + pub fn get_current_dump_info(&self) -> Option { + self.current_dump.lock().unwrap().clone() + } + + pub fn set_current_dump_info(&self, dump_info: DumpInfo) { + self.current_dump.lock().unwrap().replace(dump_info); + } + pub fn get_or_create_index(&self, uid: &str, f: F) -> Result where F: FnOnce(&Index) -> Result, diff --git a/meilisearch-http/src/dump.rs b/meilisearch-http/src/dump.rs index 468dbf640..c4513af6f 100644 --- a/meilisearch-http/src/dump.rs +++ b/meilisearch-http/src/dump.rs @@ -1,7 +1,6 @@ use std::fs::{create_dir_all, File}; use std::io::prelude::*; use std::path::{Path, PathBuf}; -use std::sync::Mutex; use std::thread; use actix_web::web; @@ -11,7 +10,6 @@ use log::{error, info}; use meilisearch_core::{MainWriter, MainReader, UpdateReader}; use meilisearch_core::settings::Settings; use meilisearch_core::update::{apply_settings_update, apply_documents_addition}; -use once_cell::sync::Lazy; use serde::{Deserialize, Serialize}; use serde_json::json; use tempfile::TempDir; @@ -22,9 +20,6 @@ use crate::helpers::compression; use crate::routes::index; use crate::routes::index::IndexResponse; -// Mutex to share dump progress. -static DUMP_INFO: Lazy>> = Lazy::new(Mutex::default); - #[derive(Debug, Serialize, Deserialize, Copy, Clone)] enum DumpVersion { V1, @@ -211,6 +206,7 @@ pub struct DumpInfo { pub status: DumpStatus, #[serde(skip_serializing_if = "Option::is_none", flatten)] pub error: Option, + } impl DumpInfo { @@ -228,14 +224,6 @@ impl DumpInfo { pub fn dump_already_in_progress(&self) -> bool { self.status == DumpStatus::InProgress } - - pub fn get_current() -> Option { - DUMP_INFO.lock().unwrap().clone() - } - - pub fn set_current(&self) { - *DUMP_INFO.lock().unwrap() = Some(self.clone()); - } } /// Generate uid from creation date @@ -299,11 +287,11 @@ fn dump_index_documents(data: &web::Data, reader: &MainReader, dir_path: & } /// Write error with a context. -fn fail_dump_process(dump_info: DumpInfo, context: &str, error: E) { +fn fail_dump_process(data: &web::Data, dump_info: DumpInfo, context: &str, error: E) { let error_message = format!("{}; {}", context, error); error!("Something went wrong during dump process: {}", &error_message); - dump_info.with_error(Error::dump_failed(error_message).into()).set_current(); + data.set_current_dump_info(dump_info.with_error(Error::dump_failed(error_message).into())) } /// Main function of dump. @@ -312,7 +300,7 @@ fn dump_process(data: web::Data, dumps_dir: PathBuf, dump_info: DumpInfo) let update_reader = match data.db.update_read_txn() { Ok(r) => r, Err(e) => { - fail_dump_process(dump_info, "creating RO transaction on updates", e); + fail_dump_process(&data, dump_info, "creating RO transaction on updates", e); return ; } }; @@ -321,7 +309,7 @@ fn dump_process(data: web::Data, dumps_dir: PathBuf, dump_info: DumpInfo) let main_reader = match data.db.main_read_txn() { Ok(r) => r, Err(e) => { - fail_dump_process(dump_info, "creating RO transaction on main", e); + fail_dump_process(&data, dump_info, "creating RO transaction on main", e); return ; } }; @@ -330,7 +318,7 @@ fn dump_process(data: web::Data, dumps_dir: PathBuf, dump_info: DumpInfo) let tmp_dir = match TempDir::new() { Ok(tmp_dir) => tmp_dir, Err(e) => { - fail_dump_process(dump_info, "creating temporary directory", e); + fail_dump_process(&data, dump_info, "creating temporary directory", e); return ; } }; @@ -340,14 +328,14 @@ fn dump_process(data: web::Data, dumps_dir: PathBuf, dump_info: DumpInfo) let indexes = match crate::routes::index::list_indexes_sync(&data, &main_reader) { Ok(indexes) => indexes, Err(e) => { - fail_dump_process(dump_info, "listing indexes", e); + fail_dump_process(&data, dump_info, "listing indexes", e); return ; } }; // create metadata if let Err(e) = dump_metadata(&data, &tmp_dir_path, indexes.clone()) { - fail_dump_process(dump_info, "generating metadata", e); + fail_dump_process(&data, dump_info, "generating metadata", e); return ; } @@ -357,32 +345,32 @@ fn dump_process(data: web::Data, dumps_dir: PathBuf, dump_info: DumpInfo) // create index sub-dircetory if let Err(e) = create_dir_all(&index_path) { - fail_dump_process(dump_info, &format!("creating directory for index {}", &index.uid), e); + fail_dump_process(&data, dump_info, &format!("creating directory for index {}", &index.uid), e); return ; } // export settings if let Err(e) = dump_index_settings(&data, &main_reader, &index_path, &index.uid) { - fail_dump_process(dump_info, &format!("generating settings for index {}", &index.uid), e); + fail_dump_process(&data, dump_info, &format!("generating settings for index {}", &index.uid), e); return ; } // export documents if let Err(e) = dump_index_documents(&data, &main_reader, &index_path, &index.uid) { - fail_dump_process(dump_info, &format!("generating documents for index {}", &index.uid), e); + fail_dump_process(&data, dump_info, &format!("generating documents for index {}", &index.uid), e); return ; } // export updates if let Err(e) = dump_index_updates(&data, &update_reader, &index_path, &index.uid) { - fail_dump_process(dump_info, &format!("generating updates for index {}", &index.uid), e); + fail_dump_process(&data, dump_info, &format!("generating updates for index {}", &index.uid), e); return ; } } // compress dump in a file named `{dump_uid}.dump` in `dumps_dir` if let Err(e) = crate::helpers::compression::to_tar_gz(&tmp_dir_path, &compressed_dumps_dir(&dumps_dir, &dump_info.uid)) { - fail_dump_process(dump_info, "compressing dump", e); + fail_dump_process(&data, dump_info, "compressing dump", e); return ; } @@ -392,14 +380,14 @@ fn dump_process(data: web::Data, dumps_dir: PathBuf, dump_info: DumpInfo) DumpStatus::Done ); - resume.set_current(); + data.set_current_dump_info(resume); } pub fn init_dump_process(data: &web::Data, dumps_dir: &Path) -> Result { create_dir_all(dumps_dir).map_err(|e| Error::dump_failed(format!("creating temporary directory {}", e)))?; // check if a dump is already in progress - if let Some(resume) = DumpInfo::get_current() { + if let Some(resume) = data.get_current_dump_info() { if resume.dump_already_in_progress() { return Err(Error::dump_conflict()) } @@ -411,7 +399,7 @@ pub fn init_dump_process(data: &web::Data, dumps_dir: &Path) -> Result SearchBuilder<'a> { all_attributes.extend(&all_formatted); }, None => { - all_attributes.extend(schema.displayed_name()); + all_attributes.extend(schema.displayed_names()); // If we specified at least one attribute to highlight or crop then // all available attributes will be returned in the _formatted field. if self.attributes_to_highlight.is_some() || self.attributes_to_crop.is_some() { @@ -445,7 +445,7 @@ fn calculate_matches( continue; } } - if !schema.displayed_name().contains(attribute) { + if !schema.displayed_names().contains(&attribute) { continue; } if let Some(pos) = matches_result.get_mut(attribute) { diff --git a/meilisearch-http/src/routes/dump.rs b/meilisearch-http/src/routes/dump.rs index 97fafdfa8..3c6d0e060 100644 --- a/meilisearch-http/src/routes/dump.rs +++ b/meilisearch-http/src/routes/dump.rs @@ -45,7 +45,7 @@ async fn get_dump_status( let dumps_dir = Path::new(&data.dumps_dir); let dump_uid = &path.dump_uid; - if let Some(resume) = DumpInfo::get_current() { + if let Some(resume) = data.get_current_dump_info() { if &resume.uid == dump_uid { return Ok(HttpResponse::Ok().json(resume)); } diff --git a/meilisearch-http/src/routes/search.rs b/meilisearch-http/src/routes/search.rs index 52bbc31f0..0f86cafc8 100644 --- a/meilisearch-http/src/routes/search.rs +++ b/meilisearch-http/src/routes/search.rs @@ -1,4 +1,4 @@ -use std::collections::{HashMap, HashSet}; +use std::collections::{HashMap, HashSet, BTreeSet}; use actix_web::{get, post, web, HttpResponse}; use log::warn; @@ -120,8 +120,8 @@ impl SearchQuery { search_builder.limit(limit); } - let available_attributes = schema.displayed_name(); - let mut restricted_attributes: HashSet<&str>; + let available_attributes = schema.displayed_names(); + let mut restricted_attributes: BTreeSet<&str>; match &self.attributes_to_retrieve { Some(attributes_to_retrieve) => { let attributes_to_retrieve: HashSet<&str> = @@ -129,7 +129,7 @@ impl SearchQuery { if attributes_to_retrieve.contains("*") { restricted_attributes = available_attributes.clone(); } else { - restricted_attributes = HashSet::new(); + restricted_attributes = BTreeSet::new(); search_builder.attributes_to_retrieve(HashSet::new()); for attr in attributes_to_retrieve { if available_attributes.contains(attr) { diff --git a/meilisearch-http/src/routes/setting.rs b/meilisearch-http/src/routes/setting.rs index 00562eed0..f7fae0a6c 100644 --- a/meilisearch-http/src/routes/setting.rs +++ b/meilisearch-http/src/routes/setting.rs @@ -523,11 +523,11 @@ async fn delete_attributes_for_faceting( } fn get_indexed_attributes(schema: &Schema) -> Vec { - if schema.is_indexed_all() { - ["*"].iter().map(|s| s.to_string()).collect() + if schema.is_searchable_all() { + vec!["*".to_string()] } else { schema - .indexed_name() + .searchable_names() .iter() .map(|s| s.to_string()) .collect() @@ -539,7 +539,7 @@ fn get_displayed_attributes(schema: &Schema) -> BTreeSet { ["*"].iter().map(|s| s.to_string()).collect() } else { schema - .displayed_name() + .displayed_names() .iter() .map(|s| s.to_string()) .collect() diff --git a/meilisearch-http/tests/assets/dumps/v1/test/updates.jsonl b/meilisearch-http/tests/assets/dumps/v1/test/updates.jsonl index 0dcffdce0..5bba3b9f0 100644 --- a/meilisearch-http/tests/assets/dumps/v1/test/updates.jsonl +++ b/meilisearch-http/tests/assets/dumps/v1/test/updates.jsonl @@ -1,2 +1,3 @@ -{"status": "processed","updateId": 0,"type": {"name":"Settings","settings":{"ranking_rules":{"Update":["Typo","Words","Proximity","Attribute","WordsPosition","Exactness"]},"distinct_attribute":"Nothing","primary_key":"Nothing","searchable_attributes":{"Update":["balance","picture","age","color","name","gender","email","phone","address","about","registered","latitude","longitude","tags"]},"displayed_attributes":{"Update":["about","address","age","balance","color","email","gender","id","isActive","latitude","longitude","name","phone","picture","registered","tags"]},"stop_words":"Nothing","synonyms":"Nothing","attributes_for_faceting":"Nothing"}}} -{"status": "processed", "updateId": 1, "type": { "name": "DocumentsAddition"}} \ No newline at end of file +{"status":"processed","updateId":0,"type":{"name":"Settings","settings":{"ranking_rules":{"Update":["Typo","Words","Proximity","Attribute","WordsPosition","Exactness"]},"distinct_attribute":"Nothing","primary_key":"Nothing","searchable_attributes":"Nothing","displayed_attributes":"Nothing","stop_words":"Nothing","synonyms":"Nothing","attributes_for_faceting":"Nothing"}}} +{"status":"processed","updateId":1,"type":{"name":"DocumentsAddition","number":77}} + diff --git a/meilisearch-http/tests/common.rs b/meilisearch-http/tests/common.rs index 43cea1447..9111dc9f1 100644 --- a/meilisearch-http/tests/common.rs +++ b/meilisearch-http/tests/common.rs @@ -88,40 +88,6 @@ impl Server { "wordsPosition", "exactness", ], - "searchableAttributes": [ - "balance", - "picture", - "age", - "color", - "name", - "gender", - "email", - "phone", - "address", - "about", - "registered", - "latitude", - "longitude", - "tags", - ], - "displayedAttributes": [ - "id", - "isActive", - "balance", - "picture", - "age", - "color", - "name", - "gender", - "email", - "phone", - "address", - "about", - "registered", - "latitude", - "longitude", - "tags", - ], }); server.update_all_settings(body).await; diff --git a/meilisearch-http/tests/dump.rs b/meilisearch-http/tests/dump.rs index 701b754aa..9972a6e17 100644 --- a/meilisearch-http/tests/dump.rs +++ b/meilisearch-http/tests/dump.rs @@ -367,20 +367,10 @@ async fn dump_index_updates_should_be_valid() { compression::from_tar_gz(&dumps_dir.join(&format!("{}.dump", uid)), tmp_dir_path).unwrap(); let file = File::open(tmp_dir_path.join("test").join("updates.jsonl")).unwrap(); - let mut updates = read_all_jsonline(file); + let updates = read_all_jsonline(file); - - // hotfix until #943 is fixed (https://github.com/meilisearch/MeiliSearch/issues/943) - updates.as_array_mut().unwrap() - .get_mut(0).unwrap() - .get_mut("type").unwrap() - .get_mut("settings").unwrap() - .get_mut("displayed_attributes").unwrap() - .get_mut("Update").unwrap() - .as_array_mut().unwrap().sort_by(|a, b| a.as_str().cmp(&b.as_str())); - - eprintln!("{}\n", updates.to_string()); - eprintln!("{}", expected.to_string()); + eprintln!("{}\n", updates); + eprintln!("{}", expected); assert_json_include!(expected: expected, actual: updates); } diff --git a/meilisearch-http/tests/index.rs b/meilisearch-http/tests/index.rs index 271507e03..050ffe813 100644 --- a/meilisearch-http/tests/index.rs +++ b/meilisearch-http/tests/index.rs @@ -779,30 +779,32 @@ async fn update_existing_primary_key_is_error() { } #[actix_rt::test] -async fn test_facets_distribution_attribute() { +async fn test_field_distribution_attribute() { let mut server = common::Server::test_server().await; let (response, _status_code) = server.get_index_stats().await; let expected = json!({ - "isIndexing": false, - "numberOfDocuments":77, - "fieldsDistribution":{ - "age":77, - "gender":77, - "phone":77, - "name":77, - "registered":77, - "latitude":77, - "email":77, - "tags":77, - "longitude":77, - "color":77, - "address":77, - "balance":77, - "about":77, - "picture":77, + "fieldsDistribution": { + "about": 77, + "address": 77, + "age": 77, + "balance": 77, + "color": 77, + "email": 77, + "gender": 77, + "id": 77, + "isActive": 77, + "latitude": 77, + "longitude": 77, + "name": 77, + "phone": 77, + "picture": 77, + "registered": 77, + "tags": 77 }, + "isIndexing": false, + "numberOfDocuments": 77 }); assert_json_eq!(expected, response, ordered: true); diff --git a/meilisearch-http/tests/search.rs b/meilisearch-http/tests/search.rs index 1d1a60678..6a496809e 100644 --- a/meilisearch-http/tests/search.rs +++ b/meilisearch-http/tests/search.rs @@ -1789,8 +1789,6 @@ async fn update_documents_with_facet_distribution() { server.create_index(body).await; let settings = json!({ "attributesForFaceting": ["genre"], - "displayedAttributes": ["genre"], - "searchableAttributes": ["genre"] }); server.update_all_settings(settings).await; let update1 = json!([ diff --git a/meilisearch-schema/src/fields_map.rs b/meilisearch-schema/src/fields_map.rs index 76d98f4da..b182c9c25 100644 --- a/meilisearch-schema/src/fields_map.rs +++ b/meilisearch-schema/src/fields_map.rs @@ -6,22 +6,14 @@ use serde::{Deserialize, Serialize}; use crate::{SResult, FieldId}; #[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct FieldsMap { +pub(crate) struct FieldsMap { name_map: HashMap, id_map: HashMap, next_id: FieldId } impl FieldsMap { - pub fn len(&self) -> usize { - self.name_map.len() - } - - pub fn is_empty(&self) -> bool { - self.name_map.is_empty() - } - - pub fn insert(&mut self, name: &str) -> SResult { + pub(crate) fn insert(&mut self, name: &str) -> SResult { if let Some(id) = self.name_map.get(name) { return Ok(*id) } @@ -32,22 +24,15 @@ impl FieldsMap { Ok(id) } - pub fn remove(&mut self, name: &str) { - if let Some(id) = self.name_map.get(name) { - self.id_map.remove(&id); - } - self.name_map.remove(name); - } - - pub fn id(&self, name: &str) -> Option { + pub(crate) fn id(&self, name: &str) -> Option { self.name_map.get(name).copied() } - pub fn name>(&self, id: I) -> Option<&str> { + pub(crate) fn name>(&self, id: I) -> Option<&str> { self.id_map.get(&id.into()).map(|s| s.as_str()) } - pub fn iter(&self) -> Iter<'_, String, FieldId> { + pub(crate) fn iter(&self) -> Iter<'_, String, FieldId> { self.name_map.iter() } } @@ -69,14 +54,10 @@ mod tests { assert_eq!(fields_map.id("title"), Some(1.into())); assert_eq!(fields_map.id("descritpion"), Some(2.into())); assert_eq!(fields_map.id("date"), None); - assert_eq!(fields_map.len(), 3); assert_eq!(fields_map.name(0), Some("id")); assert_eq!(fields_map.name(1), Some("title")); assert_eq!(fields_map.name(2), Some("descritpion")); assert_eq!(fields_map.name(4), None); - fields_map.remove("title"); - assert_eq!(fields_map.id("title"), None); - assert_eq!(fields_map.insert("title").unwrap(), 3.into()); - assert_eq!(fields_map.len(), 3); + assert_eq!(fields_map.insert("title").unwrap(), 1.into()); } } diff --git a/meilisearch-schema/src/lib.rs b/meilisearch-schema/src/lib.rs index a35c30c03..dd2e7c2fb 100644 --- a/meilisearch-schema/src/lib.rs +++ b/meilisearch-schema/src/lib.rs @@ -1,9 +1,10 @@ mod error; mod fields_map; mod schema; +mod position_map; pub use error::{Error, SResult}; -pub use fields_map::FieldsMap; +use fields_map::FieldsMap; pub use schema::Schema; use serde::{Deserialize, Serialize}; use zerocopy::{AsBytes, FromBytes}; diff --git a/meilisearch-schema/src/position_map.rs b/meilisearch-schema/src/position_map.rs new file mode 100644 index 000000000..8d5c675b7 --- /dev/null +++ b/meilisearch-schema/src/position_map.rs @@ -0,0 +1,161 @@ +use std::collections::BTreeMap; + +use crate::{FieldId, IndexedPos}; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct PositionMap { + pos_to_field: Vec, + field_to_pos: BTreeMap, +} + +impl PositionMap { + /// insert `id` at the specified `position` updating the other position if a shit if caused by + /// the operation. If `id` is already present in the position map, it is moved to the requested + /// `position`, potentially causing shifts. + pub fn insert(&mut self, id: FieldId, position: IndexedPos) -> IndexedPos { + let mut upos = position.0 as usize; + let mut must_rebuild_map = false; + + if let Some(old_pos) = self.field_to_pos.get(&id) { + let uold_pos = old_pos.0 as usize; + self.pos_to_field.remove(uold_pos); + must_rebuild_map = true; + } + + if upos < self.pos_to_field.len() { + self.pos_to_field.insert(upos, id); + must_rebuild_map = true; + } else { + upos = self.pos_to_field.len(); + self.pos_to_field.push(id); + } + + // we only need to update all the positions if there have been a shift a some point. In + // most cases we only did a push, so we don't need to rebuild the `field_to_pos` map. + if must_rebuild_map { + self.field_to_pos.clear(); + self.field_to_pos.extend( + self.pos_to_field + .iter() + .enumerate() + .map(|(p, f)| (*f, IndexedPos(p as u16))), + ); + } else { + self.field_to_pos.insert(id, IndexedPos(upos as u16)); + } + IndexedPos(upos as u16) + } + + /// Pushes `id` in last position + pub fn push(&mut self, id: FieldId) -> IndexedPos { + let pos = self.len(); + self.insert(id, IndexedPos(pos as u16)) + } + + pub fn len(&self) -> usize { + self.pos_to_field.len() + } + + pub fn field_to_pos(&self, id: FieldId) -> Option { + self.field_to_pos.get(&id).cloned() + } + + pub fn pos_to_field(&self, pos: IndexedPos) -> Option { + let pos = pos.0 as usize; + self.pos_to_field.get(pos).cloned() + } + + pub fn field_pos(&self) -> impl Iterator + '_ { + self.pos_to_field + .iter() + .enumerate() + .map(|(i, f)| (*f, IndexedPos(i as u16))) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_default() { + assert_eq!( + format!("{:?}", PositionMap::default()), + r##"PositionMap { pos_to_field: [], field_to_pos: {} }"## + ); + } + + #[test] + fn test_insert() { + let mut map = PositionMap::default(); + // changing position removes from old position + map.insert(0.into(), 0.into()); + map.insert(1.into(), 1.into()); + assert_eq!( + format!("{:?}", map), + r##"PositionMap { pos_to_field: [FieldId(0), FieldId(1)], field_to_pos: {FieldId(0): IndexedPos(0), FieldId(1): IndexedPos(1)} }"## + ); + map.insert(0.into(), 1.into()); + assert_eq!( + format!("{:?}", map), + r##"PositionMap { pos_to_field: [FieldId(1), FieldId(0)], field_to_pos: {FieldId(0): IndexedPos(1), FieldId(1): IndexedPos(0)} }"## + ); + map.insert(2.into(), 1.into()); + assert_eq!( + format!("{:?}", map), + r##"PositionMap { pos_to_field: [FieldId(1), FieldId(2), FieldId(0)], field_to_pos: {FieldId(0): IndexedPos(2), FieldId(1): IndexedPos(0), FieldId(2): IndexedPos(1)} }"## + ); + } + + #[test] + fn test_push() { + let mut map = PositionMap::default(); + map.push(0.into()); + map.push(2.into()); + assert_eq!(map.len(), 2); + assert_eq!( + format!("{:?}", map), + r##"PositionMap { pos_to_field: [FieldId(0), FieldId(2)], field_to_pos: {FieldId(0): IndexedPos(0), FieldId(2): IndexedPos(1)} }"## + ); + } + + #[test] + fn test_field_to_pos() { + let mut map = PositionMap::default(); + map.push(0.into()); + map.push(2.into()); + assert_eq!(map.field_to_pos(2.into()), Some(1.into())); + assert_eq!(map.field_to_pos(0.into()), Some(0.into())); + assert_eq!(map.field_to_pos(4.into()), None); + } + + #[test] + fn test_pos_to_field() { + let mut map = PositionMap::default(); + map.push(0.into()); + map.push(2.into()); + map.push(3.into()); + map.push(4.into()); + assert_eq!( + format!("{:?}", map), + r##"PositionMap { pos_to_field: [FieldId(0), FieldId(2), FieldId(3), FieldId(4)], field_to_pos: {FieldId(0): IndexedPos(0), FieldId(2): IndexedPos(1), FieldId(3): IndexedPos(2), FieldId(4): IndexedPos(3)} }"## + ); + assert_eq!(map.pos_to_field(0.into()), Some(0.into())); + assert_eq!(map.pos_to_field(1.into()), Some(2.into())); + assert_eq!(map.pos_to_field(2.into()), Some(3.into())); + assert_eq!(map.pos_to_field(3.into()), Some(4.into())); + assert_eq!(map.pos_to_field(4.into()), None); + } + + #[test] + fn test_field_pos() { + let mut map = PositionMap::default(); + map.push(0.into()); + map.push(2.into()); + let mut iter = map.field_pos(); + assert_eq!(iter.next(), Some((0.into(), 0.into()))); + assert_eq!(iter.next(), Some((2.into(), 1.into()))); + assert_eq!(iter.next(), None); + } +} diff --git a/meilisearch-schema/src/schema.rs b/meilisearch-schema/src/schema.rs index a1992080a..17377cedd 100644 --- a/meilisearch-schema/src/schema.rs +++ b/meilisearch-schema/src/schema.rs @@ -1,39 +1,10 @@ -use crate::{FieldsMap, FieldId, SResult, Error, IndexedPos}; -use serde::{Serialize, Deserialize}; -use std::collections::{HashMap, HashSet}; use std::borrow::Cow; +use std::collections::{BTreeSet, HashSet}; -#[derive(Clone, Debug, Serialize, Deserialize)] -enum OptionAll { - All, - Some(T), - None, -} +use serde::{Deserialize, Serialize}; -impl OptionAll { - // replace the value with None and return the previous value - fn take(&mut self) -> OptionAll { - std::mem::replace(self, OptionAll::None) - } - - fn map U>(self, f: F) -> OptionAll { - match self { - OptionAll::Some(x) => OptionAll::Some(f(x)), - OptionAll::All => OptionAll::All, - OptionAll::None => OptionAll::None, - } - } - - pub fn is_all(&self) -> bool { - matches!(self, OptionAll::All) - } -} - -impl Default for OptionAll { - fn default() -> OptionAll { - OptionAll::All - } -} +use crate::position_map::PositionMap; +use crate::{Error, FieldId, FieldsMap, IndexedPos, SResult}; #[derive(Clone, Debug, Serialize, Deserialize, Default)] pub struct Schema { @@ -41,34 +12,26 @@ pub struct Schema { primary_key: Option, ranked: HashSet, - displayed: OptionAll>, + displayed: Option>, - indexed: OptionAll>, - indexed_map: HashMap, + searchable: Option>, + pub indexed_position: PositionMap, } impl Schema { - pub fn new() -> Schema { - Schema::default() - } - pub fn with_primary_key(name: &str) -> Schema { let mut fields_map = FieldsMap::default(); let field_id = fields_map.insert(name).unwrap(); - - let mut displayed = HashSet::new(); - let mut indexed_map = HashMap::new(); - - displayed.insert(field_id); - indexed_map.insert(field_id, 0.into()); + let mut indexed_position = PositionMap::default(); + indexed_position.push(field_id); Schema { fields_map, primary_key: Some(field_id), ranked: HashSet::new(), - displayed: OptionAll::All, - indexed: OptionAll::All, - indexed_map, + displayed: None, + searchable: None, + indexed_position, } } @@ -78,13 +41,11 @@ impl Schema { pub fn set_primary_key(&mut self, name: &str) -> SResult { if self.primary_key.is_some() { - return Err(Error::PrimaryKeyAlreadyPresent) + return Err(Error::PrimaryKeyAlreadyPresent); } let id = self.insert(name)?; self.primary_key = Some(id); - self.set_indexed(name)?; - self.set_displayed(name)?; Ok(id) } @@ -101,202 +62,98 @@ impl Schema { self.fields_map.iter().map(|(k, _)| k.as_ref()) } - pub fn contains(&self, name: &str) -> bool { - self.fields_map.id(name).is_some() - } - + /// add `name` to the list of known fields pub fn insert(&mut self, name: &str) -> SResult { self.fields_map.insert(name) } - pub fn insert_and_index(&mut self, name: &str) -> SResult { - match self.fields_map.id(name) { - Some(id) => { - Ok(id) - } - None => { - self.set_indexed(name)?; - self.set_displayed(name) - } - } + /// Adds `name` to the list of known fields, and in the last position of the indexed_position map. This + /// field is taken into acccount when `searchableAttribute` or `displayedAttributes` is set to `"*"` + pub fn insert_with_position(&mut self, name: &str) -> SResult<(FieldId, IndexedPos)> { + let field_id = self.fields_map.insert(name)?; + let position = self + .is_searchable(field_id) + .unwrap_or_else(|| self.indexed_position.push(field_id)); + Ok((field_id, position)) } pub fn ranked(&self) -> &HashSet { &self.ranked } - pub fn ranked_name(&self) -> HashSet<&str> { - self.ranked.iter().filter_map(|a| self.name(*a)).collect() - } - - pub fn displayed(&self) -> Cow> { - match self.displayed { - OptionAll::Some(ref v) => Cow::Borrowed(v), - OptionAll::All => { - let fields = self - .fields_map - .iter() - .map(|(_, &v)| v) - .collect::>(); - Cow::Owned(fields) - } - OptionAll::None => Cow::Owned(HashSet::new()) + fn displayed(&self) -> Cow> { + match &self.displayed { + Some(displayed) => Cow::Borrowed(displayed), + None => Cow::Owned(self.indexed_position.field_pos().map(|(f, _)| f).collect()), } } pub fn is_displayed_all(&self) -> bool { - self.displayed.is_all() + self.displayed.is_none() } - pub fn displayed_name(&self) -> HashSet<&str> { - match self.displayed { - OptionAll::All => self.fields_map.iter().filter_map(|(_, &v)| self.name(v)).collect(), - OptionAll::Some(ref v) => v.iter().filter_map(|a| self.name(*a)).collect(), - OptionAll::None => HashSet::new(), + pub fn displayed_names(&self) -> BTreeSet<&str> { + self.displayed() + .iter() + .filter_map(|&f| self.name(f)) + .collect() + } + + fn searchable(&self) -> Cow<[FieldId]> { + match &self.searchable { + Some(searchable) => Cow::Borrowed(&searchable), + None => Cow::Owned(self.indexed_position.field_pos().map(|(f, _)| f).collect()), } } - pub fn indexed(&self) -> Cow<[FieldId]> { - match self.indexed { - OptionAll::Some(ref v) => Cow::Borrowed(v), - OptionAll::All => { - let fields = self - .fields_map - .iter() - .map(|(_, &f)| f) - .collect(); - Cow::Owned(fields) - }, - OptionAll::None => Cow::Owned(Vec::new()) - } + pub fn searchable_names(&self) -> Vec<&str> { + self.searchable() + .iter() + .filter_map(|a| self.name(*a)) + .collect() } - pub fn indexed_name(&self) -> Vec<&str> { - self.indexed().iter().filter_map(|a| self.name(*a)).collect() - } - - pub fn set_ranked(&mut self, name: &str) -> SResult { + pub(crate) fn set_ranked(&mut self, name: &str) -> SResult { let id = self.fields_map.insert(name)?; self.ranked.insert(id); Ok(id) } - pub fn set_displayed(&mut self, name: &str) -> SResult { - let id = self.fields_map.insert(name)?; - self.displayed = match self.displayed.take() { - OptionAll::All => OptionAll::All, - OptionAll::None => { - let mut displayed = HashSet::new(); - displayed.insert(id); - OptionAll::Some(displayed) - }, - OptionAll::Some(mut v) => { - v.insert(id); - OptionAll::Some(v) - } - }; - Ok(id) - } - - pub fn set_indexed(&mut self, name: &str) -> SResult<(FieldId, IndexedPos)> { - let id = self.fields_map.insert(name)?; - - if let Some(indexed_pos) = self.indexed_map.get(&id) { - return Ok((id, *indexed_pos)) - }; - let pos = self.indexed_map.len() as u16; - self.indexed_map.insert(id, pos.into()); - self.indexed = self.indexed.take().map(|mut v| { - v.push(id); - v - }); - Ok((id, pos.into())) - } - pub fn clear_ranked(&mut self) { self.ranked.clear(); } - pub fn remove_ranked(&mut self, name: &str) { - if let Some(id) = self.fields_map.id(name) { - self.ranked.remove(&id); - } - } - - /// remove field from displayed attributes. If diplayed attributes is OptionAll::All, - /// dipslayed attributes is turned into OptionAll::Some(v) where v is all displayed attributes - /// except name. - pub fn remove_displayed(&mut self, name: &str) { - if let Some(id) = self.fields_map.id(name) { - self.displayed = match self.displayed.take() { - OptionAll::Some(mut v) => { - v.remove(&id); - OptionAll::Some(v) - } - OptionAll::All => { - let displayed = self.fields_map - .iter() - .filter_map(|(key, &value)| { - if key != name { - Some(value) - } else { - None - } - }) - .collect::>(); - OptionAll::Some(displayed) - } - OptionAll::None => OptionAll::None, - }; - } - } - - pub fn remove_indexed(&mut self, name: &str) { - if let Some(id) = self.fields_map.id(name) { - self.indexed_map.remove(&id); - self.indexed = match self.indexed.take() { - // valid because indexed is All and indexed() return the content of - // indexed_map that is already updated - OptionAll::All => OptionAll::Some(self.indexed().into_owned()), - OptionAll::Some(mut v) => { - v.retain(|x| *x != id); - OptionAll::Some(v) - } - OptionAll::None => OptionAll::None, - } - } - } - pub fn is_ranked(&self, id: FieldId) -> bool { self.ranked.get(&id).is_some() } pub fn is_displayed(&self, id: FieldId) -> bool { - match self.displayed { - OptionAll::Some(ref v) => v.contains(&id), - OptionAll::All => true, - OptionAll::None => false, + match &self.displayed { + Some(displayed) => displayed.contains(&id), + None => true, } } - pub fn is_indexed(&self, id: FieldId) -> Option<&IndexedPos> { - self.indexed_map.get(&id) + pub fn is_searchable(&self, id: FieldId) -> Option { + match &self.searchable { + Some(searchable) if searchable.contains(&id) => self.indexed_position.field_to_pos(id), + None => self.indexed_position.field_to_pos(id), + _ => None, + } } - pub fn is_indexed_all(&self) -> bool { - self.indexed.is_all() + pub fn is_searchable_all(&self) -> bool { + self.searchable.is_none() } pub fn indexed_pos_to_field_id>(&self, pos: I) -> Option { - let indexed_pos = pos.into().0; - self - .indexed_map - .iter() - .find(|(_, &v)| v.0 == indexed_pos) - .map(|(&k, _)| k) + self.indexed_position.pos_to_field(pos.into()) } - pub fn update_ranked>(&mut self, data: impl IntoIterator) -> SResult<()> { + pub fn update_ranked>( + &mut self, + data: impl IntoIterator, + ) -> SResult<()> { self.ranked.clear(); for name in data { self.set_ranked(name.as_ref())?; @@ -304,46 +161,208 @@ impl Schema { Ok(()) } - pub fn update_displayed>(&mut self, data: impl IntoIterator) -> SResult<()> { - self.displayed = match self.displayed.take() { - OptionAll::Some(mut v) => { - v.clear(); - OptionAll::Some(v) - } - _ => OptionAll::Some(HashSet::new()) - }; + pub fn update_displayed>( + &mut self, + data: impl IntoIterator, + ) -> SResult<()> { + let mut displayed = BTreeSet::new(); for name in data { - self.set_displayed(name.as_ref())?; + let id = self.fields_map.insert(name.as_ref())?; + displayed.insert(id); } + self.displayed.replace(displayed); Ok(()) } - pub fn update_indexed>(&mut self, data: Vec) -> SResult<()> { - self.indexed = match self.indexed.take() { - OptionAll::Some(mut v) => { - v.clear(); - OptionAll::Some(v) - }, - _ => OptionAll::Some(Vec::new()), - }; - self.indexed_map.clear(); - for name in data { - self.set_indexed(name.as_ref())?; + pub fn update_searchable>(&mut self, data: Vec) -> SResult<()> { + let mut searchable = Vec::with_capacity(data.len()); + for (pos, name) in data.iter().enumerate() { + let id = self.insert(name.as_ref())?; + self.indexed_position.insert(id, IndexedPos(pos as u16)); + searchable.push(id); } + self.searchable.replace(searchable); Ok(()) } - pub fn set_all_fields_as_indexed(&mut self) { - self.indexed = OptionAll::All; - self.indexed_map.clear(); - - for (_name, id) in self.fields_map.iter() { - let pos = self.indexed_map.len() as u16; - self.indexed_map.insert(*id, pos.into()); - } + pub fn set_all_searchable(&mut self) { + self.searchable.take(); } - pub fn set_all_fields_as_displayed(&mut self) { - self.displayed = OptionAll::All + pub fn set_all_displayed(&mut self) { + self.displayed.take(); + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_with_primary_key() { + let schema = Schema::with_primary_key("test"); + assert_eq!( + format!("{:?}", schema), + r##"Schema { fields_map: FieldsMap { name_map: {"test": FieldId(0)}, id_map: {FieldId(0): "test"}, next_id: FieldId(1) }, primary_key: Some(FieldId(0)), ranked: {}, displayed: None, searchable: None, indexed_position: PositionMap { pos_to_field: [FieldId(0)], field_to_pos: {FieldId(0): IndexedPos(0)} } }"## + ); + } + + #[test] + fn primary_key() { + let schema = Schema::with_primary_key("test"); + assert_eq!(schema.primary_key(), Some("test")); + } + + #[test] + fn test_insert_with_position_base() { + let mut schema = Schema::default(); + let (id, position) = schema.insert_with_position("foo").unwrap(); + assert!(schema.searchable.is_none()); + assert!(schema.displayed.is_none()); + assert_eq!(id, 0.into()); + assert_eq!(position, 0.into()); + let (id, position) = schema.insert_with_position("bar").unwrap(); + assert_eq!(id, 1.into()); + assert_eq!(position, 1.into()); + } + + #[test] + fn test_insert_with_position_primary_key() { + let mut schema = Schema::with_primary_key("test"); + let (id, position) = schema.insert_with_position("foo").unwrap(); + assert!(schema.searchable.is_none()); + assert!(schema.displayed.is_none()); + assert_eq!(id, 1.into()); + assert_eq!(position, 1.into()); + let (id, position) = schema.insert_with_position("test").unwrap(); + assert_eq!(id, 0.into()); + assert_eq!(position, 0.into()); + } + + #[test] + fn test_insert() { + let mut schema = Schema::default(); + let field_id = schema.insert("foo").unwrap(); + assert!(schema.fields_map.name(field_id).is_some()); + assert!(schema.searchable.is_none()); + assert!(schema.displayed.is_none()); + } + + #[test] + fn test_update_searchable() { + let mut schema = Schema::default(); + + schema.update_searchable(vec!["foo", "bar"]).unwrap(); + assert_eq!( + format!("{:?}", schema.indexed_position), + r##"PositionMap { pos_to_field: [FieldId(0), FieldId(1)], field_to_pos: {FieldId(0): IndexedPos(0), FieldId(1): IndexedPos(1)} }"## + ); + assert_eq!( + format!("{:?}", schema.searchable), + r##"Some([FieldId(0), FieldId(1)])"## + ); + schema.update_searchable(vec!["bar"]).unwrap(); + assert_eq!( + format!("{:?}", schema.searchable), + r##"Some([FieldId(1)])"## + ); + assert_eq!( + format!("{:?}", schema.indexed_position), + r##"PositionMap { pos_to_field: [FieldId(1), FieldId(0)], field_to_pos: {FieldId(0): IndexedPos(1), FieldId(1): IndexedPos(0)} }"## + ); + } + + #[test] + fn test_update_displayed() { + let mut schema = Schema::default(); + schema.update_displayed(vec!["foobar"]).unwrap(); + assert_eq!( + format!("{:?}", schema.displayed), + r##"Some({FieldId(0)})"## + ); + assert_eq!( + format!("{:?}", schema.indexed_position), + r##"PositionMap { pos_to_field: [], field_to_pos: {} }"## + ); + } + + #[test] + fn test_is_searchable_all() { + let mut schema = Schema::default(); + assert!(schema.is_searchable_all()); + schema.update_searchable(vec!["foo"]).unwrap(); + assert!(!schema.is_searchable_all()); + } + + #[test] + fn test_is_displayed_all() { + let mut schema = Schema::default(); + assert!(schema.is_displayed_all()); + schema.update_displayed(vec!["foo"]).unwrap(); + assert!(!schema.is_displayed_all()); + } + + #[test] + fn test_searchable_names() { + let mut schema = Schema::default(); + assert_eq!(format!("{:?}", schema.searchable_names()), r##"[]"##); + schema.insert_with_position("foo").unwrap(); + schema.insert_with_position("bar").unwrap(); + assert_eq!( + format!("{:?}", schema.searchable_names()), + r##"["foo", "bar"]"## + ); + schema.update_searchable(vec!["hello", "world"]).unwrap(); + assert_eq!( + format!("{:?}", schema.searchable_names()), + r##"["hello", "world"]"## + ); + schema.set_all_searchable(); + assert_eq!( + format!("{:?}", schema.searchable_names()), + r##"["hello", "world", "foo", "bar"]"## + ); + } + + #[test] + fn test_displayed_names() { + let mut schema = Schema::default(); + assert_eq!(format!("{:?}", schema.displayed_names()), r##"{}"##); + schema.insert_with_position("foo").unwrap(); + schema.insert_with_position("bar").unwrap(); + assert_eq!( + format!("{:?}", schema.displayed_names()), + r##"{"bar", "foo"}"## + ); + schema.update_displayed(vec!["hello", "world"]).unwrap(); + assert_eq!( + format!("{:?}", schema.displayed_names()), + r##"{"hello", "world"}"## + ); + schema.set_all_displayed(); + assert_eq!( + format!("{:?}", schema.displayed_names()), + r##"{"bar", "foo"}"## + ); + } + + #[test] + fn test_set_all_searchable() { + let mut schema = Schema::default(); + assert!(schema.is_searchable_all()); + schema.update_searchable(vec!["foobar"]).unwrap(); + assert!(!schema.is_searchable_all()); + schema.set_all_searchable(); + assert!(schema.is_searchable_all()); + } + + #[test] + fn test_set_all_displayed() { + let mut schema = Schema::default(); + assert!(schema.is_displayed_all()); + schema.update_displayed(vec!["foobar"]).unwrap(); + assert!(!schema.is_displayed_all()); + schema.set_all_displayed(); + assert!(schema.is_displayed_all()); } } diff --git a/meilisearch-tokenizer/src/lib.rs b/meilisearch-tokenizer/src/lib.rs index 0015ce047..13874498b 100644 --- a/meilisearch-tokenizer/src/lib.rs +++ b/meilisearch-tokenizer/src/lib.rs @@ -4,22 +4,22 @@ use slice_group_by::StrGroupBy; use std::iter::Peekable; pub fn is_cjk(c: char) -> bool { - (c >= '\u{1100}' && c <= '\u{11ff}') // Hangul Jamo - || (c >= '\u{2e80}' && c <= '\u{2eff}') // CJK Radicals Supplement - || (c >= '\u{2f00}' && c <= '\u{2fdf}') // Kangxi radical - || (c >= '\u{3000}' && c <= '\u{303f}') // Japanese-style punctuation - || (c >= '\u{3040}' && c <= '\u{309f}') // Japanese Hiragana - || (c >= '\u{30a0}' && c <= '\u{30ff}') // Japanese Katakana - || (c >= '\u{3100}' && c <= '\u{312f}') - || (c >= '\u{3130}' && c <= '\u{318F}') // Hangul Compatibility Jamo - || (c >= '\u{3200}' && c <= '\u{32ff}') // Enclosed CJK Letters and Months - || (c >= '\u{3400}' && c <= '\u{4dbf}') // CJK Unified Ideographs Extension A - || (c >= '\u{4e00}' && c <= '\u{9fff}') // CJK Unified Ideographs - || (c >= '\u{a960}' && c <= '\u{a97f}') // Hangul Jamo Extended-A - || (c >= '\u{ac00}' && c <= '\u{d7a3}') // Hangul Syllables - || (c >= '\u{d7b0}' && c <= '\u{d7ff}') // Hangul Jamo Extended-B - || (c >= '\u{f900}' && c <= '\u{faff}') // CJK Compatibility Ideographs - || (c >= '\u{ff00}' && c <= '\u{ffef}') // Full-width roman characters and half-width katakana + ('\u{1100}'..='\u{11ff}').contains(&c) + || ('\u{2e80}'..='\u{2eff}').contains(&c) // CJK Radicals Supplement + || ('\u{2f00}'..='\u{2fdf}').contains(&c) // Kangxi radical + || ('\u{3000}'..='\u{303f}').contains(&c) // Japanese-style punctuation + || ('\u{3040}'..='\u{309f}').contains(&c) // Japanese Hiragana + || ('\u{30a0}'..='\u{30ff}').contains(&c) // Japanese Katakana + || ('\u{3100}'..='\u{312f}').contains(&c) + || ('\u{3130}'..='\u{318F}').contains(&c) // Hangul Compatibility Jamo + || ('\u{3200}'..='\u{32ff}').contains(&c) // Enclosed CJK Letters and Months + || ('\u{3400}'..='\u{4dbf}').contains(&c) // CJK Unified Ideographs Extension A + || ('\u{4e00}'..='\u{9fff}').contains(&c) // CJK Unified Ideographs + || ('\u{a960}'..='\u{a97f}').contains(&c) // Hangul Jamo Extended-A + || ('\u{ac00}'..='\u{d7a3}').contains(&c) // Hangul Syllables + || ('\u{d7b0}'..='\u{d7ff}').contains(&c) // Hangul Jamo Extended-B + || ('\u{f900}'..='\u{faff}').contains(&c) // CJK Compatibility Ideographs + || ('\u{ff00}'..='\u{ffef}').contains(&c) // Full-width roman characters and half-width katakana } #[derive(Debug, Copy, Clone, PartialEq, Eq)]