diff --git a/Cargo.lock b/Cargo.lock index 94d2d8ba1..7fd30f444 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,7 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 3 + [[package]] name = "actix-codec" version = "0.3.0" @@ -1845,7 +1847,7 @@ dependencies = [ "log", "main_error", "meilisearch-error", - "meilisearch-tokenizer", + "meilisearch-tokenizer 0.1.1 (git+https://github.com/meilisearch/Tokenizer.git?branch=main)", "memmap", "milli", "mime", @@ -1875,6 +1877,22 @@ dependencies = [ "vergen", ] +[[package]] +name = "meilisearch-tokenizer" +version = "0.1.1" +source = "git+https://github.com/meilisearch/Tokenizer.git?tag=v0.2.0#833c48b2ee39071f8b4f51abd15122afdb3c8c06" +dependencies = [ + "character_converter", + "cow-utils", + "deunicode", + "fst", + "jieba-rs", + "once_cell", + "slice-group-by", + "unicode-segmentation", + "whatlang", +] + [[package]] name = "meilisearch-tokenizer" version = "0.1.1" @@ -1919,7 +1937,7 @@ dependencies = [ [[package]] name = "milli" version = "0.1.0" -source = "git+https://github.com/meilisearch/milli.git?rev=b7b23cd#b7b23cd4a8e62932c66c2ebedf9d89ddf089e299" +source = "git+https://github.com/meilisearch/milli.git?rev=2bcdd8844c4ec9f6f8a34617ea0e4321fa633c0c#2bcdd8844c4ec9f6f8a34617ea0e4321fa633c0c" dependencies = [ "anyhow", "bstr", @@ -1939,7 +1957,7 @@ dependencies = [ "linked-hash-map", "log", "logging_timer", - "meilisearch-tokenizer", + "meilisearch-tokenizer 0.1.1 (git+https://github.com/meilisearch/Tokenizer.git?tag=v0.2.0)", "memmap", "num-traits", "obkv", @@ -2234,8 +2252,7 @@ checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" [[package]] name = "pest" version = "2.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53" +source = "git+https://github.com/pest-parser/pest.git?rev=51fd1d49f1041f7839975664ef71fe15c7dcaf67#51fd1d49f1041f7839975664ef71fe15c7dcaf67" dependencies = [ "ucd-trie", ] @@ -2243,7 +2260,8 @@ dependencies = [ [[package]] name = "pest" version = "2.1.3" -source = "git+https://github.com/pest-parser/pest.git?rev=51fd1d49f1041f7839975664ef71fe15c7dcaf67#51fd1d49f1041f7839975664ef71fe15c7dcaf67" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53" dependencies = [ "ucd-trie", ] diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index a7564f4d9..03ae35729 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -42,7 +42,7 @@ main_error = "0.1.0" meilisearch-error = { path = "../meilisearch-error" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", branch = "main" } memmap = "0.7.0" -milli = { git = "https://github.com/meilisearch/milli.git", rev = "b7b23cd" } +milli = { git = "https://github.com/meilisearch/milli.git", rev = "2bcdd8844c4ec9f6f8a34617ea0e4321fa633c0c" } mime = "0.3.16" once_cell = "1.5.2" parking_lot = "0.11.1" diff --git a/meilisearch-http/src/index/mod.rs b/meilisearch-http/src/index/mod.rs index 188afd522..dfd2ebdc4 100644 --- a/meilisearch-http/src/index/mod.rs +++ b/meilisearch-http/src/index/mod.rs @@ -1,7 +1,7 @@ mod search; mod updates; -use std::collections::HashSet; +use std::collections::{BTreeSet, HashSet}; use std::ops::Deref; use std::sync::Arc; @@ -51,11 +51,24 @@ impl Index { .map(|c| c.to_string()) .collect(); + let stop_words = self + .stop_words(&txn)? + .map(|stop_words| -> anyhow::Result> { + Ok(stop_words + .stream() + .into_strs()? + .into_iter() + .collect()) + }) + .transpose()? + .unwrap_or_else(BTreeSet::new); + Ok(Settings { displayed_attributes: Some(Some(displayed_attributes)), searchable_attributes: Some(Some(searchable_attributes)), attributes_for_faceting: Some(Some(faceted_attributes)), ranking_rules: Some(Some(criteria)), + stop_words: Some(Some(stop_words)), }) } diff --git a/meilisearch-http/src/index/updates.rs b/meilisearch-http/src/index/updates.rs index 79558dd92..085115af6 100644 --- a/meilisearch-http/src/index/updates.rs +++ b/meilisearch-http/src/index/updates.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::collections::{BTreeSet, HashMap}; use std::io; use std::num::NonZeroUsize; @@ -44,8 +44,12 @@ pub struct Settings { )] pub ranking_rules: Option>>, - // TODO we are missing the stopWords, synonyms and distinctAttribute for the GET settings - // request + #[serde( + default, + deserialize_with = "deserialize_some", + skip_serializing_if = "Option::is_none" + )] + pub stop_words: Option>>, } impl Settings { @@ -55,6 +59,7 @@ impl Settings { searchable_attributes: Some(None), attributes_for_faceting: Some(None), ranking_rules: Some(None), + stop_words: Some(None), } } } @@ -170,6 +175,14 @@ impl Index { } } + // We transpose the settings JSON struct into a real setting update. + if let Some(ref stop_words) = settings.stop_words { + match stop_words { + Some(stop_words) => builder.set_stop_words(stop_words.clone()), + _ => builder.reset_stop_words(), + } + } + let result = builder .execute(|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step)); diff --git a/meilisearch-http/src/routes/settings/attributes_for_faceting.rs b/meilisearch-http/src/routes/settings/attributes_for_faceting.rs deleted file mode 100644 index 6c881cff3..000000000 --- a/meilisearch-http/src/routes/settings/attributes_for_faceting.rs +++ /dev/null @@ -1,43 +0,0 @@ -use actix_web::{web, HttpResponse, get}; - -use crate::error::{Error, ResponseError}; -use crate::helpers::Authentication; -use crate::make_update_delete_routes; -use crate::Data; - -#[get( - "/indexes/{index_uid}/settings/attributes-for-faceting", - wrap = "Authentication::Private" -)] -async fn get( - data: web::Data, - index_uid: web::Path, -) -> Result { - let index = data - .db - .load() - .open_index(&index_uid.as_ref()) - .ok_or(Error::index_not_found(&index_uid.as_ref()))?; - - let attributes_for_faceting = data.db.load().main_read::<_, _, ResponseError>(|reader| { - let schema = index.main.schema(reader)?; - let attrs = index.main.attributes_for_faceting(reader)?; - let attr_names = match (&schema, &attrs) { - (Some(schema), Some(attrs)) => attrs - .iter() - .filter_map(|&id| schema.name(id)) - .map(str::to_string) - .collect(), - _ => vec![], - }; - Ok(attr_names) - })?; - - Ok(HttpResponse::Ok().json(attributes_for_faceting)) -} - -make_update_delete_routes!( - "/indexes/{index_uid}/settings/attributes-for-faceting", - Vec, - attributes_for_faceting -); diff --git a/meilisearch-http/src/routes/settings/displayed_attributes.rs b/meilisearch-http/src/routes/settings/displayed_attributes.rs deleted file mode 100644 index b9f36f718..000000000 --- a/meilisearch-http/src/routes/settings/displayed_attributes.rs +++ /dev/null @@ -1,25 +0,0 @@ -use std::collections::HashSet; - -use actix_web::{web, HttpResponse, get}; - -use crate::error::{Error, ResponseError}; -use crate::helpers::Authentication; -use crate::make_update_delete_routes; -use crate::Data; - -#[get( - "/indexes/{index_uid}/settings/displayed-attributes", - wrap = "Authentication::Private" -)] -async fn get( - data: web::Data, - index_uid: web::Path, -) -> Result { - todo!() -} - -make_update_delete_routes!( - "/indexes/{index_uid}/settings/displayed-attributes", - HashSet, - displayed_attributes -); diff --git a/meilisearch-http/src/routes/settings/mod.rs b/meilisearch-http/src/routes/settings/mod.rs index 8c6e04b84..732888ec2 100644 --- a/meilisearch-http/src/routes/settings/mod.rs +++ b/meilisearch-http/src/routes/settings/mod.rs @@ -91,6 +91,12 @@ make_setting_route!( searchable_attributes ); +make_setting_route!( + "/indexes/{index_uid}/settings/stop-words", + std::collections::BTreeSet, + stop_words +); + //make_setting_route!( //"/indexes/{index_uid}/settings/distinct-attribute", //String, @@ -122,7 +128,8 @@ macro_rules! create_services { create_services!( attributes_for_faceting, displayed_attributes, - searchable_attributes + searchable_attributes, + stop_words ); #[post("/indexes/{index_uid}/settings", wrap = "Authentication::Private")] diff --git a/meilisearch-http/src/routes/settings/searchable_attributes.rs b/meilisearch-http/src/routes/settings/searchable_attributes.rs deleted file mode 100644 index a337b0435..000000000 --- a/meilisearch-http/src/routes/settings/searchable_attributes.rs +++ /dev/null @@ -1,34 +0,0 @@ -use actix_web::{web, HttpResponse, get}; - -use crate::data::get_indexed_attributes; -use crate::error::{Error, ResponseError}; -use crate::helpers::Authentication; -use crate::make_update_delete_routes; -use crate::Data; - -#[get( - "/indexes/{index_uid}/settings/searchable-attributes", - wrap = "Authentication::Private" -)] -async fn get( - data: web::Data, - index_uid: web::Path, -) -> Result { - let index = data - .db - .load() - .open_index(&index_uid.as_ref()) - - .ok_or(Error::index_not_found(&index_uid.as_ref()))?; - let reader = data.db.load().main_read_txn()?; - let schema = index.main.schema(&reader)?; - let searchable_attributes: Option> = schema.as_ref().map(get_indexed_attributes); - - Ok(HttpResponse::Ok().json(searchable_attributes)) -} - -make_update_delete_routes!( - "/indexes/{index_uid}/settings/searchable-attributes", - Vec, - searchable_attributes -); diff --git a/meilisearch-http/src/routes/settings/stop_words.rs b/meilisearch-http/src/routes/settings/stop_words.rs deleted file mode 100644 index 05a753f46..000000000 --- a/meilisearch-http/src/routes/settings/stop_words.rs +++ /dev/null @@ -1,33 +0,0 @@ -use std::collections::BTreeSet; - -use crate::make_update_delete_routes; -use actix_web::{web, HttpResponse, get}; - -use crate::error::{Error, ResponseError}; -use crate::helpers::Authentication; -use crate::Data; - -#[get( - "/indexes/{index_uid}/settings/stop-words", - wrap = "Authentication::Private" -)] -async fn get( - data: web::Data, - index_uid: web::Path, -) -> Result { - let index = data - .db - .load() - .open_index(&index_uid.as_ref()) - .ok_or(Error::index_not_found(&index_uid.as_ref()))?; - let reader = data.db.load().main_read_txn()?; - let stop_words = index.main.stop_words(&reader)?; - - Ok(HttpResponse::Ok().json(stop_words)) -} - -make_update_delete_routes!( - "/indexes/{index_uid}/settings/stop-words", - BTreeSet, - stop_words -); diff --git a/meilisearch-http/tests/settings/get_settings.rs b/meilisearch-http/tests/settings/get_settings.rs index d234cbb2b..be09a5090 100644 --- a/meilisearch-http/tests/settings/get_settings.rs +++ b/meilisearch-http/tests/settings/get_settings.rs @@ -16,21 +16,21 @@ async fn get_settings() { let (response, code) = index.settings().await; assert_eq!(code, 200); let settings = response.as_object().unwrap(); - assert_eq!(settings.keys().len(), 4); + assert_eq!(settings.keys().len(), 5); assert_eq!(settings["displayedAttributes"], json!(["*"])); assert_eq!(settings["searchableAttributes"], json!(["*"])); assert_eq!(settings["attributesForFaceting"], json!({})); assert_eq!( settings["rankingRules"], json!([ - "typo", "words", + "typo", "proximity", "attribute", - "wordsPosition", "exactness" ]) ); + assert_eq!(settings["stopWords"], json!([])); } #[actix_rt::test] @@ -78,13 +78,14 @@ async fn reset_all_settings() { let server = Server::new().await; let index = server.index("test"); index - .update_settings(json!({"displayedAttributes": ["foo"], "searchableAttributes": ["bar"]})) + .update_settings(json!({"displayedAttributes": ["foo"], "searchableAttributes": ["bar"], "stopWords": ["the"] })) .await; index.wait_update_id(0).await; let (response, code) = index.settings().await; assert_eq!(code, 200); assert_eq!(response["displayedAttributes"], json!(["foo"])); assert_eq!(response["searchableAttributes"], json!(["bar"])); + assert_eq!(response["stopWords"], json!(["the"])); index.delete_settings().await; index.wait_update_id(1).await; @@ -93,6 +94,7 @@ async fn reset_all_settings() { assert_eq!(code, 200); assert_eq!(response["displayedAttributes"], json!(["*"])); assert_eq!(response["searchableAttributes"], json!(["*"])); + assert_eq!(response["stopWords"], json!([])); } #[actix_rt::test] @@ -166,5 +168,6 @@ macro_rules! test_setting_routes { test_setting_routes!( attributes_for_faceting, displayed_attributes, - searchable_attributes + searchable_attributes, + stop_words );