mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 00:34:26 +01:00
Merge #135
135: Add stop words r=curquiza a=irevoire closes #21 Co-authored-by: tamo <tamo@meilisearch.com>
This commit is contained in:
commit
f881e8691e
30
Cargo.lock
generated
30
Cargo.lock
generated
@ -1,5 +1,7 @@
|
|||||||
# This file is automatically @generated by Cargo.
|
# This file is automatically @generated by Cargo.
|
||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
|
version = 3
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "actix-codec"
|
name = "actix-codec"
|
||||||
version = "0.3.0"
|
version = "0.3.0"
|
||||||
@ -1845,7 +1847,7 @@ dependencies = [
|
|||||||
"log",
|
"log",
|
||||||
"main_error",
|
"main_error",
|
||||||
"meilisearch-error",
|
"meilisearch-error",
|
||||||
"meilisearch-tokenizer",
|
"meilisearch-tokenizer 0.1.1 (git+https://github.com/meilisearch/Tokenizer.git?branch=main)",
|
||||||
"memmap",
|
"memmap",
|
||||||
"milli",
|
"milli",
|
||||||
"mime",
|
"mime",
|
||||||
@ -1875,6 +1877,22 @@ dependencies = [
|
|||||||
"vergen",
|
"vergen",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "meilisearch-tokenizer"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "git+https://github.com/meilisearch/Tokenizer.git?tag=v0.2.0#833c48b2ee39071f8b4f51abd15122afdb3c8c06"
|
||||||
|
dependencies = [
|
||||||
|
"character_converter",
|
||||||
|
"cow-utils",
|
||||||
|
"deunicode",
|
||||||
|
"fst",
|
||||||
|
"jieba-rs",
|
||||||
|
"once_cell",
|
||||||
|
"slice-group-by",
|
||||||
|
"unicode-segmentation",
|
||||||
|
"whatlang",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch-tokenizer"
|
name = "meilisearch-tokenizer"
|
||||||
version = "0.1.1"
|
version = "0.1.1"
|
||||||
@ -1919,7 +1937,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "milli"
|
name = "milli"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
source = "git+https://github.com/meilisearch/milli.git?rev=b7b23cd#b7b23cd4a8e62932c66c2ebedf9d89ddf089e299"
|
source = "git+https://github.com/meilisearch/milli.git?rev=2bcdd8844c4ec9f6f8a34617ea0e4321fa633c0c#2bcdd8844c4ec9f6f8a34617ea0e4321fa633c0c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bstr",
|
"bstr",
|
||||||
@ -1939,7 +1957,7 @@ dependencies = [
|
|||||||
"linked-hash-map",
|
"linked-hash-map",
|
||||||
"log",
|
"log",
|
||||||
"logging_timer",
|
"logging_timer",
|
||||||
"meilisearch-tokenizer",
|
"meilisearch-tokenizer 0.1.1 (git+https://github.com/meilisearch/Tokenizer.git?tag=v0.2.0)",
|
||||||
"memmap",
|
"memmap",
|
||||||
"num-traits",
|
"num-traits",
|
||||||
"obkv",
|
"obkv",
|
||||||
@ -2234,8 +2252,7 @@ checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e"
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "pest"
|
name = "pest"
|
||||||
version = "2.1.3"
|
version = "2.1.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/pest-parser/pest.git?rev=51fd1d49f1041f7839975664ef71fe15c7dcaf67#51fd1d49f1041f7839975664ef71fe15c7dcaf67"
|
||||||
checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ucd-trie",
|
"ucd-trie",
|
||||||
]
|
]
|
||||||
@ -2243,7 +2260,8 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "pest"
|
name = "pest"
|
||||||
version = "2.1.3"
|
version = "2.1.3"
|
||||||
source = "git+https://github.com/pest-parser/pest.git?rev=51fd1d49f1041f7839975664ef71fe15c7dcaf67#51fd1d49f1041f7839975664ef71fe15c7dcaf67"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ucd-trie",
|
"ucd-trie",
|
||||||
]
|
]
|
||||||
|
@ -42,7 +42,7 @@ main_error = "0.1.0"
|
|||||||
meilisearch-error = { path = "../meilisearch-error" }
|
meilisearch-error = { path = "../meilisearch-error" }
|
||||||
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", branch = "main" }
|
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", branch = "main" }
|
||||||
memmap = "0.7.0"
|
memmap = "0.7.0"
|
||||||
milli = { git = "https://github.com/meilisearch/milli.git", rev = "b7b23cd" }
|
milli = { git = "https://github.com/meilisearch/milli.git", rev = "2bcdd8844c4ec9f6f8a34617ea0e4321fa633c0c" }
|
||||||
mime = "0.3.16"
|
mime = "0.3.16"
|
||||||
once_cell = "1.5.2"
|
once_cell = "1.5.2"
|
||||||
parking_lot = "0.11.1"
|
parking_lot = "0.11.1"
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
mod search;
|
mod search;
|
||||||
mod updates;
|
mod updates;
|
||||||
|
|
||||||
use std::collections::HashSet;
|
use std::collections::{BTreeSet, HashSet};
|
||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
@ -51,11 +51,24 @@ impl Index {
|
|||||||
.map(|c| c.to_string())
|
.map(|c| c.to_string())
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
|
let stop_words = self
|
||||||
|
.stop_words(&txn)?
|
||||||
|
.map(|stop_words| -> anyhow::Result<BTreeSet<_>> {
|
||||||
|
Ok(stop_words
|
||||||
|
.stream()
|
||||||
|
.into_strs()?
|
||||||
|
.into_iter()
|
||||||
|
.collect())
|
||||||
|
})
|
||||||
|
.transpose()?
|
||||||
|
.unwrap_or_else(BTreeSet::new);
|
||||||
|
|
||||||
Ok(Settings {
|
Ok(Settings {
|
||||||
displayed_attributes: Some(Some(displayed_attributes)),
|
displayed_attributes: Some(Some(displayed_attributes)),
|
||||||
searchable_attributes: Some(Some(searchable_attributes)),
|
searchable_attributes: Some(Some(searchable_attributes)),
|
||||||
attributes_for_faceting: Some(Some(faceted_attributes)),
|
attributes_for_faceting: Some(Some(faceted_attributes)),
|
||||||
ranking_rules: Some(Some(criteria)),
|
ranking_rules: Some(Some(criteria)),
|
||||||
|
stop_words: Some(Some(stop_words)),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::{BTreeSet, HashMap};
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::num::NonZeroUsize;
|
use std::num::NonZeroUsize;
|
||||||
|
|
||||||
@ -44,8 +44,12 @@ pub struct Settings {
|
|||||||
)]
|
)]
|
||||||
pub ranking_rules: Option<Option<Vec<String>>>,
|
pub ranking_rules: Option<Option<Vec<String>>>,
|
||||||
|
|
||||||
// TODO we are missing the stopWords, synonyms and distinctAttribute for the GET settings
|
#[serde(
|
||||||
// request
|
default,
|
||||||
|
deserialize_with = "deserialize_some",
|
||||||
|
skip_serializing_if = "Option::is_none"
|
||||||
|
)]
|
||||||
|
pub stop_words: Option<Option<BTreeSet<String>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Settings {
|
impl Settings {
|
||||||
@ -55,6 +59,7 @@ impl Settings {
|
|||||||
searchable_attributes: Some(None),
|
searchable_attributes: Some(None),
|
||||||
attributes_for_faceting: Some(None),
|
attributes_for_faceting: Some(None),
|
||||||
ranking_rules: Some(None),
|
ranking_rules: Some(None),
|
||||||
|
stop_words: Some(None),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -170,6 +175,14 @@ impl Index {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We transpose the settings JSON struct into a real setting update.
|
||||||
|
if let Some(ref stop_words) = settings.stop_words {
|
||||||
|
match stop_words {
|
||||||
|
Some(stop_words) => builder.set_stop_words(stop_words.clone()),
|
||||||
|
_ => builder.reset_stop_words(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let result = builder
|
let result = builder
|
||||||
.execute(|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step));
|
.execute(|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step));
|
||||||
|
|
||||||
|
@ -1,43 +0,0 @@
|
|||||||
use actix_web::{web, HttpResponse, get};
|
|
||||||
|
|
||||||
use crate::error::{Error, ResponseError};
|
|
||||||
use crate::helpers::Authentication;
|
|
||||||
use crate::make_update_delete_routes;
|
|
||||||
use crate::Data;
|
|
||||||
|
|
||||||
#[get(
|
|
||||||
"/indexes/{index_uid}/settings/attributes-for-faceting",
|
|
||||||
wrap = "Authentication::Private"
|
|
||||||
)]
|
|
||||||
async fn get(
|
|
||||||
data: web::Data<Data>,
|
|
||||||
index_uid: web::Path<String>,
|
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
|
||||||
let index = data
|
|
||||||
.db
|
|
||||||
.load()
|
|
||||||
.open_index(&index_uid.as_ref())
|
|
||||||
.ok_or(Error::index_not_found(&index_uid.as_ref()))?;
|
|
||||||
|
|
||||||
let attributes_for_faceting = data.db.load().main_read::<_, _, ResponseError>(|reader| {
|
|
||||||
let schema = index.main.schema(reader)?;
|
|
||||||
let attrs = index.main.attributes_for_faceting(reader)?;
|
|
||||||
let attr_names = match (&schema, &attrs) {
|
|
||||||
(Some(schema), Some(attrs)) => attrs
|
|
||||||
.iter()
|
|
||||||
.filter_map(|&id| schema.name(id))
|
|
||||||
.map(str::to_string)
|
|
||||||
.collect(),
|
|
||||||
_ => vec![],
|
|
||||||
};
|
|
||||||
Ok(attr_names)
|
|
||||||
})?;
|
|
||||||
|
|
||||||
Ok(HttpResponse::Ok().json(attributes_for_faceting))
|
|
||||||
}
|
|
||||||
|
|
||||||
make_update_delete_routes!(
|
|
||||||
"/indexes/{index_uid}/settings/attributes-for-faceting",
|
|
||||||
Vec<String>,
|
|
||||||
attributes_for_faceting
|
|
||||||
);
|
|
@ -1,25 +0,0 @@
|
|||||||
use std::collections::HashSet;
|
|
||||||
|
|
||||||
use actix_web::{web, HttpResponse, get};
|
|
||||||
|
|
||||||
use crate::error::{Error, ResponseError};
|
|
||||||
use crate::helpers::Authentication;
|
|
||||||
use crate::make_update_delete_routes;
|
|
||||||
use crate::Data;
|
|
||||||
|
|
||||||
#[get(
|
|
||||||
"/indexes/{index_uid}/settings/displayed-attributes",
|
|
||||||
wrap = "Authentication::Private"
|
|
||||||
)]
|
|
||||||
async fn get(
|
|
||||||
data: web::Data<Data>,
|
|
||||||
index_uid: web::Path<String>,
|
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
|
|
||||||
make_update_delete_routes!(
|
|
||||||
"/indexes/{index_uid}/settings/displayed-attributes",
|
|
||||||
HashSet<String>,
|
|
||||||
displayed_attributes
|
|
||||||
);
|
|
@ -91,6 +91,12 @@ make_setting_route!(
|
|||||||
searchable_attributes
|
searchable_attributes
|
||||||
);
|
);
|
||||||
|
|
||||||
|
make_setting_route!(
|
||||||
|
"/indexes/{index_uid}/settings/stop-words",
|
||||||
|
std::collections::BTreeSet<String>,
|
||||||
|
stop_words
|
||||||
|
);
|
||||||
|
|
||||||
//make_setting_route!(
|
//make_setting_route!(
|
||||||
//"/indexes/{index_uid}/settings/distinct-attribute",
|
//"/indexes/{index_uid}/settings/distinct-attribute",
|
||||||
//String,
|
//String,
|
||||||
@ -122,7 +128,8 @@ macro_rules! create_services {
|
|||||||
create_services!(
|
create_services!(
|
||||||
attributes_for_faceting,
|
attributes_for_faceting,
|
||||||
displayed_attributes,
|
displayed_attributes,
|
||||||
searchable_attributes
|
searchable_attributes,
|
||||||
|
stop_words
|
||||||
);
|
);
|
||||||
|
|
||||||
#[post("/indexes/{index_uid}/settings", wrap = "Authentication::Private")]
|
#[post("/indexes/{index_uid}/settings", wrap = "Authentication::Private")]
|
||||||
|
@ -1,34 +0,0 @@
|
|||||||
use actix_web::{web, HttpResponse, get};
|
|
||||||
|
|
||||||
use crate::data::get_indexed_attributes;
|
|
||||||
use crate::error::{Error, ResponseError};
|
|
||||||
use crate::helpers::Authentication;
|
|
||||||
use crate::make_update_delete_routes;
|
|
||||||
use crate::Data;
|
|
||||||
|
|
||||||
#[get(
|
|
||||||
"/indexes/{index_uid}/settings/searchable-attributes",
|
|
||||||
wrap = "Authentication::Private"
|
|
||||||
)]
|
|
||||||
async fn get(
|
|
||||||
data: web::Data<Data>,
|
|
||||||
index_uid: web::Path<String>,
|
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
|
||||||
let index = data
|
|
||||||
.db
|
|
||||||
.load()
|
|
||||||
.open_index(&index_uid.as_ref())
|
|
||||||
|
|
||||||
.ok_or(Error::index_not_found(&index_uid.as_ref()))?;
|
|
||||||
let reader = data.db.load().main_read_txn()?;
|
|
||||||
let schema = index.main.schema(&reader)?;
|
|
||||||
let searchable_attributes: Option<Vec<String>> = schema.as_ref().map(get_indexed_attributes);
|
|
||||||
|
|
||||||
Ok(HttpResponse::Ok().json(searchable_attributes))
|
|
||||||
}
|
|
||||||
|
|
||||||
make_update_delete_routes!(
|
|
||||||
"/indexes/{index_uid}/settings/searchable-attributes",
|
|
||||||
Vec<String>,
|
|
||||||
searchable_attributes
|
|
||||||
);
|
|
@ -1,33 +0,0 @@
|
|||||||
use std::collections::BTreeSet;
|
|
||||||
|
|
||||||
use crate::make_update_delete_routes;
|
|
||||||
use actix_web::{web, HttpResponse, get};
|
|
||||||
|
|
||||||
use crate::error::{Error, ResponseError};
|
|
||||||
use crate::helpers::Authentication;
|
|
||||||
use crate::Data;
|
|
||||||
|
|
||||||
#[get(
|
|
||||||
"/indexes/{index_uid}/settings/stop-words",
|
|
||||||
wrap = "Authentication::Private"
|
|
||||||
)]
|
|
||||||
async fn get(
|
|
||||||
data: web::Data<Data>,
|
|
||||||
index_uid: web::Path<String>,
|
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
|
||||||
let index = data
|
|
||||||
.db
|
|
||||||
.load()
|
|
||||||
.open_index(&index_uid.as_ref())
|
|
||||||
.ok_or(Error::index_not_found(&index_uid.as_ref()))?;
|
|
||||||
let reader = data.db.load().main_read_txn()?;
|
|
||||||
let stop_words = index.main.stop_words(&reader)?;
|
|
||||||
|
|
||||||
Ok(HttpResponse::Ok().json(stop_words))
|
|
||||||
}
|
|
||||||
|
|
||||||
make_update_delete_routes!(
|
|
||||||
"/indexes/{index_uid}/settings/stop-words",
|
|
||||||
BTreeSet<String>,
|
|
||||||
stop_words
|
|
||||||
);
|
|
@ -16,21 +16,21 @@ async fn get_settings() {
|
|||||||
let (response, code) = index.settings().await;
|
let (response, code) = index.settings().await;
|
||||||
assert_eq!(code, 200);
|
assert_eq!(code, 200);
|
||||||
let settings = response.as_object().unwrap();
|
let settings = response.as_object().unwrap();
|
||||||
assert_eq!(settings.keys().len(), 4);
|
assert_eq!(settings.keys().len(), 5);
|
||||||
assert_eq!(settings["displayedAttributes"], json!(["*"]));
|
assert_eq!(settings["displayedAttributes"], json!(["*"]));
|
||||||
assert_eq!(settings["searchableAttributes"], json!(["*"]));
|
assert_eq!(settings["searchableAttributes"], json!(["*"]));
|
||||||
assert_eq!(settings["attributesForFaceting"], json!({}));
|
assert_eq!(settings["attributesForFaceting"], json!({}));
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
settings["rankingRules"],
|
settings["rankingRules"],
|
||||||
json!([
|
json!([
|
||||||
"typo",
|
|
||||||
"words",
|
"words",
|
||||||
|
"typo",
|
||||||
"proximity",
|
"proximity",
|
||||||
"attribute",
|
"attribute",
|
||||||
"wordsPosition",
|
|
||||||
"exactness"
|
"exactness"
|
||||||
])
|
])
|
||||||
);
|
);
|
||||||
|
assert_eq!(settings["stopWords"], json!([]));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
@ -78,13 +78,14 @@ async fn reset_all_settings() {
|
|||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
let index = server.index("test");
|
let index = server.index("test");
|
||||||
index
|
index
|
||||||
.update_settings(json!({"displayedAttributes": ["foo"], "searchableAttributes": ["bar"]}))
|
.update_settings(json!({"displayedAttributes": ["foo"], "searchableAttributes": ["bar"], "stopWords": ["the"] }))
|
||||||
.await;
|
.await;
|
||||||
index.wait_update_id(0).await;
|
index.wait_update_id(0).await;
|
||||||
let (response, code) = index.settings().await;
|
let (response, code) = index.settings().await;
|
||||||
assert_eq!(code, 200);
|
assert_eq!(code, 200);
|
||||||
assert_eq!(response["displayedAttributes"], json!(["foo"]));
|
assert_eq!(response["displayedAttributes"], json!(["foo"]));
|
||||||
assert_eq!(response["searchableAttributes"], json!(["bar"]));
|
assert_eq!(response["searchableAttributes"], json!(["bar"]));
|
||||||
|
assert_eq!(response["stopWords"], json!(["the"]));
|
||||||
|
|
||||||
index.delete_settings().await;
|
index.delete_settings().await;
|
||||||
index.wait_update_id(1).await;
|
index.wait_update_id(1).await;
|
||||||
@ -93,6 +94,7 @@ async fn reset_all_settings() {
|
|||||||
assert_eq!(code, 200);
|
assert_eq!(code, 200);
|
||||||
assert_eq!(response["displayedAttributes"], json!(["*"]));
|
assert_eq!(response["displayedAttributes"], json!(["*"]));
|
||||||
assert_eq!(response["searchableAttributes"], json!(["*"]));
|
assert_eq!(response["searchableAttributes"], json!(["*"]));
|
||||||
|
assert_eq!(response["stopWords"], json!([]));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
@ -166,5 +168,6 @@ macro_rules! test_setting_routes {
|
|||||||
test_setting_routes!(
|
test_setting_routes!(
|
||||||
attributes_for_faceting,
|
attributes_for_faceting,
|
||||||
displayed_attributes,
|
displayed_attributes,
|
||||||
searchable_attributes
|
searchable_attributes,
|
||||||
|
stop_words
|
||||||
);
|
);
|
||||||
|
Loading…
Reference in New Issue
Block a user