mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-10 21:44:34 +01:00
update tokenizer version
This commit is contained in:
parent
0447594e02
commit
2852349e68
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -1699,7 +1699,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "meilisearch-tokenizer"
|
||||
version = "0.1.1"
|
||||
source = "git+https://github.com/meilisearch/Tokenizer.git?tag=v0.1.1#dedea5df4b52d94216a65091f237ac64673bab09"
|
||||
source = "git+https://github.com/meilisearch/Tokenizer.git?tag=v0.1.2#8d91cd52f30aa4b651a085c15056938f7b599646"
|
||||
dependencies = [
|
||||
"character_converter",
|
||||
"cow-utils",
|
||||
|
@ -26,7 +26,7 @@ levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
|
||||
log = "0.4.11"
|
||||
meilisearch-error = { path = "../meilisearch-error", version = "0.17.0" }
|
||||
meilisearch-schema = { path = "../meilisearch-schema", version = "0.17.0" }
|
||||
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.1.1" }
|
||||
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.1.2" }
|
||||
meilisearch-types = { path = "../meilisearch-types", version = "0.17.0" }
|
||||
once_cell = "1.5.2"
|
||||
ordered-float = { version = "2.0.1", features = ["serde"] }
|
||||
|
@ -140,7 +140,7 @@ fn process_tokens<'a>(tokens: impl Iterator<Item = Token<'a>>) -> impl Iterator<
|
||||
tokens
|
||||
.scan((0, None), |(offset, prev_kind), token| {
|
||||
match token.kind {
|
||||
TokenKind::Word | TokenKind::StopWord | TokenKind::Any => {
|
||||
TokenKind::Word | TokenKind::StopWord | TokenKind::Unknown => {
|
||||
*offset += match *prev_kind {
|
||||
Some(TokenKind::Separator(SeparatorKind::Hard)) => 8,
|
||||
Some(_) => 1,
|
||||
@ -227,7 +227,7 @@ mod tests {
|
||||
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stopwords));
|
||||
let analyzer = analyzer.analyze(text);
|
||||
let tokens: Vec<_> = process_tokens(analyzer.tokens()).map(|(_, t)| t.text().to_string()).collect();
|
||||
assert_eq!(tokens, ["为", "一", "包含", "一千多万", "目", "词", "的", "带", "标记", "平衡", "语料库"]);
|
||||
assert_eq!(tokens, ["为", "一", "包含", "一千多万", "目词", "的", "带", "标记", "平衡", "语料库"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -401,7 +401,7 @@ async fn search_with_attribute_to_highlight_wildcard_chinese() {
|
||||
"email": "SunTzu@chorizon.com",
|
||||
"phone": "+1 (810) 407-3258",
|
||||
"address": "吴国",
|
||||
"about": "<em>孫武</em>(前544年-前470年或前496年),字長卿,春秋時期齊國人,著名軍事家、政治家,兵家代表人物。兵書《<em>孙子</em>兵法》的作者,後人尊稱為<em>孫子</em>、兵聖、東方兵聖,山東、蘇州等地尚有祀奉<em>孫武</em>的廟宇兵聖廟。其族人为乐安<em>孙</em>氏始祖,次<em>子孙</em>明为富春<em>孙</em>氏始祖。\r\n",
|
||||
"about": "<em>孫武</em>(前544年-前470年或前496年),字長卿,春秋時期齊國人,著名軍事家、政治家,兵家代表人物。兵書《<em>孙子</em>兵法》的作者,後人尊稱為<em>孫子</em>、兵聖、東方兵聖,山東、蘇州等地尚有祀奉<em>孫武</em>的廟宇兵聖廟。其族人为乐安<em>孙氏</em>始祖,次<em>子孙</em>明为富春孙氏始祖。\r\n",
|
||||
"registered": "2014-10-20T10:13:32 -02:00",
|
||||
"latitude": 17.11935,
|
||||
"longitude": 65.38197,
|
||||
|
Loading…
x
Reference in New Issue
Block a user