mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 05:54:30 +01:00
Merge #4033
4033: Fix thai synonyms r=Kerollmops a=Kerollmops Fixes #4031 Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
commit
287cf25d39
@ -226,9 +226,9 @@ fn process_tokens<'a>(
|
|||||||
) -> impl Iterator<Item = (usize, Token<'a>)> {
|
) -> impl Iterator<Item = (usize, Token<'a>)> {
|
||||||
tokens
|
tokens
|
||||||
.skip_while(|token| token.is_separator())
|
.skip_while(|token| token.is_separator())
|
||||||
.scan((0, None), |(offset, prev_kind), token| {
|
.scan((0, None), |(offset, prev_kind), mut token| {
|
||||||
match token.kind {
|
match token.kind {
|
||||||
TokenKind::Word | TokenKind::StopWord | TokenKind::Unknown => {
|
TokenKind::Word | TokenKind::StopWord if !token.lemma().is_empty() => {
|
||||||
*offset += match *prev_kind {
|
*offset += match *prev_kind {
|
||||||
Some(TokenKind::Separator(SeparatorKind::Hard)) => 8,
|
Some(TokenKind::Separator(SeparatorKind::Hard)) => 8,
|
||||||
Some(_) => 1,
|
Some(_) => 1,
|
||||||
@ -244,7 +244,7 @@ fn process_tokens<'a>(
|
|||||||
{
|
{
|
||||||
*prev_kind = Some(token.kind);
|
*prev_kind = Some(token.kind);
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => token.kind = TokenKind::Unknown,
|
||||||
}
|
}
|
||||||
Some((*offset, token))
|
Some((*offset, token))
|
||||||
})
|
})
|
||||||
|
@ -573,7 +573,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
tokenizer
|
tokenizer
|
||||||
.tokenize(text)
|
.tokenize(text)
|
||||||
.filter_map(|token| {
|
.filter_map(|token| {
|
||||||
if token.is_word() {
|
if token.is_word() && !token.lemma().is_empty() {
|
||||||
Some(token.lemma().to_string())
|
Some(token.lemma().to_string())
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
@ -1422,6 +1422,43 @@ mod tests {
|
|||||||
assert!(result.documents_ids.is_empty());
|
assert!(result.documents_ids.is_empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn thai_synonyms() {
|
||||||
|
let mut index = TempIndex::new();
|
||||||
|
index.index_documents_config.autogenerate_docids = true;
|
||||||
|
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
// Send 3 documents with ids from 1 to 3.
|
||||||
|
index
|
||||||
|
.add_documents_using_wtxn(
|
||||||
|
&mut wtxn,
|
||||||
|
documents!([
|
||||||
|
{ "name": "ยี่ปุ่น" },
|
||||||
|
{ "name": "ญี่ปุ่น" },
|
||||||
|
]),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// In the same transaction provide some synonyms
|
||||||
|
index
|
||||||
|
.update_settings_using_wtxn(&mut wtxn, |settings| {
|
||||||
|
settings.set_synonyms(btreemap! {
|
||||||
|
"japanese".to_string() => vec![S("ญี่ปุ่น"), S("ยี่ปุ่น")],
|
||||||
|
});
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
// Ensure synonyms are effectively stored
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let synonyms = index.synonyms(&rtxn).unwrap();
|
||||||
|
assert!(!synonyms.is_empty()); // at this point the index should return something
|
||||||
|
|
||||||
|
// Check that we can use synonyms
|
||||||
|
let result = index.search(&rtxn).query("japanese").execute().unwrap();
|
||||||
|
assert_eq!(result.documents_ids.len(), 2);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn setting_searchable_recomputes_other_settings() {
|
fn setting_searchable_recomputes_other_settings() {
|
||||||
let index = TempIndex::new();
|
let index = TempIndex::new();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user