mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-03-29 11:00:39 +01:00
Make _matchesPosition length byte based instead of char based
This commit is contained in:
parent
cf31a65a88
commit
39aca661dd
@ -74,7 +74,7 @@ async fn formatted_contain_wildcard() {
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
@r#"
|
||||
{
|
||||
"_formatted": {
|
||||
"id": "852",
|
||||
@ -84,12 +84,12 @@ async fn formatted_contain_wildcard() {
|
||||
"cattos": [
|
||||
{
|
||||
"start": 0,
|
||||
"length": 5
|
||||
"length": 6
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
"###);
|
||||
"#);
|
||||
}
|
||||
}
|
||||
)
|
||||
@ -119,7 +119,7 @@ async fn formatted_contain_wildcard() {
|
||||
allow_duplicates! {
|
||||
assert_json_snapshot!(response["hits"][0],
|
||||
{ "._rankingScore" => "[score]" },
|
||||
@r###"
|
||||
@r#"
|
||||
{
|
||||
"id": 852,
|
||||
"cattos": "pésti",
|
||||
@ -131,12 +131,12 @@ async fn formatted_contain_wildcard() {
|
||||
"cattos": [
|
||||
{
|
||||
"start": 0,
|
||||
"length": 5
|
||||
"length": 6
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
"###)
|
||||
"#)
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
@ -229,8 +229,12 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
||||
.iter()
|
||||
.map(|m| MatchBounds {
|
||||
start: tokens[m.get_first_token_pos()].byte_start,
|
||||
// TODO: Why is this in chars, while start is in bytes?
|
||||
length: m.char_count,
|
||||
length: (m.get_first_token_pos()..m.get_last_token_pos() + 1)
|
||||
.map(|i| tokens[i].clone())
|
||||
.flat_map(|token| token.char_map.clone().unwrap_or(vec![(1, 1); token.char_end - token.char_start] /* Some token doesn't have a char map, here we treat them as single byte chars. */))
|
||||
.map(|(original, _)| original as usize)
|
||||
.take(m.char_count)
|
||||
.sum(),
|
||||
indices: if array_indices.is_empty() {
|
||||
None
|
||||
} else {
|
||||
|
Loading…
x
Reference in New Issue
Block a user