mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 20:07:09 +02:00
Fix search highlight for non-unicode chars
The `matching_bytes` function takes a `&Token` now and: - gets the number of bytes to highlight (unchanged). - uses `Token.num_graphemes_from_bytes` to get the number of grapheme clusters to highlight. In essence, the `matching_bytes` function returns the number of matching grapheme clusters instead of bytes. Should this function be renamed then? Added proper highlighting in the HTTP UI: - requires dependency on `unicode-segmentation` to extract grapheme clusters from tokens - `<mark>` tag is put around only the matched part - before this change, the entire word was highlighted even if only a part of it matched
This commit is contained in:
parent
559e019de1
commit
30247d70cd
3 changed files with 26 additions and 12 deletions
|
@ -3,6 +3,7 @@ use std::collections::{BTreeMap, HashSet};
|
|||
use std::ops::{Index, IndexMut};
|
||||
|
||||
use levenshtein_automata::{Distance, DFA};
|
||||
use meilisearch_tokenizer::Token;
|
||||
|
||||
use super::build_dfa;
|
||||
use crate::search::query_tree::{Operation, Query};
|
||||
|
@ -33,15 +34,18 @@ impl MatchingWords {
|
|||
}
|
||||
|
||||
/// Returns the number of matching bytes if the word matches one of the query words.
|
||||
pub fn matching_bytes(&self, word_to_highlight: &str) -> Option<usize> {
|
||||
pub fn matching_bytes(&self, word_to_highlight: &Token) -> Option<usize> {
|
||||
self.dfas.iter().find_map(|(dfa, query_word, typo, is_prefix)| {
|
||||
match dfa.eval(word_to_highlight) {
|
||||
match dfa.eval(word_to_highlight.text()) {
|
||||
Distance::Exact(t) if t <= *typo => {
|
||||
if *is_prefix {
|
||||
let len = bytes_to_highlight(word_to_highlight, query_word);
|
||||
Some(len)
|
||||
let len = bytes_to_highlight(word_to_highlight.text(), query_word);
|
||||
Some(word_to_highlight.num_graphemes_from_bytes(len))
|
||||
} else {
|
||||
Some(word_to_highlight.len())
|
||||
Some(
|
||||
word_to_highlight
|
||||
.num_graphemes_from_bytes(word_to_highlight.text().len()),
|
||||
)
|
||||
}
|
||||
}
|
||||
_otherwise => None,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue