mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 22:14:32 +01:00
Use chars for highlight instead of graphemes
Tokenizer v0.2.7 uses chars instead of graphemes for matching bytes. `unicode-segmentation` dependency isn't needed anymore. Also, oxidised the highlight code :) Co-authored-by: many <maxime@meilisearch.com>
This commit is contained in:
parent
2d7607734e
commit
c0313f3026
@ -17,7 +17,6 @@ once_cell = "1.5.2"
|
|||||||
rayon = "1.5.0"
|
rayon = "1.5.0"
|
||||||
structopt = { version = "0.3.21", default-features = false, features = ["wrap_help"] }
|
structopt = { version = "0.3.21", default-features = false, features = ["wrap_help"] }
|
||||||
tempfile = "3.2.0"
|
tempfile = "3.2.0"
|
||||||
unicode-segmentation = "1.6.0"
|
|
||||||
|
|
||||||
# http server
|
# http server
|
||||||
askama = "0.10.5"
|
askama = "0.10.5"
|
||||||
|
@ -34,7 +34,6 @@ use structopt::StructOpt;
|
|||||||
use tokio::fs::File as TFile;
|
use tokio::fs::File as TFile;
|
||||||
use tokio::io::AsyncWriteExt;
|
use tokio::io::AsyncWriteExt;
|
||||||
use tokio::sync::broadcast;
|
use tokio::sync::broadcast;
|
||||||
use unicode_segmentation::UnicodeSegmentation;
|
|
||||||
use warp::filters::ws::Message;
|
use warp::filters::ws::Message;
|
||||||
use warp::http::Response;
|
use warp::http::Response;
|
||||||
use warp::Filter;
|
use warp::Filter;
|
||||||
@ -161,21 +160,19 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
|
|||||||
let analyzed = self.analyzer.analyze(&old_string);
|
let analyzed = self.analyzer.analyze(&old_string);
|
||||||
for (word, token) in analyzed.reconstruct() {
|
for (word, token) in analyzed.reconstruct() {
|
||||||
if token.is_word() {
|
if token.is_word() {
|
||||||
let chars_to_highlight = matching_words.matching_bytes(&token).unwrap_or(0);
|
match matching_words.matching_bytes(&token) {
|
||||||
if chars_to_highlight > 0 {
|
Some(chars_to_highlight) => {
|
||||||
let graphemes = word.graphemes(true);
|
let mut chars = word.chars();
|
||||||
let chars = graphemes.clone().into_iter();
|
|
||||||
|
|
||||||
string.push_str("<mark>");
|
string.push_str("<mark>");
|
||||||
string.push_str(
|
// push the part to highlight
|
||||||
chars.take(chars_to_highlight).collect::<String>().as_str(),
|
string.extend(chars.by_ref().take(chars_to_highlight));
|
||||||
);
|
string.push_str("</mark>");
|
||||||
string.push_str("</mark>");
|
// push the suffix after highlight
|
||||||
|
string.extend(chars);
|
||||||
let chars = graphemes.into_iter().skip(chars_to_highlight);
|
}
|
||||||
string.push_str(chars.collect::<String>().as_str());
|
// no highlight
|
||||||
} else {
|
None => string.push_str(word),
|
||||||
string.push_str(word);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
string.push_str(word);
|
string.push_str(word);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user