Use chars for highlight instead of graphemes

Tokenizer v0.2.7 uses chars instead of graphemes for matching bytes. `unicode-segmentation` dependency isn't needed anymore. Also, oxidised the highlight code :) Co-authored-by: many <maxime@meilisearch.com>
2025-07-04 12:27:13 +02:00 · 2022-01-17 13:10:44 +05:30 · 2022-01-17 13:10:44 +05:30 · c0313f3026
commit c0313f3026
parent 2d7607734e
2 changed files with 12 additions and 16 deletions
--- a/http-ui/Cargo.toml
+++ b/http-ui/Cargo.toml
@ -17,7 +17,6 @@ once_cell = "1.5.2"
 rayon = "1.5.0"
 structopt = { version = "0.3.21", default-features = false, features = ["wrap_help"] }
 tempfile = "3.2.0"
-unicode-segmentation = "1.6.0"

 # http server
 askama = "0.10.5"
--- a/http-ui/src/main.rs
+++ b/http-ui/src/main.rs
@ -34,7 +34,6 @@ use structopt::StructOpt;
 use tokio::fs::File as TFile;
 use tokio::io::AsyncWriteExt;
 use tokio::sync::broadcast;
-use unicode_segmentation::UnicodeSegmentation;
 use warp::filters::ws::Message;
 use warp::http::Response;
 use warp::Filter;
@ -161,21 +160,19 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
                let analyzed = self.analyzer.analyze(&old_string);
                for (word, token) in analyzed.reconstruct() {
                    if token.is_word() {
-                        let chars_to_highlight = matching_words.matching_bytes(&token).unwrap_or(0);
-                        if chars_to_highlight > 0 {
-                            let graphemes = word.graphemes(true);
-                            let chars = graphemes.clone().into_iter();
+                        match matching_words.matching_bytes(&token) {
+                            Some(chars_to_highlight) => {
+                                let mut chars = word.chars();

-                            string.push_str("<mark>");
-                            string.push_str(
-                                chars.take(chars_to_highlight).collect::<String>().as_str(),
-                            );
-                            string.push_str("</mark>");
-
-                            let chars = graphemes.into_iter().skip(chars_to_highlight);
-                            string.push_str(chars.collect::<String>().as_str());
-                        } else {
-                            string.push_str(word);
+                                string.push_str("<mark>");
+                                // push the part to highlight
+                                string.extend(chars.by_ref().take(chars_to_highlight));
+                                string.push_str("</mark>");
+                                // push the suffix after highlight
+                                string.extend(chars);
+                            }
+                            // no highlight
+                            None => string.push_str(word),
                        }
                    } else {
                        string.push_str(word);