Use chars for highlight instead of graphemes

Tokenizer v0.2.7 uses chars instead of graphemes for matching bytes.
`unicode-segmentation` dependency isn't needed anymore.

Also, oxidised the highlight code :)

Co-authored-by: many <maxime@meilisearch.com>
This commit is contained in:
Samyak S Sarnayak 2022-01-17 13:10:44 +05:30
parent 2d7607734e
commit c0313f3026
No known key found for this signature in database
GPG Key ID: 365873F2F0C6153B
2 changed files with 12 additions and 16 deletions

View File

@ -17,7 +17,6 @@ once_cell = "1.5.2"
rayon = "1.5.0" rayon = "1.5.0"
structopt = { version = "0.3.21", default-features = false, features = ["wrap_help"] } structopt = { version = "0.3.21", default-features = false, features = ["wrap_help"] }
tempfile = "3.2.0" tempfile = "3.2.0"
unicode-segmentation = "1.6.0"
# http server # http server
askama = "0.10.5" askama = "0.10.5"

View File

@ -34,7 +34,6 @@ use structopt::StructOpt;
use tokio::fs::File as TFile; use tokio::fs::File as TFile;
use tokio::io::AsyncWriteExt; use tokio::io::AsyncWriteExt;
use tokio::sync::broadcast; use tokio::sync::broadcast;
use unicode_segmentation::UnicodeSegmentation;
use warp::filters::ws::Message; use warp::filters::ws::Message;
use warp::http::Response; use warp::http::Response;
use warp::Filter; use warp::Filter;
@ -161,21 +160,19 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
let analyzed = self.analyzer.analyze(&old_string); let analyzed = self.analyzer.analyze(&old_string);
for (word, token) in analyzed.reconstruct() { for (word, token) in analyzed.reconstruct() {
if token.is_word() { if token.is_word() {
let chars_to_highlight = matching_words.matching_bytes(&token).unwrap_or(0); match matching_words.matching_bytes(&token) {
if chars_to_highlight > 0 { Some(chars_to_highlight) => {
let graphemes = word.graphemes(true); let mut chars = word.chars();
let chars = graphemes.clone().into_iter();
string.push_str("<mark>"); string.push_str("<mark>");
string.push_str( // push the part to highlight
chars.take(chars_to_highlight).collect::<String>().as_str(), string.extend(chars.by_ref().take(chars_to_highlight));
);
string.push_str("</mark>"); string.push_str("</mark>");
// push the suffix after highlight
let chars = graphemes.into_iter().skip(chars_to_highlight); string.extend(chars);
string.push_str(chars.collect::<String>().as_str()); }
} else { // no highlight
string.push_str(word); None => string.push_str(word),
} }
} else { } else {
string.push_str(word); string.push_str(word);