From 85bf5d113ca03cc95583d1186d65f4d0e6826f5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 5 Nov 2019 16:40:34 +0100 Subject: [PATCH 1/2] Fix an highlighting problem when query was longer than original text --- meilidb-core/src/query_builder.rs | 9 ++++++--- meilidb-core/src/raw_indexer.rs | 20 +++++--------------- 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/meilidb-core/src/query_builder.rs b/meilidb-core/src/query_builder.rs index 04301c574..42a292c6f 100644 --- a/meilidb-core/src/query_builder.rs +++ b/meilidb-core/src/query_builder.rs @@ -1,9 +1,9 @@ use hashbrown::HashMap; use std::convert::TryFrom; -use std::mem; use std::ops::Range; use std::rc::Rc; use std::time::{Duration, Instant}; +use std::{cmp, mem}; use fst::{IntoStreamer, Streamer}; use sdset::SetBuf; @@ -178,7 +178,7 @@ fn fetch_raw_documents( let distance = dfa.eval(input).to_u8(); let is_exact = *is_exact && distance == 0 && input.len() == *query_len; - let covered_area = if query.len() > input.len() { + let covered_area = if *query_len > input.len() { input.len() } else { prefix_damerau_levenshtein(query.as_bytes(), input).1 @@ -202,10 +202,13 @@ fn fetch_raw_documents( is_exact, }; + let covered_area = u16::try_from(covered_area).unwrap_or(u16::max_value()); + let covered_area = cmp::min(covered_area, di.char_length); + let highlight = Highlight { attribute: di.attribute, char_index: di.char_index, - char_length: u16::try_from(covered_area).unwrap_or(u16::max_value()), + char_length: covered_area, }; tmp_matches.push((di.document_id, id, match_, highlight)); diff --git a/meilidb-core/src/raw_indexer.rs b/meilidb-core/src/raw_indexer.rs index 3e0f212f7..f4304a33a 100644 --- a/meilidb-core/src/raw_indexer.rs +++ b/meilidb-core/src/raw_indexer.rs @@ -133,30 +133,20 @@ fn index_token( .or_insert_with(Vec::new) .push(docindex); docs_words.entry(id).or_insert_with(Vec::new).push(word); - } - None => return false, - } - if !lower.contains(is_cjk) { - let unidecoded = deunicode_with_tofu(&lower, ""); - if unidecoded != lower && !unidecoded.is_empty() { - let token = Token { - word: &unidecoded, - ..token - }; - - match token_to_docindex(id, attr, token) { - Some(docindex) => { - let word = Vec::from(token.word); + if !lower.contains(is_cjk) { + let unidecoded = deunicode_with_tofu(&lower, ""); + if unidecoded != lower && !unidecoded.is_empty() { + let word = Vec::from(unidecoded); words_doc_indexes .entry(word.clone()) .or_insert_with(Vec::new) .push(docindex); docs_words.entry(id).or_insert_with(Vec::new).push(word); } - None => return false, } } + None => return false, } } From 7541172d124036535d598fdb00078c27ceb23b0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 5 Nov 2019 16:40:48 +0100 Subject: [PATCH 2/2] Make the example show highlighted areas more explicitly --- meilidb-core/examples/from_file.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/meilidb-core/examples/from_file.rs b/meilidb-core/examples/from_file.rs index 49f1f2657..2ed5e5b9f 100644 --- a/meilidb-core/examples/from_file.rs +++ b/meilidb-core/examples/from_file.rs @@ -217,7 +217,11 @@ fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> { _ => unreachable!(), }; if highlighted { - stdout.set_color(ColorSpec::new().set_fg(Some(Color::Yellow)))?; + stdout.set_color( + ColorSpec::new() + .set_fg(Some(Color::Yellow)) + .set_underline(true), + )?; } write!(&mut stdout, "{}", &text[start..end])?; stdout.reset()?;