Use new matcher in http-ui

This commit is contained in:
ManyTheFish 2022-03-30 10:50:23 +02:00
parent 734d0899d3
commit 29c5f76d7f
3 changed files with 26 additions and 29 deletions

View File

@ -25,7 +25,7 @@ use milli::update::{
ClearDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Setting, ClearDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Setting,
}; };
use milli::{ use milli::{
obkv_to_json, CompressionType, Filter as MilliFilter, FilterCondition, Index, MatchingWords, obkv_to_json, CompressionType, Filter as MilliFilter, FilterCondition, Index, MatcherBuilder,
SearchResult, SortError, SearchResult, SortError,
}; };
use once_cell::sync::OnceCell; use once_cell::sync::OnceCell;
@ -152,43 +152,25 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
Self { analyzer } Self { analyzer }
} }
fn highlight_value(&self, value: Value, matching_words: &MatchingWords) -> Value { fn highlight_value(&self, value: Value, matcher_builder: &MatcherBuilder) -> Value {
match value { match value {
Value::Null => Value::Null, Value::Null => Value::Null,
Value::Bool(boolean) => Value::Bool(boolean), Value::Bool(boolean) => Value::Bool(boolean),
Value::Number(number) => Value::Number(number), Value::Number(number) => Value::Number(number),
Value::String(old_string) => { Value::String(old_string) => {
let mut string = String::new();
let analyzed = self.analyzer.analyze(&old_string); let analyzed = self.analyzer.analyze(&old_string);
for (word, token) in analyzed.reconstruct() { let analyzed: Vec<_> = analyzed.tokens().collect();
if token.is_word() { let mut matcher = matcher_builder.build(&analyzed[..], &old_string);
match matching_words.matching_bytes(&token) {
Some(chars_to_highlight) => {
let mut chars = word.chars();
string.push_str("<mark>"); Value::String(matcher.format(true, true).to_string())
// push the part to highlight
string.extend(chars.by_ref().take(chars_to_highlight));
string.push_str("</mark>");
// push the suffix after highlight
string.extend(chars);
}
// no highlight
None => string.push_str(word),
}
} else {
string.push_str(word);
}
}
Value::String(string)
} }
Value::Array(values) => Value::Array( Value::Array(values) => Value::Array(
values.into_iter().map(|v| self.highlight_value(v, matching_words)).collect(), values.into_iter().map(|v| self.highlight_value(v, matcher_builder)).collect(),
), ),
Value::Object(object) => Value::Object( Value::Object(object) => Value::Object(
object object
.into_iter() .into_iter()
.map(|(k, v)| (k, self.highlight_value(v, matching_words))) .map(|(k, v)| (k, self.highlight_value(v, matcher_builder)))
.collect(), .collect(),
), ),
} }
@ -197,14 +179,14 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
fn highlight_record( fn highlight_record(
&self, &self,
object: &mut Map<String, Value>, object: &mut Map<String, Value>,
matching_words: &MatchingWords, matcher_builder: &MatcherBuilder,
attributes_to_highlight: &HashSet<String>, attributes_to_highlight: &HashSet<String>,
) { ) {
// TODO do we need to create a string for element that are not and needs to be highlight? // TODO do we need to create a string for element that are not and needs to be highlight?
for (key, value) in object.iter_mut() { for (key, value) in object.iter_mut() {
if attributes_to_highlight.contains(key) { if attributes_to_highlight.contains(key) {
let old_value = mem::take(value); let old_value = mem::take(value);
*value = self.highlight_value(old_value, matching_words); *value = self.highlight_value(old_value, matcher_builder);
} }
} }
} }
@ -819,12 +801,15 @@ async fn main() -> anyhow::Result<()> {
let stop_words = fst::Set::default(); let stop_words = fst::Set::default();
let highlighter = Highlighter::new(&stop_words); let highlighter = Highlighter::new(&stop_words);
let mut matcher_builder = MatcherBuilder::from_matching_words(matching_words);
matcher_builder.highlight_prefix("<mark>".to_string());
matcher_builder.highlight_suffix("</mark>".to_string());
for (_id, obkv) in index.documents(&rtxn, documents_ids).unwrap() { for (_id, obkv) in index.documents(&rtxn, documents_ids).unwrap() {
let mut object = obkv_to_json(&displayed_fields, &fields_ids_map, obkv).unwrap(); let mut object = obkv_to_json(&displayed_fields, &fields_ids_map, obkv).unwrap();
if !disable_highlighting { if !disable_highlighting {
highlighter.highlight_record( highlighter.highlight_record(
&mut object, &mut object,
&matching_words, &matcher_builder,
&attributes_to_highlight, &attributes_to_highlight,
); );
} }

View File

@ -36,7 +36,9 @@ pub use self::heed_codec::{
RoaringBitmapLenCodec, StrBEU32Codec, StrStrU8Codec, RoaringBitmapLenCodec, StrBEU32Codec, StrStrU8Codec,
}; };
pub use self::index::Index; pub use self::index::Index;
pub use self::search::{FacetDistribution, Filter, MatchingWords, Search, SearchResult}; pub use self::search::{
FacetDistribution, Filter, MatcherBuilder, MatchingWords, Search, SearchResult,
};
pub type Result<T> = std::result::Result<T, error::Error>; pub type Result<T> = std::result::Result<T, error::Error>;

View File

@ -34,6 +34,16 @@ impl MatcherBuilder {
} }
} }
pub fn from_matching_words(matching_words: MatchingWords) -> Self {
Self {
matching_words,
crop_size: DEFAULT_CROP_SIZE,
crop_marker: None,
highlight_prefix: None,
highlight_suffix: None,
}
}
pub fn crop_size(&mut self, word_count: usize) -> &Self { pub fn crop_size(&mut self, word_count: usize) -> &Self {
self.crop_size = word_count; self.crop_size = word_count;
self self