mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-23 19:57:30 +01:00
Make it compatible with the new milli highlighting
This commit is contained in:
parent
446b66b0fe
commit
b769877183
@ -402,19 +402,25 @@ fn compute_formatted<A: AsRef<[u8]>>(
|
|||||||
|
|
||||||
/// trait to allow unit testing of `compute_formatted`
|
/// trait to allow unit testing of `compute_formatted`
|
||||||
trait Matcher {
|
trait Matcher {
|
||||||
fn matches(&self, w: &str) -> bool;
|
fn matches(&self, w: &str) -> Option<usize>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// #[cfg(test)]
|
||||||
|
// impl Matcher for HashSet<String> {
|
||||||
|
// fn matches(&self, w: &str) -> bool {
|
||||||
|
// self.contains(w)
|
||||||
|
// }
|
||||||
|
// }
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
impl Matcher for HashSet<String> {
|
impl Matcher for HashMap<&str, Option<usize>> {
|
||||||
fn matches(&self, w: &str) -> bool {
|
fn matches(&self, w: &str) -> Option<usize> {
|
||||||
self.contains(w)
|
self.get(w).cloned().flatten()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Matcher for MatchingWords {
|
impl Matcher for MatchingWords {
|
||||||
fn matches(&self, w: &str) -> bool {
|
fn matches(&self, w: &str) -> Option<usize> {
|
||||||
self.matching_bytes(w).is_some()
|
self.matching_bytes(w)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -476,7 +482,7 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> {
|
|||||||
let mut buffer = VecDeque::new();
|
let mut buffer = VecDeque::new();
|
||||||
let mut tokens = analyzed.reconstruct().peekable();
|
let mut tokens = analyzed.reconstruct().peekable();
|
||||||
let mut taken_before = 0;
|
let mut taken_before = 0;
|
||||||
while let Some((word, token)) = tokens.next_if(|(_, token)| !matcher.matches(token.text())) {
|
while let Some((word, token)) = tokens.next_if(|(_, token)| !matcher.matches(token.text()).is_some()) {
|
||||||
buffer.push_back((word, token));
|
buffer.push_back((word, token));
|
||||||
taken_before += word.chars().count();
|
taken_before += word.chars().count();
|
||||||
while taken_before > crop_len {
|
while taken_before > crop_len {
|
||||||
@ -515,11 +521,14 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> {
|
|||||||
|
|
||||||
tokens
|
tokens
|
||||||
.map(|(word, token)| {
|
.map(|(word, token)| {
|
||||||
if need_to_highlight && token.is_word() && matcher.matches(token.text()) {
|
if need_to_highlight && token.is_word() && matcher.matches(token.text()).is_some() {
|
||||||
let mut new_word = String::new();
|
let mut new_word = String::new();
|
||||||
new_word.push_str(&self.marks.0);
|
new_word.push_str(&self.marks.0);
|
||||||
new_word.push_str(&word);
|
if let Some(match_len) = matcher.matches(token.text()) {
|
||||||
|
new_word.push_str(&word[..match_len]);
|
||||||
new_word.push_str(&self.marks.1);
|
new_word.push_str(&self.marks.1);
|
||||||
|
new_word.push_str(&word[match_len..]);
|
||||||
|
}
|
||||||
new_word
|
new_word
|
||||||
} else {
|
} else {
|
||||||
word.to_string()
|
word.to_string()
|
||||||
@ -672,7 +681,9 @@ mod test {
|
|||||||
let mut formatted_options = HashMap::new();
|
let mut formatted_options = HashMap::new();
|
||||||
formatted_options.insert(title, FormatOptions { highlight: true, crop: None });
|
formatted_options.insert(title, FormatOptions { highlight: true, crop: None });
|
||||||
|
|
||||||
let matching_words = HashSet::from_iter(Some(String::from("hobbit")));
|
// let matching_words = HashSet::from_iter(Some(String::from("hobbit")));
|
||||||
|
let mut matching_words = HashMap::new();
|
||||||
|
matching_words.insert("hobbit", Some(6));
|
||||||
|
|
||||||
let value = compute_formatted(
|
let value = compute_formatted(
|
||||||
&fields,
|
&fields,
|
||||||
@ -688,6 +699,49 @@ mod test {
|
|||||||
assert_eq!(value["author"], "J. R. R. Tolkien");
|
assert_eq!(value["author"], "J. R. R. Tolkien");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn formatted_with_highlight_in_word() {
|
||||||
|
let stop_words = fst::Set::default();
|
||||||
|
let formatter =
|
||||||
|
Formatter::new(&stop_words, (String::from("<em>"), String::from("</em>")));
|
||||||
|
|
||||||
|
let mut fields = FieldsIdsMap::new();
|
||||||
|
let title = fields.insert("title").unwrap();
|
||||||
|
let author = fields.insert("author").unwrap();
|
||||||
|
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
let mut obkv = obkv::KvWriter::new(&mut buf);
|
||||||
|
obkv.insert(title, Value::String("The Hobbit".into()).to_string().as_bytes())
|
||||||
|
.unwrap();
|
||||||
|
obkv.finish().unwrap();
|
||||||
|
obkv = obkv::KvWriter::new(&mut buf);
|
||||||
|
obkv.insert(author, Value::String("J. R. R. Tolkien".into()).to_string().as_bytes())
|
||||||
|
.unwrap();
|
||||||
|
obkv.finish().unwrap();
|
||||||
|
|
||||||
|
let obkv = obkv::KvReader::new(&buf);
|
||||||
|
|
||||||
|
let ids_in_formatted = vec![title, author];
|
||||||
|
let mut formatted_options = HashMap::new();
|
||||||
|
formatted_options.insert(title, FormatOptions { highlight: true, crop: None });
|
||||||
|
|
||||||
|
let mut matching_words = HashMap::new();
|
||||||
|
matching_words.insert("hobbit", Some(3));
|
||||||
|
|
||||||
|
let value = compute_formatted(
|
||||||
|
&fields,
|
||||||
|
obkv,
|
||||||
|
&formatter,
|
||||||
|
&matching_words,
|
||||||
|
&ids_in_formatted,
|
||||||
|
&formatted_options,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(value["title"], "The <em>Hob</em>bit");
|
||||||
|
assert_eq!(value["author"], "J. R. R. Tolkien");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn formatted_with_crop_2() {
|
fn formatted_with_crop_2() {
|
||||||
let stop_words = fst::Set::default();
|
let stop_words = fst::Set::default();
|
||||||
@ -714,7 +768,8 @@ mod test {
|
|||||||
let mut formatted_options = HashMap::new();
|
let mut formatted_options = HashMap::new();
|
||||||
formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(2) });
|
formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(2) });
|
||||||
|
|
||||||
let matching_words = HashSet::from_iter(Some(String::from("potter")));
|
let mut matching_words = HashMap::new();
|
||||||
|
matching_words.insert("potter", Some(6));
|
||||||
|
|
||||||
let value = compute_formatted(
|
let value = compute_formatted(
|
||||||
&fields,
|
&fields,
|
||||||
@ -756,7 +811,8 @@ mod test {
|
|||||||
let mut formatted_options = HashMap::new();
|
let mut formatted_options = HashMap::new();
|
||||||
formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(10) });
|
formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(10) });
|
||||||
|
|
||||||
let matching_words = HashSet::from_iter(Some(String::from("potter")));
|
let mut matching_words = HashMap::new();
|
||||||
|
matching_words.insert("potter", Some(6));
|
||||||
|
|
||||||
let value = compute_formatted(
|
let value = compute_formatted(
|
||||||
&fields,
|
&fields,
|
||||||
@ -798,7 +854,8 @@ mod test {
|
|||||||
let mut formatted_options = HashMap::new();
|
let mut formatted_options = HashMap::new();
|
||||||
formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(0) });
|
formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(0) });
|
||||||
|
|
||||||
let matching_words = HashSet::from_iter(Some(String::from("potter")));
|
let mut matching_words = HashMap::new();
|
||||||
|
matching_words.insert("potter", Some(6));
|
||||||
|
|
||||||
let value = compute_formatted(
|
let value = compute_formatted(
|
||||||
&fields,
|
&fields,
|
||||||
@ -840,7 +897,8 @@ mod test {
|
|||||||
let mut formatted_options = HashMap::new();
|
let mut formatted_options = HashMap::new();
|
||||||
formatted_options.insert(title, FormatOptions { highlight: true, crop: Some(1) });
|
formatted_options.insert(title, FormatOptions { highlight: true, crop: Some(1) });
|
||||||
|
|
||||||
let matching_words = HashSet::from_iter(Some(String::from("and")));
|
let mut matching_words = HashMap::new();
|
||||||
|
matching_words.insert("and", Some(3));
|
||||||
|
|
||||||
let value = compute_formatted(
|
let value = compute_formatted(
|
||||||
&fields,
|
&fields,
|
||||||
@ -855,4 +913,47 @@ mod test {
|
|||||||
assert_eq!(value["title"], " <em>and</em> ");
|
assert_eq!(value["title"], " <em>and</em> ");
|
||||||
assert_eq!(value["author"], "J. K. Rowling");
|
assert_eq!(value["author"], "J. K. Rowling");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn formatted_with_crop_and_highlight_in_word() {
|
||||||
|
let stop_words = fst::Set::default();
|
||||||
|
let formatter =
|
||||||
|
Formatter::new(&stop_words, (String::from("<em>"), String::from("</em>")));
|
||||||
|
|
||||||
|
let mut fields = FieldsIdsMap::new();
|
||||||
|
let title = fields.insert("title").unwrap();
|
||||||
|
let author = fields.insert("author").unwrap();
|
||||||
|
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
let mut obkv = obkv::KvWriter::new(&mut buf);
|
||||||
|
obkv.insert(title, Value::String("Harry Potter and the Half-Blood Prince".into()).to_string().as_bytes())
|
||||||
|
.unwrap();
|
||||||
|
obkv.finish().unwrap();
|
||||||
|
obkv = obkv::KvWriter::new(&mut buf);
|
||||||
|
obkv.insert(author, Value::String("J. K. Rowling".into()).to_string().as_bytes())
|
||||||
|
.unwrap();
|
||||||
|
obkv.finish().unwrap();
|
||||||
|
|
||||||
|
let obkv = obkv::KvReader::new(&buf);
|
||||||
|
|
||||||
|
let ids_in_formatted = vec![title, author];
|
||||||
|
let mut formatted_options = HashMap::new();
|
||||||
|
formatted_options.insert(title, FormatOptions { highlight: true, crop: Some(9) });
|
||||||
|
|
||||||
|
let mut matching_words = HashMap::new();
|
||||||
|
matching_words.insert("blood", Some(3));
|
||||||
|
|
||||||
|
let value = compute_formatted(
|
||||||
|
&fields,
|
||||||
|
obkv,
|
||||||
|
&formatter,
|
||||||
|
&matching_words,
|
||||||
|
&ids_in_formatted,
|
||||||
|
&formatted_options,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(value["title"], "the Half-<em>Blo</em>od Prince");
|
||||||
|
assert_eq!(value["author"], "J. K. Rowling");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user