Fix an highlighting problem when query was longer than original text

This commit is contained in:
Clément Renault 2019-11-05 16:40:34 +01:00
parent 89fd397903
commit 85bf5d113c
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
2 changed files with 11 additions and 18 deletions

View File

@ -1,9 +1,9 @@
use hashbrown::HashMap; use hashbrown::HashMap;
use std::convert::TryFrom; use std::convert::TryFrom;
use std::mem;
use std::ops::Range; use std::ops::Range;
use std::rc::Rc; use std::rc::Rc;
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use std::{cmp, mem};
use fst::{IntoStreamer, Streamer}; use fst::{IntoStreamer, Streamer};
use sdset::SetBuf; use sdset::SetBuf;
@ -178,7 +178,7 @@ fn fetch_raw_documents(
let distance = dfa.eval(input).to_u8(); let distance = dfa.eval(input).to_u8();
let is_exact = *is_exact && distance == 0 && input.len() == *query_len; let is_exact = *is_exact && distance == 0 && input.len() == *query_len;
let covered_area = if query.len() > input.len() { let covered_area = if *query_len > input.len() {
input.len() input.len()
} else { } else {
prefix_damerau_levenshtein(query.as_bytes(), input).1 prefix_damerau_levenshtein(query.as_bytes(), input).1
@ -202,10 +202,13 @@ fn fetch_raw_documents(
is_exact, is_exact,
}; };
let covered_area = u16::try_from(covered_area).unwrap_or(u16::max_value());
let covered_area = cmp::min(covered_area, di.char_length);
let highlight = Highlight { let highlight = Highlight {
attribute: di.attribute, attribute: di.attribute,
char_index: di.char_index, char_index: di.char_index,
char_length: u16::try_from(covered_area).unwrap_or(u16::max_value()), char_length: covered_area,
}; };
tmp_matches.push((di.document_id, id, match_, highlight)); tmp_matches.push((di.document_id, id, match_, highlight));

View File

@ -133,30 +133,20 @@ fn index_token(
.or_insert_with(Vec::new) .or_insert_with(Vec::new)
.push(docindex); .push(docindex);
docs_words.entry(id).or_insert_with(Vec::new).push(word); docs_words.entry(id).or_insert_with(Vec::new).push(word);
}
None => return false,
}
if !lower.contains(is_cjk) { if !lower.contains(is_cjk) {
let unidecoded = deunicode_with_tofu(&lower, ""); let unidecoded = deunicode_with_tofu(&lower, "");
if unidecoded != lower && !unidecoded.is_empty() { if unidecoded != lower && !unidecoded.is_empty() {
let token = Token { let word = Vec::from(unidecoded);
word: &unidecoded,
..token
};
match token_to_docindex(id, attr, token) {
Some(docindex) => {
let word = Vec::from(token.word);
words_doc_indexes words_doc_indexes
.entry(word.clone()) .entry(word.clone())
.or_insert_with(Vec::new) .or_insert_with(Vec::new)
.push(docindex); .push(docindex);
docs_words.entry(id).or_insert_with(Vec::new).push(word); docs_words.entry(id).or_insert_with(Vec::new).push(word);
} }
None => return false,
} }
} }
None => return false,
} }
} }