Use BTreeMap and remove ids_in_formatted

This commit is contained in:
Clémentine Urquizar 2021-06-16 19:30:06 +02:00
parent a0b022afee
commit 97909ce56e
No known key found for this signature in database
GPG Key ID: D8E7CC7422E77E1A

View File

@ -1,5 +1,5 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque}; use std::collections::{BTreeMap, BTreeSet, HashSet, VecDeque};
use std::time::Instant; use std::time::Instant;
use anyhow::bail; use anyhow::bail;
@ -120,9 +120,9 @@ impl Index {
}; };
// The attributes to retrieve are the ones explicitly marked as to retrieve (all by default), // The attributes to retrieve are the ones explicitly marked as to retrieve (all by default),
// but these attributes must be also // but these attributes must be also be present
// - present in the fields_ids_map // - in the fields_ids_map
// - present in the the displayed attributes // - in the the displayed attributes
let to_retrieve_ids: BTreeSet<_> = query let to_retrieve_ids: BTreeSet<_> = query
.attributes_to_retrieve .attributes_to_retrieve
.as_ref() .as_ref()
@ -140,29 +140,20 @@ impl Index {
.attributes_to_crop .attributes_to_crop
.unwrap_or_default(); .unwrap_or_default();
// Attributes in `formatted_options` correspond to the attributes that will be in `_formatted`
// These attributes are:
// - the attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`)
// - the attributes asked to be retrieved: these attributes will not be highlighted/cropped
// But these attributes must be present in displayed attributes
let formatted_options = compute_formatted_options( let formatted_options = compute_formatted_options(
&attr_to_highlight, &attr_to_highlight,
&attr_to_crop, &attr_to_crop,
query.crop_length, query.crop_length,
&to_retrieve_ids,
&fields_ids_map, &fields_ids_map,
&displayed_ids, &displayed_ids,
); );
// All attributes present in `_formatted` are:
// - the attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`)
// - the attributes asked to be retrieved: these attributes will not be highlighted/cropped
// But these attributes must be present in displayed attributes
let ids_in_formatted = formatted_options
.keys()
.cloned()
.collect::<BTreeSet<_>>()
.intersection(&displayed_ids)
.cloned()
.collect::<BTreeSet<_>>()
.union(&to_retrieve_ids)
.cloned()
.collect::<Vec<_>>();
let stop_words = fst::Set::default(); let stop_words = fst::Set::default();
let formatter = let formatter =
Formatter::new(&stop_words, (String::from("<em>"), String::from("</em>"))); Formatter::new(&stop_words, (String::from("<em>"), String::from("</em>")));
@ -174,7 +165,6 @@ impl Index {
obkv, obkv,
&formatter, &formatter,
&matching_words, &matching_words,
&ids_in_formatted,
&formatted_options, &formatted_options,
)?; )?;
let hit = SearchHit { let hit = SearchHit {
@ -215,11 +205,12 @@ fn compute_formatted_options(
attr_to_highlight: &HashSet<String>, attr_to_highlight: &HashSet<String>,
attr_to_crop: &[String], attr_to_crop: &[String],
query_crop_length: usize, query_crop_length: usize,
to_retrieve_ids: &BTreeSet<u8>,
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
displayed_ids: &BTreeSet<u8>, displayed_ids: &BTreeSet<u8>,
) -> HashMap<FieldId, FormatOptions> { ) -> BTreeMap<FieldId, FormatOptions> {
let mut formatted_options = HashMap::new(); let mut formatted_options = BTreeMap::new();
formatted_options = add_highlight_to_formatted_options( formatted_options = add_highlight_to_formatted_options(
formatted_options, formatted_options,
@ -236,15 +227,23 @@ fn compute_formatted_options(
displayed_ids, displayed_ids,
); );
// Should not return `_formatted` if no valid attributes to highlight/crop
if !formatted_options.is_empty() {
formatted_options = add_non_formatted_ids_to_formatted_options(
formatted_options,
to_retrieve_ids,
);
}
formatted_options formatted_options
} }
fn add_highlight_to_formatted_options( fn add_highlight_to_formatted_options(
mut formatted_options: HashMap<FieldId, FormatOptions>, mut formatted_options: BTreeMap<FieldId, FormatOptions>,
attr_to_highlight: &HashSet<String>, attr_to_highlight: &HashSet<String>,
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
displayed_ids: &BTreeSet<u8>, displayed_ids: &BTreeSet<u8>,
) -> HashMap<FieldId, FormatOptions> { ) -> BTreeMap<FieldId, FormatOptions> {
for attr in attr_to_highlight { for attr in attr_to_highlight {
let new_format = FormatOptions { let new_format = FormatOptions {
highlight: true, highlight: true,
@ -259,19 +258,22 @@ fn add_highlight_to_formatted_options(
} }
if let Some(id) = fields_ids_map.id(&attr) { if let Some(id) = fields_ids_map.id(&attr) {
formatted_options.insert(id, new_format); if displayed_ids.contains(&id) {
formatted_options.insert(id, new_format);
}
} }
} }
formatted_options formatted_options
} }
fn add_crop_to_formatted_options( fn add_crop_to_formatted_options(
mut formatted_options: HashMap<FieldId, FormatOptions>, mut formatted_options: BTreeMap<FieldId, FormatOptions>,
attr_to_crop: &[String], attr_to_crop: &[String],
crop_length: usize, crop_length: usize,
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
displayed_ids: &BTreeSet<u8>, displayed_ids: &BTreeSet<u8>,
) -> HashMap<FieldId, FormatOptions> { ) -> BTreeMap<FieldId, FormatOptions> {
for attr in attr_to_crop { for attr in attr_to_crop {
let mut attr_name = attr.clone(); let mut attr_name = attr.clone();
let mut attr_len = crop_length; let mut attr_len = crop_length;
@ -298,19 +300,37 @@ fn add_crop_to_formatted_options(
} }
if let Some(id) = fields_ids_map.id(&attr_name) { if let Some(id) = fields_ids_map.id(&attr_name) {
formatted_options if displayed_ids.contains(&id) {
.entry(id) formatted_options
.and_modify(|f| f.crop = Some(attr_len)) .entry(id)
.or_insert(FormatOptions { .and_modify(|f| f.crop = Some(attr_len))
highlight: false, .or_insert(FormatOptions {
crop: Some(attr_len), highlight: false,
}); crop: Some(attr_len),
});
}
} }
} }
formatted_options formatted_options
} }
fn add_non_formatted_ids_to_formatted_options(
mut formatted_options: BTreeMap<FieldId, FormatOptions>,
to_retrieve_ids: &BTreeSet<u8>
) -> BTreeMap<FieldId, FormatOptions> {
for id in to_retrieve_ids {
formatted_options
.entry(*id)
.or_insert(FormatOptions {
highlight: false,
crop: None,
});
}
formatted_options
}
fn make_document( fn make_document(
attributes_to_retrieve: &BTreeSet<FieldId>, attributes_to_retrieve: &BTreeSet<FieldId>,
field_ids_map: &FieldsIdsMap, field_ids_map: &FieldsIdsMap,
@ -339,33 +359,28 @@ fn format_fields<A: AsRef<[u8]>>(
obkv: obkv::KvReader, obkv: obkv::KvReader,
formatter: &Formatter<A>, formatter: &Formatter<A>,
matching_words: &impl Matcher, matching_words: &impl Matcher,
ids_in_formatted: &[FieldId], formatted_options: &BTreeMap<FieldId, FormatOptions>,
formatted_options: &HashMap<FieldId, FormatOptions>,
) -> anyhow::Result<Document> { ) -> anyhow::Result<Document> {
let mut document = Document::new(); let mut document = Document::new();
if !formatted_options.is_empty() { for (id, format) in formatted_options {
for field in ids_in_formatted { if let Some(value) = obkv.get(*id) {
if let Some(value) = obkv.get(*field) { let mut value: Value = serde_json::from_slice(value)?;
let mut value: Value = serde_json::from_slice(value)?;
if let Some(format) = formatted_options.get(field) { value = formatter.format_value(
value = formatter.format_value( value,
value, matching_words,
matching_words, *format,
*format, );
);
}
// This unwrap must be safe since we got the ids from the fields_ids_map just // This unwrap must be safe since we got the ids from the fields_ids_map just
// before. // before.
let key = field_ids_map let key = field_ids_map
.name(*field) .name(*id)
.expect("Missing field name") .expect("Missing field name")
.to_string(); .to_string();
document.insert(key, value); document.insert(key, value);
}
} }
} }
@ -378,7 +393,7 @@ trait Matcher {
} }
#[cfg(test)] #[cfg(test)]
impl Matcher for HashMap<&str, Option<usize>> { impl Matcher for BTreeMap<&str, Option<usize>> {
fn matches(&self, w: &str) -> Option<usize> { fn matches(&self, w: &str) -> Option<usize> {
self.get(w).cloned().flatten() self.get(w).cloned().flatten()
} }
@ -564,8 +579,7 @@ mod test {
let obkv = obkv::KvReader::new(&buf); let obkv = obkv::KvReader::new(&buf);
let ids_in_formatted = Vec::new(); let formatted_options = BTreeMap::new();
let formatted_options = HashMap::new();
let matching_words = MatchingWords::default(); let matching_words = MatchingWords::default();
@ -574,7 +588,6 @@ mod test {
obkv, obkv,
&formatter, &formatter,
&matching_words, &matching_words,
&ids_in_formatted,
&formatted_options, &formatted_options,
) )
.unwrap(); .unwrap();
@ -582,84 +595,6 @@ mod test {
assert!(value.is_empty()); assert!(value.is_empty());
} }
#[test]
fn no_formatted_with_ids() {
let stop_words = fst::Set::default();
let formatter =
Formatter::new(&stop_words, (String::from("<em>"), String::from("</em>")));
let mut fields = FieldsIdsMap::new();
let id = fields.insert("test").unwrap();
let mut buf = Vec::new();
let mut obkv = obkv::KvWriter::new(&mut buf);
obkv.insert(id, Value::String("hello".into()).to_string().as_bytes())
.unwrap();
obkv.finish().unwrap();
let obkv = obkv::KvReader::new(&buf);
let ids_in_formatted = vec![id];
let formatted_options = HashMap::new();
let matching_words = MatchingWords::default();
let value = format_fields(
&fields,
obkv,
&formatter,
&matching_words,
&ids_in_formatted,
&formatted_options,
)
.unwrap();
assert!(value.is_empty());
}
#[test]
fn formatted_with_highlight() {
let stop_words = fst::Set::default();
let formatter =
Formatter::new(&stop_words, (String::from("<em>"), String::from("</em>")));
let mut fields = FieldsIdsMap::new();
let title = fields.insert("title").unwrap();
let author = fields.insert("author").unwrap();
let mut buf = Vec::new();
let mut obkv = obkv::KvWriter::new(&mut buf);
obkv.insert(title, Value::String("The Hobbit".into()).to_string().as_bytes())
.unwrap();
obkv.finish().unwrap();
obkv = obkv::KvWriter::new(&mut buf);
obkv.insert(author, Value::String("J. R. R. Tolkien".into()).to_string().as_bytes())
.unwrap();
obkv.finish().unwrap();
let obkv = obkv::KvReader::new(&buf);
let ids_in_formatted = vec![title, author];
let mut formatted_options = HashMap::new();
formatted_options.insert(title, FormatOptions { highlight: true, crop: None });
let mut matching_words = HashMap::new();
matching_words.insert("hobbit", Some(6));
let value = format_fields(
&fields,
obkv,
&formatter,
&matching_words,
&ids_in_formatted,
&formatted_options,
)
.unwrap();
assert_eq!(value["title"], "The <em>Hobbit</em>");
assert_eq!(value["author"], "J. R. R. Tolkien");
}
#[test] #[test]
fn formatted_with_highlight_in_word() { fn formatted_with_highlight_in_word() {
let stop_words = fst::Set::default(); let stop_words = fst::Set::default();
@ -682,11 +617,11 @@ mod test {
let obkv = obkv::KvReader::new(&buf); let obkv = obkv::KvReader::new(&buf);
let ids_in_formatted = vec![title, author]; let mut formatted_options = BTreeMap::new();
let mut formatted_options = HashMap::new();
formatted_options.insert(title, FormatOptions { highlight: true, crop: None }); formatted_options.insert(title, FormatOptions { highlight: true, crop: None });
formatted_options.insert(author, FormatOptions { highlight: false, crop: None });
let mut matching_words = HashMap::new(); let mut matching_words = BTreeMap::new();
matching_words.insert("hobbit", Some(3)); matching_words.insert("hobbit", Some(3));
let value = format_fields( let value = format_fields(
@ -694,7 +629,6 @@ mod test {
obkv, obkv,
&formatter, &formatter,
&matching_words, &matching_words,
&ids_in_formatted,
&formatted_options, &formatted_options,
) )
.unwrap(); .unwrap();
@ -725,11 +659,11 @@ mod test {
let obkv = obkv::KvReader::new(&buf); let obkv = obkv::KvReader::new(&buf);
let ids_in_formatted = vec![title, author]; let mut formatted_options = BTreeMap::new();
let mut formatted_options = HashMap::new();
formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(2) }); formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(2) });
formatted_options.insert(author, FormatOptions { highlight: false, crop: None });
let mut matching_words = HashMap::new(); let mut matching_words = BTreeMap::new();
matching_words.insert("potter", Some(6)); matching_words.insert("potter", Some(6));
let value = format_fields( let value = format_fields(
@ -737,7 +671,6 @@ mod test {
obkv, obkv,
&formatter, &formatter,
&matching_words, &matching_words,
&ids_in_formatted,
&formatted_options, &formatted_options,
) )
.unwrap(); .unwrap();
@ -768,11 +701,11 @@ mod test {
let obkv = obkv::KvReader::new(&buf); let obkv = obkv::KvReader::new(&buf);
let ids_in_formatted = vec![title, author]; let mut formatted_options = BTreeMap::new();
let mut formatted_options = HashMap::new();
formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(10) }); formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(10) });
formatted_options.insert(author, FormatOptions { highlight: false, crop: None });
let mut matching_words = HashMap::new(); let mut matching_words = BTreeMap::new();
matching_words.insert("potter", Some(6)); matching_words.insert("potter", Some(6));
let value = format_fields( let value = format_fields(
@ -780,7 +713,6 @@ mod test {
obkv, obkv,
&formatter, &formatter,
&matching_words, &matching_words,
&ids_in_formatted,
&formatted_options, &formatted_options,
) )
.unwrap(); .unwrap();
@ -811,11 +743,11 @@ mod test {
let obkv = obkv::KvReader::new(&buf); let obkv = obkv::KvReader::new(&buf);
let ids_in_formatted = vec![title, author]; let mut formatted_options = BTreeMap::new();
let mut formatted_options = HashMap::new();
formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(0) }); formatted_options.insert(title, FormatOptions { highlight: false, crop: Some(0) });
formatted_options.insert(author, FormatOptions { highlight: false, crop: None });
let mut matching_words = HashMap::new(); let mut matching_words = BTreeMap::new();
matching_words.insert("potter", Some(6)); matching_words.insert("potter", Some(6));
let value = format_fields( let value = format_fields(
@ -823,7 +755,6 @@ mod test {
obkv, obkv,
&formatter, &formatter,
&matching_words, &matching_words,
&ids_in_formatted,
&formatted_options, &formatted_options,
) )
.unwrap(); .unwrap();
@ -854,11 +785,11 @@ mod test {
let obkv = obkv::KvReader::new(&buf); let obkv = obkv::KvReader::new(&buf);
let ids_in_formatted = vec![title, author]; let mut formatted_options = BTreeMap::new();
let mut formatted_options = HashMap::new();
formatted_options.insert(title, FormatOptions { highlight: true, crop: Some(1) }); formatted_options.insert(title, FormatOptions { highlight: true, crop: Some(1) });
formatted_options.insert(author, FormatOptions { highlight: false, crop: None });
let mut matching_words = HashMap::new(); let mut matching_words = BTreeMap::new();
matching_words.insert("and", Some(3)); matching_words.insert("and", Some(3));
let value = format_fields( let value = format_fields(
@ -866,7 +797,6 @@ mod test {
obkv, obkv,
&formatter, &formatter,
&matching_words, &matching_words,
&ids_in_formatted,
&formatted_options, &formatted_options,
) )
.unwrap(); .unwrap();
@ -897,11 +827,11 @@ mod test {
let obkv = obkv::KvReader::new(&buf); let obkv = obkv::KvReader::new(&buf);
let ids_in_formatted = vec![title, author]; let mut formatted_options = BTreeMap::new();
let mut formatted_options = HashMap::new();
formatted_options.insert(title, FormatOptions { highlight: true, crop: Some(9) }); formatted_options.insert(title, FormatOptions { highlight: true, crop: Some(9) });
formatted_options.insert(author, FormatOptions { highlight: false, crop: None });
let mut matching_words = HashMap::new(); let mut matching_words = BTreeMap::new();
matching_words.insert("blood", Some(3)); matching_words.insert("blood", Some(3));
let value = format_fields( let value = format_fields(
@ -909,7 +839,6 @@ mod test {
obkv, obkv,
&formatter, &formatter,
&matching_words, &matching_words,
&ids_in_formatted,
&formatted_options, &formatted_options,
) )
.unwrap(); .unwrap();