Use BTreeSet instead of HashSet

This commit is contained in:
Clémentine Urquizar 2021-06-16 16:18:55 +02:00
parent 7b02fdaddc
commit dc5a3d4a62
No known key found for this signature in database
GPG Key ID: D8E7CC7422E77E1A
2 changed files with 21 additions and 32 deletions

View File

@ -1,11 +1,10 @@
use std::collections::{BTreeMap, HashMap, HashSet, VecDeque}; use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque};
use std::time::Instant; use std::time::Instant;
use anyhow::bail; use anyhow::bail;
use either::Either; use either::Either;
use heed::RoTxn; use heed::RoTxn;
use indexmap::IndexMap; use indexmap::IndexMap;
use itertools::Itertools;
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token}; use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token};
use milli::{FilterCondition, FieldId, FieldsIdsMap, MatchingWords}; use milli::{FilterCondition, FieldId, FieldsIdsMap, MatchingWords};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@ -32,7 +31,7 @@ pub struct SearchQuery {
pub offset: Option<usize>, pub offset: Option<usize>,
#[serde(default = "default_search_limit")] #[serde(default = "default_search_limit")]
pub limit: usize, pub limit: usize,
pub attributes_to_retrieve: Option<HashSet<String>>, pub attributes_to_retrieve: Option<BTreeSet<String>>,
pub attributes_to_crop: Option<Vec<String>>, pub attributes_to_crop: Option<Vec<String>>,
#[serde(default = "default_crop_length")] #[serde(default = "default_crop_length")]
pub crop_length: usize, pub crop_length: usize,
@ -101,11 +100,11 @@ impl Index {
let displayed_ids = self let displayed_ids = self
.displayed_fields_ids(&rtxn)? .displayed_fields_ids(&rtxn)?
.map(|fields| fields.into_iter().collect::<HashSet<_>>()) .map(|fields| fields.into_iter().collect::<BTreeSet<_>>())
.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect()); .unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
let fids = |attrs: &HashSet<String>| { let fids = |attrs: &BTreeSet<String>| {
let mut ids = HashSet::new(); let mut ids = BTreeSet::new();
for attr in attrs { for attr in attrs {
if attr == "*" { if attr == "*" {
ids = displayed_ids.clone(); ids = displayed_ids.clone();
@ -123,7 +122,7 @@ impl Index {
// but these attributes must be also // but these attributes must be also
// - present in the fields_ids_map // - present in the fields_ids_map
// - present in the the displayed attributes // - present in the the displayed attributes
let to_retrieve_ids: HashSet<_> = query let to_retrieve_ids: BTreeSet<_> = query
.attributes_to_retrieve .attributes_to_retrieve
.as_ref() .as_ref()
.map(fids) .map(fids)
@ -132,12 +131,6 @@ impl Index {
.cloned() .cloned()
.collect(); .collect();
let to_retrieve_ids_sorted: Vec<_> = to_retrieve_ids
.clone()
.into_iter()
.sorted()
.collect();
let attr_to_highlight = query let attr_to_highlight = query
.attributes_to_highlight .attributes_to_highlight
.unwrap_or_default(); .unwrap_or_default();
@ -161,13 +154,12 @@ impl Index {
let ids_in_formatted = formatted_options let ids_in_formatted = formatted_options
.keys() .keys()
.cloned() .cloned()
.collect::<HashSet<_>>() .collect::<BTreeSet<_>>()
.intersection(&displayed_ids) .intersection(&displayed_ids)
.cloned() .cloned()
.collect::<HashSet<_>>() .collect::<BTreeSet<_>>()
.union(&to_retrieve_ids) .union(&to_retrieve_ids)
.cloned() .cloned()
.sorted()
.collect::<Vec<_>>(); .collect::<Vec<_>>();
let stop_words = fst::Set::default(); let stop_words = fst::Set::default();
@ -175,7 +167,7 @@ impl Index {
Formatter::new(&stop_words, (String::from("<em>"), String::from("</em>"))); Formatter::new(&stop_words, (String::from("<em>"), String::from("</em>")));
for (_id, obkv) in self.documents(&rtxn, documents_ids)? { for (_id, obkv) in self.documents(&rtxn, documents_ids)? {
let document = make_document(&to_retrieve_ids_sorted, &fields_ids_map, obkv)?; let document = make_document(&to_retrieve_ids, &fields_ids_map, obkv)?;
let formatted = format_fields( let formatted = format_fields(
&fields_ids_map, &fields_ids_map,
obkv, obkv,
@ -223,7 +215,7 @@ fn compute_formatted_options(
attr_to_crop: &[String], attr_to_crop: &[String],
query_crop_length: usize, query_crop_length: usize,
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
displayed_ids: &HashSet<u8>, displayed_ids: &BTreeSet<u8>,
) -> HashMap<FieldId, FormatOptions> { ) -> HashMap<FieldId, FormatOptions> {
let mut formatted_options = HashMap::new(); let mut formatted_options = HashMap::new();
@ -286,7 +278,7 @@ fn compute_formatted_options(
} }
fn make_document( fn make_document(
attributes_to_retrieve: &[FieldId], attributes_to_retrieve: &BTreeSet<FieldId>,
field_ids_map: &FieldsIdsMap, field_ids_map: &FieldsIdsMap,
obkv: obkv::KvReader, obkv: obkv::KvReader,
) -> anyhow::Result<Document> { ) -> anyhow::Result<Document> {
@ -327,8 +319,7 @@ fn format_fields<A: AsRef<[u8]>>(
value = formatter.format_value( value = formatter.format_value(
value, value,
matching_words, matching_words,
format.highlight, *format,
format.crop,
); );
} }
@ -384,25 +375,24 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> {
&self, &self,
value: Value, value: Value,
matcher: &impl Matcher, matcher: &impl Matcher,
need_to_highlight: bool, format_options: FormatOptions,
need_to_crop: Option<usize>,
) -> Value { ) -> Value {
match value { match value {
Value::String(old_string) => { Value::String(old_string) => {
let value = let value =
self.format_string(old_string, matcher, need_to_highlight, need_to_crop); self.format_string(old_string, matcher, format_options);
Value::String(value) Value::String(value)
} }
Value::Array(values) => Value::Array( Value::Array(values) => Value::Array(
values values
.into_iter() .into_iter()
.map(|v| self.format_value(v, matcher, need_to_highlight, None)) .map(|v| self.format_value(v, matcher, FormatOptions { highlight: format_options.highlight, crop: None }))
.collect(), .collect(),
), ),
Value::Object(object) => Value::Object( Value::Object(object) => Value::Object(
object object
.into_iter() .into_iter()
.map(|(k, v)| (k, self.format_value(v, matcher, need_to_highlight, None))) .map(|(k, v)| (k, self.format_value(v, matcher, FormatOptions { highlight: format_options.highlight, crop: None })))
.collect(), .collect(),
), ),
value => value, value => value,
@ -413,12 +403,11 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> {
&self, &self,
s: String, s: String,
matcher: &impl Matcher, matcher: &impl Matcher,
need_to_highlight: bool, format_options: FormatOptions,
need_to_crop: Option<usize>,
) -> String { ) -> String {
let analyzed = self.analyzer.analyze(&s); let analyzed = self.analyzer.analyze(&s);
let tokens: Box<dyn Iterator<Item = (&str, Token)>> = match need_to_crop { let tokens: Box<dyn Iterator<Item = (&str, Token)>> = match format_options.crop {
Some(crop_len) => { Some(crop_len) => {
let mut buffer = VecDeque::new(); let mut buffer = VecDeque::new();
let mut tokens = analyzed.reconstruct().peekable(); let mut tokens = analyzed.reconstruct().peekable();
@ -462,7 +451,7 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> {
tokens tokens
.map(|(word, token)| { .map(|(word, token)| {
if need_to_highlight && token.is_word() && matcher.matches(token.text()).is_some() { if format_options.highlight && token.is_word() && matcher.matches(token.text()).is_some() {
let mut new_word = String::new(); let mut new_word = String::new();
new_word.push_str(&self.marks.0); new_word.push_str(&self.marks.0);
if let Some(match_len) = matcher.matches(token.text()) { if let Some(match_len) = matcher.matches(token.text()) {

View File

@ -1,4 +1,4 @@
use std::collections::HashSet; use std::collections::{BTreeSet, HashSet};
use std::convert::{TryFrom, TryInto}; use std::convert::{TryFrom, TryInto};
use actix_web::{get, post, web, HttpResponse}; use actix_web::{get, post, web, HttpResponse};
@ -36,7 +36,7 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
fn try_from(other: SearchQueryGet) -> anyhow::Result<Self> { fn try_from(other: SearchQueryGet) -> anyhow::Result<Self> {
let attributes_to_retrieve = other let attributes_to_retrieve = other
.attributes_to_retrieve .attributes_to_retrieve
.map(|attrs| attrs.split(',').map(String::from).collect::<HashSet<_>>()); .map(|attrs| attrs.split(',').map(String::from).collect::<BTreeSet<_>>());
let attributes_to_crop = other let attributes_to_crop = other
.attributes_to_crop .attributes_to_crop