Use BTreeSet instead of HashSet

This commit is contained in:
Clémentine Urquizar 2021-06-16 16:18:55 +02:00
parent 7b02fdaddc
commit dc5a3d4a62
No known key found for this signature in database
GPG Key ID: D8E7CC7422E77E1A
2 changed files with 21 additions and 32 deletions

View File

@ -1,11 +1,10 @@
use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque};
use std::time::Instant;
use anyhow::bail;
use either::Either;
use heed::RoTxn;
use indexmap::IndexMap;
use itertools::Itertools;
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token};
use milli::{FilterCondition, FieldId, FieldsIdsMap, MatchingWords};
use serde::{Deserialize, Serialize};
@ -32,7 +31,7 @@ pub struct SearchQuery {
pub offset: Option<usize>,
#[serde(default = "default_search_limit")]
pub limit: usize,
pub attributes_to_retrieve: Option<HashSet<String>>,
pub attributes_to_retrieve: Option<BTreeSet<String>>,
pub attributes_to_crop: Option<Vec<String>>,
#[serde(default = "default_crop_length")]
pub crop_length: usize,
@ -101,11 +100,11 @@ impl Index {
let displayed_ids = self
.displayed_fields_ids(&rtxn)?
.map(|fields| fields.into_iter().collect::<HashSet<_>>())
.map(|fields| fields.into_iter().collect::<BTreeSet<_>>())
.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
let fids = |attrs: &HashSet<String>| {
let mut ids = HashSet::new();
let fids = |attrs: &BTreeSet<String>| {
let mut ids = BTreeSet::new();
for attr in attrs {
if attr == "*" {
ids = displayed_ids.clone();
@ -123,7 +122,7 @@ impl Index {
// but these attributes must be also
// - present in the fields_ids_map
// - present in the the displayed attributes
let to_retrieve_ids: HashSet<_> = query
let to_retrieve_ids: BTreeSet<_> = query
.attributes_to_retrieve
.as_ref()
.map(fids)
@ -132,12 +131,6 @@ impl Index {
.cloned()
.collect();
let to_retrieve_ids_sorted: Vec<_> = to_retrieve_ids
.clone()
.into_iter()
.sorted()
.collect();
let attr_to_highlight = query
.attributes_to_highlight
.unwrap_or_default();
@ -161,13 +154,12 @@ impl Index {
let ids_in_formatted = formatted_options
.keys()
.cloned()
.collect::<HashSet<_>>()
.collect::<BTreeSet<_>>()
.intersection(&displayed_ids)
.cloned()
.collect::<HashSet<_>>()
.collect::<BTreeSet<_>>()
.union(&to_retrieve_ids)
.cloned()
.sorted()
.collect::<Vec<_>>();
let stop_words = fst::Set::default();
@ -175,7 +167,7 @@ impl Index {
Formatter::new(&stop_words, (String::from("<em>"), String::from("</em>")));
for (_id, obkv) in self.documents(&rtxn, documents_ids)? {
let document = make_document(&to_retrieve_ids_sorted, &fields_ids_map, obkv)?;
let document = make_document(&to_retrieve_ids, &fields_ids_map, obkv)?;
let formatted = format_fields(
&fields_ids_map,
obkv,
@ -223,7 +215,7 @@ fn compute_formatted_options(
attr_to_crop: &[String],
query_crop_length: usize,
fields_ids_map: &FieldsIdsMap,
displayed_ids: &HashSet<u8>,
displayed_ids: &BTreeSet<u8>,
) -> HashMap<FieldId, FormatOptions> {
let mut formatted_options = HashMap::new();
@ -286,7 +278,7 @@ fn compute_formatted_options(
}
fn make_document(
attributes_to_retrieve: &[FieldId],
attributes_to_retrieve: &BTreeSet<FieldId>,
field_ids_map: &FieldsIdsMap,
obkv: obkv::KvReader,
) -> anyhow::Result<Document> {
@ -327,8 +319,7 @@ fn format_fields<A: AsRef<[u8]>>(
value = formatter.format_value(
value,
matching_words,
format.highlight,
format.crop,
*format,
);
}
@ -384,25 +375,24 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> {
&self,
value: Value,
matcher: &impl Matcher,
need_to_highlight: bool,
need_to_crop: Option<usize>,
format_options: FormatOptions,
) -> Value {
match value {
Value::String(old_string) => {
let value =
self.format_string(old_string, matcher, need_to_highlight, need_to_crop);
self.format_string(old_string, matcher, format_options);
Value::String(value)
}
Value::Array(values) => Value::Array(
values
.into_iter()
.map(|v| self.format_value(v, matcher, need_to_highlight, None))
.map(|v| self.format_value(v, matcher, FormatOptions { highlight: format_options.highlight, crop: None }))
.collect(),
),
Value::Object(object) => Value::Object(
object
.into_iter()
.map(|(k, v)| (k, self.format_value(v, matcher, need_to_highlight, None)))
.map(|(k, v)| (k, self.format_value(v, matcher, FormatOptions { highlight: format_options.highlight, crop: None })))
.collect(),
),
value => value,
@ -413,12 +403,11 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> {
&self,
s: String,
matcher: &impl Matcher,
need_to_highlight: bool,
need_to_crop: Option<usize>,
format_options: FormatOptions,
) -> String {
let analyzed = self.analyzer.analyze(&s);
let tokens: Box<dyn Iterator<Item = (&str, Token)>> = match need_to_crop {
let tokens: Box<dyn Iterator<Item = (&str, Token)>> = match format_options.crop {
Some(crop_len) => {
let mut buffer = VecDeque::new();
let mut tokens = analyzed.reconstruct().peekable();
@ -462,7 +451,7 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> {
tokens
.map(|(word, token)| {
if need_to_highlight && token.is_word() && matcher.matches(token.text()).is_some() {
if format_options.highlight && token.is_word() && matcher.matches(token.text()).is_some() {
let mut new_word = String::new();
new_word.push_str(&self.marks.0);
if let Some(match_len) = matcher.matches(token.text()) {

View File

@ -1,4 +1,4 @@
use std::collections::HashSet;
use std::collections::{BTreeSet, HashSet};
use std::convert::{TryFrom, TryInto};
use actix_web::{get, post, web, HttpResponse};
@ -36,7 +36,7 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
fn try_from(other: SearchQueryGet) -> anyhow::Result<Self> {
let attributes_to_retrieve = other
.attributes_to_retrieve
.map(|attrs| attrs.split(',').map(String::from).collect::<HashSet<_>>());
.map(|attrs| attrs.split(',').map(String::from).collect::<BTreeSet<_>>());
let attributes_to_crop = other
.attributes_to_crop