mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Use BTreeSet instead of HashSet
This commit is contained in:
parent
7b02fdaddc
commit
dc5a3d4a62
@ -1,11 +1,10 @@
|
|||||||
use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
|
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque};
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use anyhow::bail;
|
use anyhow::bail;
|
||||||
use either::Either;
|
use either::Either;
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use indexmap::IndexMap;
|
use indexmap::IndexMap;
|
||||||
use itertools::Itertools;
|
|
||||||
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token};
|
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token};
|
||||||
use milli::{FilterCondition, FieldId, FieldsIdsMap, MatchingWords};
|
use milli::{FilterCondition, FieldId, FieldsIdsMap, MatchingWords};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
@ -32,7 +31,7 @@ pub struct SearchQuery {
|
|||||||
pub offset: Option<usize>,
|
pub offset: Option<usize>,
|
||||||
#[serde(default = "default_search_limit")]
|
#[serde(default = "default_search_limit")]
|
||||||
pub limit: usize,
|
pub limit: usize,
|
||||||
pub attributes_to_retrieve: Option<HashSet<String>>,
|
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||||
pub attributes_to_crop: Option<Vec<String>>,
|
pub attributes_to_crop: Option<Vec<String>>,
|
||||||
#[serde(default = "default_crop_length")]
|
#[serde(default = "default_crop_length")]
|
||||||
pub crop_length: usize,
|
pub crop_length: usize,
|
||||||
@ -101,11 +100,11 @@ impl Index {
|
|||||||
|
|
||||||
let displayed_ids = self
|
let displayed_ids = self
|
||||||
.displayed_fields_ids(&rtxn)?
|
.displayed_fields_ids(&rtxn)?
|
||||||
.map(|fields| fields.into_iter().collect::<HashSet<_>>())
|
.map(|fields| fields.into_iter().collect::<BTreeSet<_>>())
|
||||||
.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
|
.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
|
||||||
|
|
||||||
let fids = |attrs: &HashSet<String>| {
|
let fids = |attrs: &BTreeSet<String>| {
|
||||||
let mut ids = HashSet::new();
|
let mut ids = BTreeSet::new();
|
||||||
for attr in attrs {
|
for attr in attrs {
|
||||||
if attr == "*" {
|
if attr == "*" {
|
||||||
ids = displayed_ids.clone();
|
ids = displayed_ids.clone();
|
||||||
@ -123,7 +122,7 @@ impl Index {
|
|||||||
// but these attributes must be also
|
// but these attributes must be also
|
||||||
// - present in the fields_ids_map
|
// - present in the fields_ids_map
|
||||||
// - present in the the displayed attributes
|
// - present in the the displayed attributes
|
||||||
let to_retrieve_ids: HashSet<_> = query
|
let to_retrieve_ids: BTreeSet<_> = query
|
||||||
.attributes_to_retrieve
|
.attributes_to_retrieve
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.map(fids)
|
.map(fids)
|
||||||
@ -132,12 +131,6 @@ impl Index {
|
|||||||
.cloned()
|
.cloned()
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let to_retrieve_ids_sorted: Vec<_> = to_retrieve_ids
|
|
||||||
.clone()
|
|
||||||
.into_iter()
|
|
||||||
.sorted()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let attr_to_highlight = query
|
let attr_to_highlight = query
|
||||||
.attributes_to_highlight
|
.attributes_to_highlight
|
||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
@ -161,13 +154,12 @@ impl Index {
|
|||||||
let ids_in_formatted = formatted_options
|
let ids_in_formatted = formatted_options
|
||||||
.keys()
|
.keys()
|
||||||
.cloned()
|
.cloned()
|
||||||
.collect::<HashSet<_>>()
|
.collect::<BTreeSet<_>>()
|
||||||
.intersection(&displayed_ids)
|
.intersection(&displayed_ids)
|
||||||
.cloned()
|
.cloned()
|
||||||
.collect::<HashSet<_>>()
|
.collect::<BTreeSet<_>>()
|
||||||
.union(&to_retrieve_ids)
|
.union(&to_retrieve_ids)
|
||||||
.cloned()
|
.cloned()
|
||||||
.sorted()
|
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
let stop_words = fst::Set::default();
|
let stop_words = fst::Set::default();
|
||||||
@ -175,7 +167,7 @@ impl Index {
|
|||||||
Formatter::new(&stop_words, (String::from("<em>"), String::from("</em>")));
|
Formatter::new(&stop_words, (String::from("<em>"), String::from("</em>")));
|
||||||
|
|
||||||
for (_id, obkv) in self.documents(&rtxn, documents_ids)? {
|
for (_id, obkv) in self.documents(&rtxn, documents_ids)? {
|
||||||
let document = make_document(&to_retrieve_ids_sorted, &fields_ids_map, obkv)?;
|
let document = make_document(&to_retrieve_ids, &fields_ids_map, obkv)?;
|
||||||
let formatted = format_fields(
|
let formatted = format_fields(
|
||||||
&fields_ids_map,
|
&fields_ids_map,
|
||||||
obkv,
|
obkv,
|
||||||
@ -223,7 +215,7 @@ fn compute_formatted_options(
|
|||||||
attr_to_crop: &[String],
|
attr_to_crop: &[String],
|
||||||
query_crop_length: usize,
|
query_crop_length: usize,
|
||||||
fields_ids_map: &FieldsIdsMap,
|
fields_ids_map: &FieldsIdsMap,
|
||||||
displayed_ids: &HashSet<u8>,
|
displayed_ids: &BTreeSet<u8>,
|
||||||
) -> HashMap<FieldId, FormatOptions> {
|
) -> HashMap<FieldId, FormatOptions> {
|
||||||
|
|
||||||
let mut formatted_options = HashMap::new();
|
let mut formatted_options = HashMap::new();
|
||||||
@ -286,7 +278,7 @@ fn compute_formatted_options(
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn make_document(
|
fn make_document(
|
||||||
attributes_to_retrieve: &[FieldId],
|
attributes_to_retrieve: &BTreeSet<FieldId>,
|
||||||
field_ids_map: &FieldsIdsMap,
|
field_ids_map: &FieldsIdsMap,
|
||||||
obkv: obkv::KvReader,
|
obkv: obkv::KvReader,
|
||||||
) -> anyhow::Result<Document> {
|
) -> anyhow::Result<Document> {
|
||||||
@ -327,8 +319,7 @@ fn format_fields<A: AsRef<[u8]>>(
|
|||||||
value = formatter.format_value(
|
value = formatter.format_value(
|
||||||
value,
|
value,
|
||||||
matching_words,
|
matching_words,
|
||||||
format.highlight,
|
*format,
|
||||||
format.crop,
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -384,25 +375,24 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> {
|
|||||||
&self,
|
&self,
|
||||||
value: Value,
|
value: Value,
|
||||||
matcher: &impl Matcher,
|
matcher: &impl Matcher,
|
||||||
need_to_highlight: bool,
|
format_options: FormatOptions,
|
||||||
need_to_crop: Option<usize>,
|
|
||||||
) -> Value {
|
) -> Value {
|
||||||
match value {
|
match value {
|
||||||
Value::String(old_string) => {
|
Value::String(old_string) => {
|
||||||
let value =
|
let value =
|
||||||
self.format_string(old_string, matcher, need_to_highlight, need_to_crop);
|
self.format_string(old_string, matcher, format_options);
|
||||||
Value::String(value)
|
Value::String(value)
|
||||||
}
|
}
|
||||||
Value::Array(values) => Value::Array(
|
Value::Array(values) => Value::Array(
|
||||||
values
|
values
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|v| self.format_value(v, matcher, need_to_highlight, None))
|
.map(|v| self.format_value(v, matcher, FormatOptions { highlight: format_options.highlight, crop: None }))
|
||||||
.collect(),
|
.collect(),
|
||||||
),
|
),
|
||||||
Value::Object(object) => Value::Object(
|
Value::Object(object) => Value::Object(
|
||||||
object
|
object
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|(k, v)| (k, self.format_value(v, matcher, need_to_highlight, None)))
|
.map(|(k, v)| (k, self.format_value(v, matcher, FormatOptions { highlight: format_options.highlight, crop: None })))
|
||||||
.collect(),
|
.collect(),
|
||||||
),
|
),
|
||||||
value => value,
|
value => value,
|
||||||
@ -413,12 +403,11 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> {
|
|||||||
&self,
|
&self,
|
||||||
s: String,
|
s: String,
|
||||||
matcher: &impl Matcher,
|
matcher: &impl Matcher,
|
||||||
need_to_highlight: bool,
|
format_options: FormatOptions,
|
||||||
need_to_crop: Option<usize>,
|
|
||||||
) -> String {
|
) -> String {
|
||||||
let analyzed = self.analyzer.analyze(&s);
|
let analyzed = self.analyzer.analyze(&s);
|
||||||
|
|
||||||
let tokens: Box<dyn Iterator<Item = (&str, Token)>> = match need_to_crop {
|
let tokens: Box<dyn Iterator<Item = (&str, Token)>> = match format_options.crop {
|
||||||
Some(crop_len) => {
|
Some(crop_len) => {
|
||||||
let mut buffer = VecDeque::new();
|
let mut buffer = VecDeque::new();
|
||||||
let mut tokens = analyzed.reconstruct().peekable();
|
let mut tokens = analyzed.reconstruct().peekable();
|
||||||
@ -462,7 +451,7 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> {
|
|||||||
|
|
||||||
tokens
|
tokens
|
||||||
.map(|(word, token)| {
|
.map(|(word, token)| {
|
||||||
if need_to_highlight && token.is_word() && matcher.matches(token.text()).is_some() {
|
if format_options.highlight && token.is_word() && matcher.matches(token.text()).is_some() {
|
||||||
let mut new_word = String::new();
|
let mut new_word = String::new();
|
||||||
new_word.push_str(&self.marks.0);
|
new_word.push_str(&self.marks.0);
|
||||||
if let Some(match_len) = matcher.matches(token.text()) {
|
if let Some(match_len) = matcher.matches(token.text()) {
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use std::collections::HashSet;
|
use std::collections::{BTreeSet, HashSet};
|
||||||
use std::convert::{TryFrom, TryInto};
|
use std::convert::{TryFrom, TryInto};
|
||||||
|
|
||||||
use actix_web::{get, post, web, HttpResponse};
|
use actix_web::{get, post, web, HttpResponse};
|
||||||
@ -36,7 +36,7 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
|
|||||||
fn try_from(other: SearchQueryGet) -> anyhow::Result<Self> {
|
fn try_from(other: SearchQueryGet) -> anyhow::Result<Self> {
|
||||||
let attributes_to_retrieve = other
|
let attributes_to_retrieve = other
|
||||||
.attributes_to_retrieve
|
.attributes_to_retrieve
|
||||||
.map(|attrs| attrs.split(',').map(String::from).collect::<HashSet<_>>());
|
.map(|attrs| attrs.split(',').map(String::from).collect::<BTreeSet<_>>());
|
||||||
|
|
||||||
let attributes_to_crop = other
|
let attributes_to_crop = other
|
||||||
.attributes_to_crop
|
.attributes_to_crop
|
||||||
|
Loading…
Reference in New Issue
Block a user