mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-29 16:24:26 +01:00
feat: Improve performances by reusing the documents HashMap
This commit is contained in:
parent
0814418710
commit
9dce74e9c8
@ -45,81 +45,49 @@ impl Document {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Pool {
|
fn matches_into_iter(matches: FnvHashMap<DocumentId, Vec<Match>>, limit: usize) -> vec::IntoIter<Document> {
|
||||||
documents: Vec<Document>,
|
let mut documents: Vec<_> = matches.into_iter().map(|(id, mut matches)| {
|
||||||
limit: usize,
|
matches.sort_unstable();
|
||||||
}
|
Document::from_sorted_matches(id, matches)
|
||||||
|
}).collect();
|
||||||
|
|
||||||
impl Pool {
|
let sorts = &[
|
||||||
pub fn new(query_size: usize, limit: usize) -> Self {
|
sum_of_typos,
|
||||||
Self {
|
number_of_words,
|
||||||
documents: Vec::new(),
|
words_proximity,
|
||||||
limit: limit,
|
sum_of_words_attribute,
|
||||||
}
|
sum_of_words_position,
|
||||||
}
|
exact,
|
||||||
|
];
|
||||||
|
|
||||||
// TODO remove the matches HashMap, not proud of it
|
{
|
||||||
pub fn extend(&mut self, matches: &mut FnvHashMap<DocumentId, Vec<Match>>) {
|
let mut groups = vec![documents.as_mut_slice()];
|
||||||
for doc in self.documents.iter_mut() {
|
|
||||||
if let Some(matches) = matches.remove(&doc.document_id) {
|
|
||||||
doc.matches.extend(matches);
|
|
||||||
doc.matches.sort_unstable();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (id, mut matches) in matches.drain() {
|
for sort in sorts {
|
||||||
// note that matches are already sorted we do that by security
|
let mut temp = mem::replace(&mut groups, Vec::new());
|
||||||
// TODO remove this useless sort
|
let mut computed = 0;
|
||||||
matches.sort_unstable();
|
|
||||||
|
|
||||||
let document = Document::from_sorted_matches(id, matches);
|
for group in temp {
|
||||||
self.documents.push(document);
|
group.sort_unstable_by(sort);
|
||||||
}
|
for group in GroupByMut::new(group, |a, b| sort(a, b) == Ordering::Equal) {
|
||||||
}
|
computed += group.len();
|
||||||
}
|
groups.push(group);
|
||||||
|
if computed >= limit { break }
|
||||||
impl IntoIterator for Pool {
|
|
||||||
type Item = Document;
|
|
||||||
type IntoIter = vec::IntoIter<Self::Item>;
|
|
||||||
|
|
||||||
fn into_iter(mut self) -> Self::IntoIter {
|
|
||||||
let sorts = &[
|
|
||||||
sum_of_typos,
|
|
||||||
number_of_words,
|
|
||||||
words_proximity,
|
|
||||||
sum_of_words_attribute,
|
|
||||||
sum_of_words_position,
|
|
||||||
exact,
|
|
||||||
];
|
|
||||||
|
|
||||||
{
|
|
||||||
let mut groups = vec![self.documents.as_mut_slice()];
|
|
||||||
|
|
||||||
for sort in sorts {
|
|
||||||
let mut temp = mem::replace(&mut groups, Vec::new());
|
|
||||||
let mut computed = 0;
|
|
||||||
|
|
||||||
for group in temp {
|
|
||||||
group.sort_unstable_by(sort);
|
|
||||||
for group in GroupByMut::new(group, |a, b| sort(a, b) == Ordering::Equal) {
|
|
||||||
computed += group.len();
|
|
||||||
groups.push(group);
|
|
||||||
if computed >= self.limit { break }
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
self.documents.truncate(self.limit);
|
|
||||||
self.documents.into_iter()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
documents.truncate(limit);
|
||||||
|
documents.into_iter()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum RankedStream<'m, 'v> {
|
pub enum RankedStream<'m, 'v> {
|
||||||
Fed {
|
Fed {
|
||||||
inner: UnionWithState<'m, 'v, u32>,
|
inner: UnionWithState<'m, 'v, u32>,
|
||||||
automatons: Vec<Levenshtein>,
|
automatons: Vec<Levenshtein>,
|
||||||
pool: Pool,
|
limit: usize,
|
||||||
|
matches: FnvHashMap<DocumentId, Vec<Match>>,
|
||||||
},
|
},
|
||||||
Pours {
|
Pours {
|
||||||
inner: vec::IntoIter<Document>,
|
inner: vec::IntoIter<Document>,
|
||||||
@ -135,12 +103,11 @@ impl<'m, 'v> RankedStream<'m, 'v> {
|
|||||||
op.push(stream);
|
op.push(stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
let pool = Pool::new(automatons.len(), limit);
|
|
||||||
|
|
||||||
RankedStream::Fed {
|
RankedStream::Fed {
|
||||||
inner: op.union(),
|
inner: op.union(),
|
||||||
automatons: automatons,
|
automatons: automatons,
|
||||||
pool: pool,
|
limit: limit,
|
||||||
|
matches: FnvHashMap::default(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -149,14 +116,13 @@ impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStream<'m, 'v> {
|
|||||||
type Item = Document;
|
type Item = Document;
|
||||||
|
|
||||||
fn next(&'a mut self) -> Option<Self::Item> {
|
fn next(&'a mut self) -> Option<Self::Item> {
|
||||||
let mut matches = FnvHashMap::default();
|
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
// TODO remove that when NLL are here !
|
// TODO remove that when NLL are here !
|
||||||
let mut transfert_pool = None;
|
let mut transfert_matches = None;
|
||||||
|
let mut transfert_limit = None;
|
||||||
|
|
||||||
match self {
|
match self {
|
||||||
RankedStream::Fed { inner, automatons, pool } => {
|
RankedStream::Fed { inner, automatons, limit, matches } => {
|
||||||
match inner.next() {
|
match inner.next() {
|
||||||
Some((string, indexed_values)) => {
|
Some((string, indexed_values)) => {
|
||||||
for iv in indexed_values {
|
for iv in indexed_values {
|
||||||
@ -183,15 +149,15 @@ impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStream<'m, 'v> {
|
|||||||
is_exact: string.len() == automaton.query_len,
|
is_exact: string.len() == automaton.query_len,
|
||||||
};
|
};
|
||||||
matches.entry(di.document)
|
matches.entry(di.document)
|
||||||
.and_modify(|ms: &mut Vec<_>| ms.push(match_))
|
.or_insert_with(Vec::new)
|
||||||
.or_insert_with(|| vec![match_]);
|
.push(match_);
|
||||||
}
|
}
|
||||||
pool.extend(&mut matches);
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
None => {
|
None => {
|
||||||
// TODO remove this when NLL are here !
|
// TODO remove this when NLL are here !
|
||||||
transfert_pool = Some(mem::replace(pool, Pool::new(1, 1)));
|
transfert_matches = Some(mem::replace(matches, FnvHashMap::default()));
|
||||||
|
transfert_limit = Some(mem::replace(limit, 0));
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -201,9 +167,9 @@ impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStream<'m, 'v> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// transform the `RankedStream` into a `Pours`
|
// transform the `RankedStream` into a `Pours`
|
||||||
if let Some(pool) = transfert_pool {
|
if let (Some(matches), Some(limit)) = (transfert_matches, transfert_limit) {
|
||||||
*self = RankedStream::Pours {
|
*self = RankedStream::Pours {
|
||||||
inner: pool.into_iter(),
|
inner: matches_into_iter(matches, limit).into_iter(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user