mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-22 03:07:27 +01:00
feat: Simplify the RankedStrem code logic
This commit is contained in:
parent
9dce74e9c8
commit
34e0211567
@ -1,3 +1,5 @@
|
||||
#![feature(nll)]
|
||||
|
||||
extern crate fst;
|
||||
extern crate fnv;
|
||||
extern crate group_by;
|
||||
@ -77,7 +79,8 @@ pub struct Match {
|
||||
/// (i.e. at the start or the end of the attribute).
|
||||
///
|
||||
/// The index in the attribute is limited to a maximum of `2^32`
|
||||
/// this is because we index only the first 1000 words in an attribute.
|
||||
/// this is because we index only the first 1000 words
|
||||
/// in an attribute.
|
||||
pub attribute_index: u32,
|
||||
|
||||
/// Whether the word that match is an exact match or a prefix.
|
||||
|
@ -60,20 +60,18 @@ fn matches_into_iter(matches: FnvHashMap<DocumentId, Vec<Match>>, limit: usize)
|
||||
exact,
|
||||
];
|
||||
|
||||
{
|
||||
let mut groups = vec![documents.as_mut_slice()];
|
||||
let mut groups = vec![documents.as_mut_slice()];
|
||||
|
||||
for sort in sorts {
|
||||
let mut temp = mem::replace(&mut groups, Vec::new());
|
||||
let mut computed = 0;
|
||||
for sort in sorts {
|
||||
let temp = mem::replace(&mut groups, Vec::new());
|
||||
let mut computed = 0;
|
||||
|
||||
for group in temp {
|
||||
group.sort_unstable_by(sort);
|
||||
for group in GroupByMut::new(group, |a, b| sort(a, b) == Ordering::Equal) {
|
||||
computed += group.len();
|
||||
groups.push(group);
|
||||
if computed >= limit { break }
|
||||
}
|
||||
for group in temp {
|
||||
group.sort_unstable_by(sort);
|
||||
for group in GroupByMut::new(group, |a, b| sort(a, b) == Ordering::Equal) {
|
||||
computed += group.len();
|
||||
groups.push(group);
|
||||
if computed >= limit { break }
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -82,7 +80,37 @@ fn matches_into_iter(matches: FnvHashMap<DocumentId, Vec<Match>>, limit: usize)
|
||||
documents.into_iter()
|
||||
}
|
||||
|
||||
pub enum RankedStream<'m, 'v> {
|
||||
pub struct RankedStream<'m, 'v>(RankedStreamInner<'m, 'v>);
|
||||
|
||||
impl<'m, 'v> RankedStream<'m, 'v> {
|
||||
pub fn new(map: &'m fst::Map, indexes: &'v DocIndexes, automatons: Vec<Levenshtein>, limit: usize) -> Self {
|
||||
let mut op = OpWithStateBuilder::new(indexes);
|
||||
|
||||
for automaton in automatons.iter().map(|l| l.dfa.clone()) {
|
||||
let stream = map.search(automaton).with_state();
|
||||
op.push(stream);
|
||||
}
|
||||
|
||||
let inner = RankedStreamInner::Fed {
|
||||
inner: op.union(),
|
||||
automatons: automatons,
|
||||
limit: limit,
|
||||
matches: FnvHashMap::default(),
|
||||
};
|
||||
|
||||
RankedStream(inner)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStream<'m, 'v> {
|
||||
type Item = Document;
|
||||
|
||||
fn next(&'a mut self) -> Option<Self::Item> {
|
||||
self.0.next()
|
||||
}
|
||||
}
|
||||
|
||||
enum RankedStreamInner<'m, 'v> {
|
||||
Fed {
|
||||
inner: UnionWithState<'m, 'v, u32>,
|
||||
automatons: Vec<Levenshtein>,
|
||||
@ -94,59 +122,27 @@ pub enum RankedStream<'m, 'v> {
|
||||
},
|
||||
}
|
||||
|
||||
impl<'m, 'v> RankedStream<'m, 'v> {
|
||||
pub fn new(map: &'m fst::Map, indexes: &'v DocIndexes, automatons: Vec<Levenshtein>, limit: usize) -> Self {
|
||||
let mut op = OpWithStateBuilder::new(indexes);
|
||||
|
||||
for automaton in automatons.iter().map(|l| l.dfa.clone()) {
|
||||
let stream = map.search(automaton).with_state();
|
||||
op.push(stream);
|
||||
}
|
||||
|
||||
RankedStream::Fed {
|
||||
inner: op.union(),
|
||||
automatons: automatons,
|
||||
limit: limit,
|
||||
matches: FnvHashMap::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStream<'m, 'v> {
|
||||
impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStreamInner<'m, 'v> {
|
||||
type Item = Document;
|
||||
|
||||
fn next(&'a mut self) -> Option<Self::Item> {
|
||||
loop {
|
||||
// TODO remove that when NLL are here !
|
||||
let mut transfert_matches = None;
|
||||
let mut transfert_limit = None;
|
||||
|
||||
match self {
|
||||
RankedStream::Fed { inner, automatons, limit, matches } => {
|
||||
RankedStreamInner::Fed { inner, automatons, limit, matches } => {
|
||||
match inner.next() {
|
||||
Some((string, indexed_values)) => {
|
||||
for iv in indexed_values {
|
||||
|
||||
// TODO extend documents matches by batch of query_index
|
||||
// that way it will be possible to discard matches that
|
||||
// have an invalid distance *before* adding them
|
||||
// to the matches of the documents and, that way, avoid a sort
|
||||
|
||||
let automaton = &automatons[iv.index];
|
||||
let distance = automaton.dfa.distance(iv.state).to_u8();
|
||||
|
||||
// TODO remove the Pool system !
|
||||
// this is an internal Pool rule but
|
||||
// it is more efficient to test that here
|
||||
// if pool.limitation.is_reached() && distance != 0 { continue }
|
||||
|
||||
for di in iv.values {
|
||||
let match_ = Match {
|
||||
query_index: iv.index as u32,
|
||||
distance: distance,
|
||||
attribute: di.attribute,
|
||||
attribute_index: di.attribute_index,
|
||||
is_exact: string.len() == automaton.query_len,
|
||||
is_exact: distance == 0 && string.len() == automaton.query_len,
|
||||
};
|
||||
matches.entry(di.document)
|
||||
.or_insert_with(Vec::new)
|
||||
@ -155,23 +151,17 @@ impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStream<'m, 'v> {
|
||||
}
|
||||
},
|
||||
None => {
|
||||
// TODO remove this when NLL are here !
|
||||
transfert_matches = Some(mem::replace(matches, FnvHashMap::default()));
|
||||
transfert_limit = Some(mem::replace(limit, 0));
|
||||
let matches = mem::replace(matches, FnvHashMap::default());
|
||||
*self = RankedStreamInner::Pours {
|
||||
inner: matches_into_iter(matches, *limit).into_iter()
|
||||
};
|
||||
},
|
||||
}
|
||||
},
|
||||
RankedStream::Pours { inner } => {
|
||||
RankedStreamInner::Pours { inner } => {
|
||||
return inner.next()
|
||||
},
|
||||
}
|
||||
|
||||
// transform the `RankedStream` into a `Pours`
|
||||
if let (Some(matches), Some(limit)) = (transfert_matches, transfert_limit) {
|
||||
*self = RankedStream::Pours {
|
||||
inner: matches_into_iter(matches, limit).into_iter(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user