mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-22 03:07:27 +01:00
feat: Simplify the RankedStrem code logic
This commit is contained in:
parent
9dce74e9c8
commit
34e0211567
@ -1,3 +1,5 @@
|
|||||||
|
#![feature(nll)]
|
||||||
|
|
||||||
extern crate fst;
|
extern crate fst;
|
||||||
extern crate fnv;
|
extern crate fnv;
|
||||||
extern crate group_by;
|
extern crate group_by;
|
||||||
@ -77,7 +79,8 @@ pub struct Match {
|
|||||||
/// (i.e. at the start or the end of the attribute).
|
/// (i.e. at the start or the end of the attribute).
|
||||||
///
|
///
|
||||||
/// The index in the attribute is limited to a maximum of `2^32`
|
/// The index in the attribute is limited to a maximum of `2^32`
|
||||||
/// this is because we index only the first 1000 words in an attribute.
|
/// this is because we index only the first 1000 words
|
||||||
|
/// in an attribute.
|
||||||
pub attribute_index: u32,
|
pub attribute_index: u32,
|
||||||
|
|
||||||
/// Whether the word that match is an exact match or a prefix.
|
/// Whether the word that match is an exact match or a prefix.
|
||||||
|
@ -60,20 +60,18 @@ fn matches_into_iter(matches: FnvHashMap<DocumentId, Vec<Match>>, limit: usize)
|
|||||||
exact,
|
exact,
|
||||||
];
|
];
|
||||||
|
|
||||||
{
|
let mut groups = vec![documents.as_mut_slice()];
|
||||||
let mut groups = vec![documents.as_mut_slice()];
|
|
||||||
|
|
||||||
for sort in sorts {
|
for sort in sorts {
|
||||||
let mut temp = mem::replace(&mut groups, Vec::new());
|
let temp = mem::replace(&mut groups, Vec::new());
|
||||||
let mut computed = 0;
|
let mut computed = 0;
|
||||||
|
|
||||||
for group in temp {
|
for group in temp {
|
||||||
group.sort_unstable_by(sort);
|
group.sort_unstable_by(sort);
|
||||||
for group in GroupByMut::new(group, |a, b| sort(a, b) == Ordering::Equal) {
|
for group in GroupByMut::new(group, |a, b| sort(a, b) == Ordering::Equal) {
|
||||||
computed += group.len();
|
computed += group.len();
|
||||||
groups.push(group);
|
groups.push(group);
|
||||||
if computed >= limit { break }
|
if computed >= limit { break }
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -82,7 +80,37 @@ fn matches_into_iter(matches: FnvHashMap<DocumentId, Vec<Match>>, limit: usize)
|
|||||||
documents.into_iter()
|
documents.into_iter()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum RankedStream<'m, 'v> {
|
pub struct RankedStream<'m, 'v>(RankedStreamInner<'m, 'v>);
|
||||||
|
|
||||||
|
impl<'m, 'v> RankedStream<'m, 'v> {
|
||||||
|
pub fn new(map: &'m fst::Map, indexes: &'v DocIndexes, automatons: Vec<Levenshtein>, limit: usize) -> Self {
|
||||||
|
let mut op = OpWithStateBuilder::new(indexes);
|
||||||
|
|
||||||
|
for automaton in automatons.iter().map(|l| l.dfa.clone()) {
|
||||||
|
let stream = map.search(automaton).with_state();
|
||||||
|
op.push(stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
let inner = RankedStreamInner::Fed {
|
||||||
|
inner: op.union(),
|
||||||
|
automatons: automatons,
|
||||||
|
limit: limit,
|
||||||
|
matches: FnvHashMap::default(),
|
||||||
|
};
|
||||||
|
|
||||||
|
RankedStream(inner)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStream<'m, 'v> {
|
||||||
|
type Item = Document;
|
||||||
|
|
||||||
|
fn next(&'a mut self) -> Option<Self::Item> {
|
||||||
|
self.0.next()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enum RankedStreamInner<'m, 'v> {
|
||||||
Fed {
|
Fed {
|
||||||
inner: UnionWithState<'m, 'v, u32>,
|
inner: UnionWithState<'m, 'v, u32>,
|
||||||
automatons: Vec<Levenshtein>,
|
automatons: Vec<Levenshtein>,
|
||||||
@ -94,59 +122,27 @@ pub enum RankedStream<'m, 'v> {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'m, 'v> RankedStream<'m, 'v> {
|
impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStreamInner<'m, 'v> {
|
||||||
pub fn new(map: &'m fst::Map, indexes: &'v DocIndexes, automatons: Vec<Levenshtein>, limit: usize) -> Self {
|
|
||||||
let mut op = OpWithStateBuilder::new(indexes);
|
|
||||||
|
|
||||||
for automaton in automatons.iter().map(|l| l.dfa.clone()) {
|
|
||||||
let stream = map.search(automaton).with_state();
|
|
||||||
op.push(stream);
|
|
||||||
}
|
|
||||||
|
|
||||||
RankedStream::Fed {
|
|
||||||
inner: op.union(),
|
|
||||||
automatons: automatons,
|
|
||||||
limit: limit,
|
|
||||||
matches: FnvHashMap::default(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStream<'m, 'v> {
|
|
||||||
type Item = Document;
|
type Item = Document;
|
||||||
|
|
||||||
fn next(&'a mut self) -> Option<Self::Item> {
|
fn next(&'a mut self) -> Option<Self::Item> {
|
||||||
loop {
|
loop {
|
||||||
// TODO remove that when NLL are here !
|
|
||||||
let mut transfert_matches = None;
|
|
||||||
let mut transfert_limit = None;
|
|
||||||
|
|
||||||
match self {
|
match self {
|
||||||
RankedStream::Fed { inner, automatons, limit, matches } => {
|
RankedStreamInner::Fed { inner, automatons, limit, matches } => {
|
||||||
match inner.next() {
|
match inner.next() {
|
||||||
Some((string, indexed_values)) => {
|
Some((string, indexed_values)) => {
|
||||||
for iv in indexed_values {
|
for iv in indexed_values {
|
||||||
|
|
||||||
// TODO extend documents matches by batch of query_index
|
|
||||||
// that way it will be possible to discard matches that
|
|
||||||
// have an invalid distance *before* adding them
|
|
||||||
// to the matches of the documents and, that way, avoid a sort
|
|
||||||
|
|
||||||
let automaton = &automatons[iv.index];
|
let automaton = &automatons[iv.index];
|
||||||
let distance = automaton.dfa.distance(iv.state).to_u8();
|
let distance = automaton.dfa.distance(iv.state).to_u8();
|
||||||
|
|
||||||
// TODO remove the Pool system !
|
|
||||||
// this is an internal Pool rule but
|
|
||||||
// it is more efficient to test that here
|
|
||||||
// if pool.limitation.is_reached() && distance != 0 { continue }
|
|
||||||
|
|
||||||
for di in iv.values {
|
for di in iv.values {
|
||||||
let match_ = Match {
|
let match_ = Match {
|
||||||
query_index: iv.index as u32,
|
query_index: iv.index as u32,
|
||||||
distance: distance,
|
distance: distance,
|
||||||
attribute: di.attribute,
|
attribute: di.attribute,
|
||||||
attribute_index: di.attribute_index,
|
attribute_index: di.attribute_index,
|
||||||
is_exact: string.len() == automaton.query_len,
|
is_exact: distance == 0 && string.len() == automaton.query_len,
|
||||||
};
|
};
|
||||||
matches.entry(di.document)
|
matches.entry(di.document)
|
||||||
.or_insert_with(Vec::new)
|
.or_insert_with(Vec::new)
|
||||||
@ -155,23 +151,17 @@ impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStream<'m, 'v> {
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
None => {
|
None => {
|
||||||
// TODO remove this when NLL are here !
|
let matches = mem::replace(matches, FnvHashMap::default());
|
||||||
transfert_matches = Some(mem::replace(matches, FnvHashMap::default()));
|
*self = RankedStreamInner::Pours {
|
||||||
transfert_limit = Some(mem::replace(limit, 0));
|
inner: matches_into_iter(matches, *limit).into_iter()
|
||||||
|
};
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
RankedStream::Pours { inner } => {
|
RankedStreamInner::Pours { inner } => {
|
||||||
return inner.next()
|
return inner.next()
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
// transform the `RankedStream` into a `Pours`
|
|
||||||
if let (Some(matches), Some(limit)) = (transfert_matches, transfert_limit) {
|
|
||||||
*self = RankedStream::Pours {
|
|
||||||
inner: matches_into_iter(matches, limit).into_iter(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user