feat: Make querying always return limited results

This commit is contained in:
Kerollmops 2018-06-24 15:36:24 +02:00 committed by Clément Renault
parent 8fab80048c
commit 0ce08c8790
3 changed files with 29 additions and 52 deletions

View File

@ -4,10 +4,8 @@ extern crate test;
extern crate fst; extern crate fst;
extern crate raptor; extern crate raptor;
use std::path::Path;
use std::{fs, env, io};
use fst::Streamer; use fst::Streamer;
use raptor::{load_map, DocIndexMap, RankedStream, LevBuilder}; use raptor::{load_map, RankedStream, LevBuilder};
#[bench] #[bench]
fn chauve_souris(b: &mut test::Bencher) { fn chauve_souris(b: &mut test::Bencher) {
@ -23,7 +21,7 @@ fn chauve_souris(b: &mut test::Bencher) {
automatons.push(lev); automatons.push(lev);
} }
let mut stream = RankedStream::new(&map, &map.values(), automatons); let mut stream = RankedStream::new(&map, &map.values(), automatons, 20);
while let Some(document_id) = stream.next() { while let Some(document_id) = stream.next() {
test::black_box(document_id); test::black_box(document_id);
} }

View File

@ -2,10 +2,9 @@ extern crate env_logger;
extern crate fst; extern crate fst;
extern crate raptor; extern crate raptor;
use std::path::Path; use std::env;
use std::{fs, env, io};
use fst::Streamer; use fst::Streamer;
use raptor::{load_map, DocIndexMap, RankedStream, LevBuilder}; use raptor::{load_map, RankedStream, LevBuilder};
fn main() { fn main() {
drop(env_logger::init()); drop(env_logger::init());
@ -24,13 +23,9 @@ fn main() {
automatons.push(lev); automatons.push(lev);
} }
let mut limit: Option<usize> = env::var("RAPTOR_OUTPUT_LIMIT").ok().and_then(|x| x.parse().ok()); let limit: Option<usize> = env::var("RAPTOR_OUTPUT_LIMIT").ok().and_then(|x| x.parse().ok());
let mut stream = RankedStream::new(&map, map.values(), automatons); let mut stream = RankedStream::new(&map, map.values(), automatons, limit.unwrap_or(20));
while let Some(document_id) = stream.next() { while let Some(document_id) = stream.next() {
if limit == Some(0) { println!("..."); break }
println!("{:?}", document_id); println!("{:?}", document_id);
if let Some(ref mut limit) = limit { *limit -= 1 }
} }
} }

View File

@ -142,8 +142,9 @@ pub struct Pool {
#[derive(Debug, Copy, Clone)] #[derive(Debug, Copy, Clone)]
enum Limitation { enum Limitation {
/// No limitation is specified. /// No limitation is specified.
Unspecified { Unspecified { // FIXME rename that !
query_size: usize, /// The maximum number of results to return.
limit: usize,
}, },
/// The limitation is specified but not reached. /// The limitation is specified but not reached.
@ -178,33 +179,22 @@ impl Limitation {
fn is_reached(&self) -> bool { fn is_reached(&self) -> bool {
self.reached().is_some() self.reached().is_some()
} }
fn query_size(&self) -> usize {
match *self {
Limitation::Unspecified { query_size } => query_size,
_ => 1,
}
}
} }
impl Pool { impl Pool {
pub fn new(query_size: usize) -> Self { pub fn new(query_size: usize, limit: usize) -> Self {
Self { assert!(query_size > 0, "query size can not be less that one");
returned_documents: HashSet::new(), assert!(limit > 0, "limit can not be less that one");
documents: Vec::new(),
limitation: Limitation::Unspecified { query_size }, let limitation = match query_size {
} 1 => Limitation::Specified { limit, matching_documents: 0 },
} _ => Limitation::Unspecified { limit },
};
pub fn with_output_limit(query_size: usize, limit: usize) -> Self {
assert_eq!(query_size, 1, "limit can only be specified if the query size is 1");
Self { Self {
returned_documents: HashSet::new(), returned_documents: HashSet::new(),
documents: Vec::new(), documents: Vec::new(),
limitation: Limitation::Specified { limitation: limitation,
limit: limit,
matching_documents: 0,
},
} }
} }
@ -268,17 +258,14 @@ impl IntoIterator for Pool {
type IntoIter = vec::IntoIter<Self::Item>; type IntoIter = vec::IntoIter<Self::Item>;
fn into_iter(mut self) -> Self::IntoIter { fn into_iter(mut self) -> Self::IntoIter {
match self.limitation { let limit = match self.limitation {
Limitation::Unspecified { .. } => self.documents.into_iter(), Limitation::Unspecified { limit } => limit,
Limitation::Specified { limit, .. } => { Limitation::Specified { limit, .. } => limit,
self.documents.truncate(limit); Limitation::Reached { remaining } => remaining,
self.documents.into_iter() };
},
Limitation::Reached { remaining } => { self.documents.truncate(limit);
self.documents.truncate(remaining); self.documents.into_iter()
self.documents.into_iter()
},
}
} }
} }
@ -294,7 +281,7 @@ pub enum RankedStream<'m, 'v> {
} }
impl<'m, 'v> RankedStream<'m, 'v> { impl<'m, 'v> RankedStream<'m, 'v> {
pub fn new(map: &'m DocIndexMap, values: &'v Values<DocIndex>, automatons: Vec<DFA>) -> Self { pub fn new(map: &'m DocIndexMap, values: &'v Values<DocIndex>, automatons: Vec<DFA>, limit: usize) -> Self {
let mut op = OpWithStateBuilder::new(values); let mut op = OpWithStateBuilder::new(values);
for automaton in automatons.iter().cloned() { for automaton in automatons.iter().cloned() {
@ -302,10 +289,7 @@ impl<'m, 'v> RankedStream<'m, 'v> {
op.push(stream); op.push(stream);
} }
let pool = match automatons.len() { let pool = Pool::new(automatons.len(), limit);
1 => Pool::with_output_limit(automatons.len(), 20),
_ => Pool::new(automatons.len()),
};
RankedStream::Fed { RankedStream::Fed {
inner: op.union(), inner: op.union(),
@ -352,7 +336,7 @@ impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStream<'m, 'v> {
} }
}, },
None => { None => {
transfert_pool = Some(mem::replace(pool, Pool::new(0))); transfert_pool = Some(mem::replace(pool, Pool::new(1, 1)));
}, },
} }
}, },