From 0ce08c8790b3fd0589d4bdc5171648cb1dd4ce1f Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Sun, 24 Jun 2018 15:36:24 +0200 Subject: [PATCH] feat: Make querying always return limited results --- raptor-search/benches/main.rs | 6 ++-- raptor-search/src/main.rs | 13 +++----- src/rank.rs | 62 +++++++++++++---------------------- 3 files changed, 29 insertions(+), 52 deletions(-) diff --git a/raptor-search/benches/main.rs b/raptor-search/benches/main.rs index e761252d5..f6a962e13 100644 --- a/raptor-search/benches/main.rs +++ b/raptor-search/benches/main.rs @@ -4,10 +4,8 @@ extern crate test; extern crate fst; extern crate raptor; -use std::path::Path; -use std::{fs, env, io}; use fst::Streamer; -use raptor::{load_map, DocIndexMap, RankedStream, LevBuilder}; +use raptor::{load_map, RankedStream, LevBuilder}; #[bench] fn chauve_souris(b: &mut test::Bencher) { @@ -23,7 +21,7 @@ fn chauve_souris(b: &mut test::Bencher) { automatons.push(lev); } - let mut stream = RankedStream::new(&map, &map.values(), automatons); + let mut stream = RankedStream::new(&map, &map.values(), automatons, 20); while let Some(document_id) = stream.next() { test::black_box(document_id); } diff --git a/raptor-search/src/main.rs b/raptor-search/src/main.rs index ed57b507f..ad3cfad8f 100644 --- a/raptor-search/src/main.rs +++ b/raptor-search/src/main.rs @@ -2,10 +2,9 @@ extern crate env_logger; extern crate fst; extern crate raptor; -use std::path::Path; -use std::{fs, env, io}; +use std::env; use fst::Streamer; -use raptor::{load_map, DocIndexMap, RankedStream, LevBuilder}; +use raptor::{load_map, RankedStream, LevBuilder}; fn main() { drop(env_logger::init()); @@ -24,13 +23,9 @@ fn main() { automatons.push(lev); } - let mut limit: Option = env::var("RAPTOR_OUTPUT_LIMIT").ok().and_then(|x| x.parse().ok()); - let mut stream = RankedStream::new(&map, map.values(), automatons); + let limit: Option = env::var("RAPTOR_OUTPUT_LIMIT").ok().and_then(|x| x.parse().ok()); + let mut stream = RankedStream::new(&map, map.values(), automatons, limit.unwrap_or(20)); while let Some(document_id) = stream.next() { - if limit == Some(0) { println!("..."); break } - println!("{:?}", document_id); - - if let Some(ref mut limit) = limit { *limit -= 1 } } } diff --git a/src/rank.rs b/src/rank.rs index d1dfb0336..567033072 100644 --- a/src/rank.rs +++ b/src/rank.rs @@ -142,8 +142,9 @@ pub struct Pool { #[derive(Debug, Copy, Clone)] enum Limitation { /// No limitation is specified. - Unspecified { - query_size: usize, + Unspecified { // FIXME rename that ! + /// The maximum number of results to return. + limit: usize, }, /// The limitation is specified but not reached. @@ -178,33 +179,22 @@ impl Limitation { fn is_reached(&self) -> bool { self.reached().is_some() } - - fn query_size(&self) -> usize { - match *self { - Limitation::Unspecified { query_size } => query_size, - _ => 1, - } - } } impl Pool { - pub fn new(query_size: usize) -> Self { - Self { - returned_documents: HashSet::new(), - documents: Vec::new(), - limitation: Limitation::Unspecified { query_size }, - } - } + pub fn new(query_size: usize, limit: usize) -> Self { + assert!(query_size > 0, "query size can not be less that one"); + assert!(limit > 0, "limit can not be less that one"); + + let limitation = match query_size { + 1 => Limitation::Specified { limit, matching_documents: 0 }, + _ => Limitation::Unspecified { limit }, + }; - pub fn with_output_limit(query_size: usize, limit: usize) -> Self { - assert_eq!(query_size, 1, "limit can only be specified if the query size is 1"); Self { returned_documents: HashSet::new(), documents: Vec::new(), - limitation: Limitation::Specified { - limit: limit, - matching_documents: 0, - }, + limitation: limitation, } } @@ -268,17 +258,14 @@ impl IntoIterator for Pool { type IntoIter = vec::IntoIter; fn into_iter(mut self) -> Self::IntoIter { - match self.limitation { - Limitation::Unspecified { .. } => self.documents.into_iter(), - Limitation::Specified { limit, .. } => { - self.documents.truncate(limit); - self.documents.into_iter() - }, - Limitation::Reached { remaining } => { - self.documents.truncate(remaining); - self.documents.into_iter() - }, - } + let limit = match self.limitation { + Limitation::Unspecified { limit } => limit, + Limitation::Specified { limit, .. } => limit, + Limitation::Reached { remaining } => remaining, + }; + + self.documents.truncate(limit); + self.documents.into_iter() } } @@ -294,7 +281,7 @@ pub enum RankedStream<'m, 'v> { } impl<'m, 'v> RankedStream<'m, 'v> { - pub fn new(map: &'m DocIndexMap, values: &'v Values, automatons: Vec) -> Self { + pub fn new(map: &'m DocIndexMap, values: &'v Values, automatons: Vec, limit: usize) -> Self { let mut op = OpWithStateBuilder::new(values); for automaton in automatons.iter().cloned() { @@ -302,10 +289,7 @@ impl<'m, 'v> RankedStream<'m, 'v> { op.push(stream); } - let pool = match automatons.len() { - 1 => Pool::with_output_limit(automatons.len(), 20), - _ => Pool::new(automatons.len()), - }; + let pool = Pool::new(automatons.len(), limit); RankedStream::Fed { inner: op.union(), @@ -352,7 +336,7 @@ impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStream<'m, 'v> { } }, None => { - transfert_pool = Some(mem::replace(pool, Pool::new(0))); + transfert_pool = Some(mem::replace(pool, Pool::new(1, 1))); }, } },