fix: Remove stop-words from the serve examples

This commit is contained in:
Clément Renault 2018-10-21 16:42:19 +02:00
parent cf41b20fbb
commit 26dcfe1e54
6 changed files with 56 additions and 41 deletions

View file

@ -1,4 +1,5 @@
use std::ops::Deref;
use fst::Automaton;
use levenshtein_automata::{
LevenshteinAutomatonBuilder as LevBuilder,
@ -50,16 +51,40 @@ impl AutomatonExt for DfaExt {
}
}
pub fn build(query: &str) -> DfaExt {
enum PrefixSetting {
Prefix,
NoPrefix,
}
fn build_dfa_with_setting(query: &str, setting: PrefixSetting) -> DfaExt {
use self::PrefixSetting::{Prefix, NoPrefix};
let dfa = match query.len() {
0 ..= 4 => LEVDIST0.build_prefix_dfa(query),
5 ..= 8 => LEVDIST1.build_prefix_dfa(query),
_ => LEVDIST2.build_prefix_dfa(query),
0 ..= 4 => match setting {
Prefix => LEVDIST0.build_prefix_dfa(query),
NoPrefix => LEVDIST0.build_dfa(query),
},
5 ..= 8 => match setting {
Prefix => LEVDIST1.build_prefix_dfa(query),
NoPrefix => LEVDIST1.build_dfa(query),
},
_ => match setting {
Prefix => LEVDIST2.build_prefix_dfa(query),
NoPrefix => LEVDIST2.build_dfa(query),
},
};
DfaExt { query_len: query.len(), automaton: dfa }
}
pub fn build_prefix_dfa(query: &str) -> DfaExt {
build_dfa_with_setting(query, PrefixSetting::Prefix)
}
pub fn build_dfa(query: &str) -> DfaExt {
build_dfa_with_setting(query, PrefixSetting::NoPrefix)
}
pub trait AutomatonExt: Automaton {
fn eval<B: AsRef<[u8]>>(&self, s: B) -> Distance;
fn query_len(&self) -> usize;

View file

@ -57,6 +57,8 @@ impl Criterion for DocumentId {
}
}
// TODO there is too much Box here, can we use
// static references or static closures
pub fn default() -> Vec<Box<dyn Criterion>> {
vec![
Box::new(SumOfTypos),

View file

@ -21,6 +21,11 @@ impl Document {
unsafe { Self::from_sorted_matches(doc, vec![match_]) }
}
pub fn from_matches(doc: DocumentId, mut matches: Vec<Match>) -> Self {
matches.sort_unstable();
unsafe { Self::from_sorted_matches(doc, matches) }
}
pub unsafe fn from_sorted_matches(id: DocumentId, matches: Vec<Match>) -> Self {
Self { id, matches }
}

View file

@ -2,7 +2,7 @@ use std::collections::HashMap;
use std::hash::Hash;
use std::ops::Range;
use std::rc::Rc;
use std::{mem, vec, cmp};
use std::{mem, vec};
use fnv::FnvHashMap;
use fst::Streamer;
@ -11,10 +11,17 @@ use group_by::GroupByMut;
use crate::automaton::{DfaExt, AutomatonExt};
use crate::metadata::Metadata;
use crate::metadata::ops::OpBuilder;
use crate::rank::criterion::{self, Criterion};
use crate::rank::criterion::Criterion;
use crate::rank::Document;
use crate::{Match, DocumentId};
fn clamp_range<T: Copy + Ord>(range: Range<T>, big: Range<T>) -> Range<T> {
Range {
start: range.start.min(big.end).max(big.start),
end: range.end.min(big.end).max(big.start),
}
}
pub struct Config<'m, C, F> {
pub metadata: &'m Metadata,
pub automatons: Vec<DfaExt>,
@ -67,10 +74,7 @@ impl<'m, C, F> RankedStream<'m, C, F> {
}
}
matches.into_iter().map(|(id, mut matches)| {
matches.sort_unstable();
unsafe { Document::from_sorted_matches(id, matches) }
}).collect()
matches.into_iter().map(|(id, matches)| Document::from_matches(id, matches)).collect()
}
}
@ -92,10 +96,7 @@ where C: Criterion
}
}
let range = Range {
start: cmp::min(range.start, documents.len()),
end: cmp::min(range.end, documents.len()),
};
let range = clamp_range(range, 0..documents.len());
documents[range].to_vec()
}