mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 08:44:27 +01:00
Add more debug infos
This commit is contained in:
parent
ef6a4db182
commit
d17d4dc5ec
@ -7,6 +7,7 @@ use std::{cmp, fmt, vec};
|
|||||||
use fst::{IntoStreamer, Streamer};
|
use fst::{IntoStreamer, Streamer};
|
||||||
use levenshtein_automata::DFA;
|
use levenshtein_automata::DFA;
|
||||||
use meilisearch_tokenizer::{is_cjk, split_query_string};
|
use meilisearch_tokenizer::{is_cjk, split_query_string};
|
||||||
|
use log::debug;
|
||||||
|
|
||||||
use crate::database::MainT;
|
use crate::database::MainT;
|
||||||
use crate::error::MResult;
|
use crate::error::MResult;
|
||||||
@ -38,6 +39,10 @@ impl AutomatonProducer {
|
|||||||
synonyms_store,
|
synonyms_store,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
for (i, group) in automatons.iter().enumerate() {
|
||||||
|
debug!("all automatons: group {} automatons {:?}", i, group.automatons);
|
||||||
|
}
|
||||||
|
|
||||||
Ok((AutomatonProducer { automatons }, query_enhancer))
|
Ok((AutomatonProducer { automatons }, query_enhancer))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -80,7 +85,9 @@ pub struct Automaton {
|
|||||||
impl fmt::Debug for Automaton {
|
impl fmt::Debug for Automaton {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
f.debug_struct("Automaton")
|
f.debug_struct("Automaton")
|
||||||
|
.field("index", &self.index)
|
||||||
.field("query", &self.query)
|
.field("query", &self.query)
|
||||||
|
.field("is_prefix", &self.is_prefix)
|
||||||
.finish()
|
.finish()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -58,6 +58,7 @@ where
|
|||||||
type Origin = usize;
|
type Origin = usize;
|
||||||
type RealLength = usize;
|
type RealLength = usize;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
struct FakeIntervalTree {
|
struct FakeIntervalTree {
|
||||||
intervals: Vec<(Range<usize>, (Origin, RealLength))>,
|
intervals: Vec<(Range<usize>, (Origin, RealLength))>,
|
||||||
}
|
}
|
||||||
@ -154,6 +155,7 @@ impl<S: AsRef<str>> QueryEnhancerBuilder<'_, S> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
pub struct QueryEnhancer {
|
pub struct QueryEnhancer {
|
||||||
origins: Vec<usize>,
|
origins: Vec<usize>,
|
||||||
real_to_origin: FakeIntervalTree,
|
real_to_origin: FakeIntervalTree,
|
||||||
|
@ -34,6 +34,14 @@ fn multiword_rewrite_matches(
|
|||||||
mut matches: Vec<(DocumentId, TmpMatch)>,
|
mut matches: Vec<(DocumentId, TmpMatch)>,
|
||||||
query_enhancer: &QueryEnhancer,
|
query_enhancer: &QueryEnhancer,
|
||||||
) -> SetBuf<(DocumentId, TmpMatch)> {
|
) -> SetBuf<(DocumentId, TmpMatch)> {
|
||||||
|
if true {
|
||||||
|
let before_sort = Instant::now();
|
||||||
|
matches.sort_unstable();
|
||||||
|
let matches = SetBuf::new_unchecked(matches);
|
||||||
|
debug!("sorting dirty matches took {:.02?}", before_sort.elapsed());
|
||||||
|
return matches;
|
||||||
|
}
|
||||||
|
|
||||||
let mut padded_matches = Vec::with_capacity(matches.len());
|
let mut padded_matches = Vec::with_capacity(matches.len());
|
||||||
|
|
||||||
// we sort the matches by word index to make them rewritable
|
// we sort the matches by word index to make them rewritable
|
||||||
@ -137,6 +145,10 @@ fn multiword_rewrite_matches(
|
|||||||
document_matches.sort_unstable();
|
document_matches.sort_unstable();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// With this check we can see that the loop above takes something
|
||||||
|
// like 43% of the search time even when no rewrite is needed.
|
||||||
|
// assert_eq!(before_matches, padded_matches);
|
||||||
|
|
||||||
SetBuf::new_unchecked(padded_matches)
|
SetBuf::new_unchecked(padded_matches)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -236,16 +248,28 @@ fn fetch_raw_documents(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
let before_rerewrite = Instant::now();
|
||||||
for (id, _, match_, highlight) in tmp_matches {
|
for (id, _, match_, highlight) in tmp_matches {
|
||||||
matches.push((id, match_));
|
matches.push((id, match_));
|
||||||
highlights.push((id, highlight));
|
highlights.push((id, highlight));
|
||||||
}
|
}
|
||||||
|
debug!("rerewrite took {:.02?}", before_rerewrite.elapsed());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
debug!("automatons_groups_loop took {:.02?}", before_automatons_groups_loop.elapsed());
|
debug!("automatons_groups_loop took {:.02?}", before_automatons_groups_loop.elapsed());
|
||||||
|
|
||||||
|
{
|
||||||
|
let mut cloned = matches.clone();
|
||||||
|
let before_sort_test = Instant::now();
|
||||||
|
cloned.sort_unstable_by_key(|(id, m)| (*id, m.query_index, m.distance));
|
||||||
|
debug!("sorting test took {:.02?}", before_sort_test.elapsed());
|
||||||
|
}
|
||||||
|
|
||||||
let before_multiword_rewrite_matches = Instant::now();
|
let before_multiword_rewrite_matches = Instant::now();
|
||||||
|
debug!("number of matches before rewrite {}", matches.len());
|
||||||
|
debug!("{:?}", query_enhancer);
|
||||||
let matches = multiword_rewrite_matches(matches, &query_enhancer);
|
let matches = multiword_rewrite_matches(matches, &query_enhancer);
|
||||||
|
debug!("number of matches after rewrite {}", matches.len());
|
||||||
debug!("multiword_rewrite_matches took {:.02?}", before_multiword_rewrite_matches.elapsed());
|
debug!("multiword_rewrite_matches took {:.02?}", before_multiword_rewrite_matches.elapsed());
|
||||||
|
|
||||||
let before_highlight_sorting = Instant::now();
|
let before_highlight_sorting = Instant::now();
|
||||||
@ -299,9 +323,7 @@ impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
|
|||||||
synonyms_store: synonyms,
|
synonyms_store: synonyms,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
|
|
||||||
pub fn with_filter<F>(&mut self, function: F)
|
pub fn with_filter<F>(&mut self, function: F)
|
||||||
where
|
where
|
||||||
F: Fn(DocumentId) -> bool + 'f,
|
F: Fn(DocumentId) -> bool + 'f,
|
||||||
@ -479,12 +501,16 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
group.sort_unstable_by(|a, b| criterion.evaluate(a, b));
|
group.sort_unstable_by(|a, b| criterion.evaluate(a, b));
|
||||||
|
|
||||||
for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b)) {
|
for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b)) {
|
||||||
|
debug!("criterion {} produced a group of size {}", criterion.name(), group.len());
|
||||||
|
|
||||||
documents_seen += group.len();
|
documents_seen += group.len();
|
||||||
groups.push(group);
|
groups.push(group);
|
||||||
|
|
||||||
|
|
||||||
// we have sort enough documents if the last document sorted is after
|
// we have sort enough documents if the last document sorted is after
|
||||||
// the end of the requested range, we can continue to the next criterion
|
// the end of the requested range, we can continue to the next criterion
|
||||||
if documents_seen >= range.end {
|
if documents_seen >= range.end {
|
||||||
|
@ -1,398 +0,0 @@
|
|||||||
use std::ops::Range;
|
|
||||||
use std::cmp::Ordering::{Less, Greater, Equal};
|
|
||||||
|
|
||||||
/// Return `true` if the specified range can accept the given replacements words.
|
|
||||||
/// Returns `false` if the replacements words are already present in the original query
|
|
||||||
/// or if there is fewer replacement words than the range to replace.
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// ## Ignored because already present in original
|
|
||||||
//
|
|
||||||
// new york city subway
|
|
||||||
// -------- ^^^^
|
|
||||||
// / \
|
|
||||||
// [new york city]
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// ## Ignored because smaller than the original
|
|
||||||
//
|
|
||||||
// new york city subway
|
|
||||||
// -------------
|
|
||||||
// \ /
|
|
||||||
// [new york]
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// ## Accepted because bigger than the original
|
|
||||||
//
|
|
||||||
// NYC subway
|
|
||||||
// ---
|
|
||||||
// / \
|
|
||||||
// / \
|
|
||||||
// / \
|
|
||||||
// / \
|
|
||||||
// / \
|
|
||||||
// [new york city]
|
|
||||||
//
|
|
||||||
fn rewrite_range_with<S, T>(query: &[S], range: Range<usize>, words: &[T]) -> bool
|
|
||||||
where S: AsRef<str>,
|
|
||||||
T: AsRef<str>,
|
|
||||||
{
|
|
||||||
if words.len() <= range.len() {
|
|
||||||
// there is fewer or equal replacement words
|
|
||||||
// than there is already in the replaced range
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// retrieve the part to rewrite but with the length
|
|
||||||
// of the replacement part
|
|
||||||
let original = query.iter().skip(range.start).take(words.len());
|
|
||||||
|
|
||||||
// check if the original query doesn't already contain
|
|
||||||
// the replacement words
|
|
||||||
!original.map(AsRef::as_ref).eq(words.iter().map(AsRef::as_ref))
|
|
||||||
}
|
|
||||||
|
|
||||||
type Origin = usize;
|
|
||||||
type RealLength = usize;
|
|
||||||
|
|
||||||
struct FakeIntervalTree {
|
|
||||||
intervals: Vec<(Range<usize>, (Origin, RealLength))>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl FakeIntervalTree {
|
|
||||||
fn new(mut intervals: Vec<(Range<usize>, (Origin, RealLength))>) -> FakeIntervalTree {
|
|
||||||
intervals.sort_unstable_by_key(|(r, _)| (r.start, r.end));
|
|
||||||
FakeIntervalTree { intervals }
|
|
||||||
}
|
|
||||||
|
|
||||||
fn query(&self, point: usize) -> Option<(Range<usize>, (Origin, RealLength))> {
|
|
||||||
let element = self.intervals.binary_search_by(|(r, _)| {
|
|
||||||
if point >= r.start {
|
|
||||||
if point < r.end { Equal } else { Less }
|
|
||||||
} else { Greater }
|
|
||||||
});
|
|
||||||
|
|
||||||
let n = match element { Ok(n) => n, Err(n) => n };
|
|
||||||
|
|
||||||
match self.intervals.get(n) {
|
|
||||||
Some((range, value)) if range.contains(&point) => Some((range.clone(), *value)),
|
|
||||||
_otherwise => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct QueryEnhancerBuilder<'a, S> {
|
|
||||||
query: &'a [S],
|
|
||||||
origins: Vec<usize>,
|
|
||||||
real_to_origin: Vec<(Range<usize>, (Origin, RealLength))>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<S: AsRef<str>> QueryEnhancerBuilder<'_, S> {
|
|
||||||
pub fn new(query: &[S]) -> QueryEnhancerBuilder<S> {
|
|
||||||
// we initialize origins query indices based on their positions
|
|
||||||
let origins: Vec<_> = (0..query.len() + 1).collect();
|
|
||||||
let real_to_origin = origins.iter().map(|&o| (o..o+1, (o, 1))).collect();
|
|
||||||
|
|
||||||
QueryEnhancerBuilder { query, origins, real_to_origin }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Update the final real to origin query indices mapping.
|
|
||||||
///
|
|
||||||
/// `range` is the original words range that this `replacement` words replace
|
|
||||||
/// and `real` is the first real query index of these replacement words.
|
|
||||||
pub fn declare<T>(&mut self, range: Range<usize>, real: usize, replacement: &[T])
|
|
||||||
where T: AsRef<str>,
|
|
||||||
{
|
|
||||||
// check if the range of original words
|
|
||||||
// can be rewritten with the replacement words
|
|
||||||
if rewrite_range_with(self.query, range.clone(), replacement) {
|
|
||||||
|
|
||||||
// this range can be replaced so we need to
|
|
||||||
// modify the origins accordingly
|
|
||||||
let offset = replacement.len() - range.len();
|
|
||||||
|
|
||||||
let previous_padding = self.origins[range.end - 1];
|
|
||||||
let current_offset = (self.origins[range.end] - 1) - previous_padding;
|
|
||||||
let diff = offset.saturating_sub(current_offset);
|
|
||||||
self.origins[range.end] += diff;
|
|
||||||
|
|
||||||
for r in &mut self.origins[range.end + 1..] {
|
|
||||||
*r += diff;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// we need to store the real number and origins relations
|
|
||||||
// this way it will be possible to know by how many
|
|
||||||
// we need to pad real query indices
|
|
||||||
let real_range = real..real + replacement.len().max(range.len());
|
|
||||||
let real_length = replacement.len();
|
|
||||||
self.real_to_origin.push((real_range, (range.start, real_length)));
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn build(self) -> QueryEnhancer {
|
|
||||||
QueryEnhancer {
|
|
||||||
origins: self.origins,
|
|
||||||
real_to_origin: FakeIntervalTree::new(self.real_to_origin),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct QueryEnhancer {
|
|
||||||
origins: Vec<usize>,
|
|
||||||
real_to_origin: FakeIntervalTree,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl QueryEnhancer {
|
|
||||||
/// Returns the query indices to use to replace this real query index.
|
|
||||||
pub fn replacement(&self, real: u32) -> Range<u32> {
|
|
||||||
let real = real as usize;
|
|
||||||
|
|
||||||
// query the fake interval tree with the real query index
|
|
||||||
let (range, (origin, real_length)) =
|
|
||||||
self.real_to_origin
|
|
||||||
.query(real)
|
|
||||||
.expect("real has never been declared");
|
|
||||||
|
|
||||||
// if `real` is the end bound of the range
|
|
||||||
if (range.start + real_length - 1) == real {
|
|
||||||
let mut count = range.len();
|
|
||||||
let mut new_origin = origin;
|
|
||||||
for (i, slice) in self.origins[new_origin..].windows(2).enumerate() {
|
|
||||||
let len = slice[1] - slice[0];
|
|
||||||
count = count.saturating_sub(len);
|
|
||||||
if count == 0 { new_origin = origin + i; break }
|
|
||||||
}
|
|
||||||
|
|
||||||
let n = real - range.start;
|
|
||||||
let start = self.origins[origin];
|
|
||||||
let end = self.origins[new_origin + 1];
|
|
||||||
let remaining = (end - start) - n;
|
|
||||||
|
|
||||||
Range { start: (start + n) as u32, end: (start + n + remaining) as u32 }
|
|
||||||
|
|
||||||
} else {
|
|
||||||
// just return the origin along with
|
|
||||||
// the real position of the word
|
|
||||||
let n = real as usize - range.start;
|
|
||||||
let origin = self.origins[origin];
|
|
||||||
|
|
||||||
Range { start: (origin + n) as u32, end: (origin + n + 1) as u32 }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn original_unmodified() {
|
|
||||||
let query = ["new", "york", "city", "subway"];
|
|
||||||
// 0 1 2 3
|
|
||||||
let mut builder = QueryEnhancerBuilder::new(&query);
|
|
||||||
|
|
||||||
// new york = new york city
|
|
||||||
builder.declare(0..2, 4, &["new", "york", "city"]);
|
|
||||||
// ^ 4 5 6
|
|
||||||
|
|
||||||
let enhancer = builder.build();
|
|
||||||
|
|
||||||
assert_eq!(enhancer.replacement(0), 0..1); // new
|
|
||||||
assert_eq!(enhancer.replacement(1), 1..2); // york
|
|
||||||
assert_eq!(enhancer.replacement(2), 2..3); // city
|
|
||||||
assert_eq!(enhancer.replacement(3), 3..4); // subway
|
|
||||||
assert_eq!(enhancer.replacement(4), 0..1); // new
|
|
||||||
assert_eq!(enhancer.replacement(5), 1..2); // york
|
|
||||||
assert_eq!(enhancer.replacement(6), 2..3); // city
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn simple_growing() {
|
|
||||||
let query = ["new", "york", "subway"];
|
|
||||||
// 0 1 2
|
|
||||||
let mut builder = QueryEnhancerBuilder::new(&query);
|
|
||||||
|
|
||||||
// new york = new york city
|
|
||||||
builder.declare(0..2, 3, &["new", "york", "city"]);
|
|
||||||
// ^ 3 4 5
|
|
||||||
|
|
||||||
let enhancer = builder.build();
|
|
||||||
|
|
||||||
assert_eq!(enhancer.replacement(0), 0..1); // new
|
|
||||||
assert_eq!(enhancer.replacement(1), 1..3); // york
|
|
||||||
assert_eq!(enhancer.replacement(2), 3..4); // subway
|
|
||||||
assert_eq!(enhancer.replacement(3), 0..1); // new
|
|
||||||
assert_eq!(enhancer.replacement(4), 1..2); // york
|
|
||||||
assert_eq!(enhancer.replacement(5), 2..3); // city
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn same_place_growings() {
|
|
||||||
let query = ["NY", "subway"];
|
|
||||||
// 0 1
|
|
||||||
let mut builder = QueryEnhancerBuilder::new(&query);
|
|
||||||
|
|
||||||
// NY = new york
|
|
||||||
builder.declare(0..1, 2, &["new", "york"]);
|
|
||||||
// ^ 2 3
|
|
||||||
|
|
||||||
// NY = new york city
|
|
||||||
builder.declare(0..1, 4, &["new", "york", "city"]);
|
|
||||||
// ^ 4 5 6
|
|
||||||
|
|
||||||
// NY = NYC
|
|
||||||
builder.declare(0..1, 7, &["NYC"]);
|
|
||||||
// ^ 7
|
|
||||||
|
|
||||||
// NY = new york city
|
|
||||||
builder.declare(0..1, 8, &["new", "york", "city"]);
|
|
||||||
// ^ 8 9 10
|
|
||||||
|
|
||||||
// subway = underground train
|
|
||||||
builder.declare(1..2, 11, &["underground", "train"]);
|
|
||||||
// ^ 11 12
|
|
||||||
|
|
||||||
let enhancer = builder.build();
|
|
||||||
|
|
||||||
assert_eq!(enhancer.replacement(0), 0..3); // NY
|
|
||||||
assert_eq!(enhancer.replacement(1), 3..5); // subway
|
|
||||||
assert_eq!(enhancer.replacement(2), 0..1); // new
|
|
||||||
assert_eq!(enhancer.replacement(3), 1..3); // york
|
|
||||||
assert_eq!(enhancer.replacement(4), 0..1); // new
|
|
||||||
assert_eq!(enhancer.replacement(5), 1..2); // york
|
|
||||||
assert_eq!(enhancer.replacement(6), 2..3); // city
|
|
||||||
assert_eq!(enhancer.replacement(7), 0..3); // NYC
|
|
||||||
assert_eq!(enhancer.replacement(8), 0..1); // new
|
|
||||||
assert_eq!(enhancer.replacement(9), 1..2); // york
|
|
||||||
assert_eq!(enhancer.replacement(10), 2..3); // city
|
|
||||||
assert_eq!(enhancer.replacement(11), 3..4); // underground
|
|
||||||
assert_eq!(enhancer.replacement(12), 4..5); // train
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn bigger_growing() {
|
|
||||||
let query = ["NYC", "subway"];
|
|
||||||
// 0 1
|
|
||||||
let mut builder = QueryEnhancerBuilder::new(&query);
|
|
||||||
|
|
||||||
// NYC = new york city
|
|
||||||
builder.declare(0..1, 2, &["new", "york", "city"]);
|
|
||||||
// ^ 2 3 4
|
|
||||||
|
|
||||||
let enhancer = builder.build();
|
|
||||||
|
|
||||||
assert_eq!(enhancer.replacement(0), 0..3); // NYC
|
|
||||||
assert_eq!(enhancer.replacement(1), 3..4); // subway
|
|
||||||
assert_eq!(enhancer.replacement(2), 0..1); // new
|
|
||||||
assert_eq!(enhancer.replacement(3), 1..2); // york
|
|
||||||
assert_eq!(enhancer.replacement(4), 2..3); // city
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn middle_query_growing() {
|
|
||||||
let query = ["great", "awesome", "NYC", "subway"];
|
|
||||||
// 0 1 2 3
|
|
||||||
let mut builder = QueryEnhancerBuilder::new(&query);
|
|
||||||
|
|
||||||
// NYC = new york city
|
|
||||||
builder.declare(2..3, 4, &["new", "york", "city"]);
|
|
||||||
// ^ 4 5 6
|
|
||||||
|
|
||||||
let enhancer = builder.build();
|
|
||||||
|
|
||||||
assert_eq!(enhancer.replacement(0), 0..1); // great
|
|
||||||
assert_eq!(enhancer.replacement(1), 1..2); // awesome
|
|
||||||
assert_eq!(enhancer.replacement(2), 2..5); // NYC
|
|
||||||
assert_eq!(enhancer.replacement(3), 5..6); // subway
|
|
||||||
assert_eq!(enhancer.replacement(4), 2..3); // new
|
|
||||||
assert_eq!(enhancer.replacement(5), 3..4); // york
|
|
||||||
assert_eq!(enhancer.replacement(6), 4..5); // city
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn end_query_growing() {
|
|
||||||
let query = ["NYC", "subway"];
|
|
||||||
// 0 1
|
|
||||||
let mut builder = QueryEnhancerBuilder::new(&query);
|
|
||||||
|
|
||||||
// NYC = new york city
|
|
||||||
builder.declare(1..2, 2, &["underground", "train"]);
|
|
||||||
// ^ 2 3
|
|
||||||
|
|
||||||
let enhancer = builder.build();
|
|
||||||
|
|
||||||
assert_eq!(enhancer.replacement(0), 0..1); // NYC
|
|
||||||
assert_eq!(enhancer.replacement(1), 1..3); // subway
|
|
||||||
assert_eq!(enhancer.replacement(2), 1..2); // underground
|
|
||||||
assert_eq!(enhancer.replacement(3), 2..3); // train
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn multiple_growings() {
|
|
||||||
let query = ["great", "awesome", "NYC", "subway"];
|
|
||||||
// 0 1 2 3
|
|
||||||
let mut builder = QueryEnhancerBuilder::new(&query);
|
|
||||||
|
|
||||||
// NYC = new york city
|
|
||||||
builder.declare(2..3, 4, &["new", "york", "city"]);
|
|
||||||
// ^ 4 5 6
|
|
||||||
|
|
||||||
// subway = underground train
|
|
||||||
builder.declare(3..4, 7, &["underground", "train"]);
|
|
||||||
// ^ 7 8
|
|
||||||
|
|
||||||
let enhancer = builder.build();
|
|
||||||
|
|
||||||
assert_eq!(enhancer.replacement(0), 0..1); // great
|
|
||||||
assert_eq!(enhancer.replacement(1), 1..2); // awesome
|
|
||||||
assert_eq!(enhancer.replacement(2), 2..5); // NYC
|
|
||||||
assert_eq!(enhancer.replacement(3), 5..7); // subway
|
|
||||||
assert_eq!(enhancer.replacement(4), 2..3); // new
|
|
||||||
assert_eq!(enhancer.replacement(5), 3..4); // york
|
|
||||||
assert_eq!(enhancer.replacement(6), 4..5); // city
|
|
||||||
assert_eq!(enhancer.replacement(7), 5..6); // underground
|
|
||||||
assert_eq!(enhancer.replacement(8), 6..7); // train
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn multiple_probable_growings() {
|
|
||||||
let query = ["great", "awesome", "NYC", "subway"];
|
|
||||||
// 0 1 2 3
|
|
||||||
let mut builder = QueryEnhancerBuilder::new(&query);
|
|
||||||
|
|
||||||
// NYC = new york city
|
|
||||||
builder.declare(2..3, 4, &["new", "york", "city"]);
|
|
||||||
// ^ 4 5 6
|
|
||||||
|
|
||||||
// subway = underground train
|
|
||||||
builder.declare(3..4, 7, &["underground", "train"]);
|
|
||||||
// ^ 7 8
|
|
||||||
|
|
||||||
// great awesome = good
|
|
||||||
builder.declare(0..2, 9, &["good"]);
|
|
||||||
// ^ 9
|
|
||||||
|
|
||||||
// awesome NYC = NY
|
|
||||||
builder.declare(1..3, 10, &["NY"]);
|
|
||||||
// ^^ 10
|
|
||||||
|
|
||||||
// NYC subway = metro
|
|
||||||
builder.declare(2..4, 11, &["metro"]);
|
|
||||||
// ^^ 11
|
|
||||||
|
|
||||||
let enhancer = builder.build();
|
|
||||||
|
|
||||||
assert_eq!(enhancer.replacement(0), 0..1); // great
|
|
||||||
assert_eq!(enhancer.replacement(1), 1..2); // awesome
|
|
||||||
assert_eq!(enhancer.replacement(2), 2..5); // NYC
|
|
||||||
assert_eq!(enhancer.replacement(3), 5..7); // subway
|
|
||||||
assert_eq!(enhancer.replacement(4), 2..3); // new
|
|
||||||
assert_eq!(enhancer.replacement(5), 3..4); // york
|
|
||||||
assert_eq!(enhancer.replacement(6), 4..5); // city
|
|
||||||
assert_eq!(enhancer.replacement(7), 5..6); // underground
|
|
||||||
assert_eq!(enhancer.replacement(8), 6..7); // train
|
|
||||||
assert_eq!(enhancer.replacement(9), 0..2); // good
|
|
||||||
assert_eq!(enhancer.replacement(10), 1..5); // NY
|
|
||||||
assert_eq!(enhancer.replacement(11), 2..5); // metro
|
|
||||||
}
|
|
||||||
}
|
|
@ -3,7 +3,6 @@ use std::sync::Arc;
|
|||||||
|
|
||||||
use sdset::SetBuf;
|
use sdset::SetBuf;
|
||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
use log::debug;
|
|
||||||
|
|
||||||
use crate::{DocumentId, Highlight, TmpMatch, AttrCount};
|
use crate::{DocumentId, Highlight, TmpMatch, AttrCount};
|
||||||
|
|
||||||
@ -108,10 +107,7 @@ pub fn raw_documents_from(
|
|||||||
let matches = matches.linear_group_by_key(|(id, _)| *id);
|
let matches = matches.linear_group_by_key(|(id, _)| *id);
|
||||||
let highlights = highlights.linear_group_by_key(|(id, _)| *id);
|
let highlights = highlights.linear_group_by_key(|(id, _)| *id);
|
||||||
|
|
||||||
let mut loops_count = 0;
|
|
||||||
|
|
||||||
for (mgroup, hgroup) in matches.zip(highlights) {
|
for (mgroup, hgroup) in matches.zip(highlights) {
|
||||||
loops_count += 1;
|
|
||||||
assert_eq!(mgroup[0].0, hgroup[0].0);
|
assert_eq!(mgroup[0].0, hgroup[0].0);
|
||||||
|
|
||||||
let document_id = mgroup[0].0;
|
let document_id = mgroup[0].0;
|
||||||
@ -122,8 +118,8 @@ pub fn raw_documents_from(
|
|||||||
|
|
||||||
docs_ranges.push((document_id, Range { start, end }, highlights, fields_counts));
|
docs_ranges.push((document_id, Range { start, end }, highlights, fields_counts));
|
||||||
// TODO we could try to keep both data
|
// TODO we could try to keep both data
|
||||||
// - the data oriented one and the raw one,
|
// - the data oriented one and,
|
||||||
// - the one that comes from the arguments of this function
|
// - the raw one, the one that comes from the arguments of this function
|
||||||
// This way we would be able to only produce data oriented lazily.
|
// This way we would be able to only produce data oriented lazily.
|
||||||
//
|
//
|
||||||
// For example the default first criterion is `SumOfTypos`
|
// For example the default first criterion is `SumOfTypos`
|
||||||
@ -133,8 +129,6 @@ pub fn raw_documents_from(
|
|||||||
matches2.extend_from_slice(mgroup);
|
matches2.extend_from_slice(mgroup);
|
||||||
}
|
}
|
||||||
|
|
||||||
debug!("loops_counts number is {}", loops_count);
|
|
||||||
|
|
||||||
let matches = Arc::new(matches2);
|
let matches = Arc::new(matches2);
|
||||||
docs_ranges
|
docs_ranges
|
||||||
.into_iter()
|
.into_iter()
|
||||||
|
Loading…
Reference in New Issue
Block a user