mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 13:24:27 +01:00
Merge pull request #165 from meilisearch/reorder-schema-attributes
Reorder schema attributes
This commit is contained in:
commit
e0cadaa68d
@ -1,5 +1,4 @@
|
||||
use std::hash::Hash;
|
||||
|
||||
use hashbrown::HashMap;
|
||||
|
||||
pub struct DistinctMap<K> {
|
||||
|
@ -4,6 +4,7 @@
|
||||
mod automaton;
|
||||
mod distinct_map;
|
||||
mod query_builder;
|
||||
mod reordered_attrs;
|
||||
mod store;
|
||||
pub mod criterion;
|
||||
|
||||
@ -59,73 +60,53 @@ pub struct DocIndex {
|
||||
///
|
||||
/// The order of the field is important because it defines
|
||||
/// the way these structures are ordered between themselves.
|
||||
///
|
||||
/// The word in itself is not important.
|
||||
// TODO do data oriented programming ? very arrays ?
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Match {
|
||||
/// The word index in the query sentence.
|
||||
/// Same as the `attribute_index` but for the query words.
|
||||
///
|
||||
/// Used to retrieve the automaton that match this word.
|
||||
pub query_index: u32,
|
||||
|
||||
/// The distance the word has with the query word
|
||||
/// (i.e. the Levenshtein distance).
|
||||
pub distance: u8,
|
||||
|
||||
pub struct Highlight {
|
||||
/// The attribute in the document where the word was found
|
||||
/// along with the index in it.
|
||||
pub attribute: u16,
|
||||
pub word_index: u16,
|
||||
|
||||
/// Whether the word that match is an exact match or a prefix.
|
||||
pub is_exact: bool,
|
||||
|
||||
/// The position in bytes where the word was found
|
||||
/// along with the length of it.
|
||||
/// The position in bytes where the word was found.
|
||||
///
|
||||
/// It informs on the original word area in the text indexed
|
||||
/// without needing to run the tokenizer again.
|
||||
pub char_index: u16,
|
||||
|
||||
/// The length in bytes of the found word.
|
||||
///
|
||||
/// It informs on the original word area in the text indexed
|
||||
/// without needing to run the tokenizer again.
|
||||
pub char_length: u16,
|
||||
}
|
||||
|
||||
impl Match {
|
||||
pub fn zero() -> Self {
|
||||
Match {
|
||||
query_index: 0,
|
||||
distance: 0,
|
||||
attribute: 0,
|
||||
word_index: 0,
|
||||
is_exact: false,
|
||||
char_index: 0,
|
||||
char_length: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn max() -> Self {
|
||||
Match {
|
||||
query_index: u32::max_value(),
|
||||
distance: u8::max_value(),
|
||||
attribute: u16::max_value(),
|
||||
word_index: u16::max_value(),
|
||||
is_exact: true,
|
||||
char_index: u16::max_value(),
|
||||
char_length: u16::max_value(),
|
||||
}
|
||||
}
|
||||
#[doc(hidden)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct TmpMatch {
|
||||
pub query_index: u32,
|
||||
pub distance: u8,
|
||||
pub attribute: u16,
|
||||
pub word_index: u16,
|
||||
pub is_exact: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Document {
|
||||
pub id: DocumentId,
|
||||
pub matches: Vec<Match>,
|
||||
pub highlights: Vec<Highlight>,
|
||||
|
||||
#[cfg(test)]
|
||||
pub matches: Vec<TmpMatch>,
|
||||
}
|
||||
|
||||
impl Document {
|
||||
fn from_raw(raw: &RawDocument) -> Document {
|
||||
let len = raw.matches.range.len();
|
||||
#[cfg(not(test))]
|
||||
fn from_raw(raw: RawDocument) -> Document {
|
||||
Document { id: raw.id, highlights: raw.highlights }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn from_raw(raw: RawDocument) -> Document {
|
||||
let len = raw.query_index().len();
|
||||
let mut matches = Vec::with_capacity(len);
|
||||
|
||||
let query_index = raw.query_index();
|
||||
@ -133,23 +114,19 @@ impl Document {
|
||||
let attribute = raw.attribute();
|
||||
let word_index = raw.word_index();
|
||||
let is_exact = raw.is_exact();
|
||||
let char_index = raw.char_index();
|
||||
let char_length = raw.char_length();
|
||||
|
||||
for i in 0..len {
|
||||
let match_ = Match {
|
||||
let match_ = TmpMatch {
|
||||
query_index: query_index[i],
|
||||
distance: distance[i],
|
||||
attribute: attribute[i],
|
||||
word_index: word_index[i],
|
||||
is_exact: is_exact[i],
|
||||
char_index: char_index[i],
|
||||
char_length: char_length[i],
|
||||
};
|
||||
matches.push(match_);
|
||||
}
|
||||
|
||||
Document { id: raw.id, matches }
|
||||
Document { id: raw.id, matches, highlights: raw.highlights }
|
||||
}
|
||||
}
|
||||
|
||||
@ -157,11 +134,12 @@ impl Document {
|
||||
pub struct RawDocument {
|
||||
pub id: DocumentId,
|
||||
pub matches: SharedMatches,
|
||||
pub highlights: Vec<Highlight>,
|
||||
}
|
||||
|
||||
impl RawDocument {
|
||||
fn new(id: DocumentId, range: Range, matches: Arc<Matches>) -> RawDocument {
|
||||
RawDocument { id, matches: SharedMatches { range, matches } }
|
||||
fn new(id: DocumentId, matches: SharedMatches, highlights: Vec<Highlight>) -> RawDocument {
|
||||
RawDocument { id, matches, highlights }
|
||||
}
|
||||
|
||||
pub fn query_index(&self) -> &[u32] {
|
||||
@ -198,20 +176,6 @@ impl RawDocument {
|
||||
// can only be done in this module
|
||||
unsafe { &self.matches.matches.is_exact.get_unchecked(r.start..r.end) }
|
||||
}
|
||||
|
||||
pub fn char_index(&self) -> &[u16] {
|
||||
let r = self.matches.range;
|
||||
// it is safe because construction/modifications
|
||||
// can only be done in this module
|
||||
unsafe { &self.matches.matches.char_index.get_unchecked(r.start..r.end) }
|
||||
}
|
||||
|
||||
pub fn char_length(&self) -> &[u16] {
|
||||
let r = self.matches.range;
|
||||
// it is safe because construction/modifications
|
||||
// can only be done in this module
|
||||
unsafe { &self.matches.matches.char_length.get_unchecked(r.start..r.end) }
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for RawDocument {
|
||||
@ -223,27 +187,30 @@ impl fmt::Debug for RawDocument {
|
||||
.field("attribute", &self.attribute())
|
||||
.field("word_index", &self.word_index())
|
||||
.field("is_exact", &self.is_exact())
|
||||
.field("char_index", &self.char_index())
|
||||
.field("char_length", &self.char_length())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn raw_documents_from_matches(matches: SetBuf<(DocumentId, Match)>) -> Vec<RawDocument> {
|
||||
let mut docs_ranges = Vec::<(_, Range)>::new();
|
||||
fn raw_documents_from_matches(matches: SetBuf<(DocumentId, TmpMatch, Highlight)>) -> Vec<RawDocument> {
|
||||
let mut docs_ranges: Vec<(_, Range, _)> = Vec::new();
|
||||
let mut matches2 = Matches::with_capacity(matches.len());
|
||||
|
||||
for group in matches.linear_group_by(|(a, _), (b, _)| a == b) {
|
||||
let id = group[0].0;
|
||||
let start = docs_ranges.last().map(|(_, r)| r.end).unwrap_or(0);
|
||||
for group in matches.linear_group_by(|(a, _, _), (b, _, _)| a == b) {
|
||||
let document_id = group[0].0;
|
||||
let start = docs_ranges.last().map(|(_, r, _)| r.end).unwrap_or(0);
|
||||
let end = start + group.len();
|
||||
docs_ranges.push((id, Range { start, end }));
|
||||
|
||||
let highlights = group.iter().map(|(_, _, h)| *h).collect();
|
||||
docs_ranges.push((document_id, Range { start, end }, highlights));
|
||||
|
||||
matches2.extend_from_slice(group);
|
||||
}
|
||||
|
||||
let matches = Arc::new(matches2);
|
||||
docs_ranges.into_iter().map(|(i, r)| RawDocument::new(i, r, matches.clone())).collect()
|
||||
docs_ranges.into_iter().map(|(i, range, highlights)| {
|
||||
let matches = SharedMatches { range, matches: matches.clone() };
|
||||
RawDocument::new(i, matches, highlights)
|
||||
}).collect()
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
@ -252,12 +219,6 @@ struct Range {
|
||||
end: usize,
|
||||
}
|
||||
|
||||
impl Range {
|
||||
fn len(self) -> usize {
|
||||
self.end - self.start
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SharedMatches {
|
||||
range: Range,
|
||||
@ -271,8 +232,6 @@ struct Matches {
|
||||
attribute: Vec<u16>,
|
||||
word_index: Vec<u16>,
|
||||
is_exact: Vec<bool>,
|
||||
char_index: Vec<u16>,
|
||||
char_length: Vec<u16>,
|
||||
}
|
||||
|
||||
impl Matches {
|
||||
@ -283,25 +242,20 @@ impl Matches {
|
||||
attribute: Vec::with_capacity(cap),
|
||||
word_index: Vec::with_capacity(cap),
|
||||
is_exact: Vec::with_capacity(cap),
|
||||
char_index: Vec::with_capacity(cap),
|
||||
char_length: Vec::with_capacity(cap),
|
||||
}
|
||||
}
|
||||
|
||||
fn extend_from_slice(&mut self, matches: &[(DocumentId, Match)]) {
|
||||
for (_, match_) in matches {
|
||||
fn extend_from_slice(&mut self, matches: &[(DocumentId, TmpMatch, Highlight)]) {
|
||||
for (_, match_, _) in matches {
|
||||
self.query_index.push(match_.query_index);
|
||||
self.distance.push(match_.distance);
|
||||
self.attribute.push(match_.attribute);
|
||||
self.word_index.push(match_.word_index);
|
||||
self.is_exact.push(match_.is_exact);
|
||||
self.char_index.push(match_.char_index);
|
||||
self.char_length.push(match_.char_length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
@ -5,7 +5,7 @@ use std::time::Instant;
|
||||
use std::{cmp, mem};
|
||||
|
||||
use fst::{Streamer, IntoStreamer};
|
||||
use hashbrown::{HashMap, HashSet};
|
||||
use hashbrown::HashMap;
|
||||
use log::info;
|
||||
use meilidb_tokenizer::{is_cjk, split_query_string};
|
||||
use rayon::slice::ParallelSliceMut;
|
||||
@ -17,7 +17,8 @@ use crate::automaton::{build_dfa, build_prefix_dfa};
|
||||
use crate::distinct_map::{DistinctMap, BufferedDistinctMap};
|
||||
use crate::criterion::Criteria;
|
||||
use crate::raw_documents_from_matches;
|
||||
use crate::{Match, DocumentId, Store, RawDocument, Document};
|
||||
use crate::reordered_attrs::ReorderedAttrs;
|
||||
use crate::{TmpMatch, Highlight, DocumentId, Store, RawDocument, Document};
|
||||
|
||||
const NGRAMS: usize = 3;
|
||||
|
||||
@ -177,12 +178,12 @@ fn generate_automatons<S: Store>(query: &str, store: &S) -> Result<Vec<Automaton
|
||||
Ok(automatons)
|
||||
}
|
||||
|
||||
fn rewrite_matched_positions(matches: &mut [(DocumentId, Match)]) {
|
||||
for document_matches in matches.linear_group_by_mut(|(a, _), (b, _)| a == b) {
|
||||
fn rewrite_matched_positions(matches: &mut [(DocumentId, TmpMatch, Highlight)]) {
|
||||
for document_matches in matches.linear_group_by_mut(|(a, _, _), (b, _, _)| a == b) {
|
||||
let mut offset = 0;
|
||||
for query_indexes in document_matches.linear_group_by_mut(|(_, a), (_, b)| a.query_index == b.query_index) {
|
||||
for query_indexes in document_matches.linear_group_by_mut(|(_, a, _), (_, b, _)| a.query_index == b.query_index) {
|
||||
let word_index = query_indexes[0].1.word_index - offset as u16;
|
||||
for (_, match_) in query_indexes.iter_mut() {
|
||||
for (_, match_, _) in query_indexes.iter_mut() {
|
||||
match_.word_index = word_index;
|
||||
}
|
||||
offset += query_indexes.len() - 1;
|
||||
@ -193,7 +194,7 @@ fn rewrite_matched_positions(matches: &mut [(DocumentId, Match)]) {
|
||||
pub struct QueryBuilder<'c, S, FI = fn(DocumentId) -> bool> {
|
||||
store: S,
|
||||
criteria: Criteria<'c>,
|
||||
searchable_attrs: Option<HashSet<u16>>,
|
||||
searchable_attrs: Option<ReorderedAttrs>,
|
||||
filter: Option<FI>,
|
||||
}
|
||||
|
||||
@ -228,8 +229,8 @@ impl<'c, S, FI> QueryBuilder<'c, S, FI>
|
||||
}
|
||||
|
||||
pub fn add_searchable_attribute(&mut self, attribute: u16) {
|
||||
let attributes = self.searchable_attrs.get_or_insert_with(HashSet::new);
|
||||
attributes.insert(attribute);
|
||||
let reorders = self.searchable_attrs.get_or_insert_with(ReorderedAttrs::new);
|
||||
reorders.insert_attribute(attribute);
|
||||
}
|
||||
}
|
||||
|
||||
@ -239,6 +240,7 @@ where S: Store,
|
||||
fn query_all(&self, query: &str) -> Result<Vec<RawDocument>, S::Error> {
|
||||
let automatons = generate_automatons(query, &self.store)?;
|
||||
let words = self.store.words()?.as_fst();
|
||||
let searchables = self.searchable_attrs.as_ref();
|
||||
|
||||
let mut stream = {
|
||||
let mut op_builder = fst::raw::OpBuilder::new();
|
||||
@ -264,18 +266,21 @@ where S: Store,
|
||||
};
|
||||
|
||||
for di in doc_indexes.as_slice() {
|
||||
if self.searchable_attrs.as_ref().map_or(true, |r| r.contains(&di.attribute)) {
|
||||
let match_ = Match {
|
||||
let attribute = searchables.map_or(Some(di.attribute), |r| r.get(di.attribute));
|
||||
if let Some(attribute) = attribute {
|
||||
let match_ = TmpMatch {
|
||||
query_index: query_index as u32,
|
||||
distance,
|
||||
attribute: di.attribute,
|
||||
attribute,
|
||||
word_index: di.word_index,
|
||||
is_exact,
|
||||
};
|
||||
let highlight = Highlight {
|
||||
attribute: di.attribute,
|
||||
char_index: di.char_index,
|
||||
char_length: di.char_length,
|
||||
};
|
||||
matches.push((di.document_id, match_));
|
||||
|
||||
matches.push((di.document_id, match_, highlight));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -286,7 +291,11 @@ where S: Store,
|
||||
rewrite_matched_positions(&mut matches);
|
||||
|
||||
let total_matches = matches.len();
|
||||
let padded_matches = SetBuf::from_dirty(matches);
|
||||
let padded_matches = {
|
||||
matches.par_sort_unstable();
|
||||
matches.dedup();
|
||||
SetBuf::new_unchecked(matches)
|
||||
};
|
||||
let raw_documents = raw_documents_from_matches(padded_matches);
|
||||
|
||||
info!("{} total documents to classify", raw_documents.len());
|
||||
@ -346,7 +355,7 @@ where S: Store,
|
||||
|
||||
let offset = cmp::min(documents.len(), range.start);
|
||||
let iter = documents.into_iter().skip(offset).take(range.len());
|
||||
Ok(iter.map(|d| Document::from_raw(&d)).collect())
|
||||
Ok(iter.map(|d| Document::from_raw(d)).collect())
|
||||
}
|
||||
}
|
||||
|
||||
@ -473,7 +482,7 @@ where S: Store,
|
||||
};
|
||||
|
||||
if distinct_accepted && seen.len() > range.start {
|
||||
out_documents.push(Document::from_raw(&document));
|
||||
out_documents.push(Document::from_raw(document));
|
||||
if out_documents.len() == range.len() { break }
|
||||
}
|
||||
}
|
||||
@ -616,11 +625,11 @@ mod tests {
|
||||
let results = builder.query("iphone from apple", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
let mut matches = matches.into_iter();
|
||||
assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 0, .. }));
|
||||
assert_matches!(matches.next(), Some(Match { query_index: 1, word_index: 1, .. }));
|
||||
assert_matches!(matches.next(), Some(Match { query_index: 2, word_index: 2, .. }));
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. }));
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, .. }));
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 2, word_index: 2, .. }));
|
||||
assert_matches!(matches.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
@ -638,9 +647,9 @@ mod tests {
|
||||
let results = builder.query("hello", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
let mut matches = matches.into_iter();
|
||||
assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 0, .. }));
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. }));
|
||||
assert_matches!(matches.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
@ -649,9 +658,9 @@ mod tests {
|
||||
let results = builder.query("bonjour", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
let mut matches = matches.into_iter();
|
||||
assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 0, .. }));
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. }));
|
||||
assert_matches!(matches.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
@ -670,9 +679,9 @@ mod tests {
|
||||
let results = builder.query("sal", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
let mut matches = matches.into_iter();
|
||||
assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 0, .. }));
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. }));
|
||||
assert_matches!(matches.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
@ -681,9 +690,9 @@ mod tests {
|
||||
let results = builder.query("bonj", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
let mut matches = matches.into_iter();
|
||||
assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 0, .. }));
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. }));
|
||||
assert_matches!(matches.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
@ -713,9 +722,9 @@ mod tests {
|
||||
let results = builder.query("salutution", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
let mut matches = matches.into_iter();
|
||||
assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 0, .. }));
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. }));
|
||||
assert_matches!(matches.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
@ -724,9 +733,9 @@ mod tests {
|
||||
let results = builder.query("saluttion", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
let mut matches = matches.into_iter();
|
||||
assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 0, .. }));
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. }));
|
||||
assert_matches!(matches.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
@ -748,19 +757,19 @@ mod tests {
|
||||
let results = builder.query("hello", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
let mut matches = matches.into_iter();
|
||||
assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 0, .. }));
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. }));
|
||||
assert_matches!(matches.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
|
||||
let mut matches = matches.into_iter();
|
||||
assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 3, .. }));
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 3, .. }));
|
||||
assert_matches!(matches.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches, .. }) => {
|
||||
let mut matches = matches.into_iter();
|
||||
assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 5, .. }));
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 5, .. }));
|
||||
assert_matches!(matches.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
@ -769,19 +778,19 @@ mod tests {
|
||||
let results = builder.query("bonjour", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
let mut matches = matches.into_iter();
|
||||
assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 0, .. }));
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. }));
|
||||
assert_matches!(matches.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
|
||||
let mut matches = matches.into_iter();
|
||||
assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 3, .. }));
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 3, .. }));
|
||||
assert_matches!(matches.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches, .. }) => {
|
||||
let mut matches = matches.into_iter();
|
||||
assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 5, .. }));
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 5, .. }));
|
||||
assert_matches!(matches.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
@ -790,19 +799,19 @@ mod tests {
|
||||
let results = builder.query("salut", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
let mut matches = matches.into_iter();
|
||||
assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 0, .. }));
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. }));
|
||||
assert_matches!(matches.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
|
||||
let mut matches = matches.into_iter();
|
||||
assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 3, .. }));
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 3, .. }));
|
||||
assert_matches!(matches.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches, .. }) => {
|
||||
let mut matches = matches.into_iter();
|
||||
assert_matches!(matches.next(), Some(Match { query_index: 0, word_index: 5, .. }));
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 5, .. }));
|
||||
assert_matches!(matches.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
@ -828,18 +837,18 @@ mod tests {
|
||||
let results = builder.query("NY subway", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 1, .. })); // subway
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, .. })); // subway
|
||||
assert_matches!(iter.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // new = NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // york = NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // city = NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 1, .. })); // subway
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new = NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york = NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // city = NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, .. })); // subway
|
||||
assert_matches!(iter.next(), None); // position rewritten ^
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
@ -848,18 +857,18 @@ mod tests {
|
||||
let results = builder.query("NYC subway", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 1, .. })); // subway
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, .. })); // subway
|
||||
assert_matches!(iter.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // new = NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // york = NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // city = NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 1, .. })); // subway
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new = NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york = NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // city = NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, .. })); // subway
|
||||
assert_matches!(iter.next(), None); // position rewritten ^
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
@ -888,18 +897,18 @@ mod tests {
|
||||
let results = builder.query("NY subway", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // subway
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // subway
|
||||
assert_matches!(iter.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // new = NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // york = NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // city = NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // subway
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new = NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york = NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // city = NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // subway
|
||||
assert_matches!(iter.next(), None); // position rewritten ^
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
@ -908,18 +917,18 @@ mod tests {
|
||||
let results = builder.query("NYC subway", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // subway
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // subway
|
||||
assert_matches!(iter.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // new = NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // york = NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // city = NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // subway
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new = NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york = NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // city = NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // subway
|
||||
assert_matches!(iter.next(), None); // position rewritten ^
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
@ -950,20 +959,20 @@ mod tests {
|
||||
let results = builder.query("NY subway broken", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // new = NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // york = NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // city = NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // underground = subway
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // train = subway
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 2, word_index: 3, .. })); // broken
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new = NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york = NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // city = NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // underground = subway
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // train = subway
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 3, .. })); // broken
|
||||
assert_matches!(iter.next(), None); // position rewritten ^
|
||||
});
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // subway
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // subway
|
||||
assert_matches!(iter.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
@ -972,19 +981,19 @@ mod tests {
|
||||
let results = builder.query("NYC subway", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // subway
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // subway
|
||||
assert_matches!(iter.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // new = NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // york = NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // city = NY
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // underground = subway
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // train = subway
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new = NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york = NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // city = NY
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // underground = subway
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // train = subway
|
||||
assert_matches!(iter.next(), None); // position rewritten ^
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
@ -1017,27 +1026,41 @@ mod tests {
|
||||
let results = builder.query("new york underground train broken", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches, highlights }) => {
|
||||
let mut matches = matches.into_iter();
|
||||
let mut highlights = highlights.into_iter();
|
||||
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york
|
||||
assert_matches!(highlights.next(), Some(Highlight { char_index: 1, .. }));
|
||||
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new
|
||||
assert_matches!(highlights.next(), Some(Highlight { char_index: 0, .. }));
|
||||
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 0, .. })); // york
|
||||
assert_matches!(highlights.next(), Some(Highlight { char_index: 1, .. }));
|
||||
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 2, word_index: 1, .. })); // underground
|
||||
assert_matches!(highlights.next(), Some(Highlight { char_index: 2, .. }));
|
||||
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 3, word_index: 2, .. })); // train
|
||||
assert_matches!(highlights.next(), Some(Highlight { char_index: 3, .. }));
|
||||
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 4, word_index: 3, .. })); // broken
|
||||
assert_matches!(highlights.next(), Some(Highlight { char_index: 4, .. }));
|
||||
|
||||
assert_matches!(matches.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, char_index: 1, .. })); // york
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, char_index: 0, .. })); // new
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 0, char_index: 1, .. })); // york
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 2, word_index: 1, char_index: 2, .. })); // underground
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 3, word_index: 2, char_index: 3, .. })); // train
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 4, word_index: 3, char_index: 4, .. })); // broken
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NYC = new york
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, .. })); // subway = underground train
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 4, word_index: 3, .. })); // broken
|
||||
assert_matches!(iter.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NYC = new york
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 2, word_index: 2, .. })); // subway = underground train
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 4, word_index: 3, .. })); // broken
|
||||
assert_matches!(iter.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY = new york
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 2, word_index: 1, .. })); // subway = underground train
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY = new york
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 1, .. })); // subway = underground train
|
||||
assert_matches!(iter.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
@ -1046,27 +1069,41 @@ mod tests {
|
||||
let results = builder.query("new york city underground train broken", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches, highlights }) => {
|
||||
let mut matches = matches.into_iter();
|
||||
let mut highlights = highlights.into_iter();
|
||||
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york
|
||||
assert_matches!(highlights.next(), Some(Highlight { char_index: 1, .. }));
|
||||
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new
|
||||
assert_matches!(highlights.next(), Some(Highlight { char_index: 0, .. }));
|
||||
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 0, .. })); // york
|
||||
assert_matches!(highlights.next(), Some(Highlight { char_index: 1, .. }));
|
||||
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 3, word_index: 1, .. })); // underground
|
||||
assert_matches!(highlights.next(), Some(Highlight { char_index: 2, .. }));
|
||||
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 4, word_index: 2, .. })); // train
|
||||
assert_matches!(highlights.next(), Some(Highlight { char_index: 3, .. }));
|
||||
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 5, word_index: 3, .. })); // broken
|
||||
assert_matches!(highlights.next(), Some(Highlight { char_index: 4, .. }));
|
||||
|
||||
assert_matches!(matches.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, char_index: 1, .. })); // york
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, char_index: 0, .. })); // new
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 0, char_index: 1, .. })); // york
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 3, word_index: 1, char_index: 2, .. })); // underground
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 4, word_index: 2, char_index: 3, .. })); // train
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 5, word_index: 3, char_index: 4, .. })); // broken
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NYC = new york city
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 3, word_index: 2, .. })); // subway = underground train
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 5, word_index: 3, .. })); // broken
|
||||
assert_matches!(iter.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NYC = new york city
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 3, word_index: 2, .. })); // subway = underground train
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 5, word_index: 3, .. })); // broken
|
||||
assert_matches!(iter.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY = new york city
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 3, word_index: 1, .. })); // subway = underground train
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY = new york city
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 3, word_index: 1, .. })); // subway = underground train
|
||||
assert_matches!(iter.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
@ -1086,15 +1123,15 @@ mod tests {
|
||||
let results = builder.query("telephone", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, .. }));
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, .. }));
|
||||
assert_matches!(iter.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, .. }));
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, .. }));
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, .. }));
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, .. }));
|
||||
assert_matches!(iter.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
@ -1103,15 +1140,15 @@ mod tests {
|
||||
let results = builder.query("téléphone", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, .. }));
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, .. }));
|
||||
assert_matches!(iter.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, .. }));
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, .. }));
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, .. }));
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, .. }));
|
||||
assert_matches!(iter.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
@ -1120,14 +1157,14 @@ mod tests {
|
||||
let results = builder.query("télephone", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, .. }));
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, .. }));
|
||||
assert_matches!(iter.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, distance: 1, .. })); // téléphone
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, distance: 1, .. })); // téléphone
|
||||
assert_matches!(iter.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
@ -1144,11 +1181,11 @@ mod tests {
|
||||
let results = builder.query("i phone case", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, distance: 0, .. })); // iphone
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 0, distance: 1, .. })); // phone
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 2, word_index: 1, distance: 0, .. })); // case
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, distance: 0, .. })); // iphone
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 0, distance: 1, .. })); // phone
|
||||
assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 1, distance: 0, .. })); // case
|
||||
assert_matches!(iter.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
@ -1167,11 +1204,17 @@ mod tests {
|
||||
let results = builder.query("portefeuille", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, char_index: 0, .. })); // porte
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, char_index: 1, .. })); // feuille
|
||||
assert_matches!(iter.next(), None);
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, highlights }) => {
|
||||
let mut matches = matches.into_iter();
|
||||
let mut highlights = highlights.into_iter();
|
||||
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // porte
|
||||
assert_matches!(highlights.next(), Some(Highlight { char_index: 0, .. }));
|
||||
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // feuille
|
||||
assert_matches!(highlights.next(), Some(Highlight { char_index: 1, .. }));
|
||||
|
||||
assert_matches!(matches.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
|
||||
@ -1179,11 +1222,17 @@ mod tests {
|
||||
let results = builder.query("searchengine", 0..20).unwrap();
|
||||
let mut iter = results.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, char_index: 0, .. })); // search
|
||||
assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, char_index: 1, .. })); // engine
|
||||
assert_matches!(iter.next(), None);
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, highlights }) => {
|
||||
let mut matches = matches.into_iter();
|
||||
let mut highlights = highlights.into_iter();
|
||||
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // search
|
||||
assert_matches!(highlights.next(), Some(Highlight { char_index: 0, .. }));
|
||||
|
||||
assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // engine
|
||||
assert_matches!(highlights.next(), Some(Highlight { char_index: 1, .. }));
|
||||
|
||||
assert_matches!(matches.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
}
|
||||
|
24
meilidb-core/src/reordered_attrs.rs
Normal file
24
meilidb-core/src/reordered_attrs.rs
Normal file
@ -0,0 +1,24 @@
|
||||
#[derive(Default)]
|
||||
pub struct ReorderedAttrs {
|
||||
count: usize,
|
||||
reorders: Vec<Option<u16>>,
|
||||
}
|
||||
|
||||
impl ReorderedAttrs {
|
||||
pub fn new() -> ReorderedAttrs {
|
||||
ReorderedAttrs { count: 0, reorders: Vec::new() }
|
||||
}
|
||||
|
||||
pub fn insert_attribute(&mut self, attribute: u16) {
|
||||
self.reorders.resize(attribute as usize + 1, None);
|
||||
self.reorders[attribute as usize] = Some(self.count as u16);
|
||||
self.count += 1;
|
||||
}
|
||||
|
||||
pub fn get(&self, attribute: u16) -> Option<u16> {
|
||||
match self.reorders.get(attribute as usize) {
|
||||
Some(Some(attribute)) => Some(*attribute),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
@ -11,7 +11,7 @@ use std::error::Error;
|
||||
|
||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||
use structopt::StructOpt;
|
||||
use meilidb_core::Match;
|
||||
use meilidb_core::Highlight;
|
||||
|
||||
use meilidb_data::Database;
|
||||
use meilidb_schema::SchemaAttr;
|
||||
@ -71,12 +71,12 @@ fn char_to_byte_range(index: usize, length: usize, text: &str) -> (usize, usize)
|
||||
(byte_index, byte_length)
|
||||
}
|
||||
|
||||
fn create_highlight_areas(text: &str, matches: &[Match]) -> Vec<usize> {
|
||||
fn create_highlight_areas(text: &str, highlights: &[Highlight]) -> Vec<usize> {
|
||||
let mut byte_indexes = BTreeMap::new();
|
||||
|
||||
for match_ in matches {
|
||||
let char_index = match_.char_index as usize;
|
||||
let char_length = match_.char_length as usize;
|
||||
for highlight in highlights {
|
||||
let char_index = highlight.char_index as usize;
|
||||
let char_length = highlight.char_length as usize;
|
||||
let (byte_index, byte_length) = char_to_byte_range(char_index, char_length, text);
|
||||
|
||||
match byte_indexes.entry(byte_index) {
|
||||
@ -111,26 +111,26 @@ fn create_highlight_areas(text: &str, matches: &[Match]) -> Vec<usize> {
|
||||
/// ```
|
||||
fn crop_text(
|
||||
text: &str,
|
||||
matches: impl IntoIterator<Item=Match>,
|
||||
highlights: impl IntoIterator<Item=Highlight>,
|
||||
context: usize,
|
||||
) -> (String, Vec<Match>)
|
||||
) -> (String, Vec<Highlight>)
|
||||
{
|
||||
let mut matches = matches.into_iter().peekable();
|
||||
let mut highlights = highlights.into_iter().peekable();
|
||||
|
||||
let char_index = matches.peek().map(|m| m.char_index as usize).unwrap_or(0);
|
||||
let char_index = highlights.peek().map(|m| m.char_index as usize).unwrap_or(0);
|
||||
let start = char_index.saturating_sub(context);
|
||||
let text = text.chars().skip(start).take(context * 2).collect();
|
||||
|
||||
let matches = matches
|
||||
let highlights = highlights
|
||||
.take_while(|m| {
|
||||
(m.char_index as usize) + (m.char_length as usize) <= start + (context * 2)
|
||||
})
|
||||
.map(|match_| {
|
||||
Match { char_index: match_.char_index - start as u16, ..match_ }
|
||||
.map(|highlight| {
|
||||
Highlight { char_index: highlight.char_index - start as u16, ..highlight }
|
||||
})
|
||||
.collect();
|
||||
|
||||
(text, matches)
|
||||
(text, highlights)
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
@ -168,7 +168,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
let number_of_documents = documents.len();
|
||||
for mut doc in documents {
|
||||
|
||||
doc.matches.sort_unstable_by_key(|m| (m.char_index, m.char_index));
|
||||
doc.highlights.sort_unstable_by_key(|m| (m.char_index, m.char_length));
|
||||
|
||||
let start_retrieve = Instant::now();
|
||||
let result = index.document::<Document>(Some(&fields), doc.id);
|
||||
@ -180,11 +180,11 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
print!("{}: ", name);
|
||||
|
||||
let attr = schema.attribute(&name).unwrap();
|
||||
let matches = doc.matches.iter()
|
||||
let highlights = doc.highlights.iter()
|
||||
.filter(|m| SchemaAttr::new(m.attribute) == attr)
|
||||
.cloned();
|
||||
let (text, matches) = crop_text(&text, matches, opt.char_context);
|
||||
let areas = create_highlight_areas(&text, &matches);
|
||||
let (text, highlights) = crop_text(&text, highlights, opt.char_context);
|
||||
let areas = create_highlight_areas(&text, &highlights);
|
||||
display_highlights(&text, &areas)?;
|
||||
println!();
|
||||
}
|
||||
@ -194,8 +194,8 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
}
|
||||
|
||||
let mut matching_attributes = HashSet::new();
|
||||
for _match in doc.matches {
|
||||
let attr = SchemaAttr::new(_match.attribute);
|
||||
for highlight in doc.highlights {
|
||||
let attr = SchemaAttr::new(highlight.attribute);
|
||||
let name = schema.attribute_name(attr);
|
||||
matching_attributes.insert(name);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user