mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 05:14:27 +01:00
Remove unused files
This commit is contained in:
parent
f118d7e067
commit
6bf6b40495
@ -3,8 +3,6 @@
|
|||||||
mod criterion;
|
mod criterion;
|
||||||
mod external_documents_ids;
|
mod external_documents_ids;
|
||||||
mod fields_ids_map;
|
mod fields_ids_map;
|
||||||
mod mdfs;
|
|
||||||
mod query_tokens;
|
|
||||||
mod search;
|
mod search;
|
||||||
mod update_store;
|
mod update_store;
|
||||||
pub mod facet;
|
pub mod facet;
|
||||||
|
@ -1,163 +0,0 @@
|
|||||||
use std::collections::hash_map::Entry::{Occupied, Vacant};
|
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::mem;
|
|
||||||
|
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
use crate::Index;
|
|
||||||
|
|
||||||
/// A mana depth first search implementation.
|
|
||||||
pub struct Mdfs<'a> {
|
|
||||||
index: &'a Index,
|
|
||||||
rtxn: &'a heed::RoTxn<'a>,
|
|
||||||
words: &'a [(HashMap<String, (u8, RoaringBitmap)>, RoaringBitmap)],
|
|
||||||
union_cache: HashMap<(usize, u8), RoaringBitmap>,
|
|
||||||
candidates: RoaringBitmap,
|
|
||||||
mana: u32,
|
|
||||||
max_mana: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> Mdfs<'a> {
|
|
||||||
pub fn new(
|
|
||||||
index: &'a Index,
|
|
||||||
rtxn: &'a heed::RoTxn,
|
|
||||||
words: &'a [(HashMap<String, (u8, RoaringBitmap)>, RoaringBitmap)],
|
|
||||||
candidates: RoaringBitmap,
|
|
||||||
) -> Mdfs<'a>
|
|
||||||
{
|
|
||||||
// Compute the number of pairs (windows) we have for this list of words.
|
|
||||||
let mana = words.len().saturating_sub(1) as u32;
|
|
||||||
let max_mana = mana * 8;
|
|
||||||
Mdfs { index, rtxn, words, union_cache: HashMap::new(), candidates, mana, max_mana }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> Iterator for Mdfs<'a> {
|
|
||||||
type Item = anyhow::Result<(u32, RoaringBitmap)>;
|
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
|
||||||
// If there is less or only one word therefore the only
|
|
||||||
// possible documents that we can return are the candidates.
|
|
||||||
if self.words.len() <= 1 {
|
|
||||||
if self.candidates.is_empty() { return None }
|
|
||||||
return Some(Ok((0, mem::take(&mut self.candidates))));
|
|
||||||
}
|
|
||||||
|
|
||||||
while self.mana <= self.max_mana {
|
|
||||||
let mut answer = RoaringBitmap::new();
|
|
||||||
let result = mdfs_step(
|
|
||||||
&self.index,
|
|
||||||
&self.rtxn,
|
|
||||||
self.mana,
|
|
||||||
self.words,
|
|
||||||
&self.candidates,
|
|
||||||
&self.candidates,
|
|
||||||
&mut self.union_cache,
|
|
||||||
&mut answer,
|
|
||||||
);
|
|
||||||
|
|
||||||
match result {
|
|
||||||
Ok(()) => {
|
|
||||||
// We always increase the mana for the next loop.
|
|
||||||
let proximity = self.mana;
|
|
||||||
self.mana += 1;
|
|
||||||
|
|
||||||
// If no documents were found we must not return and continue
|
|
||||||
// the search with more mana.
|
|
||||||
if !answer.is_empty() {
|
|
||||||
|
|
||||||
// We remove the answered documents from the list of
|
|
||||||
// candidates to be sure we don't search for them again.
|
|
||||||
self.candidates.difference_with(&answer);
|
|
||||||
|
|
||||||
// We return the answer.
|
|
||||||
return Some(Ok((proximity, answer)));
|
|
||||||
}
|
|
||||||
},
|
|
||||||
Err(e) => return Some(Err(e)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn mdfs_step(
|
|
||||||
index: &Index,
|
|
||||||
rtxn: &heed::RoTxn,
|
|
||||||
mana: u32,
|
|
||||||
words: &[(HashMap<String, (u8, RoaringBitmap)>, RoaringBitmap)],
|
|
||||||
candidates: &RoaringBitmap,
|
|
||||||
parent_docids: &RoaringBitmap,
|
|
||||||
union_cache: &mut HashMap<(usize, u8), RoaringBitmap>,
|
|
||||||
answer: &mut RoaringBitmap,
|
|
||||||
) -> anyhow::Result<()>
|
|
||||||
{
|
|
||||||
use std::cmp::{min, max};
|
|
||||||
|
|
||||||
let (words1, words2) = (&words[0].0, &words[1].0);
|
|
||||||
let pairs = words_pair_combinations(words1, words2);
|
|
||||||
let tail = &words[1..];
|
|
||||||
let nb_children = tail.len() as u32 - 1;
|
|
||||||
|
|
||||||
// The minimum amount of mana that you must consume is at least 1 and the
|
|
||||||
// amount of mana that your children can consume. Because the last child must
|
|
||||||
// consume the remaining mana, it is mandatory that there not too much at the end.
|
|
||||||
let min_proximity = max(1, mana.saturating_sub(nb_children * 8)) as u8;
|
|
||||||
|
|
||||||
// The maximum amount of mana that you can use is 8 or the remaining amount of
|
|
||||||
// mana minus your children, as you can't just consume all the mana,
|
|
||||||
// your children must have at least 1 mana.
|
|
||||||
let max_proximity = min(8, mana - nb_children) as u8;
|
|
||||||
|
|
||||||
for proximity in min_proximity..=max_proximity {
|
|
||||||
let mut docids = match union_cache.entry((words.len(), proximity)) {
|
|
||||||
Occupied(entry) => entry.get().clone(),
|
|
||||||
Vacant(entry) => {
|
|
||||||
let mut docids = RoaringBitmap::new();
|
|
||||||
if proximity == 8 {
|
|
||||||
docids = candidates.clone();
|
|
||||||
} else {
|
|
||||||
for (w1, w2) in pairs.iter().cloned() {
|
|
||||||
let key = (w1, w2, proximity);
|
|
||||||
if let Some(di) = index.word_pair_proximity_docids.get(rtxn, &key)? {
|
|
||||||
docids.union_with(&di);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
entry.insert(docids).clone()
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// We must be sure that we only return docids that are present in the candidates.
|
|
||||||
docids.intersect_with(parent_docids);
|
|
||||||
|
|
||||||
if !docids.is_empty() {
|
|
||||||
let mana = mana.checked_sub(proximity as u32).unwrap();
|
|
||||||
if tail.len() < 2 {
|
|
||||||
// We are the last pair, we return without recuring as we don't have any child.
|
|
||||||
answer.union_with(&docids);
|
|
||||||
return Ok(());
|
|
||||||
} else {
|
|
||||||
return mdfs_step(index, rtxn, mana, tail, candidates, &docids, union_cache, answer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn words_pair_combinations<'h>(
|
|
||||||
w1: &'h HashMap<String, (u8, RoaringBitmap)>,
|
|
||||||
w2: &'h HashMap<String, (u8, RoaringBitmap)>,
|
|
||||||
) -> Vec<(&'h str, &'h str)>
|
|
||||||
{
|
|
||||||
let mut pairs = Vec::new();
|
|
||||||
for (w1, (_typos, docids1)) in w1 {
|
|
||||||
for (w2, (_typos, docids2)) in w2 {
|
|
||||||
if !docids1.is_disjoint(&docids2) {
|
|
||||||
pairs.push((w1.as_str(), w2.as_str()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pairs
|
|
||||||
}
|
|
@ -1,217 +0,0 @@
|
|||||||
use meilisearch_tokenizer::{Token, TokenKind};
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
enum State {
|
|
||||||
Free,
|
|
||||||
Quoted,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl State {
|
|
||||||
fn swap(&mut self) {
|
|
||||||
match self {
|
|
||||||
State::Quoted => *self = State::Free,
|
|
||||||
State::Free => *self = State::Quoted,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
|
||||||
pub enum QueryToken<'a> {
|
|
||||||
Free(Token<'a>),
|
|
||||||
Quoted(Token<'a>),
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn query_tokens<'a>(mut tokens: impl Iterator<Item = Token<'a>>) -> impl Iterator<Item = QueryToken<'a>> {
|
|
||||||
let mut state = State::Free;
|
|
||||||
let f = move || {
|
|
||||||
loop {
|
|
||||||
let token = tokens.next()?;
|
|
||||||
match token.kind() {
|
|
||||||
_ if token.text().trim() == "\"" => state.swap(),
|
|
||||||
TokenKind::Word => {
|
|
||||||
let token = match state {
|
|
||||||
State::Quoted => QueryToken::Quoted(token),
|
|
||||||
State::Free => QueryToken::Free(token),
|
|
||||||
};
|
|
||||||
return Some(token);
|
|
||||||
},
|
|
||||||
_ => (),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
std::iter::from_fn(f)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
use QueryToken::{Quoted, Free};
|
|
||||||
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
|
|
||||||
use fst::Set;
|
|
||||||
|
|
||||||
macro_rules! assert_eq_query_token {
|
|
||||||
($test:expr, Quoted($val:literal)) => {
|
|
||||||
match $test {
|
|
||||||
Quoted(val) => assert_eq!(val.text(), $val),
|
|
||||||
Free(val) => panic!("expected Quoted(\"{}\"), found Free(\"{}\")", $val, val.text()),
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
($test:expr, Free($val:literal)) => {
|
|
||||||
match $test {
|
|
||||||
Quoted(val) => panic!("expected Free(\"{}\"), found Quoted(\"{}\")", $val, val.text()),
|
|
||||||
Free(val) => assert_eq!(val.text(), $val),
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn empty() {
|
|
||||||
let stop_words = Set::default();
|
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
|
|
||||||
let query = "";
|
|
||||||
let analyzed = analyzer.analyze(query);
|
|
||||||
let tokens = analyzed.tokens();
|
|
||||||
let mut iter = query_tokens(tokens);
|
|
||||||
assert!(iter.next().is_none());
|
|
||||||
|
|
||||||
let query = " ";
|
|
||||||
let analyzed = analyzer.analyze(query);
|
|
||||||
let tokens = analyzed.tokens();
|
|
||||||
let mut iter = query_tokens(tokens);
|
|
||||||
assert!(iter.next().is_none());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn one_quoted_string() {
|
|
||||||
let stop_words = Set::default();
|
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
|
|
||||||
let query = "\"hello\"";
|
|
||||||
let analyzed = analyzer.analyze(query);
|
|
||||||
let tokens = analyzed.tokens();
|
|
||||||
let mut iter = query_tokens(tokens);
|
|
||||||
assert_eq_query_token!(iter.next().unwrap(), Quoted("hello"));
|
|
||||||
assert!(iter.next().is_none());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn one_pending_quoted_string() {
|
|
||||||
let stop_words = Set::default();
|
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
|
|
||||||
let query = "\"hello";
|
|
||||||
let analyzed = analyzer.analyze(query);
|
|
||||||
let tokens = analyzed.tokens();
|
|
||||||
let mut iter = query_tokens(tokens);
|
|
||||||
assert_eq_query_token!(iter.next().unwrap(), Quoted("hello"));
|
|
||||||
assert!(iter.next().is_none());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn one_non_quoted_string() {
|
|
||||||
let stop_words = Set::default();
|
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
|
|
||||||
let query = "hello";
|
|
||||||
let analyzed = analyzer.analyze(query);
|
|
||||||
let tokens = analyzed.tokens();
|
|
||||||
let mut iter = query_tokens(tokens);
|
|
||||||
assert_eq_query_token!(iter.next().unwrap(), Free("hello"));
|
|
||||||
assert!(iter.next().is_none());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn quoted_directly_followed_by_free_strings() {
|
|
||||||
let stop_words = Set::default();
|
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
|
|
||||||
let query = "\"hello\"world";
|
|
||||||
let analyzed = analyzer.analyze(query);
|
|
||||||
let tokens = analyzed.tokens();
|
|
||||||
let mut iter = query_tokens(tokens);
|
|
||||||
assert_eq_query_token!(iter.next().unwrap(), Quoted("hello"));
|
|
||||||
assert_eq_query_token!(iter.next().unwrap(), Free("world"));
|
|
||||||
assert!(iter.next().is_none());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn free_directly_followed_by_quoted_strings() {
|
|
||||||
let stop_words = Set::default();
|
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
|
|
||||||
let query = "hello\"world\"";
|
|
||||||
let analyzed = analyzer.analyze(query);
|
|
||||||
let tokens = analyzed.tokens();
|
|
||||||
let mut iter = query_tokens(tokens);
|
|
||||||
assert_eq_query_token!(iter.next().unwrap(), Free("hello"));
|
|
||||||
assert_eq_query_token!(iter.next().unwrap(), Quoted("world"));
|
|
||||||
assert!(iter.next().is_none());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn free_followed_by_quoted_strings() {
|
|
||||||
let stop_words = Set::default();
|
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
|
|
||||||
let query = "hello \"world\"";
|
|
||||||
let analyzed = analyzer.analyze(query);
|
|
||||||
let tokens = analyzed.tokens();
|
|
||||||
let mut iter = query_tokens(tokens);
|
|
||||||
assert_eq_query_token!(iter.next().unwrap(), Free("hello"));
|
|
||||||
assert_eq_query_token!(iter.next().unwrap(), Quoted("world"));
|
|
||||||
assert!(iter.next().is_none());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn multiple_spaces_separated_strings() {
|
|
||||||
let stop_words = Set::default();
|
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
|
|
||||||
let query = "hello world ";
|
|
||||||
let analyzed = analyzer.analyze(query);
|
|
||||||
let tokens = analyzed.tokens();
|
|
||||||
let mut iter = query_tokens(tokens);
|
|
||||||
assert_eq_query_token!(iter.next().unwrap(), Free("hello"));
|
|
||||||
assert_eq_query_token!(iter.next().unwrap(), Free("world"));
|
|
||||||
assert!(iter.next().is_none());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn multi_interleaved_quoted_free_strings() {
|
|
||||||
let stop_words = Set::default();
|
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
|
|
||||||
let query = "hello \"world\" coucou \"monde\"";
|
|
||||||
let analyzed = analyzer.analyze(query);
|
|
||||||
let tokens = analyzed.tokens();
|
|
||||||
let mut iter = query_tokens(tokens);
|
|
||||||
assert_eq_query_token!(iter.next().unwrap(), Free("hello"));
|
|
||||||
assert_eq_query_token!(iter.next().unwrap(), Quoted("world"));
|
|
||||||
assert_eq_query_token!(iter.next().unwrap(), Free("coucou"));
|
|
||||||
assert_eq_query_token!(iter.next().unwrap(), Quoted("monde"));
|
|
||||||
assert!(iter.next().is_none());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn multi_quoted_strings() {
|
|
||||||
let stop_words = Set::default();
|
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
|
|
||||||
let query = "\"hello world\" coucou \"monde est beau\"";
|
|
||||||
let analyzed = analyzer.analyze(query);
|
|
||||||
let tokens = analyzed.tokens();
|
|
||||||
let mut iter = query_tokens(tokens);
|
|
||||||
assert_eq_query_token!(iter.next().unwrap(), Quoted("hello"));
|
|
||||||
assert_eq_query_token!(iter.next().unwrap(), Quoted("world"));
|
|
||||||
assert_eq_query_token!(iter.next().unwrap(), Free("coucou"));
|
|
||||||
assert_eq_query_token!(iter.next().unwrap(), Quoted("monde"));
|
|
||||||
assert_eq_query_token!(iter.next().unwrap(), Quoted("est"));
|
|
||||||
assert_eq_query_token!(iter.next().unwrap(), Quoted("beau"));
|
|
||||||
assert!(iter.next().is_none());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn chinese() {
|
|
||||||
let stop_words = Set::default();
|
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
|
|
||||||
let query = "汽车男生";
|
|
||||||
let analyzed = analyzer.analyze(query);
|
|
||||||
let tokens = analyzed.tokens();
|
|
||||||
let mut iter = query_tokens(tokens);
|
|
||||||
assert_eq_query_token!(iter.next().unwrap(), Free("汽车"));
|
|
||||||
assert_eq_query_token!(iter.next().unwrap(), Free("男生"));
|
|
||||||
assert!(iter.next().is_none());
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue
Block a user