mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 14:04:31 +01:00
Merge #227
227: Replace Consecutive by Phrase in query tree r=Kerollmops a=ManyTheFish Replace `Consecutive` by `Phrase` in the query tree in order to remove theoretical bugs, due to the `Consecutive` enum type. Co-authored-by: many <maxime@meilisearch.com> Co-authored-by: Many <legendre.maxime.isn@gmail.com>
This commit is contained in:
commit
3e6c05fe13
@ -578,7 +578,6 @@ fn linear_compute_candidates(
|
|||||||
fn compute_candidate_rank(branches: &FlattenedQueryTree, words_positions: HashMap<String, RoaringBitmap>) -> u64 {
|
fn compute_candidate_rank(branches: &FlattenedQueryTree, words_positions: HashMap<String, RoaringBitmap>) -> u64 {
|
||||||
let mut min_rank = u64::max_value();
|
let mut min_rank = u64::max_value();
|
||||||
for branch in branches {
|
for branch in branches {
|
||||||
|
|
||||||
let branch_len = branch.len();
|
let branch_len = branch.len();
|
||||||
let mut branch_rank = Vec::with_capacity(branch_len);
|
let mut branch_rank = Vec::with_capacity(branch_len);
|
||||||
for derivates in branch {
|
for derivates in branch {
|
||||||
@ -661,7 +660,7 @@ fn linear_compute_candidates(
|
|||||||
|
|
||||||
// TODO can we keep refs of Query
|
// TODO can we keep refs of Query
|
||||||
fn flatten_query_tree(query_tree: &Operation) -> FlattenedQueryTree {
|
fn flatten_query_tree(query_tree: &Operation) -> FlattenedQueryTree {
|
||||||
use crate::search::criteria::Operation::{And, Or, Consecutive};
|
use crate::search::criteria::Operation::{And, Or, Phrase};
|
||||||
|
|
||||||
fn and_recurse(head: &Operation, tail: &[Operation]) -> FlattenedQueryTree {
|
fn and_recurse(head: &Operation, tail: &[Operation]) -> FlattenedQueryTree {
|
||||||
match tail.split_first() {
|
match tail.split_first() {
|
||||||
@ -683,7 +682,7 @@ fn flatten_query_tree(query_tree: &Operation) -> FlattenedQueryTree {
|
|||||||
|
|
||||||
fn recurse(op: &Operation) -> FlattenedQueryTree {
|
fn recurse(op: &Operation) -> FlattenedQueryTree {
|
||||||
match op {
|
match op {
|
||||||
And(ops) | Consecutive(ops) => {
|
And(ops) => {
|
||||||
ops.split_first().map_or_else(Vec::new, |(h, t)| and_recurse(h, t))
|
ops.split_first().map_or_else(Vec::new, |(h, t)| and_recurse(h, t))
|
||||||
},
|
},
|
||||||
Or(_, ops) => if ops.iter().all(|op| op.query().is_some()) {
|
Or(_, ops) => if ops.iter().all(|op| op.query().is_some()) {
|
||||||
@ -691,6 +690,12 @@ fn flatten_query_tree(query_tree: &Operation) -> FlattenedQueryTree {
|
|||||||
} else {
|
} else {
|
||||||
ops.iter().map(recurse).flatten().collect()
|
ops.iter().map(recurse).flatten().collect()
|
||||||
},
|
},
|
||||||
|
Phrase(words) => {
|
||||||
|
let queries = words.iter().map(|word| {
|
||||||
|
vec![Query {prefix: false, kind: QueryKind::exact(word.clone())}]
|
||||||
|
}).collect();
|
||||||
|
vec![queries]
|
||||||
|
}
|
||||||
Operation::Query(query) => vec![vec![vec![query.clone()]]],
|
Operation::Query(query) => vec![vec![vec![query.clone()]]],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
|
||||||
use anyhow::bail;
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::{FieldId, TreeLevel, search::{word_derivations, WordDerivationsCache}};
|
use crate::{FieldId, TreeLevel, search::{word_derivations, WordDerivationsCache}};
|
||||||
@ -239,7 +238,7 @@ pub fn resolve_query_tree<'t>(
|
|||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<RoaringBitmap>
|
) -> anyhow::Result<RoaringBitmap>
|
||||||
{
|
{
|
||||||
use Operation::{And, Consecutive, Or, Query};
|
use Operation::{And, Phrase, Or, Query};
|
||||||
|
|
||||||
match query_tree {
|
match query_tree {
|
||||||
And(ops) => {
|
And(ops) => {
|
||||||
@ -261,26 +260,23 @@ pub fn resolve_query_tree<'t>(
|
|||||||
}
|
}
|
||||||
Ok(candidates)
|
Ok(candidates)
|
||||||
},
|
},
|
||||||
Consecutive(ops) => {
|
Phrase(words) => {
|
||||||
let mut candidates = RoaringBitmap::new();
|
let mut candidates = RoaringBitmap::new();
|
||||||
let mut first_loop = true;
|
let mut first_loop = true;
|
||||||
for slice in ops.windows(2) {
|
for slice in words.windows(2) {
|
||||||
match (&slice[0], &slice[1]) {
|
let (left, right) = (&slice[0], &slice[1]);
|
||||||
(Operation::Query(left), Operation::Query(right)) => {
|
match ctx.word_pair_proximity_docids(left, right, 1)? {
|
||||||
match query_pair_proximity_docids(ctx, left, right, 1, wdcache)? {
|
Some(pair_docids) => {
|
||||||
pair_docids if pair_docids.is_empty() => {
|
if pair_docids.is_empty() {
|
||||||
return Ok(RoaringBitmap::new())
|
return Ok(RoaringBitmap::new());
|
||||||
},
|
} else if first_loop {
|
||||||
pair_docids if first_loop => {
|
candidates = pair_docids;
|
||||||
candidates = pair_docids;
|
first_loop = false;
|
||||||
first_loop = false;
|
} else {
|
||||||
},
|
candidates &= pair_docids;
|
||||||
pair_docids => {
|
|
||||||
candidates.intersect_with(&pair_docids);
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
_ => bail!("invalid consecutive query type"),
|
None => return Ok(RoaringBitmap::new())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(candidates)
|
Ok(candidates)
|
||||||
|
@ -171,12 +171,33 @@ fn resolve_candidates<'t>(
|
|||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<Vec<(Query, Query, RoaringBitmap)>>
|
) -> anyhow::Result<Vec<(Query, Query, RoaringBitmap)>>
|
||||||
{
|
{
|
||||||
use Operation::{And, Consecutive, Or, Query};
|
use Operation::{And, Phrase, Or};
|
||||||
|
|
||||||
let result = match query_tree {
|
let result = match query_tree {
|
||||||
And(ops) => mdfs(ctx, ops, proximity, cache, wdcache)?,
|
And(ops) => mdfs(ctx, ops, proximity, cache, wdcache)?,
|
||||||
Consecutive(ops) => if proximity == 0 {
|
Phrase(words) => if proximity == 0 {
|
||||||
mdfs(ctx, ops, 0, cache, wdcache)?
|
let most_left = words.first().map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
|
||||||
|
let most_right = words.last().map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
|
||||||
|
let mut candidates = None;
|
||||||
|
for slice in words.windows(2) {
|
||||||
|
let (left, right) = (&slice[0], &slice[1]);
|
||||||
|
match ctx.word_pair_proximity_docids(left, right, 1)? {
|
||||||
|
Some(pair_docids) => {
|
||||||
|
match candidates.as_mut() {
|
||||||
|
Some(candidates) => *candidates &= pair_docids,
|
||||||
|
None => candidates = Some(pair_docids),
|
||||||
|
}
|
||||||
|
},
|
||||||
|
None => {
|
||||||
|
candidates = None;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
match (most_left, most_right, candidates) {
|
||||||
|
(Some(l), Some(r), Some(c)) => vec![(l, r, c)],
|
||||||
|
_otherwise => Default::default(),
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
Default::default()
|
Default::default()
|
||||||
},
|
},
|
||||||
@ -188,7 +209,7 @@ fn resolve_candidates<'t>(
|
|||||||
}
|
}
|
||||||
output
|
output
|
||||||
},
|
},
|
||||||
Query(q) => if proximity == 0 {
|
Operation::Query(q) => if proximity == 0 {
|
||||||
let candidates = query_docids(ctx, q, wdcache)?;
|
let candidates = query_docids(ctx, q, wdcache)?;
|
||||||
vec![(q.clone(), q.clone(), candidates)]
|
vec![(q.clone(), q.clone(), candidates)]
|
||||||
} else {
|
} else {
|
||||||
@ -306,14 +327,9 @@ fn resolve_plane_sweep_candidates(
|
|||||||
) -> anyhow::Result<BTreeMap<u8, RoaringBitmap>>
|
) -> anyhow::Result<BTreeMap<u8, RoaringBitmap>>
|
||||||
{
|
{
|
||||||
/// FIXME may be buggy with query like "new new york"
|
/// FIXME may be buggy with query like "new new york"
|
||||||
fn plane_sweep<'a>(
|
fn plane_sweep(
|
||||||
ctx: &dyn Context,
|
groups_positions: Vec<Vec<(Position, u8, Position)>>,
|
||||||
operations: &'a [Operation],
|
|
||||||
docid: DocumentId,
|
|
||||||
consecutive: bool,
|
consecutive: bool,
|
||||||
rocache: &mut HashMap<&'a Operation, Vec<(Position, u8, Position)>>,
|
|
||||||
words_positions: &HashMap<String, RoaringBitmap>,
|
|
||||||
wdcache: &mut WordDerivationsCache,
|
|
||||||
) -> anyhow::Result<Vec<(Position, u8, Position)>>
|
) -> anyhow::Result<Vec<(Position, u8, Position)>>
|
||||||
{
|
{
|
||||||
fn compute_groups_proximity(
|
fn compute_groups_proximity(
|
||||||
@ -362,13 +378,9 @@ fn resolve_plane_sweep_candidates(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let groups_len = operations.len();
|
let groups_len = groups_positions.len();
|
||||||
let mut groups_positions = Vec::with_capacity(groups_len);
|
|
||||||
|
|
||||||
for operation in operations {
|
let mut groups_positions: Vec<_> = groups_positions.into_iter().map(|pos| pos.into_iter()).collect();
|
||||||
let positions = resolve_operation(ctx, operation, docid, rocache, words_positions, wdcache)?;
|
|
||||||
groups_positions.push(positions.into_iter());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pop top elements of each list.
|
// Pop top elements of each list.
|
||||||
let mut current = Vec::with_capacity(groups_len);
|
let mut current = Vec::with_capacity(groups_len);
|
||||||
@ -441,15 +453,32 @@ fn resolve_plane_sweep_candidates(
|
|||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<Vec<(Position, u8, Position)>>
|
) -> anyhow::Result<Vec<(Position, u8, Position)>>
|
||||||
{
|
{
|
||||||
use Operation::{And, Consecutive, Or};
|
use Operation::{And, Phrase, Or};
|
||||||
|
|
||||||
if let Some(result) = rocache.get(query_tree) {
|
if let Some(result) = rocache.get(query_tree) {
|
||||||
return Ok(result.clone());
|
return Ok(result.clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
let result = match query_tree {
|
let result = match query_tree {
|
||||||
And(ops) => plane_sweep(ctx, ops, docid, false, rocache, words_positions, wdcache)?,
|
And(ops) => {
|
||||||
Consecutive(ops) => plane_sweep(ctx, ops, docid, true, rocache, words_positions, wdcache)?,
|
let mut groups_positions = Vec::with_capacity(ops.len());
|
||||||
|
for operation in ops {
|
||||||
|
let positions = resolve_operation(ctx, operation, docid, rocache, words_positions, wdcache)?;
|
||||||
|
groups_positions.push(positions);
|
||||||
|
}
|
||||||
|
plane_sweep(groups_positions, false)?
|
||||||
|
},
|
||||||
|
Phrase(words) => {
|
||||||
|
let mut groups_positions = Vec::with_capacity(words.len());
|
||||||
|
for word in words {
|
||||||
|
let positions = match words_positions.get(word) {
|
||||||
|
Some(positions) => positions.iter().map(|p| (p, 0, p)).collect(),
|
||||||
|
None => vec![],
|
||||||
|
};
|
||||||
|
groups_positions.push(positions);
|
||||||
|
}
|
||||||
|
plane_sweep(groups_positions, true)?
|
||||||
|
},
|
||||||
Or(_, ops) => {
|
Or(_, ops) => {
|
||||||
let mut result = Vec::new();
|
let mut result = Vec::new();
|
||||||
for op in ops {
|
for op in ops {
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
use std::{borrow::Cow, collections::HashMap, mem::take};
|
use std::{borrow::Cow, collections::HashMap, mem::take};
|
||||||
|
|
||||||
use anyhow::bail;
|
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
@ -13,7 +12,6 @@ use super::{
|
|||||||
CriterionParameters,
|
CriterionParameters,
|
||||||
CriterionResult,
|
CriterionResult,
|
||||||
query_docids,
|
query_docids,
|
||||||
query_pair_proximity_docids,
|
|
||||||
resolve_query_tree,
|
resolve_query_tree,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -174,12 +172,14 @@ fn alterate_query_tree(
|
|||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<()>
|
) -> anyhow::Result<()>
|
||||||
{
|
{
|
||||||
use Operation::{And, Consecutive, Or};
|
use Operation::{And, Phrase, Or};
|
||||||
|
|
||||||
match operation {
|
match operation {
|
||||||
And(ops) | Consecutive(ops) | Or(_, ops) => {
|
And(ops) | Or(_, ops) => {
|
||||||
ops.iter_mut().try_for_each(|op| recurse(words_fst, op, number_typos, wdcache))
|
ops.iter_mut().try_for_each(|op| recurse(words_fst, op, number_typos, wdcache))
|
||||||
},
|
},
|
||||||
|
// Because Phrases don't allow typos, no alteration can be done.
|
||||||
|
Phrase(_words) => return Ok(()),
|
||||||
Operation::Query(q) => {
|
Operation::Query(q) => {
|
||||||
if let QueryKind::Tolerant { typo, word } = &q.kind {
|
if let QueryKind::Tolerant { typo, word } = &q.kind {
|
||||||
// if no typo is allowed we don't call word_derivations function,
|
// if no typo is allowed we don't call word_derivations function,
|
||||||
@ -228,32 +228,29 @@ fn resolve_candidates<'t>(
|
|||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<RoaringBitmap>
|
) -> anyhow::Result<RoaringBitmap>
|
||||||
{
|
{
|
||||||
use Operation::{And, Consecutive, Or, Query};
|
use Operation::{And, Phrase, Or, Query};
|
||||||
|
|
||||||
match query_tree {
|
match query_tree {
|
||||||
And(ops) => {
|
And(ops) => {
|
||||||
mdfs(ctx, ops, number_typos, cache, wdcache)
|
mdfs(ctx, ops, number_typos, cache, wdcache)
|
||||||
},
|
},
|
||||||
Consecutive(ops) => {
|
Phrase(words) => {
|
||||||
let mut candidates = RoaringBitmap::new();
|
let mut candidates = RoaringBitmap::new();
|
||||||
let mut first_loop = true;
|
let mut first_loop = true;
|
||||||
for slice in ops.windows(2) {
|
for slice in words.windows(2) {
|
||||||
match (&slice[0], &slice[1]) {
|
let (left, right) = (&slice[0], &slice[1]);
|
||||||
(Operation::Query(left), Operation::Query(right)) => {
|
match ctx.word_pair_proximity_docids(left, right, 1)? {
|
||||||
match query_pair_proximity_docids(ctx, left, right, 1, wdcache)? {
|
Some(pair_docids) => {
|
||||||
pair_docids if pair_docids.is_empty() => {
|
if pair_docids.is_empty() {
|
||||||
return Ok(RoaringBitmap::new())
|
return Ok(RoaringBitmap::new());
|
||||||
},
|
} else if first_loop {
|
||||||
pair_docids if first_loop => {
|
candidates = pair_docids;
|
||||||
candidates = pair_docids;
|
first_loop = false;
|
||||||
first_loop = false;
|
} else {
|
||||||
},
|
candidates &= pair_docids;
|
||||||
pair_docids => {
|
|
||||||
candidates.intersect_with(&pair_docids);
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
_ => bail!("invalid consecutive query type"),
|
None => return Ok(RoaringBitmap::new())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(candidates)
|
Ok(candidates)
|
||||||
|
@ -52,13 +52,18 @@ impl MatchingWords {
|
|||||||
fn fetch_queries(tree: &Operation) -> HashSet<(&str, u8, IsPrefix)> {
|
fn fetch_queries(tree: &Operation) -> HashSet<(&str, u8, IsPrefix)> {
|
||||||
fn resolve_ops<'a>(tree: &'a Operation, out: &mut HashSet<(&'a str, u8, IsPrefix)>) {
|
fn resolve_ops<'a>(tree: &'a Operation, out: &mut HashSet<(&'a str, u8, IsPrefix)>) {
|
||||||
match tree {
|
match tree {
|
||||||
Operation::Or(_, ops) | Operation::And(ops) | Operation::Consecutive(ops) => {
|
Operation::Or(_, ops) | Operation::And(ops) => {
|
||||||
ops.as_slice().iter().for_each(|op| resolve_ops(op, out));
|
ops.as_slice().iter().for_each(|op| resolve_ops(op, out));
|
||||||
},
|
},
|
||||||
Operation::Query(Query { prefix, kind }) => {
|
Operation::Query(Query { prefix, kind }) => {
|
||||||
let typo = if kind.is_exact() { 0 } else { kind.typo() };
|
let typo = if kind.is_exact() { 0 } else { kind.typo() };
|
||||||
out.insert((kind.word(), typo, *prefix));
|
out.insert((kind.word(), typo, *prefix));
|
||||||
},
|
},
|
||||||
|
Operation::Phrase(words) => {
|
||||||
|
for word in words {
|
||||||
|
out.insert((word, 0, false));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,7 +15,8 @@ type IsPrefix = bool;
|
|||||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||||
pub enum Operation {
|
pub enum Operation {
|
||||||
And(Vec<Operation>),
|
And(Vec<Operation>),
|
||||||
Consecutive(Vec<Operation>),
|
// serie of consecutive non prefix and exact words
|
||||||
|
Phrase(Vec<String>),
|
||||||
Or(IsOptionalWord, Vec<Operation>),
|
Or(IsOptionalWord, Vec<Operation>),
|
||||||
Query(Query),
|
Query(Query),
|
||||||
}
|
}
|
||||||
@ -28,9 +29,8 @@ impl fmt::Debug for Operation {
|
|||||||
writeln!(f, "{:1$}AND", "", depth * 2)?;
|
writeln!(f, "{:1$}AND", "", depth * 2)?;
|
||||||
children.iter().try_for_each(|c| pprint_tree(f, c, depth + 1))
|
children.iter().try_for_each(|c| pprint_tree(f, c, depth + 1))
|
||||||
},
|
},
|
||||||
Operation::Consecutive(children) => {
|
Operation::Phrase(children) => {
|
||||||
writeln!(f, "{:1$}CONSECUTIVE", "", depth * 2)?;
|
writeln!(f, "{:2$}PHRASE {:?}", "", children, depth * 2)
|
||||||
children.iter().try_for_each(|c| pprint_tree(f, c, depth + 1))
|
|
||||||
},
|
},
|
||||||
Operation::Or(true, children) => {
|
Operation::Or(true, children) => {
|
||||||
writeln!(f, "{:1$}OR(WORD)", "", depth * 2)?;
|
writeln!(f, "{:1$}OR(WORD)", "", depth * 2)?;
|
||||||
@ -49,14 +49,6 @@ impl fmt::Debug for Operation {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Operation {
|
impl Operation {
|
||||||
fn phrase(words: Vec<String>) -> Operation {
|
|
||||||
Operation::consecutive(
|
|
||||||
words.into_iter().map(|s| {
|
|
||||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(s) })
|
|
||||||
}).collect()
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn and(mut ops: Vec<Self>) -> Self {
|
fn and(mut ops: Vec<Self>) -> Self {
|
||||||
if ops.len() == 1 {
|
if ops.len() == 1 {
|
||||||
ops.pop().unwrap()
|
ops.pop().unwrap()
|
||||||
@ -73,11 +65,11 @@ impl Operation {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn consecutive(mut ops: Vec<Self>) -> Self {
|
fn phrase(mut words: Vec<String>) -> Self {
|
||||||
if ops.len() == 1 {
|
if words.len() == 1 {
|
||||||
ops.pop().unwrap()
|
Self::Query(Query { prefix: false, kind: QueryKind::exact(words.pop().unwrap()) })
|
||||||
} else {
|
} else {
|
||||||
Self::Consecutive(ops)
|
Self::Phrase(words)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -256,10 +248,10 @@ fn split_best_frequency(ctx: &impl Context, word: &str) -> heed::Result<Option<O
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(best.map(|(_, left, right)| Operation::Consecutive(
|
Ok(best.map(|(_, left, right)| Operation::Phrase(
|
||||||
vec![
|
vec![
|
||||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(left.to_string()) }),
|
left.to_string(),
|
||||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(right.to_string()) })
|
right.to_string()
|
||||||
]
|
]
|
||||||
)))
|
)))
|
||||||
}
|
}
|
||||||
@ -494,24 +486,26 @@ fn create_primitive_query(query: TokenStream, stop_words: Option<Set<&[u8]>>, wo
|
|||||||
|
|
||||||
/// Returns the maximum number of typos that this Operation allows.
|
/// Returns the maximum number of typos that this Operation allows.
|
||||||
pub fn maximum_typo(operation: &Operation) -> usize {
|
pub fn maximum_typo(operation: &Operation) -> usize {
|
||||||
use Operation::{Or, And, Query, Consecutive};
|
use Operation::{Or, And, Query, Phrase};
|
||||||
match operation {
|
match operation {
|
||||||
Or(_, ops) => ops.iter().map(maximum_typo).max().unwrap_or(0),
|
Or(_, ops) => ops.iter().map(maximum_typo).max().unwrap_or(0),
|
||||||
And(ops) | Consecutive(ops) => ops.iter().map(maximum_typo).sum::<usize>(),
|
And(ops) => ops.iter().map(maximum_typo).sum::<usize>(),
|
||||||
Query(q) => q.kind.typo() as usize,
|
Query(q) => q.kind.typo() as usize,
|
||||||
|
// no typo allowed in phrases
|
||||||
|
Phrase(_) => 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the maximum proximity that this Operation allows.
|
/// Returns the maximum proximity that this Operation allows.
|
||||||
pub fn maximum_proximity(operation: &Operation) -> usize {
|
pub fn maximum_proximity(operation: &Operation) -> usize {
|
||||||
use Operation::{Or, And, Query, Consecutive};
|
use Operation::{Or, And, Query, Phrase};
|
||||||
match operation {
|
match operation {
|
||||||
Or(_, ops) => ops.iter().map(maximum_proximity).max().unwrap_or(0),
|
Or(_, ops) => ops.iter().map(maximum_proximity).max().unwrap_or(0),
|
||||||
And(ops) => {
|
And(ops) => {
|
||||||
ops.iter().map(maximum_proximity).sum::<usize>()
|
ops.iter().map(maximum_proximity).sum::<usize>()
|
||||||
+ ops.len().saturating_sub(1) * 7
|
+ ops.len().saturating_sub(1) * 7
|
||||||
},
|
},
|
||||||
Query(_) | Consecutive(_) => 0,
|
Query(_) | Phrase(_) => 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -765,9 +759,9 @@ mod test {
|
|||||||
let expected = Operation::Or(false, vec![
|
let expected = Operation::Or(false, vec![
|
||||||
Operation::And(vec![
|
Operation::And(vec![
|
||||||
Operation::Or(false, vec![
|
Operation::Or(false, vec![
|
||||||
Operation::Consecutive(vec![
|
Operation::Phrase(vec![
|
||||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("word".to_string()) }),
|
"word".to_string(),
|
||||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("split".to_string()) }),
|
"split".to_string(),
|
||||||
]),
|
]),
|
||||||
Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(2, "wordsplit".to_string()) }),
|
Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(2, "wordsplit".to_string()) }),
|
||||||
]),
|
]),
|
||||||
@ -789,9 +783,9 @@ mod test {
|
|||||||
let tokens = result.tokens();
|
let tokens = result.tokens();
|
||||||
|
|
||||||
let expected = Operation::And(vec![
|
let expected = Operation::And(vec![
|
||||||
Operation::Consecutive(vec![
|
Operation::Phrase(vec![
|
||||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("hey".to_string()) }),
|
"hey".to_string(),
|
||||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("friends".to_string()) }),
|
"friends".to_string(),
|
||||||
]),
|
]),
|
||||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("wooop".to_string()) }),
|
Operation::Query(Query { prefix: false, kind: QueryKind::exact("wooop".to_string()) }),
|
||||||
]);
|
]);
|
||||||
@ -809,13 +803,13 @@ mod test {
|
|||||||
let tokens = result.tokens();
|
let tokens = result.tokens();
|
||||||
|
|
||||||
let expected = Operation::And(vec![
|
let expected = Operation::And(vec![
|
||||||
Operation::Consecutive(vec![
|
Operation::Phrase(vec![
|
||||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("hey".to_string()) }),
|
"hey".to_string(),
|
||||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("friends".to_string()) }),
|
"friends".to_string(),
|
||||||
]),
|
]),
|
||||||
Operation::Consecutive(vec![
|
Operation::Phrase(vec![
|
||||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("wooop".to_string()) }),
|
"wooop".to_string(),
|
||||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("wooop".to_string()) }),
|
"wooop".to_string(),
|
||||||
]),
|
]),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
@ -870,9 +864,9 @@ mod test {
|
|||||||
let result = analyzer.analyze(query);
|
let result = analyzer.analyze(query);
|
||||||
let tokens = result.tokens();
|
let tokens = result.tokens();
|
||||||
|
|
||||||
let expected = Operation::Consecutive(vec![
|
let expected = Operation::Phrase(vec![
|
||||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("hey".to_string()) }),
|
"hey".to_string(),
|
||||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("my".to_string()) }),
|
"my".to_string(),
|
||||||
]);
|
]);
|
||||||
let (query_tree, _) = TestContext::default().build(true, true, None, tokens).unwrap().unwrap();
|
let (query_tree, _) = TestContext::default().build(true, true, None, tokens).unwrap().unwrap();
|
||||||
|
|
||||||
@ -940,9 +934,9 @@ mod test {
|
|||||||
let tokens = result.tokens();
|
let tokens = result.tokens();
|
||||||
|
|
||||||
let expected = Operation::And(vec![
|
let expected = Operation::And(vec![
|
||||||
Operation::Consecutive(vec![
|
Operation::Phrase(vec![
|
||||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("hey".to_string()) }),
|
"hey".to_string(),
|
||||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("my".to_string()) }),
|
"my".to_string(),
|
||||||
]),
|
]),
|
||||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact("good".to_string()) }),
|
Operation::Query(Query { prefix: false, kind: QueryKind::exact("good".to_string()) }),
|
||||||
]);
|
]);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user