mod compute_derivations; mod ntypo_subset; mod parse_query; mod phrase; use super::interner::{DedupInterner, Interned}; use super::{limits, SearchContext}; use crate::Result; use std::collections::BTreeSet; use std::ops::RangeInclusive; use either::Either; pub use ntypo_subset::NTypoTermSubset; pub use parse_query::{located_query_terms_from_string, make_ngram, number_of_typos_allowed}; pub use phrase::Phrase; use compute_derivations::partially_initialized_term_from_word; /// A set of word derivations attached to a location in the search query. #[derive(Clone, PartialEq, Eq, Hash)] pub struct LocatedQueryTermSubset { pub term_subset: QueryTermSubset, pub positions: RangeInclusive, pub term_ids: RangeInclusive, } #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct QueryTermSubset { original: Interned, zero_typo_subset: NTypoTermSubset, one_typo_subset: NTypoTermSubset, two_typo_subset: NTypoTermSubset, } #[derive(Clone, PartialEq, Eq, Hash)] pub struct QueryTerm { original: Interned, ngram_words: Option>>, max_nbr_typos: u8, is_prefix: bool, zero_typo: ZeroTypoTerm, // May not be computed yet one_typo: Lazy, // May not be computed yet two_typo: Lazy, } // SubTerms will be in a dedup interner #[derive(Default, Clone, PartialEq, Eq, Hash)] struct ZeroTypoTerm { /// The original phrase, if any phrase: Option>, /// A single word equivalent to the original term, with zero typos exact: Option>, /// All the words that contain the original word as prefix prefix_of: BTreeSet>, /// All the synonyms of the original word or phrase synonyms: BTreeSet>, /// A prefix in the prefix databases matching the original word use_prefix_db: Option>, } #[derive(Default, Clone, PartialEq, Eq, Hash)] struct OneTypoTerm { /// The original word split into multiple consecutive words split_words: Option>, /// Words that are 1 typo away from the original word one_typo: BTreeSet>, } #[derive(Default, Clone, PartialEq, Eq, Hash)] struct TwoTypoTerm { /// Words that are 2 typos away from the original word two_typos: BTreeSet>, } #[derive(Clone, PartialEq, Eq, Hash)] pub enum Lazy { Uninit, Init(T), } impl Lazy { pub fn is_init(&self) -> bool { match self { Lazy::Uninit => false, Lazy::Init(_) => true, } } pub fn is_uninit(&self) -> bool { match self { Lazy::Uninit => true, Lazy::Init(_) => false, } } } #[derive(Clone, Copy)] pub enum ExactTerm { Phrase(Interned), Word(Interned), } impl ExactTerm { pub fn interned_words<'ctx>( &self, ctx: &'ctx SearchContext<'ctx>, ) -> impl Iterator>> + 'ctx { match *self { ExactTerm::Phrase(phrase) => { let phrase = ctx.phrase_interner.get(phrase); Either::Left(phrase.words.iter().copied()) } ExactTerm::Word(word) => Either::Right(std::iter::once(Some(word))), } } } impl QueryTermSubset { pub fn exact_term(&self, ctx: &SearchContext) -> Option { let full_query_term = ctx.term_interner.get(self.original); if full_query_term.ngram_words.is_some() { return None; } // TODO: included in subset if let Some(phrase) = full_query_term.zero_typo.phrase { self.zero_typo_subset.contains_phrase(phrase).then_some(ExactTerm::Phrase(phrase)) } else if let Some(word) = full_query_term.zero_typo.exact { self.zero_typo_subset.contains_word(word).then_some(ExactTerm::Word(word)) } else { None } } pub fn empty(for_term: Interned) -> Self { Self { original: for_term, zero_typo_subset: NTypoTermSubset::Nothing, one_typo_subset: NTypoTermSubset::Nothing, two_typo_subset: NTypoTermSubset::Nothing, } } pub fn full(for_term: Interned) -> Self { Self { original: for_term, zero_typo_subset: NTypoTermSubset::All, one_typo_subset: NTypoTermSubset::All, two_typo_subset: NTypoTermSubset::All, } } pub fn union(&mut self, other: &Self) { assert!(self.original == other.original); self.zero_typo_subset.union(&other.zero_typo_subset); self.one_typo_subset.union(&other.one_typo_subset); self.two_typo_subset.union(&other.two_typo_subset); } pub fn intersect(&mut self, other: &Self) { assert!(self.original == other.original); self.zero_typo_subset.intersect(&other.zero_typo_subset); self.one_typo_subset.intersect(&other.one_typo_subset); self.two_typo_subset.intersect(&other.two_typo_subset); } pub fn use_prefix_db(&self, ctx: &SearchContext) -> Option> { let original = ctx.term_interner.get(self.original); let Some(use_prefix_db) = original.zero_typo.use_prefix_db else { return None }; match &self.zero_typo_subset { NTypoTermSubset::All => Some(use_prefix_db), NTypoTermSubset::Subset { words, phrases: _ } => { // TODO: use a subset of prefix words instead if words.contains(&use_prefix_db) { Some(use_prefix_db) } else { None } } NTypoTermSubset::Nothing => None, } } pub fn all_single_words_except_prefix_db( &self, ctx: &mut SearchContext, ) -> Result>> { let mut result = BTreeSet::default(); // TODO: a compute_partially funtion if !self.one_typo_subset.is_empty() || !self.two_typo_subset.is_empty() { self.original.compute_fully_if_needed(ctx)?; } let original = ctx.term_interner.get_mut(self.original); if !self.zero_typo_subset.is_empty() { let ZeroTypoTerm { phrase: _, exact: zero_typo, prefix_of, synonyms: _, use_prefix_db: _, } = &original.zero_typo; result.extend(zero_typo.iter().copied()); result.extend(prefix_of.iter().copied()); }; match &self.one_typo_subset { NTypoTermSubset::All => { let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo else { panic!() }; result.extend(one_typo.iter().copied()) } NTypoTermSubset::Subset { words, phrases: _ } => { let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo else { panic!() }; result.extend(one_typo.intersection(words)); } NTypoTermSubset::Nothing => {} }; match &self.two_typo_subset { NTypoTermSubset::All => { let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else { panic!() }; result.extend(two_typos.iter().copied()); } NTypoTermSubset::Subset { words, phrases: _ } => { let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else { panic!() }; result.extend(two_typos.intersection(words)); } NTypoTermSubset::Nothing => {} }; Ok(result) } pub fn all_phrases(&self, ctx: &mut SearchContext) -> Result>> { let mut result = BTreeSet::default(); if !self.one_typo_subset.is_empty() { // TODO: compute less than fully if possible self.original.compute_fully_if_needed(ctx)?; } let original = ctx.term_interner.get_mut(self.original); let ZeroTypoTerm { phrase, exact: _, prefix_of: _, synonyms, use_prefix_db: _ } = &original.zero_typo; result.extend(phrase.iter().copied()); result.extend(synonyms.iter().copied()); if !self.one_typo_subset.is_empty() { let Lazy::Init(OneTypoTerm { split_words, one_typo: _ }) = &original.one_typo else { panic!(); }; result.extend(split_words.iter().copied()); } Ok(result) } pub fn original_phrase(&self, ctx: &SearchContext) -> Option> { let t = ctx.term_interner.get(self.original); if let Some(p) = t.zero_typo.phrase { if self.zero_typo_subset.contains_phrase(p) { return Some(p); } } None } pub fn max_nbr_typos(&self, ctx: &SearchContext) -> u8 { let t = ctx.term_interner.get(self.original); match t.max_nbr_typos { 0 => 0, 1 => { if self.one_typo_subset.is_empty() { 0 } else { 1 } } 2 => { if self.two_typo_subset.is_empty() { if self.one_typo_subset.is_empty() { 0 } else { 1 } } else { 2 } } _ => panic!(), } } pub fn clear_zero_typo_subset(&mut self) { self.zero_typo_subset = NTypoTermSubset::Nothing; } pub fn clear_one_typo_subset(&mut self) { self.one_typo_subset = NTypoTermSubset::Nothing; } pub fn clear_two_typo_subset(&mut self) { self.two_typo_subset = NTypoTermSubset::Nothing; } pub fn description(&self, ctx: &SearchContext) -> String { let t = ctx.term_interner.get(self.original); ctx.word_interner.get(t.original).to_owned() } } impl ZeroTypoTerm { fn is_empty(&self) -> bool { let ZeroTypoTerm { phrase, exact: zero_typo, prefix_of, synonyms, use_prefix_db } = self; phrase.is_none() && zero_typo.is_none() && prefix_of.is_empty() && synonyms.is_empty() && use_prefix_db.is_none() } } impl OneTypoTerm { fn is_empty(&self) -> bool { let OneTypoTerm { split_words, one_typo } = self; one_typo.is_empty() && split_words.is_none() } } impl TwoTypoTerm { fn is_empty(&self) -> bool { let TwoTypoTerm { two_typos } = self; two_typos.is_empty() } } impl QueryTerm { fn is_empty(&self) -> bool { let Lazy::Init(one_typo) = &self.one_typo else { return false; }; let Lazy::Init(two_typo) = &self.two_typo else { return false; }; self.zero_typo.is_empty() && one_typo.is_empty() && two_typo.is_empty() } } impl Interned { /// Return the original word from the given query term fn original_single_word(self, ctx: &SearchContext) -> Option> { let self_ = ctx.term_interner.get(self); if self_.ngram_words.is_some() { None } else { Some(self_.original) } } } /// A query term coupled with its position in the user's search query. #[derive(Clone)] pub struct LocatedQueryTerm { pub value: Interned, pub positions: RangeInclusive, } impl LocatedQueryTerm { /// Return `true` iff the term is empty pub fn is_empty(&self, interner: &DedupInterner) -> bool { interner.get(self.value).is_empty() } }