diff --git a/milli/src/search/new/logger/detailed.rs b/milli/src/search/new/logger/detailed.rs index 9f612f239..575d5b0bf 100644 --- a/milli/src/search/new/logger/detailed.rs +++ b/milli/src/search/new/logger/detailed.rs @@ -6,7 +6,7 @@ use std::time::Instant; use rand::random; use roaring::RoaringBitmap; -use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}; +use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm}; use crate::search::new::ranking_rule_graph::{ Edge, EdgeCondition, EmptyPathsCache, ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait, TypoGraph, @@ -432,70 +432,70 @@ results.{random} {{ file: &mut File, ) { match &node { - QueryNode::Term(LocatedQueryTerm { value, .. }) => match value { - QueryTerm::Phrase { phrase } => { + QueryNode::Term(LocatedQueryTerm { value, .. }) => { + let QueryTerm { + original, + zero_typo, + one_typo, + two_typos, + use_prefix_db, + synonyms, + split_words, + prefix_of, + is_prefix: _, + is_ngram: _, + phrase, + } = ctx.term_interner.get(*value); + + let original = ctx.word_interner.get(*original); + writeln!( + file, + "{node_idx} : \"{original}\" {{ +shape: class" + ) + .unwrap(); + for w in zero_typo.iter().copied() { + let w = ctx.word_interner.get(w); + writeln!(file, "\"{w}\" : 0").unwrap(); + } + for w in prefix_of.iter().copied() { + let w = ctx.word_interner.get(w); + writeln!(file, "\"{w}\" : 0P").unwrap(); + } + for w in one_typo.iter().copied() { + let w = ctx.word_interner.get(w); + writeln!(file, "\"{w}\" : 1").unwrap(); + } + for w in two_typos.iter().copied() { + let w = ctx.word_interner.get(w); + writeln!(file, "\"{w}\" : 2").unwrap(); + } + if let Some(phrase) = phrase { let phrase = ctx.phrase_interner.get(*phrase); let phrase_str = phrase.description(&ctx.word_interner); - writeln!(file, "{node_idx} : \"{phrase_str}\"").unwrap(); + writeln!(file, "\"{phrase_str}\" : phrase").unwrap(); } - QueryTerm::Word { derivations } => { - let WordDerivations { - original, - zero_typo, - one_typo, - two_typos, - use_prefix_db, - synonyms, - split_words, - prefix_of, - is_prefix: _, - } = ctx.derivations_interner.get(*derivations); - - let original = ctx.word_interner.get(*original); - writeln!( - file, - "{node_idx} : \"{original}\" {{ -shape: class" - ) - .unwrap(); - for w in zero_typo.iter().copied() { - let w = ctx.word_interner.get(w); - writeln!(file, "\"{w}\" : 0").unwrap(); - } - for w in prefix_of.iter().copied() { - let w = ctx.word_interner.get(w); - writeln!(file, "\"{w}\" : 0P").unwrap(); - } - for w in one_typo.iter().copied() { - let w = ctx.word_interner.get(w); - writeln!(file, "\"{w}\" : 1").unwrap(); - } - for w in two_typos.iter().copied() { - let w = ctx.word_interner.get(w); - writeln!(file, "\"{w}\" : 2").unwrap(); - } - if let Some(split_words) = split_words { - let phrase = ctx.phrase_interner.get(*split_words); - let phrase_str = phrase.description(&ctx.word_interner); - writeln!(file, "\"{phrase_str}\" : split_words").unwrap(); - } - for synonym in synonyms.iter().copied() { - let phrase = ctx.phrase_interner.get(synonym); - let phrase_str = phrase.description(&ctx.word_interner); - writeln!(file, "\"{phrase_str}\" : synonym").unwrap(); - } - if let Some(use_prefix_db) = use_prefix_db { - let p = ctx.word_interner.get(*use_prefix_db); - writeln!(file, "use prefix DB : {p}").unwrap(); - } - for (d, edges) in distances.iter() { - writeln!(file, "\"distance {d}\" : {:?}", edges.iter().collect::>()) - .unwrap(); - } - - writeln!(file, "}}").unwrap(); + if let Some(split_words) = split_words { + let phrase = ctx.phrase_interner.get(*split_words); + let phrase_str = phrase.description(&ctx.word_interner); + writeln!(file, "\"{phrase_str}\" : split_words").unwrap(); } - }, + for synonym in synonyms.iter().copied() { + let phrase = ctx.phrase_interner.get(synonym); + let phrase_str = phrase.description(&ctx.word_interner); + writeln!(file, "\"{phrase_str}\" : synonym").unwrap(); + } + if let Some(use_prefix_db) = use_prefix_db { + let p = ctx.word_interner.get(*use_prefix_db); + writeln!(file, "use prefix DB : {p}").unwrap(); + } + for (d, edges) in distances.iter() { + writeln!(file, "\"distance {d}\" : {:?}", edges.iter().collect::>()) + .unwrap(); + } + + writeln!(file, "}}").unwrap(); + } QueryNode::Deleted => panic!(), QueryNode::Start => { writeln!(file, "{node_idx} : START").unwrap(); @@ -600,32 +600,20 @@ shape: class" graph.edges_store[edge_idx as usize].as_ref().unwrap(); let source_node = &graph.query_graph.nodes[*source_node as usize]; let source_node_desc = match source_node { - QueryNode::Term(term) => match term.value { - QueryTerm::Phrase { phrase } => { - let phrase = ctx.phrase_interner.get(phrase); - phrase.description(&ctx.word_interner) - } - QueryTerm::Word { derivations } => { - let derivations = ctx.derivations_interner.get(derivations); - ctx.word_interner.get(derivations.original).to_owned() - } - }, + QueryNode::Term(term) => { + let term = ctx.term_interner.get(term.value); + ctx.word_interner.get(term.original).to_owned() + } QueryNode::Deleted => panic!(), QueryNode::Start => "START".to_owned(), QueryNode::End => "END".to_owned(), }; let dest_node = &graph.query_graph.nodes[*dest_node as usize]; let dest_node_desc = match dest_node { - QueryNode::Term(term) => match term.value { - QueryTerm::Phrase { phrase } => { - let phrase = ctx.phrase_interner.get(phrase); - phrase.description(&ctx.word_interner) - } - QueryTerm::Word { derivations } => { - let derivations = ctx.derivations_interner.get(derivations); - ctx.word_interner.get(derivations.original).to_owned() - } - }, + QueryNode::Term(term) => { + let term = ctx.term_interner.get(term.value); + ctx.word_interner.get(term.original).to_owned() + } QueryNode::Deleted => panic!(), QueryNode::Start => "START".to_owned(), QueryNode::End => "END".to_owned(), diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 323b8eb62..1eaa6d347 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -27,7 +27,7 @@ pub use ranking_rules::{bucket_sort, RankingRule, RankingRuleOutput, RankingRule use roaring::RoaringBitmap; use self::interner::Interner; -use self::query_term::{Phrase, WordDerivations}; +use self::query_term::{Phrase, QueryTerm}; use self::ranking_rules::PlaceholderQuery; use self::resolve_query_graph::{resolve_query_graph, QueryTermDocIdsCache}; use crate::search::new::graph_based_ranking_rule::{Proximity, Typo}; @@ -41,8 +41,8 @@ pub struct SearchContext<'ctx> { pub db_cache: DatabaseCache<'ctx>, pub word_interner: Interner, pub phrase_interner: Interner, - pub derivations_interner: Interner, - pub query_term_docids: QueryTermDocIdsCache, + pub term_interner: Interner, + pub term_docids: QueryTermDocIdsCache, } impl<'ctx> SearchContext<'ctx> { pub fn new(index: &'ctx Index, txn: &'ctx RoTxn<'ctx>) -> Self { @@ -52,8 +52,8 @@ impl<'ctx> SearchContext<'ctx> { db_cache: <_>::default(), word_interner: <_>::default(), phrase_interner: <_>::default(), - derivations_interner: <_>::default(), - query_term_docids: <_>::default(), + term_interner: <_>::default(), + term_docids: <_>::default(), } } } diff --git a/milli/src/search/new/query_graph.rs b/milli/src/search/new/query_graph.rs index f76feb80b..7bed15571 100644 --- a/milli/src/search/new/query_graph.rs +++ b/milli/src/search/new/query_graph.rs @@ -45,7 +45,7 @@ For the search query `sunflower`, we need to register the following things: - and also the couple of adjacent words `sun flower` - as well as all the user-defined synonyms of `sunflower` -All these derivations of a word will be stored in [`WordDerivations`]. +All these derivations of a word will be stored in [`QueryTerm`]. ## Example 2: For the search query `summer house by`. @@ -148,7 +148,7 @@ impl QueryGraph { let mut new_nodes = vec![]; let new_node_idx = graph.add_node(&prev0, QueryNode::Term(term0.clone())); new_nodes.push(new_node_idx); - if term0.is_empty(&ctx.derivations_interner) { + if term0.is_empty(&ctx.term_interner) { empty_nodes.push(new_node_idx); } diff --git a/milli/src/search/new/query_term.rs b/milli/src/search/new/query_term.rs index 467752012..c6cb81131 100644 --- a/milli/src/search/new/query_term.rs +++ b/milli/src/search/new/query_term.rs @@ -30,16 +30,20 @@ impl Phrase { /// A structure storing all the different ways to match /// a term in the user's search query. #[derive(Clone, PartialEq, Eq, Hash)] -pub struct WordDerivations { +pub struct QueryTerm { /// The original terms, for debugging purposes pub original: Interned, + /// Whether the term is an ngram + pub is_ngram: bool, + /// Whether the term can be only the prefix of a word pub is_prefix: bool, - - /// A single word equivalent to the original one, with zero typos + /// The original phrase, if any + pub phrase: Option>, + /// A single word equivalent to the original term, with zero typos pub zero_typo: Option>, /// All the words that contain the original word as prefix pub prefix_of: Box<[Interned]>, - /// All the synonyms of the original word + /// All the synonyms of the original word or phrase pub synonyms: Box<[Interned]>, /// The original word split into multiple consecutive words @@ -54,10 +58,15 @@ pub struct WordDerivations { /// A prefix in the prefix databases matching the original word pub use_prefix_db: Option>, } -impl WordDerivations { - pub fn empty(word_interner: &mut Interner, original: &str) -> Self { +impl QueryTerm { + pub fn phrase( + word_interner: &mut Interner, + phrase_interner: &mut Interner, + phrase: Phrase, + ) -> Self { Self { - original: word_interner.insert(original.to_owned()), + original: word_interner.insert(phrase.description(word_interner)), + phrase: Some(phrase_interner.insert(phrase)), is_prefix: false, zero_typo: None, prefix_of: Box::new([]), @@ -66,12 +75,28 @@ impl WordDerivations { one_typo: Box::new([]), two_typos: Box::new([]), use_prefix_db: None, + is_ngram: false, + } + } + pub fn empty(word_interner: &mut Interner, original: &str) -> Self { + Self { + original: word_interner.insert(original.to_owned()), + phrase: None, + is_prefix: false, + zero_typo: None, + prefix_of: Box::new([]), + synonyms: Box::new([]), + split_words: None, + one_typo: Box::new([]), + two_typos: Box::new([]), + use_prefix_db: None, + is_ngram: false, } } /// Return an iterator over all the single words derived from the original word. /// /// This excludes synonyms, split words, and words stored in the prefix databases. - pub fn all_single_word_derivations_except_prefix_db( + pub fn all_single_words_except_prefix_db( &'_ self, ) -> impl Iterator> + Clone + '_ { self.zero_typo @@ -84,7 +109,7 @@ impl WordDerivations { /// Return an iterator over all the single words derived from the original word. /// /// This excludes synonyms, split words, and words stored in the prefix databases. - pub fn all_phrase_derivations(&'_ self) -> impl Iterator> + Clone + '_ { + pub fn all_phrases(&'_ self) -> impl Iterator> + Clone + '_ { self.split_words.iter().chain(self.synonyms.iter()).copied() } pub fn is_empty(&self) -> bool { @@ -98,15 +123,15 @@ impl WordDerivations { } } -/// Compute the word derivations for the given word -pub fn word_derivations( +/// Compute the query term for the given word +pub fn query_term_from_word( ctx: &mut SearchContext, word: &str, max_typo: u8, is_prefix: bool, -) -> Result { +) -> Result { if word.len() > MAX_WORD_LENGTH { - return Ok(WordDerivations::empty(&mut ctx.word_interner, word)); + return Ok(QueryTerm::empty(&mut ctx.word_interner, word)); } let fst = ctx.index.words_fst(ctx.txn)?; @@ -223,8 +248,9 @@ pub fn word_derivations( }) .collect(); - Ok(WordDerivations { + Ok(QueryTerm { original: word_interned, + phrase: None, is_prefix, zero_typo, prefix_of: prefix_of.into_boxed_slice(), @@ -233,6 +259,7 @@ pub fn word_derivations( one_typo: one_typo.into_boxed_slice(), two_typos: two_typos.into_boxed_slice(), use_prefix_db, + is_ngram: false, }) } @@ -266,35 +293,13 @@ fn split_best_frequency( Ok(best.map(|(_, left, right)| (left.to_owned(), right.to_owned()))) } -#[derive(Clone, PartialEq, Eq, Hash)] -pub enum QueryTerm { - Phrase { phrase: Interned }, - // TODO: change to `Interned`? - Word { derivations: Interned }, -} - impl QueryTerm { - pub fn is_prefix(&self, derivations_interner: &Interner) -> bool { - match self { - QueryTerm::Phrase { .. } => false, - QueryTerm::Word { derivations } => derivations_interner.get(*derivations).is_prefix, - } - } /// Return the original word from the given query term - pub fn original_single_word( - &self, - derivations_interner: &Interner, - ) -> Option> { - match self { - QueryTerm::Phrase { phrase: _ } => None, - QueryTerm::Word { derivations } => { - let derivations = derivations_interner.get(*derivations); - if derivations.is_empty() { - None - } else { - Some(derivations.original) - } - } + pub fn original_single_word(&self) -> Option> { + if self.phrase.is_some() || self.is_ngram { + None + } else { + Some(self.original) } } } @@ -302,19 +307,14 @@ impl QueryTerm { /// A query term term coupled with its position in the user's search query. #[derive(Clone)] pub struct LocatedQueryTerm { - pub value: QueryTerm, + pub value: Interned, pub positions: RangeInclusive, } impl LocatedQueryTerm { - /// Return `true` iff the word derivations within the query term are empty - pub fn is_empty(&self, interner: &Interner) -> bool { - match self.value { - // TODO: phrases should be greedily computed, so that they can be excluded from - // the query graph right from the start? - QueryTerm::Phrase { phrase: _ } => false, - QueryTerm::Word { derivations, .. } => interner.get(derivations).is_empty(), - } + /// Return `true` iff the term is empty + pub fn is_empty(&self, interner: &Interner) -> bool { + interner.get(self.value).is_empty() } } @@ -360,18 +360,16 @@ pub fn located_query_terms_from_string<'ctx>( } else { let word = ctx.word_interner.insert(token.lemma().to_string()); // TODO: in a phrase, check that every word exists - // otherwise return WordDerivations::Empty + // otherwise return an empty term phrase.push(Some(word)); } } else if peekable.peek().is_some() { match token.kind { TokenKind::Word => { let word = token.lemma(); - let derivations = word_derivations(ctx, word, nbr_typos(word), false)?; + let term = query_term_from_word(ctx, word, nbr_typos(word), false)?; let located_term = LocatedQueryTerm { - value: QueryTerm::Word { - derivations: ctx.derivations_interner.insert(derivations), - }, + value: ctx.term_interner.insert(term), positions: position..=position, }; located_terms.push(located_term); @@ -380,11 +378,9 @@ pub fn located_query_terms_from_string<'ctx>( } } else { let word = token.lemma(); - let derivations = word_derivations(ctx, word, nbr_typos(word), true)?; + let term = query_term_from_word(ctx, word, nbr_typos(word), true)?; let located_term = LocatedQueryTerm { - value: QueryTerm::Word { - derivations: ctx.derivations_interner.insert(derivations), - }, + value: ctx.term_interner.insert(term), positions: position..=position, }; located_terms.push(located_term); @@ -408,11 +404,11 @@ pub fn located_query_terms_from_string<'ctx>( if !phrase.is_empty() && (quote_count > 0 || separator_kind == SeparatorKind::Hard) { let located_query_term = LocatedQueryTerm { - value: QueryTerm::Phrase { - phrase: ctx - .phrase_interner - .insert(Phrase { words: mem::take(&mut phrase) }), - }, + value: ctx.term_interner.insert(QueryTerm::phrase( + &mut ctx.word_interner, + &mut ctx.phrase_interner, + Phrase { words: mem::take(&mut phrase) }, + )), positions: phrase_start..=phrase_end, }; located_terms.push(located_query_term); @@ -425,9 +421,11 @@ pub fn located_query_terms_from_string<'ctx>( // If a quote is never closed, we consider all of the end of the query as a phrase. if !phrase.is_empty() { let located_query_term = LocatedQueryTerm { - value: QueryTerm::Phrase { - phrase: ctx.phrase_interner.insert(Phrase { words: mem::take(&mut phrase) }), - }, + value: ctx.term_interner.insert(QueryTerm::phrase( + &mut ctx.word_interner, + &mut ctx.phrase_interner, + Phrase { words: mem::take(&mut phrase) }, + )), positions: phrase_start..=phrase_end, }; located_terms.push(located_query_term); @@ -474,8 +472,7 @@ pub fn make_ngram( } let mut words_interned = vec![]; for term in terms { - if let Some(original_term_word) = term.value.original_single_word(&ctx.derivations_interner) - { + if let Some(original_term_word) = ctx.term_interner.get(term.value).original_single_word() { words_interned.push(original_term_word); } else { return Ok(None); @@ -486,121 +483,40 @@ pub fn make_ngram( let start = *terms.first().as_ref().unwrap().positions.start(); let end = *terms.last().as_ref().unwrap().positions.end(); - let is_prefix = terms.last().as_ref().unwrap().value.is_prefix(&ctx.derivations_interner); + let is_prefix = ctx.term_interner.get(terms.last().as_ref().unwrap().value).is_prefix; let ngram_str = words.join(""); if ngram_str.len() > MAX_WORD_LENGTH { return Ok(None); } - let mut derivations = word_derivations( + let mut term = query_term_from_word( ctx, &ngram_str, number_of_typos_allowed(ngram_str.as_str()).saturating_sub(terms.len() as u8), is_prefix, )?; - derivations.original = ctx.word_interner.insert(words.join(" ")); + term.original = ctx.word_interner.insert(words.join(" ")); // Now add the synonyms let index_synonyms = ctx.index.synonyms(ctx.txn)?; - let mut derivations_synonyms = derivations.synonyms.to_vec(); - derivations_synonyms.extend( - index_synonyms.get(&words).cloned().unwrap_or_default().into_iter().map(|words| { + let mut term_synonyms = term.synonyms.to_vec(); + term_synonyms.extend(index_synonyms.get(&words).cloned().unwrap_or_default().into_iter().map( + |words| { let words = words.into_iter().map(|w| Some(ctx.word_interner.insert(w))).collect(); ctx.phrase_interner.insert(Phrase { words }) - }), - ); - derivations.synonyms = derivations_synonyms.into_boxed_slice(); - if let Some(split_words) = derivations.split_words { + }, + )); + term.synonyms = term_synonyms.into_boxed_slice(); + if let Some(split_words) = term.split_words { let split_words = ctx.phrase_interner.get(split_words); if split_words.words == words_interned.iter().map(|&i| Some(i)).collect::>() { - derivations.split_words = None; + term.split_words = None; } } - if derivations.is_empty() { + if term.is_empty() { return Ok(None); } - let term = LocatedQueryTerm { - value: QueryTerm::Word { derivations: ctx.derivations_interner.insert(derivations) }, - positions: start..=end, - }; + term.is_ngram = true; + let term = LocatedQueryTerm { value: ctx.term_interner.insert(term), positions: start..=end }; Ok(Some(term)) } - -// // TODO: return a word derivations instead? -// pub fn ngram2( -// ctx: &mut SearchContext, -// x: &LocatedQueryTerm, -// y: &LocatedQueryTerm, -// number_of_typos_allowed: impl Fn(&str) -> u8, -// ) -> Result> { -// if *x.positions.end() != y.positions.start() - 1 { -// return Ok(None); -// } -// match ( -// x.value.original_single_word(&ctx.word_interner, &ctx.derivations_interner), -// y.value.original_single_word(&ctx.word_interner, &ctx.derivations_interner), -// ) { -// (Some(w1), Some(w2)) => { -// let ngram2_str = format!("{w1}{w2}"); -// let mut derivations = word_derivations( -// ctx, -// &ngram2_str, -// number_of_typos_allowed(ngram2_str.as_str()).saturating_sub(1), -// y.value.is_prefix(&ctx.derivations_interner), -// )?; -// // Now add the synonyms -// let index_synonyms = ctx.index.synonyms(ctx.txn)?; -// let mut derivations_synonyms = derivations.synonyms.to_vec(); -// derivations_synonyms.extend( -// index_synonyms -// .get(&vec![w1.to_owned(), w2.to_owned()]) -// .cloned() -// .unwrap_or_default() -// .into_iter() -// .map(|words| { -// let words = -// words.into_iter().map(|w| Some(ctx.word_interner.insert(w))).collect(); -// ctx.phrase_interner.insert(Phrase { words }) -// }), -// ); - -// let term = LocatedQueryTerm { -// value: QueryTerm::Word { -// derivations: ctx.derivations_interner.insert(derivations), -// }, -// positions: *x.positions.start()..=*y.positions.end(), -// }; - -// Ok(Some(term)) -// } -// _ => Ok(None), -// } -// } - -// // TODO: return a word derivations instead? -// pub fn ngram3( -// ctx: &mut SearchContext, -// x: &LocatedQueryTerm, -// y: &LocatedQueryTerm, -// z: &LocatedQueryTerm, -// ) -> Option<(Interned, RangeInclusive)> { -// if *x.positions.end() != y.positions.start() - 1 -// || *y.positions.end() != z.positions.start() - 1 -// { -// return None; -// } -// match ( -// &x.value.original_single_word(&ctx.word_interner, &ctx.derivations_interner), -// &y.value.original_single_word(&ctx.word_interner, &ctx.derivations_interner), -// &z.value.original_single_word(&ctx.word_interner, &ctx.derivations_interner), -// ) { -// (Some(w1), Some(w2), Some(w3)) => { -// let term = ( -// ctx.word_interner.insert(format!("{w1}{w2}{w3}")), -// *x.positions.start()..=*z.positions.end(), -// ); -// Some(term) -// } -// _ => None, -// } -// } diff --git a/milli/src/search/new/ranking_rule_graph/build.rs b/milli/src/search/new/ranking_rule_graph/build.rs index 7b00fc445..286a98ab1 100644 --- a/milli/src/search/new/ranking_rule_graph/build.rs +++ b/milli/src/search/new/ranking_rule_graph/build.rs @@ -22,28 +22,21 @@ impl RankingRuleGraph { let mut edges_store = vec![]; let mut edges_of_node = vec![]; - for (node_idx, node) in graph_nodes.iter().enumerate() { + for (source_idx, source_node) in graph_nodes.iter().enumerate() { edges_of_node.push(HashSet::new()); let new_edges = edges_of_node.last_mut().unwrap(); - let Some(source_node_data) = G::build_step_visit_source_node(ctx, node)? else { continue }; - - for successor_idx in graph_edges[node_idx].successors.iter() { - let dest_node = &graph_nodes[successor_idx as usize]; - let edges = G::build_step_visit_destination_node( - ctx, - &mut conditions_interner, - dest_node, - &source_node_data, - )?; + for dest_idx in graph_edges[source_idx].successors.iter() { + let dest_node = &graph_nodes[dest_idx as usize]; + let edges = G::build_edges(ctx, &mut conditions_interner, source_node, dest_node)?; if edges.is_empty() { continue; } for (cost, condition) in edges { edges_store.push(Some(Edge { - source_node: node_idx as u16, - dest_node: successor_idx, + source_node: source_idx as u16, + dest_node: dest_idx, cost, condition, })); diff --git a/milli/src/search/new/ranking_rule_graph/mod.rs b/milli/src/search/new/ranking_rule_graph/mod.rs index 16c75b072..ee93bee13 100644 --- a/milli/src/search/new/ranking_rule_graph/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/mod.rs @@ -80,11 +80,6 @@ pub trait RankingRuleGraphTrait: Sized { /// in [`resolve_edge_condition`](RankingRuleGraphTrait::resolve_edge_condition). type EdgeCondition: Sized + Clone + PartialEq + Eq + Hash; - /// A structure used in the construction of the graph, created when a - /// query graph source node is visited. It is used to determine the cost - /// and condition of a ranking rule edge when the destination node is visited. - type BuildVisitedFromNode; - /// Return the label of the given edge condition, to be used when visualising /// the ranking rule graph. fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String; @@ -97,22 +92,13 @@ pub trait RankingRuleGraphTrait: Sized { universe: &RoaringBitmap, ) -> Result; - /// Prepare to build the edges outgoing from `source_node`. - /// - /// This call is followed by zero, one or more calls to [`build_step_visit_destination_node`](RankingRuleGraphTrait::build_step_visit_destination_node), - /// which builds the actual edges. - fn build_step_visit_source_node<'ctx>( - ctx: &mut SearchContext<'ctx>, - source_node: &QueryNode, - ) -> Result>; - /// Return the cost and condition of the edges going from the previously visited node /// (with [`build_step_visit_source_node`](RankingRuleGraphTrait::build_step_visit_source_node)) to `dest_node`. - fn build_step_visit_destination_node<'from_data, 'ctx: 'from_data>( + fn build_edges<'ctx>( ctx: &mut SearchContext<'ctx>, conditions_interner: &mut Interner, + source_node: &QueryNode, dest_node: &QueryNode, - source_node_data: &'from_data Self::BuildVisitedFromNode, ) -> Result)>>; fn log_state( diff --git a/milli/src/search/new/ranking_rule_graph/proximity/build.rs b/milli/src/search/new/ranking_rule_graph/proximity/build.rs index d3a219948..b8042c408 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/build.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/build.rs @@ -4,89 +4,40 @@ use std::collections::BTreeMap; use super::ProximityEdge; use crate::search::new::db_cache::DatabaseCache; use crate::search::new::interner::{Interned, Interner}; -use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations}; +use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm}; use crate::search::new::ranking_rule_graph::proximity::WordPair; use crate::search::new::ranking_rule_graph::EdgeCondition; use crate::search::new::{QueryNode, SearchContext}; use crate::Result; use heed::RoTxn; -pub fn visit_from_node( - ctx: &mut SearchContext, - from_node: &QueryNode, -) -> Result>, Interned)>, i8)>> { - let SearchContext { derivations_interner, .. } = ctx; - - let (left_phrase, left_derivations, left_end_position) = match from_node { - QueryNode::Term(LocatedQueryTerm { value: value1, positions: pos1 }) => { - match value1 { - QueryTerm::Word { derivations } => { - (None, derivations_interner.get(*derivations).clone(), *pos1.end()) - } - QueryTerm::Phrase { phrase: phrase_interned } => { - let phrase = ctx.phrase_interner.get(*phrase_interned); - if let Some(original) = *phrase.words.last().unwrap() { - ( - Some(*phrase_interned), - WordDerivations { - original, - zero_typo: Some(original), - one_typo: Box::new([]), - two_typos: Box::new([]), - use_prefix_db: None, - synonyms: Box::new([]), - split_words: None, - is_prefix: false, - prefix_of: Box::new([]), - }, - *pos1.end(), - ) - } else { - // No word pairs if the phrase does not have a regular word as its last term - return Ok(None); - } - } - } - } - QueryNode::Start => (None, WordDerivations::empty(&mut ctx.word_interner, ""), -1), - _ => return Ok(None), - }; - - // left term cannot be a prefix - assert!(left_derivations.use_prefix_db.is_none() && !left_derivations.is_prefix); - - let last_word_left_phrase = if let Some(left_phrase_interned) = left_phrase { - let left_phrase = ctx.phrase_interner.get(left_phrase_interned); - left_phrase.words.last().copied().unwrap() - } else { - None - }; - let left_single_word_iter: Vec<(Option>, Interned)> = left_derivations - .all_single_word_derivations_except_prefix_db() - .chain(last_word_left_phrase.iter().copied()) - .map(|w| (left_phrase, w)) - .collect(); - let left_phrase_iter: Vec<(Option>, Interned)> = left_derivations - .all_phrase_derivations() - .map(|left_phrase_interned: Interned| { - let left_phrase = ctx.phrase_interner.get(left_phrase_interned); - let last_word_left_phrase: Interned = - left_phrase.words.last().unwrap().unwrap(); - let r: (Option>, Interned) = - (Some(left_phrase_interned), last_word_left_phrase); - r - }) - .collect(); - let mut left_word_iter = left_single_word_iter; - left_word_iter.extend(left_phrase_iter); - - Ok(Some((left_word_iter, left_end_position))) +fn last_word_of_term_iter<'t>( + t: &'t QueryTerm, + phrase_interner: &'t Interner, +) -> impl Iterator>, Interned)> + 't { + t.all_single_words_except_prefix_db().map(|w| (None, w)).chain(t.all_phrases().flat_map( + move |p| { + let phrase = phrase_interner.get(p); + phrase.words.last().unwrap().map(|last| (Some(p), last)) + }, + )) +} +fn first_word_of_term_iter<'t>( + t: &'t QueryTerm, + phrase_interner: &'t Interner, +) -> impl Iterator, Option>)> + 't { + t.all_single_words_except_prefix_db().map(|w| (w, None)).chain(t.all_phrases().flat_map( + move |p| { + let phrase = phrase_interner.get(p); + phrase.words.first().unwrap().map(|first| (first, Some(p))) + }, + )) } -pub fn build_step_visit_destination_node<'ctx, 'from_data>( +pub fn build_edges<'ctx>( ctx: &mut SearchContext<'ctx>, conditions_interner: &mut Interner, - from_node_data: &'from_data (Vec<(Option>, Interned)>, i8), + from_node: &QueryNode, to_node: &QueryNode, ) -> Result)>> { let SearchContext { @@ -95,9 +46,19 @@ pub fn build_step_visit_destination_node<'ctx, 'from_data>( db_cache, word_interner, phrase_interner, - derivations_interner, - query_term_docids: _, + term_interner, + term_docids: _, } = ctx; + + let (left_term, left_end_position) = match from_node { + QueryNode::Term(LocatedQueryTerm { value, positions }) => { + (term_interner.get(*value), *positions.end()) + } + QueryNode::Deleted => return Ok(vec![]), + QueryNode::Start => return Ok(vec![(0, EdgeCondition::Unconditional)]), + QueryNode::End => return Ok(vec![]), + }; + let right_term = match &to_node { QueryNode::End => return Ok(vec![(0, EdgeCondition::Unconditional)]), QueryNode::Deleted | QueryNode::Start => return Ok(vec![]), @@ -105,47 +66,14 @@ pub fn build_step_visit_destination_node<'ctx, 'from_data>( }; let LocatedQueryTerm { value: right_value, positions: right_positions } = right_term; - let (right_phrase, right_derivations, right_start_position, right_ngram_length) = - match right_value { - QueryTerm::Word { derivations } => ( - None, - derivations_interner.get(*derivations).clone(), - *right_positions.start(), - right_positions.len(), - ), - QueryTerm::Phrase { phrase: right_phrase_interned } => { - let right_phrase = phrase_interner.get(*right_phrase_interned); - if let Some(original) = *right_phrase.words.first().unwrap() { - ( - Some(*right_phrase_interned), - WordDerivations { - original, - zero_typo: Some(original), - one_typo: Box::new([]), - two_typos: Box::new([]), - use_prefix_db: None, - synonyms: Box::new([]), - split_words: None, - is_prefix: false, - prefix_of: Box::new([]), - }, - *right_positions.start(), - 1, - ) - } else { - // No word pairs if the phrase does not have a regular word as its first term - return Ok(vec![]); - } - } - }; - - let (left_derivations, left_end_position) = from_node_data; + let (right_term, right_start_position, right_ngram_length) = + (term_interner.get(*right_value), *right_positions.start(), right_positions.len()); if left_end_position + 1 != right_start_position { // We want to ignore this pair of terms // Unconditionally walk through the edge without computing the docids // This can happen when, in a query like `the sun flowers are beautiful`, the term - // `flowers` is removed by the words ranking rule due to the terms matching strategy. + // `flowers` is removed by the `words` ranking rule. // The remaining query graph represents `the sun .. are beautiful` // but `sun` and `are` have no proximity condition between them return Ok(vec![(0, EdgeCondition::Unconditional)]); @@ -153,8 +81,8 @@ pub fn build_step_visit_destination_node<'ctx, 'from_data>( let mut cost_proximity_word_pairs = BTreeMap::>>::new(); - if let Some(right_prefix) = right_derivations.use_prefix_db { - for (left_phrase, left_word) in left_derivations.iter().copied() { + if let Some(right_prefix) = right_term.use_prefix_db { + for (left_phrase, left_word) in last_word_of_term_iter(left_term, phrase_interner) { add_prefix_edges( index, txn, @@ -172,37 +100,12 @@ pub fn build_step_visit_destination_node<'ctx, 'from_data>( // TODO: add safeguard in case the cartesian product is too large! // even if we restrict the word derivations to a maximum of 100, the size of the // caterisan product could reach a maximum of 10_000 derivations, which is way too much. - // mMaybe prioritise the product of zero typo derivations, then the product of zero-typo/one-typo + // Maybe prioritise the product of zero typo derivations, then the product of zero-typo/one-typo // + one-typo/zero-typo, then one-typo/one-typo, then ... until an arbitrary limit has been // reached - let first_word_right_phrase = if let Some(right_phrase_interned) = right_phrase { - let right_phrase = phrase_interner.get(right_phrase_interned); - right_phrase.words.first().copied().unwrap() - } else { - None - }; - let right_single_word_iter: Vec<(Option>, Interned)> = - right_derivations - .all_single_word_derivations_except_prefix_db() - .chain(first_word_right_phrase.iter().copied()) - .map(|w| (right_phrase, w)) - .collect(); - let right_phrase_iter: Vec<(Option>, Interned)> = right_derivations - .all_phrase_derivations() - .map(|right_phrase_interned: Interned| { - let right_phrase = phrase_interner.get(right_phrase_interned); - let first_word_right_phrase: Interned = - right_phrase.words.first().unwrap().unwrap(); - let r: (Option>, Interned) = - (Some(right_phrase_interned), first_word_right_phrase); - r - }) - .collect(); - let mut right_word_iter = right_single_word_iter; - right_word_iter.extend(right_phrase_iter); - for (left_phrase, left_word) in left_derivations.iter().copied() { - for (right_phrase, right_word) in right_word_iter.iter().copied() { + for (left_phrase, left_word) in last_word_of_term_iter(left_term, phrase_interner) { + for (right_word, right_phrase) in first_word_of_term_iter(right_term, phrase_interner) { add_non_prefix_edges( index, txn, diff --git a/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs b/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs index 8dfe805c7..0acee0329 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs @@ -29,7 +29,7 @@ pub fn compute_docids<'ctx>( .unwrap_or_default(); if !docids.is_empty() { for phrase in phrases { - docids &= ctx.query_term_docids.get_phrase_docids( + docids &= ctx.term_docids.get_phrase_docids( index, txn, db_cache, @@ -56,7 +56,7 @@ pub fn compute_docids<'ctx>( .unwrap_or_default(); if !docids.is_empty() { for phrase in phrases { - docids &= ctx.query_term_docids.get_phrase_docids( + docids &= ctx.term_docids.get_phrase_docids( index, txn, db_cache, diff --git a/milli/src/search/new/ranking_rule_graph/proximity/mod.rs b/milli/src/search/new/ranking_rule_graph/proximity/mod.rs index 876bd3ac0..2cfee0b65 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/mod.rs @@ -40,7 +40,6 @@ pub enum ProximityGraph {} impl RankingRuleGraphTrait for ProximityGraph { type EdgeCondition = ProximityEdge; - type BuildVisitedFromNode = (Vec<(Option>, Interned)>, i8); fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String { let ProximityEdge { pairs, proximity } = edge; @@ -55,25 +54,13 @@ impl RankingRuleGraphTrait for ProximityGraph { compute_docids::compute_docids(ctx, edge, universe) } - fn build_step_visit_source_node<'ctx>( - ctx: &mut SearchContext<'ctx>, - from_node: &QueryNode, - ) -> Result> { - build::visit_from_node(ctx, from_node) - } - - fn build_step_visit_destination_node<'from_data, 'ctx: 'from_data>( + fn build_edges<'ctx>( ctx: &mut SearchContext<'ctx>, conditions_interner: &mut Interner, + source_node: &QueryNode, dest_node: &QueryNode, - source_node_data: &'from_data Self::BuildVisitedFromNode, ) -> Result)>> { - build::build_step_visit_destination_node( - ctx, - conditions_interner, - source_node_data, - dest_node, - ) + build::build_edges(ctx, conditions_interner, source_node, dest_node) } fn log_state( diff --git a/milli/src/search/new/ranking_rule_graph/typo/mod.rs b/milli/src/search/new/ranking_rule_graph/typo/mod.rs index 9b80cd314..6b832f9b2 100644 --- a/milli/src/search/new/ranking_rule_graph/typo/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/typo/mod.rs @@ -4,28 +4,24 @@ use super::empty_paths_cache::EmptyPathsCache; use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait}; use crate::search::new::interner::{Interned, Interner}; use crate::search::new::logger::SearchLogger; -use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations}; +use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm}; use crate::search::new::small_bitmap::SmallBitmap; use crate::search::new::{QueryGraph, QueryNode, SearchContext}; use crate::Result; #[derive(Clone, PartialEq, Eq, Hash)] -pub enum TypoEdge { - Phrase { phrase: Interned }, - Word { derivations: Interned, nbr_typos: u8 }, +pub struct TypoEdge { + term: Interned, + nbr_typos: u8, } pub enum TypoGraph {} impl RankingRuleGraphTrait for TypoGraph { type EdgeCondition = TypoEdge; - type BuildVisitedFromNode = (); fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String { - match edge { - TypoEdge::Phrase { .. } => ", 0 typos".to_owned(), - TypoEdge::Word { nbr_typos, .. } => format!(", {nbr_typos} typos"), - } + format!(", {} typos", edge.nbr_typos) } fn resolve_edge_condition<'db_cache, 'ctx>( @@ -39,124 +35,101 @@ impl RankingRuleGraphTrait for TypoGraph { db_cache, word_interner, phrase_interner, - derivations_interner, - query_term_docids, + term_interner, + term_docids: query_term_docids, } = ctx; - match edge { - &TypoEdge::Phrase { phrase } => Ok(universe - & query_term_docids.get_phrase_docids( - index, - txn, - db_cache, - word_interner, - phrase_interner, - phrase, - )?), - TypoEdge::Word { derivations, .. } => { - let docids = universe - & query_term_docids.get_word_derivations_docids( - index, - txn, - db_cache, - word_interner, - derivations_interner, - phrase_interner, - *derivations, - )?; - Ok(docids) - } - } + let docids = universe + & query_term_docids.get_query_term_docids( + index, + txn, + db_cache, + word_interner, + term_interner, + phrase_interner, + edge.term, + )?; + + Ok(docids) } - fn build_step_visit_source_node<'ctx>( - _ctx: &mut SearchContext<'ctx>, - _from_node: &QueryNode, - ) -> Result> { - Ok(Some(())) - } - - fn build_step_visit_destination_node<'from_data, 'ctx: 'from_data>( + fn build_edges<'ctx>( ctx: &mut SearchContext<'ctx>, conditions_interner: &mut Interner, + _from_node: &QueryNode, to_node: &QueryNode, - _from_node_data: &'from_data Self::BuildVisitedFromNode, ) -> Result)>> { - let SearchContext { derivations_interner, .. } = ctx; + let SearchContext { term_interner, .. } = ctx; match to_node { - QueryNode::Term(LocatedQueryTerm { value, positions }) => match *value { - QueryTerm::Phrase { phrase } => Ok(vec![( - 0, - EdgeCondition::Conditional( - conditions_interner.insert(TypoEdge::Phrase { phrase }), - ), - )]), - QueryTerm::Word { derivations } => { - let mut edges = vec![]; - // Ngrams have a base typo cost - // 2-gram -> equivalent to 1 typo - // 3-gram -> equivalent to 2 typos - let base_cost = positions.len().max(2) as u8; + QueryNode::Term(LocatedQueryTerm { value, positions }) => { + let mut edges = vec![]; + // Ngrams have a base typo cost + // 2-gram -> equivalent to 1 typo + // 3-gram -> equivalent to 2 typos + let base_cost = positions.len().max(2) as u8; - for nbr_typos in 0..=2 { - let derivations = derivations_interner.get(derivations).clone(); - let new_derivations = match nbr_typos { - 0 => WordDerivations { - original: derivations.original, - is_prefix: derivations.is_prefix, - zero_typo: derivations.zero_typo, - prefix_of: derivations.prefix_of, - synonyms: derivations.synonyms, + for nbr_typos in 0..=2 { + let term = term_interner.get(*value).clone(); + let new_term = match nbr_typos { + 0 => QueryTerm { + original: term.original, + is_prefix: term.is_prefix, + zero_typo: term.zero_typo, + prefix_of: term.prefix_of, + synonyms: term.synonyms, + split_words: None, + one_typo: Box::new([]), + two_typos: Box::new([]), + use_prefix_db: term.use_prefix_db, + is_ngram: term.is_ngram, + phrase: term.phrase, + }, + 1 => { + // What about split words and synonyms here? + QueryTerm { + original: term.original, + is_prefix: false, + zero_typo: None, + prefix_of: Box::new([]), + synonyms: Box::new([]), + split_words: term.split_words, + one_typo: term.one_typo, + two_typos: Box::new([]), + use_prefix_db: None, // false because all items from use_prefix_db have 0 typos + is_ngram: term.is_ngram, + phrase: None, + } + } + 2 => { + // What about split words and synonyms here? + QueryTerm { + original: term.original, + zero_typo: None, + is_prefix: false, + prefix_of: Box::new([]), + synonyms: Box::new([]), split_words: None, one_typo: Box::new([]), - two_typos: Box::new([]), - use_prefix_db: derivations.use_prefix_db, - }, - 1 => { - // What about split words and synonyms here? - WordDerivations { - original: derivations.original, - is_prefix: false, - zero_typo: None, - prefix_of: Box::new([]), - synonyms: Box::new([]), - split_words: derivations.split_words, - one_typo: derivations.one_typo, - two_typos: Box::new([]), - use_prefix_db: None, // false because all items from use_prefix_db have 0 typos - } + two_typos: term.two_typos, + use_prefix_db: None, // false because all items from use_prefix_db have 0 typos + is_ngram: term.is_ngram, + phrase: None, } - 2 => { - // What about split words and synonyms here? - WordDerivations { - original: derivations.original, - zero_typo: None, - is_prefix: false, - prefix_of: Box::new([]), - synonyms: Box::new([]), - split_words: None, - one_typo: Box::new([]), - two_typos: derivations.two_typos, - use_prefix_db: None, // false because all items from use_prefix_db have 0 typos - } - } - _ => panic!(), - }; - if !new_derivations.is_empty() { - edges.push(( - nbr_typos as u8 + base_cost, - EdgeCondition::Conditional(conditions_interner.insert( - TypoEdge::Word { - derivations: derivations_interner.insert(new_derivations), - nbr_typos: nbr_typos as u8, - }, - )), - )) } + _ => panic!(), + }; + if !new_term.is_empty() { + edges.push(( + nbr_typos as u8 + base_cost, + EdgeCondition::Conditional(conditions_interner.insert(TypoEdge { + term: term_interner.insert(new_term), + nbr_typos: nbr_typos as u8, + })), + )) } - Ok(edges) } - }, + Ok(edges) + } QueryNode::End => Ok(vec![(0, EdgeCondition::Unconditional)]), QueryNode::Deleted | QueryNode::Start => panic!(), } diff --git a/milli/src/search/new/resolve_query_graph.rs b/milli/src/search/new/resolve_query_graph.rs index 0ebeaa6df..5ce6ecec2 100644 --- a/milli/src/search/new/resolve_query_graph.rs +++ b/milli/src/search/new/resolve_query_graph.rs @@ -4,12 +4,12 @@ use std::collections::VecDeque; use fxhash::FxHashMap; use heed::{BytesDecode, RoTxn}; -use roaring::{MultiOps, RoaringBitmap}; +use roaring::RoaringBitmap; use super::db_cache::DatabaseCache; use super::interner::{Interned, Interner}; use super::query_graph::QUERY_GRAPH_NODE_LENGTH_LIMIT; -use super::query_term::{Phrase, QueryTerm, WordDerivations}; +use super::query_term::{Phrase, QueryTerm}; use super::small_bitmap::SmallBitmap; use super::{QueryGraph, QueryNode, SearchContext}; use crate::{CboRoaringBitmapCodec, Index, Result, RoaringBitmapCodec}; @@ -17,7 +17,7 @@ use crate::{CboRoaringBitmapCodec, Index, Result, RoaringBitmapCodec}; #[derive(Default)] pub struct QueryTermDocIdsCache { pub phrases: FxHashMap, RoaringBitmap>, - pub derivations: FxHashMap, RoaringBitmap>, + pub terms: FxHashMap, RoaringBitmap>, } impl QueryTermDocIdsCache { /// Get the document ids associated with the given phrase @@ -38,109 +38,53 @@ impl QueryTermDocIdsCache { let docids = &self.phrases[&phrase]; Ok(docids) } - - /// Get the document ids associated with the given word derivations - pub fn get_word_derivations_docids<'s, 'ctx>( + /// Get the document ids associated with the given term + pub fn get_query_term_docids<'s, 'ctx>( &'s mut self, index: &Index, txn: &'ctx RoTxn, db_cache: &mut DatabaseCache<'ctx>, word_interner: &Interner, - derivations_interner: &Interner, + term_interner: &Interner, phrase_interner: &Interner, - derivations: Interned, + term_interned: Interned, ) -> Result<&'s RoaringBitmap> { - if self.derivations.contains_key(&derivations) { - return Ok(&self.derivations[&derivations]); + if self.terms.contains_key(&term_interned) { + return Ok(&self.terms[&term_interned]); }; - let WordDerivations { - original: _, - is_prefix: _, - zero_typo, - prefix_of, - synonyms, - split_words, - one_typo, - two_typos, - use_prefix_db, - } = derivations_interner.get(derivations); - let mut or_docids = vec![]; - for word in zero_typo - .iter() - .chain(prefix_of.iter()) - .chain(one_typo.iter()) - .chain(two_typos.iter()) - .copied() - { + let mut docids = RoaringBitmap::new(); + + let term = term_interner.get(term_interned); + for word in term.all_single_words_except_prefix_db() { if let Some(word_docids) = db_cache.get_word_docids(index, txn, word_interner, word)? { - or_docids.push(word_docids); + docids |= + RoaringBitmapCodec::bytes_decode(word_docids).ok_or(heed::Error::Decoding)?; } } - if let Some(prefix) = use_prefix_db { + for phrase in term.all_phrases() { + docids |= self.get_phrase_docids( + index, + txn, + db_cache, + word_interner, + phrase_interner, + phrase, + )?; + } + + if let Some(prefix) = term.use_prefix_db { if let Some(prefix_docids) = - db_cache.get_word_prefix_docids(index, txn, word_interner, *prefix)? + db_cache.get_word_prefix_docids(index, txn, word_interner, prefix)? { - or_docids.push(prefix_docids); + docids |= + RoaringBitmapCodec::bytes_decode(prefix_docids).ok_or(heed::Error::Decoding)?; } } - let mut docids = or_docids - .into_iter() - .map(|slice| RoaringBitmapCodec::bytes_decode(slice).unwrap()) - .collect::>(); - for synonym in synonyms.iter().copied() { - // TODO: cache resolve_phrase? - docids.push(resolve_phrase( - index, - txn, - db_cache, - word_interner, - phrase_interner, - synonym, - )?); - } - if let Some(split_words) = split_words { - docids.push(resolve_phrase( - index, - txn, - db_cache, - word_interner, - phrase_interner, - *split_words, - )?); - } - let docids = MultiOps::union(docids); - let _ = self.derivations.insert(derivations, docids); - let docids = &self.derivations[&derivations]; + let _ = self.terms.insert(term_interned, docids); + let docids = &self.terms[&term_interned]; Ok(docids) } - - /// Get the document ids associated with the given query term. - fn get_query_term_docids<'s, 'ctx>( - &'s mut self, - index: &Index, - txn: &'ctx RoTxn, - db_cache: &mut DatabaseCache<'ctx>, - word_interner: &Interner, - derivations_interner: &Interner, - phrase_interner: &Interner, - term: &QueryTerm, - ) -> Result<&'s RoaringBitmap> { - match *term { - QueryTerm::Phrase { phrase } => { - self.get_phrase_docids(index, txn, db_cache, word_interner, phrase_interner, phrase) - } - QueryTerm::Word { derivations } => self.get_word_derivations_docids( - index, - txn, - db_cache, - word_interner, - derivations_interner, - phrase_interner, - derivations, - ), - } - } } pub fn resolve_query_graph<'ctx>( @@ -154,8 +98,8 @@ pub fn resolve_query_graph<'ctx>( db_cache, word_interner, phrase_interner, - derivations_interner, - query_term_docids, + term_interner, + term_docids: query_term_docids, .. } = ctx; // TODO: there is a faster way to compute this big @@ -183,16 +127,16 @@ pub fn resolve_query_graph<'ctx>( let node_docids = match n { QueryNode::Term(located_term) => { - let derivations_docids = query_term_docids.get_query_term_docids( + let term_docids = query_term_docids.get_query_term_docids( index, txn, db_cache, word_interner, - derivations_interner, + term_interner, phrase_interner, - &located_term.value, + located_term.value, )?; - predecessors_docids & derivations_docids + predecessors_docids & term_docids } QueryNode::Deleted => { panic!()