mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 14:04:31 +01:00
Reorganise initialisation of ranking rules + rename PathsMap -> PathSet
This commit is contained in:
parent
9ec9c204d3
commit
5155fd2bf1
@ -40,12 +40,25 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use super::logger::SearchLogger;
|
||||
use super::ranking_rule_graph::{
|
||||
EdgeDocidsCache, EmptyPathsCache, RankingRuleGraph, RankingRuleGraphTrait,
|
||||
EdgeDocidsCache, EmptyPathsCache, RankingRuleGraph, RankingRuleGraphTrait, TypoGraph, ProximityGraph,
|
||||
};
|
||||
use super::small_bitmap::SmallBitmap;
|
||||
use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
|
||||
use crate::Result;
|
||||
|
||||
pub type Proximity = GraphBasedRankingRule<ProximityGraph>;
|
||||
impl Default for GraphBasedRankingRule<ProximityGraph> {
|
||||
fn default() -> Self {
|
||||
Self::new("proximity".to_owned())
|
||||
}
|
||||
}
|
||||
pub type Typo = GraphBasedRankingRule<TypoGraph>;
|
||||
impl Default for GraphBasedRankingRule<TypoGraph> {
|
||||
fn default() -> Self {
|
||||
Self::new("typo".to_owned())
|
||||
}
|
||||
}
|
||||
|
||||
/// A generic graph-based ranking rule
|
||||
pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> {
|
||||
id: String,
|
||||
|
@ -98,7 +98,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
||||
fn initial_universe(&mut self, universe: &RoaringBitmap) {
|
||||
self.initial_universe = Some(universe.clone());
|
||||
}
|
||||
fn ranking_rules(&mut self, rr: &[&mut dyn RankingRule<QueryGraph>]) {
|
||||
fn ranking_rules(&mut self, rr: &[Box<dyn RankingRule<QueryGraph>>]) {
|
||||
self.ranking_rules_ids = Some(rr.iter().map(|rr| rr.id()).collect());
|
||||
}
|
||||
|
||||
|
@ -19,7 +19,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
||||
fn initial_universe(&mut self, universe: &RoaringBitmap);
|
||||
|
||||
/// Logs the ranking rules used to perform the search query
|
||||
fn ranking_rules(&mut self, rr: &[&mut dyn RankingRule<Q>]);
|
||||
fn ranking_rules(&mut self, rr: &[Box<dyn RankingRule<Q>>]);
|
||||
|
||||
/// Logs the start of a ranking rule's iteration.
|
||||
fn start_iteration_ranking_rule<'transaction>(
|
||||
@ -90,7 +90,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
||||
|
||||
fn initial_universe(&mut self, _universe: &RoaringBitmap) {}
|
||||
|
||||
fn ranking_rules(&mut self, _rr: &[&mut dyn RankingRule<Q>]) {}
|
||||
fn ranking_rules(&mut self, _rr: &[Box<dyn RankingRule<Q>>]) {}
|
||||
|
||||
fn start_iteration_ranking_rule<'transaction>(
|
||||
&mut self,
|
||||
|
@ -17,7 +17,7 @@ mod words;
|
||||
|
||||
pub use logger::{DefaultSearchLogger, SearchLogger};
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
|
||||
use charabia::Tokenize;
|
||||
use db_cache::DatabaseCache;
|
||||
@ -28,10 +28,10 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use self::interner::Interner;
|
||||
use self::query_term::{Phrase, WordDerivations};
|
||||
use self::ranking_rules::PlaceholderQuery;
|
||||
use self::resolve_query_graph::{resolve_query_graph, QueryTermDocIdsCache};
|
||||
use crate::search::new::graph_based_ranking_rule::GraphBasedRankingRule;
|
||||
use crate::search::new::graph_based_ranking_rule::{Proximity, Typo};
|
||||
use crate::search::new::query_term::located_query_terms_from_string;
|
||||
use crate::search::new::ranking_rule_graph::{ProximityGraph, TypoGraph};
|
||||
use crate::search::new::words::Words;
|
||||
use crate::{Filter, Index, Result, TermsMatchingStrategy};
|
||||
|
||||
@ -88,7 +88,9 @@ fn resolve_maximally_reduced_query_graph<'search>(
|
||||
TermsMatchingStrategy::All => vec![],
|
||||
};
|
||||
// don't remove the first term
|
||||
positions_to_remove.remove(0);
|
||||
if !positions_to_remove.is_empty() {
|
||||
positions_to_remove.remove(0);
|
||||
}
|
||||
loop {
|
||||
if positions_to_remove.is_empty() {
|
||||
break;
|
||||
@ -102,48 +104,172 @@ fn resolve_maximally_reduced_query_graph<'search>(
|
||||
|
||||
Ok(docids)
|
||||
}
|
||||
fn get_ranking_rules_for_placeholder_search<'search>(
|
||||
ctx: &SearchContext<'search>,
|
||||
) -> Result<Vec<Box<dyn RankingRule<'search, PlaceholderQuery>>>> {
|
||||
// let sort = false;
|
||||
// let mut asc = HashSet::new();
|
||||
// let mut desc = HashSet::new();
|
||||
let /*mut*/ ranking_rules: Vec<Box<dyn RankingRule<PlaceholderQuery>>> = vec![];
|
||||
let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
|
||||
for rr in settings_ranking_rules {
|
||||
// Add Words before any of: typo, proximity, attribute, exactness
|
||||
match rr {
|
||||
crate::Criterion::Words
|
||||
| crate::Criterion::Typo
|
||||
| crate::Criterion::Attribute
|
||||
| crate::Criterion::Proximity
|
||||
| crate::Criterion::Exactness => continue,
|
||||
crate::Criterion::Sort => todo!(),
|
||||
crate::Criterion::Asc(_) => todo!(),
|
||||
crate::Criterion::Desc(_) => todo!(),
|
||||
}
|
||||
}
|
||||
Ok(ranking_rules)
|
||||
}
|
||||
fn get_ranking_rules_for_query_graph_search<'search>(
|
||||
ctx: &SearchContext<'search>,
|
||||
terms_matching_strategy: TermsMatchingStrategy,
|
||||
) -> Result<Vec<Box<dyn RankingRule<'search, QueryGraph>>>> {
|
||||
// query graph search
|
||||
let mut words = false;
|
||||
let mut typo = false;
|
||||
let mut proximity = false;
|
||||
let sort = false;
|
||||
let attribute = false;
|
||||
let exactness = false;
|
||||
let mut asc = HashSet::new();
|
||||
let mut desc = HashSet::new();
|
||||
|
||||
let mut ranking_rules: Vec<Box<dyn RankingRule<QueryGraph>>> = vec![];
|
||||
let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
|
||||
for rr in settings_ranking_rules {
|
||||
// Add Words before any of: typo, proximity, attribute, exactness
|
||||
match rr {
|
||||
crate::Criterion::Typo
|
||||
| crate::Criterion::Attribute
|
||||
| crate::Criterion::Proximity
|
||||
| crate::Criterion::Exactness => {
|
||||
if !words {
|
||||
ranking_rules.push(Box::new(Words::new(terms_matching_strategy)));
|
||||
words = true;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
match rr {
|
||||
crate::Criterion::Words => {
|
||||
if words {
|
||||
continue;
|
||||
}
|
||||
ranking_rules.push(Box::new(Words::new(terms_matching_strategy)));
|
||||
words = true;
|
||||
}
|
||||
crate::Criterion::Typo => {
|
||||
if typo {
|
||||
continue;
|
||||
}
|
||||
typo = true;
|
||||
ranking_rules.push(Box::<Typo>::default());
|
||||
}
|
||||
crate::Criterion::Proximity => {
|
||||
if proximity {
|
||||
continue;
|
||||
}
|
||||
proximity = true;
|
||||
ranking_rules.push(Box::<Proximity>::default());
|
||||
}
|
||||
crate::Criterion::Attribute => {
|
||||
if attribute {
|
||||
continue;
|
||||
}
|
||||
todo!();
|
||||
// attribute = false;
|
||||
}
|
||||
crate::Criterion::Sort => {
|
||||
if sort {
|
||||
continue;
|
||||
}
|
||||
todo!();
|
||||
// sort = false;
|
||||
}
|
||||
crate::Criterion::Exactness => {
|
||||
if exactness {
|
||||
continue;
|
||||
}
|
||||
todo!();
|
||||
// exactness = false;
|
||||
}
|
||||
crate::Criterion::Asc(field) => {
|
||||
if asc.contains(&field) {
|
||||
continue;
|
||||
}
|
||||
asc.insert(field);
|
||||
todo!();
|
||||
}
|
||||
crate::Criterion::Desc(field) => {
|
||||
if desc.contains(&field) {
|
||||
continue;
|
||||
}
|
||||
desc.insert(field);
|
||||
todo!();
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(ranking_rules)
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn execute_search<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
query: &str,
|
||||
terms_matching_strategy: TermsMatchingStrategy,
|
||||
filters: Option<Filter>,
|
||||
from: usize,
|
||||
length: usize,
|
||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>,
|
||||
query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
) -> Result<Vec<u32>> {
|
||||
assert!(!query.is_empty());
|
||||
let query_terms = located_query_terms_from_string(ctx, query.tokenize(), None)?;
|
||||
let graph = QueryGraph::from_query(ctx, query_terms)?;
|
||||
|
||||
logger.initial_query(&graph);
|
||||
|
||||
let universe = if let Some(filters) = filters {
|
||||
filters.evaluate(ctx.txn, ctx.index)?
|
||||
} else {
|
||||
ctx.index.documents_ids(ctx.txn)?
|
||||
};
|
||||
|
||||
let universe = resolve_maximally_reduced_query_graph(
|
||||
ctx,
|
||||
&universe,
|
||||
&graph,
|
||||
TermsMatchingStrategy::Last,
|
||||
logger,
|
||||
)?;
|
||||
// TODO: create ranking rules here
|
||||
// TODO: other way to tell whether it is a placeholder search
|
||||
// This way of doing things is not correct because if someone searches
|
||||
// for a word that does not appear in any document, the word will be removed
|
||||
// from the graph and thus its number of nodes will be == 2
|
||||
// But in that case, we should return no results.
|
||||
//
|
||||
// The search is a placeholder search only if there are no tokens?
|
||||
if graph.nodes.len() > 2 {
|
||||
let universe = resolve_maximally_reduced_query_graph(
|
||||
ctx,
|
||||
&universe,
|
||||
&graph,
|
||||
terms_matching_strategy,
|
||||
query_graph_logger,
|
||||
)?;
|
||||
|
||||
logger.initial_universe(&universe);
|
||||
|
||||
let words = &mut Words::new(TermsMatchingStrategy::Last);
|
||||
// let sort = &mut Sort::new(index, txn, "release_date".to_owned(), true)?;
|
||||
let proximity = &mut GraphBasedRankingRule::<ProximityGraph>::new("proximity".to_owned());
|
||||
let typo = &mut GraphBasedRankingRule::<TypoGraph>::new("typo".to_owned());
|
||||
// TODO: ranking rules given as argument
|
||||
let ranking_rules: Vec<&mut dyn RankingRule<'search, QueryGraph>> =
|
||||
vec![words, typo, proximity /*sort*/];
|
||||
|
||||
bucket_sort(ctx, ranking_rules, &graph, &universe, from, length, logger)
|
||||
let ranking_rules = get_ranking_rules_for_query_graph_search(ctx, terms_matching_strategy)?;
|
||||
bucket_sort(ctx, ranking_rules, &graph, &universe, from, length, query_graph_logger)
|
||||
} else {
|
||||
let ranking_rules = get_ranking_rules_for_placeholder_search(ctx)?;
|
||||
bucket_sort(
|
||||
ctx,
|
||||
ranking_rules,
|
||||
&PlaceholderQuery,
|
||||
&universe,
|
||||
from,
|
||||
length,
|
||||
placeholder_search_logger,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@ -182,10 +308,11 @@ mod tests {
|
||||
let results = execute_search(
|
||||
&mut ctx,
|
||||
"zero config",
|
||||
TermsMatchingStrategy::Last,
|
||||
None,
|
||||
0,
|
||||
20,
|
||||
// &mut DefaultSearchLogger,
|
||||
&mut DefaultSearchLogger,
|
||||
&mut logger,
|
||||
)
|
||||
.unwrap();
|
||||
@ -279,10 +406,11 @@ mod tests {
|
||||
let results = execute_search(
|
||||
&mut ctx,
|
||||
"releases from poison by the government",
|
||||
TermsMatchingStrategy::Last,
|
||||
None,
|
||||
0,
|
||||
20,
|
||||
// &mut DefaultSearchLogger,
|
||||
&mut DefaultSearchLogger,
|
||||
&mut logger,
|
||||
)
|
||||
.unwrap();
|
||||
|
@ -1,4 +1,4 @@
|
||||
use super::paths_map::PathSet;
|
||||
use super::path_set::PathSet;
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
|
||||
/// A cache which stores sufficient conditions for a path
|
||||
@ -10,7 +10,7 @@ pub struct EmptyPathsCache {
|
||||
pub empty_edges: SmallBitmap,
|
||||
/// A set of path prefixes that resolve to no documents.
|
||||
pub empty_prefixes: PathSet,
|
||||
/// A set of empty couple of edge indexes that resolve to no documents.
|
||||
/// A set of empty couples of edge indexes that resolve to no documents.
|
||||
pub empty_couple_edges: Vec<SmallBitmap>,
|
||||
}
|
||||
impl EmptyPathsCache {
|
||||
|
@ -9,7 +9,7 @@ mod build;
|
||||
mod cheapest_paths;
|
||||
mod edge_docids_cache;
|
||||
mod empty_paths_cache;
|
||||
mod paths_map;
|
||||
mod path_set;
|
||||
|
||||
/// Implementation of the `proximity` ranking rule
|
||||
mod proximity;
|
||||
|
@ -70,16 +70,15 @@ pub struct RankingRuleOutput<Q> {
|
||||
|
||||
pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
mut ranking_rules: Vec<&mut dyn RankingRule<'search, Q>>,
|
||||
query_graph: &Q,
|
||||
mut ranking_rules: Vec<Box<dyn RankingRule<'search, Q>>>,
|
||||
query: &Q,
|
||||
universe: &RoaringBitmap,
|
||||
from: usize,
|
||||
length: usize,
|
||||
logger: &mut dyn SearchLogger<Q>,
|
||||
) -> Result<Vec<u32>> {
|
||||
logger.initial_query(query_graph);
|
||||
|
||||
logger.ranking_rules(&ranking_rules);
|
||||
logger.initial_universe(universe);
|
||||
|
||||
let distinct_fid = if let Some(field) = ctx.index.distinct_field(ctx.txn)? {
|
||||
ctx.index.fields_ids_map(ctx.txn)?.id(field)
|
||||
@ -92,8 +91,8 @@ pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>(
|
||||
}
|
||||
|
||||
let ranking_rules_len = ranking_rules.len();
|
||||
logger.start_iteration_ranking_rule(0, ranking_rules[0], query_graph, universe);
|
||||
ranking_rules[0].start_iteration(ctx, logger, universe, query_graph)?;
|
||||
logger.start_iteration_ranking_rule(0, ranking_rules[0].as_ref(), query, universe);
|
||||
ranking_rules[0].start_iteration(ctx, logger, universe, query)?;
|
||||
|
||||
let mut ranking_rule_universes: Vec<RoaringBitmap> =
|
||||
vec![RoaringBitmap::default(); ranking_rules_len];
|
||||
@ -109,7 +108,7 @@ pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>(
|
||||
assert!(ranking_rule_universes[cur_ranking_rule_index].is_empty());
|
||||
logger.end_iteration_ranking_rule(
|
||||
cur_ranking_rule_index,
|
||||
ranking_rules[cur_ranking_rule_index],
|
||||
ranking_rules[cur_ranking_rule_index].as_ref(),
|
||||
&ranking_rule_universes[cur_ranking_rule_index],
|
||||
);
|
||||
ranking_rule_universes[cur_ranking_rule_index].clear();
|
||||
@ -149,7 +148,7 @@ pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>(
|
||||
// then just skip the bucket
|
||||
logger.skip_bucket_ranking_rule(
|
||||
cur_ranking_rule_index,
|
||||
ranking_rules[cur_ranking_rule_index],
|
||||
ranking_rules[cur_ranking_rule_index].as_ref(),
|
||||
&candidates,
|
||||
);
|
||||
} else {
|
||||
@ -159,7 +158,7 @@ pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>(
|
||||
all_candidates.split_at(from - cur_offset);
|
||||
logger.skip_bucket_ranking_rule(
|
||||
cur_ranking_rule_index,
|
||||
ranking_rules[cur_ranking_rule_index],
|
||||
ranking_rules[cur_ranking_rule_index].as_ref(),
|
||||
&skipped_candidates.into_iter().collect(),
|
||||
);
|
||||
let candidates = candidates
|
||||
@ -186,7 +185,6 @@ pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>(
|
||||
// anything, just extend the results and go back to the parent ranking rule.
|
||||
if ranking_rule_universes[cur_ranking_rule_index].len() <= 1 {
|
||||
maybe_add_to_results!(&ranking_rule_universes[cur_ranking_rule_index]);
|
||||
ranking_rule_universes[cur_ranking_rule_index].clear();
|
||||
back!();
|
||||
continue;
|
||||
}
|
||||
@ -198,7 +196,7 @@ pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>(
|
||||
|
||||
logger.next_bucket_ranking_rule(
|
||||
cur_ranking_rule_index,
|
||||
ranking_rules[cur_ranking_rule_index],
|
||||
ranking_rules[cur_ranking_rule_index].as_ref(),
|
||||
&ranking_rule_universes[cur_ranking_rule_index],
|
||||
&next_bucket.candidates,
|
||||
);
|
||||
@ -218,7 +216,7 @@ pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>(
|
||||
ranking_rule_universes[cur_ranking_rule_index] = next_bucket.candidates.clone();
|
||||
logger.start_iteration_ranking_rule(
|
||||
cur_ranking_rule_index,
|
||||
ranking_rules[cur_ranking_rule_index],
|
||||
ranking_rules[cur_ranking_rule_index].as_ref(),
|
||||
&next_bucket.query,
|
||||
&ranking_rule_universes[cur_ranking_rule_index],
|
||||
);
|
||||
|
Loading…
x
Reference in New Issue
Block a user