mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Add typo ranking rule to new search impl
This commit is contained in:
parent
71f18e4379
commit
caa1e1b923
@ -70,14 +70,16 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap
|
||||
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
|
||||
assert!(universe.len() > 1);
|
||||
let mut state = self.state.take().unwrap();
|
||||
|
||||
let Some(mut cheapest_paths_state) = state.cheapest_paths_state.take() else {
|
||||
if state.cheapest_paths_state.is_none() {
|
||||
return Ok(None);
|
||||
};
|
||||
}
|
||||
|
||||
let mut paths = PathsMap::default();
|
||||
|
||||
while paths.is_empty() {
|
||||
let Some(cheapest_paths_state) = state.cheapest_paths_state.take() else {
|
||||
break;
|
||||
};
|
||||
if let Some(next_cheapest_paths_state) = cheapest_paths_state
|
||||
.compute_paths_of_next_lowest_cost(
|
||||
&mut state.graph,
|
||||
@ -85,13 +87,15 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap
|
||||
&mut paths,
|
||||
)
|
||||
{
|
||||
cheapest_paths_state = next_cheapest_paths_state;
|
||||
state.cheapest_paths_state = Some(next_cheapest_paths_state);
|
||||
} else {
|
||||
self.state = None;
|
||||
return Ok(None);
|
||||
break;
|
||||
}
|
||||
}
|
||||
state.cheapest_paths_state = Some(cheapest_paths_state);
|
||||
|
||||
if paths.is_empty() && state.cheapest_paths_state.is_none() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
G::log_state(&state.graph, &paths, &state.empty_paths_cache, logger);
|
||||
|
||||
|
@ -3,6 +3,7 @@ use roaring::RoaringBitmap;
|
||||
use std::fs::File;
|
||||
use std::{io::Write, path::PathBuf};
|
||||
|
||||
use crate::new::ranking_rule_graph::typo::TypoGraph;
|
||||
use crate::new::{QueryNode, QueryGraph};
|
||||
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||
use crate::new::ranking_rule_graph::empty_paths_cache::EmptyPathsCache;
|
||||
@ -38,6 +39,11 @@ pub enum SearchEvents {
|
||||
paths: PathsMap<u64>,
|
||||
empty_paths_cache: EmptyPathsCache,
|
||||
},
|
||||
TypoState {
|
||||
graph: RankingRuleGraph<TypoGraph>,
|
||||
paths: PathsMap<u64>,
|
||||
empty_paths_cache: EmptyPathsCache,
|
||||
},
|
||||
RankingRuleSkipBucket { ranking_rule_idx: usize, candidates: RoaringBitmap },
|
||||
}
|
||||
|
||||
@ -132,7 +138,10 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
||||
self.events.push(SearchEvents::ProximityState { graph: query_graph.clone(), paths: paths_map.clone(), empty_paths_cache: empty_paths_cache.clone() })
|
||||
}
|
||||
|
||||
|
||||
fn log_typo_state(&mut self, query_graph: &RankingRuleGraph<TypoGraph>, paths_map: &PathsMap<u64>, empty_paths_cache: &EmptyPathsCache) {
|
||||
self.events.push(SearchEvents::TypoState { graph: query_graph.clone(), paths: paths_map.clone(), empty_paths_cache: empty_paths_cache.clone() })
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl DetailedSearchLogger {
|
||||
@ -251,7 +260,20 @@ results.{random} {{
|
||||
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
|
||||
let new_file_path = self.folder_path.join(format!("{id}.d2"));
|
||||
let mut new_file = std::fs::File::create(new_file_path).unwrap();
|
||||
Self::proximity_graph_d2_description(graph, paths, empty_paths_cache, &mut new_file);
|
||||
Self::ranking_rule_graph_d2_description(graph, paths, empty_paths_cache, &mut new_file);
|
||||
writeln!(
|
||||
&mut file,
|
||||
"{id} {{
|
||||
link: \"{id}.d2.svg\"
|
||||
}}").unwrap();
|
||||
},
|
||||
SearchEvents::TypoState { graph, paths, empty_paths_cache } => {
|
||||
let cur_ranking_rule = timestamp.len() - 1;
|
||||
let cur_activated_id = activated_id(×tamp);
|
||||
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
|
||||
let new_file_path = self.folder_path.join(format!("{id}.d2"));
|
||||
let mut new_file = std::fs::File::create(new_file_path).unwrap();
|
||||
Self::ranking_rule_graph_d2_description(graph, paths, empty_paths_cache, &mut new_file);
|
||||
writeln!(
|
||||
&mut file,
|
||||
"{id} {{
|
||||
@ -309,7 +331,7 @@ shape: class").unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
fn proximity_graph_d2_description(graph: &RankingRuleGraph<ProximityGraph>, paths: &PathsMap<u64>, empty_paths_cache: &EmptyPathsCache, file: &mut File) {
|
||||
fn ranking_rule_graph_d2_description<R: RankingRuleGraphTrait>(graph: &RankingRuleGraph<R>, paths: &PathsMap<u64>, empty_paths_cache: &EmptyPathsCache, file: &mut File) {
|
||||
writeln!(file,"direction: right").unwrap();
|
||||
|
||||
writeln!(file, "Proximity Graph {{").unwrap();
|
||||
@ -333,7 +355,7 @@ shape: class").unwrap();
|
||||
writeln!(file,
|
||||
"{from_node} -> {to_node} : \"cost {cost} {edge_label}\"",
|
||||
cost = edge.cost,
|
||||
edge_label = ProximityGraph::graphviz_edge_details_label(details)
|
||||
edge_label = R::graphviz_edge_details_label(details)
|
||||
).unwrap();
|
||||
}
|
||||
}
|
||||
@ -354,7 +376,7 @@ shape: class").unwrap();
|
||||
}
|
||||
writeln!(file, "}}").unwrap();
|
||||
}
|
||||
fn edge_d2_description(graph: &RankingRuleGraph<ProximityGraph>, paths_idx: &str, edge_idx: u32, file: &mut File) -> String {
|
||||
fn edge_d2_description<R: RankingRuleGraphTrait>(graph: &RankingRuleGraph<R>, paths_idx: &str, edge_idx: u32, file: &mut File) -> String {
|
||||
let Edge { from_node, to_node, cost, .. } = graph.all_edges[edge_idx as usize].as_ref().unwrap() ;
|
||||
let from_node = &graph.query_graph.nodes[*from_node as usize];
|
||||
let from_node_desc = match from_node {
|
||||
@ -382,7 +404,7 @@ shape: class").unwrap();
|
||||
}}").unwrap();
|
||||
edge_id
|
||||
}
|
||||
fn paths_d2_description<T>(graph: &RankingRuleGraph<ProximityGraph>, paths_idx: &str, paths: &PathsMap<T>, file: &mut File) {
|
||||
fn paths_d2_description<R: RankingRuleGraphTrait, T>(graph: &RankingRuleGraph<R>, paths_idx: &str, paths: &PathsMap<T>, file: &mut File) {
|
||||
for (edge_idx, rest) in paths.nodes.iter() {
|
||||
let edge_id = Self::edge_d2_description(graph, paths_idx, *edge_idx, file);
|
||||
for (dest_edge_idx, _) in rest.nodes.iter() {
|
||||
|
@ -6,7 +6,7 @@ use roaring::RoaringBitmap;
|
||||
use super::{
|
||||
ranking_rule_graph::{
|
||||
empty_paths_cache::EmptyPathsCache, paths_map::PathsMap, proximity::ProximityGraph,
|
||||
RankingRuleGraph,
|
||||
typo::TypoGraph, RankingRuleGraph,
|
||||
},
|
||||
RankingRule, RankingRuleQueryTrait,
|
||||
};
|
||||
@ -61,6 +61,14 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
||||
_empty_paths_cache: &EmptyPathsCache,
|
||||
) {
|
||||
}
|
||||
|
||||
fn log_typo_state(
|
||||
&mut self,
|
||||
query_graph: &RankingRuleGraph<TypoGraph>,
|
||||
paths: &PathsMap<u64>,
|
||||
empty_paths_cache: &EmptyPathsCache,
|
||||
) {
|
||||
}
|
||||
}
|
||||
|
||||
pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
||||
@ -104,4 +112,11 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
||||
paths: &PathsMap<u64>,
|
||||
empty_paths_cache: &EmptyPathsCache,
|
||||
);
|
||||
|
||||
fn log_typo_state(
|
||||
&mut self,
|
||||
query_graph: &RankingRuleGraph<TypoGraph>,
|
||||
paths: &PathsMap<u64>,
|
||||
empty_paths_cache: &EmptyPathsCache,
|
||||
);
|
||||
}
|
||||
|
@ -5,6 +5,7 @@ pub mod empty_paths_cache;
|
||||
pub mod paths_map;
|
||||
pub mod proximity;
|
||||
pub mod resolve_paths;
|
||||
pub mod typo;
|
||||
|
||||
use std::ops::ControlFlow;
|
||||
|
||||
|
131
milli/src/search/new/ranking_rule_graph/typo/mod.rs
Normal file
131
milli/src/search/new/ranking_rule_graph/typo/mod.rs
Normal file
@ -0,0 +1,131 @@
|
||||
use heed::{BytesDecode, RoTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::empty_paths_cache::EmptyPathsCache;
|
||||
use super::paths_map::PathsMap;
|
||||
use super::{EdgeDetails, RankingRuleGraphTrait};
|
||||
use crate::new::db_cache::DatabaseCache;
|
||||
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||
use crate::new::QueryNode;
|
||||
use crate::{Index, Result, RoaringBitmapCodec};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum TypoEdge {
|
||||
Phrase,
|
||||
Word { derivations: WordDerivations, nbr_typos: u8 },
|
||||
}
|
||||
|
||||
pub enum TypoGraph {}
|
||||
|
||||
impl RankingRuleGraphTrait for TypoGraph {
|
||||
type EdgeDetails = TypoEdge;
|
||||
type BuildVisitedFromNode = ();
|
||||
|
||||
fn graphviz_edge_details_label(edge: &Self::EdgeDetails) -> String {
|
||||
match edge {
|
||||
TypoEdge::Phrase => format!(", 0 typos"),
|
||||
TypoEdge::Word { nbr_typos, .. } => format!(", {nbr_typos} typos"),
|
||||
}
|
||||
}
|
||||
|
||||
fn compute_docids<'db_cache, 'transaction>(
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
edge: &Self::EdgeDetails,
|
||||
) -> Result<roaring::RoaringBitmap> {
|
||||
match edge {
|
||||
TypoEdge::Phrase => todo!(),
|
||||
TypoEdge::Word { derivations, nbr_typos } => {
|
||||
let words = match nbr_typos {
|
||||
0 => &derivations.zero_typo,
|
||||
1 => &derivations.one_typo,
|
||||
2 => &derivations.two_typos,
|
||||
_ => panic!(),
|
||||
};
|
||||
let mut docids = RoaringBitmap::new();
|
||||
for word in words.iter() {
|
||||
let Some(bytes) = db_cache.get_word_docids(index, txn, word)? else { continue };
|
||||
let bitmap =
|
||||
RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding)?;
|
||||
docids |= bitmap;
|
||||
}
|
||||
if *nbr_typos == 0 {
|
||||
if let Some(bytes) =
|
||||
db_cache.get_prefix_docids(index, txn, &derivations.original)?
|
||||
{
|
||||
let bitmap =
|
||||
RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding)?;
|
||||
docids |= bitmap;
|
||||
}
|
||||
}
|
||||
Ok(docids)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn build_visit_from_node<'transaction>(
|
||||
_index: &Index,
|
||||
_txn: &'transaction RoTxn,
|
||||
_db_cache: &mut DatabaseCache<'transaction>,
|
||||
from_node: &QueryNode,
|
||||
) -> Result<Option<Self::BuildVisitedFromNode>> {
|
||||
Ok(Some(()))
|
||||
}
|
||||
|
||||
fn build_visit_to_node<'from_data, 'transaction: 'from_data>(
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
to_node: &QueryNode,
|
||||
from_node_data: &'from_data Self::BuildVisitedFromNode,
|
||||
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>> {
|
||||
match to_node {
|
||||
QueryNode::Term(LocatedQueryTerm { value, .. }) => match value {
|
||||
QueryTerm::Phrase(_) => Ok(vec![(0, EdgeDetails::Data(TypoEdge::Phrase))]),
|
||||
QueryTerm::Word { derivations } => {
|
||||
let mut edges = vec![];
|
||||
if !derivations.zero_typo.is_empty() || derivations.use_prefix_db {
|
||||
edges.push((
|
||||
0,
|
||||
EdgeDetails::Data(TypoEdge::Word {
|
||||
derivations: derivations.clone(),
|
||||
nbr_typos: 0,
|
||||
}),
|
||||
))
|
||||
}
|
||||
if !derivations.one_typo.is_empty() {
|
||||
edges.push((
|
||||
1,
|
||||
EdgeDetails::Data(TypoEdge::Word {
|
||||
derivations: derivations.clone(),
|
||||
nbr_typos: 1,
|
||||
}),
|
||||
))
|
||||
}
|
||||
if !derivations.two_typos.is_empty() {
|
||||
edges.push((
|
||||
2,
|
||||
EdgeDetails::Data(TypoEdge::Word {
|
||||
derivations: derivations.clone(),
|
||||
nbr_typos: 2,
|
||||
}),
|
||||
))
|
||||
}
|
||||
Ok(edges)
|
||||
}
|
||||
},
|
||||
QueryNode::End => Ok(vec![(0, EdgeDetails::Unconditional)]),
|
||||
QueryNode::Deleted | QueryNode::Start => panic!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn log_state(
|
||||
graph: &super::RankingRuleGraph<Self>,
|
||||
paths: &PathsMap<u64>,
|
||||
empty_paths_cache: &EmptyPathsCache,
|
||||
logger: &mut dyn crate::new::logger::SearchLogger<crate::new::QueryGraph>,
|
||||
) {
|
||||
logger.log_typo_state(graph, paths, empty_paths_cache);
|
||||
}
|
||||
}
|
@ -7,6 +7,7 @@ use super::logger::SearchLogger;
|
||||
use super::QueryGraph;
|
||||
use crate::new::graph_based_ranking_rule::GraphBasedRankingRule;
|
||||
use crate::new::ranking_rule_graph::proximity::ProximityGraph;
|
||||
use crate::new::ranking_rule_graph::typo::TypoGraph;
|
||||
use crate::new::words::Words;
|
||||
use crate::search::new::sort::Sort;
|
||||
use crate::{Filter, Index, Result, TermsMatchingStrategy};
|
||||
@ -125,9 +126,10 @@ pub fn execute_search<'transaction>(
|
||||
let words = &mut Words::new(TermsMatchingStrategy::Last);
|
||||
let sort = &mut Sort::new(index, txn, "release_date".to_owned(), true)?;
|
||||
let proximity = &mut GraphBasedRankingRule::<ProximityGraph>::new("proximity".to_owned());
|
||||
let typo = &mut GraphBasedRankingRule::<TypoGraph>::new("typo".to_owned());
|
||||
// TODO: ranking rules given as argument
|
||||
let mut ranking_rules: Vec<&mut dyn RankingRule<'transaction, QueryGraph>> =
|
||||
vec![words, proximity, sort];
|
||||
vec![words, typo, proximity, sort];
|
||||
|
||||
logger.ranking_rules(&ranking_rules);
|
||||
|
||||
@ -152,7 +154,7 @@ pub fn execute_search<'transaction>(
|
||||
|
||||
macro_rules! back {
|
||||
() => {
|
||||
assert!(candidates[cur_ranking_rule_index].is_empty());
|
||||
// assert!(candidates[cur_ranking_rule_index].is_empty());
|
||||
logger.end_iteration_ranking_rule(
|
||||
cur_ranking_rule_index,
|
||||
ranking_rules[cur_ranking_rule_index],
|
||||
@ -230,6 +232,7 @@ pub fn execute_search<'transaction>(
|
||||
);
|
||||
|
||||
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(index, txn, db_cache, logger, &candidates[cur_ranking_rule_index])? else {
|
||||
// TODO: add remaining candidates automatically here?
|
||||
back!();
|
||||
continue;
|
||||
};
|
||||
@ -346,7 +349,7 @@ mod tests {
|
||||
let mut db_cache = DatabaseCache::default();
|
||||
|
||||
let query_graph =
|
||||
make_query_graph(&index, &txn, &mut db_cache, "released from prison by the government")
|
||||
make_query_graph(&index, &txn, &mut db_cache, "releases from poison by the government")
|
||||
.unwrap();
|
||||
|
||||
let mut logger = DetailedSearchLogger::new("log");
|
||||
|
Loading…
Reference in New Issue
Block a user