mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
Add typo ranking rule to new search impl
This commit is contained in:
parent
71f18e4379
commit
caa1e1b923
@ -70,14 +70,16 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap
|
|||||||
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
|
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
|
||||||
assert!(universe.len() > 1);
|
assert!(universe.len() > 1);
|
||||||
let mut state = self.state.take().unwrap();
|
let mut state = self.state.take().unwrap();
|
||||||
|
if state.cheapest_paths_state.is_none() {
|
||||||
let Some(mut cheapest_paths_state) = state.cheapest_paths_state.take() else {
|
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
};
|
}
|
||||||
|
|
||||||
let mut paths = PathsMap::default();
|
let mut paths = PathsMap::default();
|
||||||
|
|
||||||
while paths.is_empty() {
|
while paths.is_empty() {
|
||||||
|
let Some(cheapest_paths_state) = state.cheapest_paths_state.take() else {
|
||||||
|
break;
|
||||||
|
};
|
||||||
if let Some(next_cheapest_paths_state) = cheapest_paths_state
|
if let Some(next_cheapest_paths_state) = cheapest_paths_state
|
||||||
.compute_paths_of_next_lowest_cost(
|
.compute_paths_of_next_lowest_cost(
|
||||||
&mut state.graph,
|
&mut state.graph,
|
||||||
@ -85,13 +87,15 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap
|
|||||||
&mut paths,
|
&mut paths,
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
cheapest_paths_state = next_cheapest_paths_state;
|
state.cheapest_paths_state = Some(next_cheapest_paths_state);
|
||||||
} else {
|
} else {
|
||||||
self.state = None;
|
break;
|
||||||
return Ok(None);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
state.cheapest_paths_state = Some(cheapest_paths_state);
|
|
||||||
|
if paths.is_empty() && state.cheapest_paths_state.is_none() {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
G::log_state(&state.graph, &paths, &state.empty_paths_cache, logger);
|
G::log_state(&state.graph, &paths, &state.empty_paths_cache, logger);
|
||||||
|
|
||||||
|
@ -3,6 +3,7 @@ use roaring::RoaringBitmap;
|
|||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::{io::Write, path::PathBuf};
|
use std::{io::Write, path::PathBuf};
|
||||||
|
|
||||||
|
use crate::new::ranking_rule_graph::typo::TypoGraph;
|
||||||
use crate::new::{QueryNode, QueryGraph};
|
use crate::new::{QueryNode, QueryGraph};
|
||||||
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||||
use crate::new::ranking_rule_graph::empty_paths_cache::EmptyPathsCache;
|
use crate::new::ranking_rule_graph::empty_paths_cache::EmptyPathsCache;
|
||||||
@ -38,6 +39,11 @@ pub enum SearchEvents {
|
|||||||
paths: PathsMap<u64>,
|
paths: PathsMap<u64>,
|
||||||
empty_paths_cache: EmptyPathsCache,
|
empty_paths_cache: EmptyPathsCache,
|
||||||
},
|
},
|
||||||
|
TypoState {
|
||||||
|
graph: RankingRuleGraph<TypoGraph>,
|
||||||
|
paths: PathsMap<u64>,
|
||||||
|
empty_paths_cache: EmptyPathsCache,
|
||||||
|
},
|
||||||
RankingRuleSkipBucket { ranking_rule_idx: usize, candidates: RoaringBitmap },
|
RankingRuleSkipBucket { ranking_rule_idx: usize, candidates: RoaringBitmap },
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -132,6 +138,9 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
|||||||
self.events.push(SearchEvents::ProximityState { graph: query_graph.clone(), paths: paths_map.clone(), empty_paths_cache: empty_paths_cache.clone() })
|
self.events.push(SearchEvents::ProximityState { graph: query_graph.clone(), paths: paths_map.clone(), empty_paths_cache: empty_paths_cache.clone() })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn log_typo_state(&mut self, query_graph: &RankingRuleGraph<TypoGraph>, paths_map: &PathsMap<u64>, empty_paths_cache: &EmptyPathsCache) {
|
||||||
|
self.events.push(SearchEvents::TypoState { graph: query_graph.clone(), paths: paths_map.clone(), empty_paths_cache: empty_paths_cache.clone() })
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -251,7 +260,20 @@ results.{random} {{
|
|||||||
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
|
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
|
||||||
let new_file_path = self.folder_path.join(format!("{id}.d2"));
|
let new_file_path = self.folder_path.join(format!("{id}.d2"));
|
||||||
let mut new_file = std::fs::File::create(new_file_path).unwrap();
|
let mut new_file = std::fs::File::create(new_file_path).unwrap();
|
||||||
Self::proximity_graph_d2_description(graph, paths, empty_paths_cache, &mut new_file);
|
Self::ranking_rule_graph_d2_description(graph, paths, empty_paths_cache, &mut new_file);
|
||||||
|
writeln!(
|
||||||
|
&mut file,
|
||||||
|
"{id} {{
|
||||||
|
link: \"{id}.d2.svg\"
|
||||||
|
}}").unwrap();
|
||||||
|
},
|
||||||
|
SearchEvents::TypoState { graph, paths, empty_paths_cache } => {
|
||||||
|
let cur_ranking_rule = timestamp.len() - 1;
|
||||||
|
let cur_activated_id = activated_id(×tamp);
|
||||||
|
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
|
||||||
|
let new_file_path = self.folder_path.join(format!("{id}.d2"));
|
||||||
|
let mut new_file = std::fs::File::create(new_file_path).unwrap();
|
||||||
|
Self::ranking_rule_graph_d2_description(graph, paths, empty_paths_cache, &mut new_file);
|
||||||
writeln!(
|
writeln!(
|
||||||
&mut file,
|
&mut file,
|
||||||
"{id} {{
|
"{id} {{
|
||||||
@ -309,7 +331,7 @@ shape: class").unwrap();
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn proximity_graph_d2_description(graph: &RankingRuleGraph<ProximityGraph>, paths: &PathsMap<u64>, empty_paths_cache: &EmptyPathsCache, file: &mut File) {
|
fn ranking_rule_graph_d2_description<R: RankingRuleGraphTrait>(graph: &RankingRuleGraph<R>, paths: &PathsMap<u64>, empty_paths_cache: &EmptyPathsCache, file: &mut File) {
|
||||||
writeln!(file,"direction: right").unwrap();
|
writeln!(file,"direction: right").unwrap();
|
||||||
|
|
||||||
writeln!(file, "Proximity Graph {{").unwrap();
|
writeln!(file, "Proximity Graph {{").unwrap();
|
||||||
@ -333,7 +355,7 @@ shape: class").unwrap();
|
|||||||
writeln!(file,
|
writeln!(file,
|
||||||
"{from_node} -> {to_node} : \"cost {cost} {edge_label}\"",
|
"{from_node} -> {to_node} : \"cost {cost} {edge_label}\"",
|
||||||
cost = edge.cost,
|
cost = edge.cost,
|
||||||
edge_label = ProximityGraph::graphviz_edge_details_label(details)
|
edge_label = R::graphviz_edge_details_label(details)
|
||||||
).unwrap();
|
).unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -354,7 +376,7 @@ shape: class").unwrap();
|
|||||||
}
|
}
|
||||||
writeln!(file, "}}").unwrap();
|
writeln!(file, "}}").unwrap();
|
||||||
}
|
}
|
||||||
fn edge_d2_description(graph: &RankingRuleGraph<ProximityGraph>, paths_idx: &str, edge_idx: u32, file: &mut File) -> String {
|
fn edge_d2_description<R: RankingRuleGraphTrait>(graph: &RankingRuleGraph<R>, paths_idx: &str, edge_idx: u32, file: &mut File) -> String {
|
||||||
let Edge { from_node, to_node, cost, .. } = graph.all_edges[edge_idx as usize].as_ref().unwrap() ;
|
let Edge { from_node, to_node, cost, .. } = graph.all_edges[edge_idx as usize].as_ref().unwrap() ;
|
||||||
let from_node = &graph.query_graph.nodes[*from_node as usize];
|
let from_node = &graph.query_graph.nodes[*from_node as usize];
|
||||||
let from_node_desc = match from_node {
|
let from_node_desc = match from_node {
|
||||||
@ -382,7 +404,7 @@ shape: class").unwrap();
|
|||||||
}}").unwrap();
|
}}").unwrap();
|
||||||
edge_id
|
edge_id
|
||||||
}
|
}
|
||||||
fn paths_d2_description<T>(graph: &RankingRuleGraph<ProximityGraph>, paths_idx: &str, paths: &PathsMap<T>, file: &mut File) {
|
fn paths_d2_description<R: RankingRuleGraphTrait, T>(graph: &RankingRuleGraph<R>, paths_idx: &str, paths: &PathsMap<T>, file: &mut File) {
|
||||||
for (edge_idx, rest) in paths.nodes.iter() {
|
for (edge_idx, rest) in paths.nodes.iter() {
|
||||||
let edge_id = Self::edge_d2_description(graph, paths_idx, *edge_idx, file);
|
let edge_id = Self::edge_d2_description(graph, paths_idx, *edge_idx, file);
|
||||||
for (dest_edge_idx, _) in rest.nodes.iter() {
|
for (dest_edge_idx, _) in rest.nodes.iter() {
|
||||||
|
@ -6,7 +6,7 @@ use roaring::RoaringBitmap;
|
|||||||
use super::{
|
use super::{
|
||||||
ranking_rule_graph::{
|
ranking_rule_graph::{
|
||||||
empty_paths_cache::EmptyPathsCache, paths_map::PathsMap, proximity::ProximityGraph,
|
empty_paths_cache::EmptyPathsCache, paths_map::PathsMap, proximity::ProximityGraph,
|
||||||
RankingRuleGraph,
|
typo::TypoGraph, RankingRuleGraph,
|
||||||
},
|
},
|
||||||
RankingRule, RankingRuleQueryTrait,
|
RankingRule, RankingRuleQueryTrait,
|
||||||
};
|
};
|
||||||
@ -61,6 +61,14 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
|||||||
_empty_paths_cache: &EmptyPathsCache,
|
_empty_paths_cache: &EmptyPathsCache,
|
||||||
) {
|
) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn log_typo_state(
|
||||||
|
&mut self,
|
||||||
|
query_graph: &RankingRuleGraph<TypoGraph>,
|
||||||
|
paths: &PathsMap<u64>,
|
||||||
|
empty_paths_cache: &EmptyPathsCache,
|
||||||
|
) {
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
||||||
@ -104,4 +112,11 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
|||||||
paths: &PathsMap<u64>,
|
paths: &PathsMap<u64>,
|
||||||
empty_paths_cache: &EmptyPathsCache,
|
empty_paths_cache: &EmptyPathsCache,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
fn log_typo_state(
|
||||||
|
&mut self,
|
||||||
|
query_graph: &RankingRuleGraph<TypoGraph>,
|
||||||
|
paths: &PathsMap<u64>,
|
||||||
|
empty_paths_cache: &EmptyPathsCache,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,7 @@ pub mod empty_paths_cache;
|
|||||||
pub mod paths_map;
|
pub mod paths_map;
|
||||||
pub mod proximity;
|
pub mod proximity;
|
||||||
pub mod resolve_paths;
|
pub mod resolve_paths;
|
||||||
|
pub mod typo;
|
||||||
|
|
||||||
use std::ops::ControlFlow;
|
use std::ops::ControlFlow;
|
||||||
|
|
||||||
|
131
milli/src/search/new/ranking_rule_graph/typo/mod.rs
Normal file
131
milli/src/search/new/ranking_rule_graph/typo/mod.rs
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
use heed::{BytesDecode, RoTxn};
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use super::empty_paths_cache::EmptyPathsCache;
|
||||||
|
use super::paths_map::PathsMap;
|
||||||
|
use super::{EdgeDetails, RankingRuleGraphTrait};
|
||||||
|
use crate::new::db_cache::DatabaseCache;
|
||||||
|
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||||
|
use crate::new::QueryNode;
|
||||||
|
use crate::{Index, Result, RoaringBitmapCodec};
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub enum TypoEdge {
|
||||||
|
Phrase,
|
||||||
|
Word { derivations: WordDerivations, nbr_typos: u8 },
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum TypoGraph {}
|
||||||
|
|
||||||
|
impl RankingRuleGraphTrait for TypoGraph {
|
||||||
|
type EdgeDetails = TypoEdge;
|
||||||
|
type BuildVisitedFromNode = ();
|
||||||
|
|
||||||
|
fn graphviz_edge_details_label(edge: &Self::EdgeDetails) -> String {
|
||||||
|
match edge {
|
||||||
|
TypoEdge::Phrase => format!(", 0 typos"),
|
||||||
|
TypoEdge::Word { nbr_typos, .. } => format!(", {nbr_typos} typos"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compute_docids<'db_cache, 'transaction>(
|
||||||
|
index: &Index,
|
||||||
|
txn: &'transaction RoTxn,
|
||||||
|
db_cache: &mut DatabaseCache<'transaction>,
|
||||||
|
edge: &Self::EdgeDetails,
|
||||||
|
) -> Result<roaring::RoaringBitmap> {
|
||||||
|
match edge {
|
||||||
|
TypoEdge::Phrase => todo!(),
|
||||||
|
TypoEdge::Word { derivations, nbr_typos } => {
|
||||||
|
let words = match nbr_typos {
|
||||||
|
0 => &derivations.zero_typo,
|
||||||
|
1 => &derivations.one_typo,
|
||||||
|
2 => &derivations.two_typos,
|
||||||
|
_ => panic!(),
|
||||||
|
};
|
||||||
|
let mut docids = RoaringBitmap::new();
|
||||||
|
for word in words.iter() {
|
||||||
|
let Some(bytes) = db_cache.get_word_docids(index, txn, word)? else { continue };
|
||||||
|
let bitmap =
|
||||||
|
RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding)?;
|
||||||
|
docids |= bitmap;
|
||||||
|
}
|
||||||
|
if *nbr_typos == 0 {
|
||||||
|
if let Some(bytes) =
|
||||||
|
db_cache.get_prefix_docids(index, txn, &derivations.original)?
|
||||||
|
{
|
||||||
|
let bitmap =
|
||||||
|
RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding)?;
|
||||||
|
docids |= bitmap;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(docids)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_visit_from_node<'transaction>(
|
||||||
|
_index: &Index,
|
||||||
|
_txn: &'transaction RoTxn,
|
||||||
|
_db_cache: &mut DatabaseCache<'transaction>,
|
||||||
|
from_node: &QueryNode,
|
||||||
|
) -> Result<Option<Self::BuildVisitedFromNode>> {
|
||||||
|
Ok(Some(()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_visit_to_node<'from_data, 'transaction: 'from_data>(
|
||||||
|
index: &Index,
|
||||||
|
txn: &'transaction RoTxn,
|
||||||
|
db_cache: &mut DatabaseCache<'transaction>,
|
||||||
|
to_node: &QueryNode,
|
||||||
|
from_node_data: &'from_data Self::BuildVisitedFromNode,
|
||||||
|
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>> {
|
||||||
|
match to_node {
|
||||||
|
QueryNode::Term(LocatedQueryTerm { value, .. }) => match value {
|
||||||
|
QueryTerm::Phrase(_) => Ok(vec![(0, EdgeDetails::Data(TypoEdge::Phrase))]),
|
||||||
|
QueryTerm::Word { derivations } => {
|
||||||
|
let mut edges = vec![];
|
||||||
|
if !derivations.zero_typo.is_empty() || derivations.use_prefix_db {
|
||||||
|
edges.push((
|
||||||
|
0,
|
||||||
|
EdgeDetails::Data(TypoEdge::Word {
|
||||||
|
derivations: derivations.clone(),
|
||||||
|
nbr_typos: 0,
|
||||||
|
}),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
if !derivations.one_typo.is_empty() {
|
||||||
|
edges.push((
|
||||||
|
1,
|
||||||
|
EdgeDetails::Data(TypoEdge::Word {
|
||||||
|
derivations: derivations.clone(),
|
||||||
|
nbr_typos: 1,
|
||||||
|
}),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
if !derivations.two_typos.is_empty() {
|
||||||
|
edges.push((
|
||||||
|
2,
|
||||||
|
EdgeDetails::Data(TypoEdge::Word {
|
||||||
|
derivations: derivations.clone(),
|
||||||
|
nbr_typos: 2,
|
||||||
|
}),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
Ok(edges)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
QueryNode::End => Ok(vec![(0, EdgeDetails::Unconditional)]),
|
||||||
|
QueryNode::Deleted | QueryNode::Start => panic!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn log_state(
|
||||||
|
graph: &super::RankingRuleGraph<Self>,
|
||||||
|
paths: &PathsMap<u64>,
|
||||||
|
empty_paths_cache: &EmptyPathsCache,
|
||||||
|
logger: &mut dyn crate::new::logger::SearchLogger<crate::new::QueryGraph>,
|
||||||
|
) {
|
||||||
|
logger.log_typo_state(graph, paths, empty_paths_cache);
|
||||||
|
}
|
||||||
|
}
|
@ -7,6 +7,7 @@ use super::logger::SearchLogger;
|
|||||||
use super::QueryGraph;
|
use super::QueryGraph;
|
||||||
use crate::new::graph_based_ranking_rule::GraphBasedRankingRule;
|
use crate::new::graph_based_ranking_rule::GraphBasedRankingRule;
|
||||||
use crate::new::ranking_rule_graph::proximity::ProximityGraph;
|
use crate::new::ranking_rule_graph::proximity::ProximityGraph;
|
||||||
|
use crate::new::ranking_rule_graph::typo::TypoGraph;
|
||||||
use crate::new::words::Words;
|
use crate::new::words::Words;
|
||||||
use crate::search::new::sort::Sort;
|
use crate::search::new::sort::Sort;
|
||||||
use crate::{Filter, Index, Result, TermsMatchingStrategy};
|
use crate::{Filter, Index, Result, TermsMatchingStrategy};
|
||||||
@ -125,9 +126,10 @@ pub fn execute_search<'transaction>(
|
|||||||
let words = &mut Words::new(TermsMatchingStrategy::Last);
|
let words = &mut Words::new(TermsMatchingStrategy::Last);
|
||||||
let sort = &mut Sort::new(index, txn, "release_date".to_owned(), true)?;
|
let sort = &mut Sort::new(index, txn, "release_date".to_owned(), true)?;
|
||||||
let proximity = &mut GraphBasedRankingRule::<ProximityGraph>::new("proximity".to_owned());
|
let proximity = &mut GraphBasedRankingRule::<ProximityGraph>::new("proximity".to_owned());
|
||||||
|
let typo = &mut GraphBasedRankingRule::<TypoGraph>::new("typo".to_owned());
|
||||||
// TODO: ranking rules given as argument
|
// TODO: ranking rules given as argument
|
||||||
let mut ranking_rules: Vec<&mut dyn RankingRule<'transaction, QueryGraph>> =
|
let mut ranking_rules: Vec<&mut dyn RankingRule<'transaction, QueryGraph>> =
|
||||||
vec![words, proximity, sort];
|
vec![words, typo, proximity, sort];
|
||||||
|
|
||||||
logger.ranking_rules(&ranking_rules);
|
logger.ranking_rules(&ranking_rules);
|
||||||
|
|
||||||
@ -152,7 +154,7 @@ pub fn execute_search<'transaction>(
|
|||||||
|
|
||||||
macro_rules! back {
|
macro_rules! back {
|
||||||
() => {
|
() => {
|
||||||
assert!(candidates[cur_ranking_rule_index].is_empty());
|
// assert!(candidates[cur_ranking_rule_index].is_empty());
|
||||||
logger.end_iteration_ranking_rule(
|
logger.end_iteration_ranking_rule(
|
||||||
cur_ranking_rule_index,
|
cur_ranking_rule_index,
|
||||||
ranking_rules[cur_ranking_rule_index],
|
ranking_rules[cur_ranking_rule_index],
|
||||||
@ -230,6 +232,7 @@ pub fn execute_search<'transaction>(
|
|||||||
);
|
);
|
||||||
|
|
||||||
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(index, txn, db_cache, logger, &candidates[cur_ranking_rule_index])? else {
|
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(index, txn, db_cache, logger, &candidates[cur_ranking_rule_index])? else {
|
||||||
|
// TODO: add remaining candidates automatically here?
|
||||||
back!();
|
back!();
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
@ -346,7 +349,7 @@ mod tests {
|
|||||||
let mut db_cache = DatabaseCache::default();
|
let mut db_cache = DatabaseCache::default();
|
||||||
|
|
||||||
let query_graph =
|
let query_graph =
|
||||||
make_query_graph(&index, &txn, &mut db_cache, "released from prison by the government")
|
make_query_graph(&index, &txn, &mut db_cache, "releases from poison by the government")
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let mut logger = DetailedSearchLogger::new("log");
|
let mut logger = DetailedSearchLogger::new("log");
|
||||||
|
Loading…
Reference in New Issue
Block a user