mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-29 16:24:26 +01:00
Integrate the new Highlighter in the search
This commit is contained in:
parent
ebe23b04c9
commit
9c5f64769a
@ -97,8 +97,8 @@ pub use self::heed_codec::{
|
|||||||
};
|
};
|
||||||
pub use self::index::Index;
|
pub use self::index::Index;
|
||||||
pub use self::search::{
|
pub use self::search::{
|
||||||
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWord,
|
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, Search,
|
||||||
MatchingWords, Search, SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, error::Error>;
|
pub type Result<T> = std::result::Result<T, error::Error>;
|
||||||
|
@ -289,8 +289,7 @@ mod tests {
|
|||||||
|
|
||||||
use charabia::TokenKind;
|
use charabia::TokenKind;
|
||||||
|
|
||||||
use super::*;
|
use super::{MatchingWords, *};
|
||||||
use crate::MatchingWords;
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_bytes_to_highlight() {
|
fn test_bytes_to_highlight() {
|
||||||
|
@ -5,9 +5,7 @@ use once_cell::sync::Lazy;
|
|||||||
use roaring::bitmap::RoaringBitmap;
|
use roaring::bitmap::RoaringBitmap;
|
||||||
|
|
||||||
pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET};
|
pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET};
|
||||||
pub use self::matches::{
|
pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords};
|
||||||
FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWord, MatchingWords,
|
|
||||||
};
|
|
||||||
use crate::{
|
use crate::{
|
||||||
execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext,
|
execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext,
|
||||||
};
|
};
|
||||||
@ -109,9 +107,9 @@ impl<'a> Search<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute(&self) -> Result<SearchResult> {
|
pub fn execute(&self) -> Result<SearchResult> {
|
||||||
let mut ctx = SearchContext::new(self.index, self.rtxn);
|
let ctx = SearchContext::new(self.index, self.rtxn);
|
||||||
execute_search(
|
execute_search(
|
||||||
&mut ctx,
|
ctx,
|
||||||
&self.query,
|
&self.query,
|
||||||
self.terms_matching_strategy,
|
self.terms_matching_strategy,
|
||||||
self.exhaustive_number_hits,
|
self.exhaustive_number_hits,
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
use std::cmp::Reverse;
|
use std::cmp::Reverse;
|
||||||
|
use std::fmt;
|
||||||
use std::ops::RangeInclusive;
|
use std::ops::RangeInclusive;
|
||||||
|
|
||||||
use charabia::Token;
|
use charabia::Token;
|
||||||
@ -23,6 +24,7 @@ pub struct LocatedMatchingWords {
|
|||||||
|
|
||||||
/// Structure created from a query tree
|
/// Structure created from a query tree
|
||||||
/// referencing words that match the given query tree.
|
/// referencing words that match the given query tree.
|
||||||
|
#[derive(Default)]
|
||||||
pub struct MatchingWords {
|
pub struct MatchingWords {
|
||||||
word_interner: DedupInterner<String>,
|
word_interner: DedupInterner<String>,
|
||||||
phrase_interner: DedupInterner<Phrase>,
|
phrase_interner: DedupInterner<Phrase>,
|
||||||
@ -240,6 +242,40 @@ impl<'a> PartialMatch<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for MatchingWords {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
let MatchingWords { word_interner, phrase_interner, phrases, words } = self;
|
||||||
|
|
||||||
|
let phrases: Vec<_> = phrases
|
||||||
|
.iter()
|
||||||
|
.map(|p| {
|
||||||
|
(
|
||||||
|
phrase_interner
|
||||||
|
.get(p.value)
|
||||||
|
.words
|
||||||
|
.iter()
|
||||||
|
.map(|w| w.map_or("STOP_WORD", |w| word_interner.get(w)))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join(" "),
|
||||||
|
p.positions.clone(),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let words: Vec<_> = words
|
||||||
|
.iter()
|
||||||
|
.flat_map(|w| {
|
||||||
|
w.value
|
||||||
|
.iter()
|
||||||
|
.map(|s| (word_interner.get(*s), w.positions.clone(), w.is_prefix))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
f.debug_struct("MatchingWords").field("phrases", &phrases).field("words", &words).finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(crate) mod tests {
|
pub(crate) mod tests {
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
|
||||||
use charabia::{SeparatorKind, Token, Tokenizer};
|
use charabia::{SeparatorKind, Token, Tokenizer};
|
||||||
use matching_words::{MatchType, MatchingWords, PartialMatch, WordId};
|
pub use matching_words::MatchingWords;
|
||||||
|
use matching_words::{MatchType, PartialMatch, WordId};
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
|
||||||
use super::query_term::LocatedQueryTerm;
|
use super::query_term::LocatedQueryTerm;
|
||||||
@ -23,12 +24,7 @@ pub struct MatcherBuilder<'a, A> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, A> MatcherBuilder<'a, A> {
|
impl<'a, A> MatcherBuilder<'a, A> {
|
||||||
pub fn new(
|
pub fn new(matching_words: MatchingWords, tokenizer: Tokenizer<'a, 'a, A>) -> Self {
|
||||||
ctx: SearchContext,
|
|
||||||
located_terms: Vec<LocatedQueryTerm>,
|
|
||||||
tokenizer: Tokenizer<'a, 'a, A>,
|
|
||||||
) -> Self {
|
|
||||||
let matching_words = MatchingWords::new(ctx, located_terms);
|
|
||||||
Self {
|
Self {
|
||||||
matching_words,
|
matching_words,
|
||||||
tokenizer,
|
tokenizer,
|
||||||
@ -514,7 +510,8 @@ mod tests {
|
|||||||
let tokenizer = TokenizerBuilder::new().build();
|
let tokenizer = TokenizerBuilder::new().build();
|
||||||
let tokens = tokenizer.tokenize(query);
|
let tokens = tokenizer.tokenize(query);
|
||||||
let query_terms = located_query_terms_from_string(&mut ctx, tokens, None).unwrap();
|
let query_terms = located_query_terms_from_string(&mut ctx, tokens, None).unwrap();
|
||||||
Self::new(ctx, query_terms, TokenizerBuilder::new().build())
|
let matching_words = MatchingWords::new(ctx, query_terms);
|
||||||
|
Self::new(matching_words, TokenizerBuilder::new().build())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@ mod graph_based_ranking_rule;
|
|||||||
mod interner;
|
mod interner;
|
||||||
mod limits;
|
mod limits;
|
||||||
mod logger;
|
mod logger;
|
||||||
mod matches;
|
pub mod matches;
|
||||||
mod query_graph;
|
mod query_graph;
|
||||||
mod query_term;
|
mod query_term;
|
||||||
mod ranking_rule_graph;
|
mod ranking_rule_graph;
|
||||||
@ -271,7 +271,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
|
|||||||
|
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub fn execute_search(
|
pub fn execute_search(
|
||||||
ctx: &mut SearchContext,
|
mut ctx: SearchContext,
|
||||||
query: &Option<String>,
|
query: &Option<String>,
|
||||||
terms_matching_strategy: TermsMatchingStrategy,
|
terms_matching_strategy: TermsMatchingStrategy,
|
||||||
exhaustive_number_hits: bool,
|
exhaustive_number_hits: bool,
|
||||||
@ -284,21 +284,22 @@ pub fn execute_search(
|
|||||||
query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
|
query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
) -> Result<SearchResult> {
|
) -> Result<SearchResult> {
|
||||||
let mut universe = if let Some(filters) = filters {
|
let mut universe = if let Some(filters) = filters {
|
||||||
filters.evaluate(ctx.txn, ctx.index)?
|
filters.evaluate(&mut ctx.txn, &mut ctx.index)?
|
||||||
} else {
|
} else {
|
||||||
ctx.index.documents_ids(ctx.txn)?
|
ctx.index.documents_ids(&mut ctx.txn)?
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let mut located_query_terms = None;
|
||||||
let documents_ids = if let Some(query) = query {
|
let documents_ids = if let Some(query) = query {
|
||||||
// We make sure that the analyzer is aware of the stop words
|
// We make sure that the analyzer is aware of the stop words
|
||||||
// this ensures that the query builder is able to properly remove them.
|
// this ensures that the query builder is able to properly remove them.
|
||||||
let mut tokbuilder = TokenizerBuilder::new();
|
let mut tokbuilder = TokenizerBuilder::new();
|
||||||
let stop_words = ctx.index.stop_words(ctx.txn)?;
|
let stop_words = &mut ctx.index.stop_words(&mut ctx.txn)?;
|
||||||
if let Some(ref stop_words) = stop_words {
|
if let Some(ref stop_words) = stop_words {
|
||||||
tokbuilder.stop_words(stop_words);
|
tokbuilder.stop_words(stop_words);
|
||||||
}
|
}
|
||||||
|
|
||||||
let script_lang_map = ctx.index.script_language(ctx.txn)?;
|
let script_lang_map = &mut ctx.index.script_language(&mut ctx.txn)?;
|
||||||
if !script_lang_map.is_empty() {
|
if !script_lang_map.is_empty() {
|
||||||
tokbuilder.allow_list(&script_lang_map);
|
tokbuilder.allow_list(&script_lang_map);
|
||||||
}
|
}
|
||||||
@ -306,27 +307,31 @@ pub fn execute_search(
|
|||||||
let tokenizer = tokbuilder.build();
|
let tokenizer = tokbuilder.build();
|
||||||
let tokens = tokenizer.tokenize(query);
|
let tokens = tokenizer.tokenize(query);
|
||||||
|
|
||||||
let query_terms = located_query_terms_from_string(ctx, tokens, words_limit)?;
|
let query_terms = located_query_terms_from_string(&mut ctx, tokens, words_limit)?;
|
||||||
let graph = QueryGraph::from_query(ctx, &query_terms)?;
|
let graph = QueryGraph::from_query(&mut ctx, &query_terms)?;
|
||||||
|
located_query_terms = Some(query_terms);
|
||||||
|
|
||||||
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
check_sort_criteria(&mut ctx, sort_criteria.as_ref())?;
|
||||||
|
|
||||||
universe = resolve_maximally_reduced_query_graph(
|
universe = resolve_maximally_reduced_query_graph(
|
||||||
ctx,
|
&mut ctx,
|
||||||
&universe,
|
&universe,
|
||||||
&graph,
|
&graph,
|
||||||
terms_matching_strategy,
|
terms_matching_strategy,
|
||||||
query_graph_logger,
|
query_graph_logger,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let ranking_rules =
|
let ranking_rules = get_ranking_rules_for_query_graph_search(
|
||||||
get_ranking_rules_for_query_graph_search(ctx, sort_criteria, terms_matching_strategy)?;
|
&mut ctx,
|
||||||
|
sort_criteria,
|
||||||
|
terms_matching_strategy,
|
||||||
|
)?;
|
||||||
|
|
||||||
bucket_sort(ctx, ranking_rules, &graph, &universe, from, length, query_graph_logger)?
|
bucket_sort(&mut ctx, ranking_rules, &graph, &universe, from, length, query_graph_logger)?
|
||||||
} else {
|
} else {
|
||||||
let ranking_rules = get_ranking_rules_for_placeholder_search(ctx, sort_criteria)?;
|
let ranking_rules = get_ranking_rules_for_placeholder_search(&mut ctx, sort_criteria)?;
|
||||||
bucket_sort(
|
bucket_sort(
|
||||||
ctx,
|
&mut ctx,
|
||||||
ranking_rules,
|
ranking_rules,
|
||||||
&PlaceholderQuery,
|
&PlaceholderQuery,
|
||||||
&universe,
|
&universe,
|
||||||
@ -340,19 +345,20 @@ pub fn execute_search(
|
|||||||
// is requested and a distinct attribute is set.
|
// is requested and a distinct attribute is set.
|
||||||
let mut candidates = universe;
|
let mut candidates = universe;
|
||||||
if exhaustive_number_hits {
|
if exhaustive_number_hits {
|
||||||
if let Some(f) = ctx.index.distinct_field(ctx.txn)? {
|
if let Some(f) = &mut ctx.index.distinct_field(&mut ctx.txn)? {
|
||||||
if let Some(distinct_fid) = ctx.index.fields_ids_map(ctx.txn)?.id(f) {
|
if let Some(distinct_fid) = ctx.index.fields_ids_map(&mut ctx.txn)?.id(f) {
|
||||||
candidates = apply_distinct_rule(ctx, distinct_fid, &candidates)?.remaining;
|
candidates = apply_distinct_rule(&mut ctx, distinct_fid, &candidates)?.remaining;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(SearchResult {
|
// consume context and located_query_terms to build MatchingWords.
|
||||||
// TODO: correct matching words
|
let matching_words = match located_query_terms {
|
||||||
matching_words: MatchingWords::default(),
|
Some(located_query_terms) => MatchingWords::new(ctx, located_query_terms),
|
||||||
candidates,
|
None => MatchingWords::default(),
|
||||||
documents_ids,
|
};
|
||||||
})
|
|
||||||
|
Ok(SearchResult { matching_words, candidates, documents_ids })
|
||||||
}
|
}
|
||||||
|
|
||||||
fn check_sort_criteria(ctx: &SearchContext, sort_criteria: Option<&Vec<AscDesc>>) -> Result<()> {
|
fn check_sort_criteria(ctx: &SearchContext, sort_criteria: Option<&Vec<AscDesc>>) -> Result<()> {
|
||||||
|
Loading…
Reference in New Issue
Block a user