mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 20:07:09 +02:00
Merge branch 'main' into release-v1.14.0-tmp
This commit is contained in:
commit
b025f1bcf1
113 changed files with 1268 additions and 852 deletions
|
@ -358,7 +358,7 @@ impl<'a> FacetDistribution<'a> {
|
|||
) -> bool {
|
||||
// If the field is not filterable, we don't want to compute the facet distribution.
|
||||
if !matching_features(name, filterable_attributes_rules)
|
||||
.map_or(false, |(_, features)| features.is_filterable())
|
||||
.is_some_and(|(_, features)| features.is_filterable())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
@ -378,13 +378,21 @@ impl<'a> FacetDistribution<'a> {
|
|||
filterable_attributes_rules: &[FilterableAttributesRule],
|
||||
) -> Result<()> {
|
||||
let mut invalid_facets = BTreeSet::new();
|
||||
let mut matching_rule_indices = HashMap::new();
|
||||
|
||||
if let Some(facets) = &self.facets {
|
||||
for field in facets.keys() {
|
||||
let is_valid_filterable_field =
|
||||
matching_features(field, filterable_attributes_rules)
|
||||
.map_or(false, |(_, features)| features.is_filterable());
|
||||
if !is_valid_filterable_field {
|
||||
let matched_rule = matching_features(field, filterable_attributes_rules);
|
||||
let is_filterable = matched_rule.is_some_and(|(_, f)| f.is_filterable());
|
||||
|
||||
if !is_filterable {
|
||||
invalid_facets.insert(field.to_string());
|
||||
|
||||
// If the field matched a rule but that rule doesn't enable filtering,
|
||||
// store the rule index for better error messages
|
||||
if let Some((rule_index, _)) = matched_rule {
|
||||
matching_rule_indices.insert(field.to_string(), rule_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -400,6 +408,7 @@ impl<'a> FacetDistribution<'a> {
|
|||
return Err(Error::UserError(UserError::InvalidFacetsDistribution {
|
||||
invalid_facets_name: invalid_facets,
|
||||
valid_patterns,
|
||||
matching_rule_indices,
|
||||
}));
|
||||
}
|
||||
|
||||
|
|
|
@ -79,7 +79,7 @@ struct FacetRangeSearch<'t, 'b, 'bitmap> {
|
|||
docids: &'bitmap mut RoaringBitmap,
|
||||
}
|
||||
|
||||
impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
impl<'t> FacetRangeSearch<'t, '_, '_> {
|
||||
fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> {
|
||||
let left_key =
|
||||
FacetGroupKey { field_id: self.field_id, level: 0, left_bound: starting_left_bound };
|
||||
|
|
|
@ -62,7 +62,7 @@ struct AscendingFacetSort<'t, 'e> {
|
|||
)>,
|
||||
}
|
||||
|
||||
impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
|
||||
impl<'t> Iterator for AscendingFacetSort<'t, '_> {
|
||||
type Item = Result<(RoaringBitmap, &'t [u8])>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
|
|
|
@ -66,15 +66,15 @@ enum FilterError<'a> {
|
|||
ParseGeoError(BadGeoError),
|
||||
TooDeep,
|
||||
}
|
||||
impl<'a> std::error::Error for FilterError<'a> {}
|
||||
impl std::error::Error for FilterError<'_> {}
|
||||
|
||||
impl<'a> From<BadGeoError> for FilterError<'a> {
|
||||
impl From<BadGeoError> for FilterError<'_> {
|
||||
fn from(geo_error: BadGeoError) -> Self {
|
||||
FilterError::ParseGeoError(geo_error)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Display for FilterError<'a> {
|
||||
impl Display for FilterError<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::AttributeNotFilterable { attribute, filterable_patterns } => {
|
||||
|
@ -237,7 +237,7 @@ impl<'a> Filter<'a> {
|
|||
for fid in self.condition.fids(MAX_FILTER_DEPTH) {
|
||||
let attribute = fid.value();
|
||||
if matching_features(attribute, &filterable_attributes_rules)
|
||||
.map_or(false, |(_, features)| features.is_filterable())
|
||||
.is_some_and(|(_, features)| features.is_filterable())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
@ -461,7 +461,7 @@ impl<'a> Filter<'a> {
|
|||
filterable_attribute_rules: &[FilterableAttributesRule],
|
||||
universe: Option<&RoaringBitmap>,
|
||||
) -> Result<RoaringBitmap> {
|
||||
if universe.map_or(false, |u| u.is_empty()) {
|
||||
if universe.is_some_and(|u| u.is_empty()) {
|
||||
return Ok(RoaringBitmap::new());
|
||||
}
|
||||
|
||||
|
|
|
@ -75,9 +75,11 @@ impl<'a> SearchForFacetValues<'a> {
|
|||
let rtxn = self.search_query.rtxn;
|
||||
|
||||
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
|
||||
if !matching_features(&self.facet, &filterable_attributes_rules)
|
||||
.map_or(false, |(_, features)| features.is_facet_searchable())
|
||||
{
|
||||
let matched_rule = matching_features(&self.facet, &filterable_attributes_rules);
|
||||
let is_facet_searchable =
|
||||
matched_rule.is_some_and(|(_, features)| features.is_facet_searchable());
|
||||
|
||||
if !is_facet_searchable {
|
||||
let matching_field_names =
|
||||
filtered_matching_patterns(&filterable_attributes_rules, &|features| {
|
||||
features.is_facet_searchable()
|
||||
|
@ -85,10 +87,14 @@ impl<'a> SearchForFacetValues<'a> {
|
|||
let (valid_patterns, hidden_fields) =
|
||||
index.remove_hidden_fields(rtxn, matching_field_names)?;
|
||||
|
||||
// Get the matching rule index if any rule matched the attribute
|
||||
let matching_rule_index = matched_rule.map(|(rule_index, _)| rule_index);
|
||||
|
||||
return Err(UserError::InvalidFacetSearchFacetName {
|
||||
field: self.facet.clone(),
|
||||
valid_patterns,
|
||||
hidden_fields,
|
||||
matching_rule_index,
|
||||
}
|
||||
.into());
|
||||
};
|
||||
|
@ -129,7 +135,7 @@ impl<'a> SearchForFacetValues<'a> {
|
|||
|
||||
if authorize_typos && field_authorizes_typos {
|
||||
let exact_words_fst = self.search_query.index.exact_words(rtxn)?;
|
||||
if exact_words_fst.map_or(false, |fst| fst.contains(query)) {
|
||||
if exact_words_fst.is_some_and(|fst| fst.contains(query)) {
|
||||
if fst.contains(query) {
|
||||
self.fetch_original_facets_using_normalized(
|
||||
fid,
|
||||
|
|
|
@ -151,7 +151,7 @@ impl ScoreWithRatioResult {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a> Search<'a> {
|
||||
impl Search<'_> {
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::hybrid")]
|
||||
pub fn execute_hybrid(&self, semantic_ratio: f32) -> Result<(SearchResult, Option<u32>)> {
|
||||
// TODO: find classier way to achieve that than to reset vector and query params
|
||||
|
|
|
@ -190,9 +190,10 @@ impl<'a> Search<'a> {
|
|||
if let Some(distinct) = &self.distinct {
|
||||
let filterable_fields = ctx.index.filterable_attributes_rules(ctx.txn)?;
|
||||
// check if the distinct field is in the filterable fields
|
||||
if !matching_features(distinct, &filterable_fields)
|
||||
.map_or(false, |(_, features)| features.is_filterable())
|
||||
{
|
||||
let matched_rule = matching_features(distinct, &filterable_fields);
|
||||
let is_filterable = matched_rule.is_some_and(|(_, features)| features.is_filterable());
|
||||
|
||||
if !is_filterable {
|
||||
// if not, remove the hidden fields from the filterable fields to generate the error message
|
||||
let matching_patterns =
|
||||
filtered_matching_patterns(&filterable_fields, &|features| {
|
||||
|
@ -200,11 +201,16 @@ impl<'a> Search<'a> {
|
|||
});
|
||||
let (valid_patterns, hidden_fields) =
|
||||
ctx.index.remove_hidden_fields(ctx.txn, matching_patterns)?;
|
||||
|
||||
// Get the matching rule index if any rule matched the attribute
|
||||
let matching_rule_index = matched_rule.map(|(rule_index, _)| rule_index);
|
||||
|
||||
// and return the error
|
||||
return Err(Error::UserError(UserError::InvalidDistinctAttribute {
|
||||
field: distinct.clone(),
|
||||
valid_patterns,
|
||||
hidden_fields,
|
||||
matching_rule_index,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -537,7 +537,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||
fid: u16,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
// if the requested fid isn't in the restricted list, return None.
|
||||
if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) {
|
||||
if self.restricted_fids.as_ref().is_some_and(|fids| !fids.contains(&fid)) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
|
@ -558,7 +558,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||
fid: u16,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
// if the requested fid isn't in the restricted list, return None.
|
||||
if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) {
|
||||
if self.restricted_fids.as_ref().is_some_and(|fids| !fids.contains(&fid)) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
|
|
|
@ -72,7 +72,7 @@ pub fn find_best_match_interval(matches: &[Match], crop_size: usize) -> [&Match;
|
|||
let interval_score = get_interval_score(&matches[interval_first..=interval_last]);
|
||||
let is_interval_score_better = &best_interval
|
||||
.as_ref()
|
||||
.map_or(true, |MatchIntervalWithScore { score, .. }| interval_score > *score);
|
||||
.is_none_or(|MatchIntervalWithScore { score, .. }| interval_score > *score);
|
||||
|
||||
if *is_interval_score_better {
|
||||
best_interval = Some(MatchIntervalWithScore {
|
||||
|
|
|
@ -8,6 +8,7 @@ use std::cmp::{max, min};
|
|||
|
||||
use charabia::{Language, SeparatorKind, Token, Tokenizer};
|
||||
use either::Either;
|
||||
use itertools::Itertools;
|
||||
pub use matching_words::MatchingWords;
|
||||
use matching_words::{MatchType, PartialMatch};
|
||||
use r#match::{Match, MatchPosition};
|
||||
|
@ -122,7 +123,7 @@ pub struct Matcher<'t, 'tokenizer, 'b, 'lang> {
|
|||
matches: Option<(Vec<Token<'t>>, Vec<Match>)>,
|
||||
}
|
||||
|
||||
impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
||||
impl<'t> Matcher<'t, '_, '_, '_> {
|
||||
/// Iterates over tokens and save any of them that matches the query.
|
||||
fn compute_matches(&mut self) -> &mut Self {
|
||||
/// some words are counted as matches only if they are close together and in the good order,
|
||||
|
@ -229,8 +230,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
|||
.iter()
|
||||
.map(|m| MatchBounds {
|
||||
start: tokens[m.get_first_token_pos()].byte_start,
|
||||
// TODO: Why is this in chars, while start is in bytes?
|
||||
length: m.char_count,
|
||||
length: self.calc_byte_length(tokens, m),
|
||||
indices: if array_indices.is_empty() {
|
||||
None
|
||||
} else {
|
||||
|
@ -241,6 +241,18 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
|||
}
|
||||
}
|
||||
|
||||
fn calc_byte_length(&self, tokens: &[Token<'t>], m: &Match) -> usize {
|
||||
(m.get_first_token_pos()..=m.get_last_token_pos())
|
||||
.flat_map(|i| match &tokens[i].char_map {
|
||||
Some(char_map) => {
|
||||
char_map.iter().map(|(original, _)| *original as usize).collect_vec()
|
||||
}
|
||||
None => tokens[i].lemma().chars().map(|c| c.len_utf8()).collect_vec(),
|
||||
})
|
||||
.take(m.char_count)
|
||||
.sum()
|
||||
}
|
||||
|
||||
/// Returns the bounds in byte index of the crop window.
|
||||
fn crop_bounds(&self, tokens: &[Token<'_>], matches: &[Match], crop_size: usize) -> [usize; 2] {
|
||||
let (
|
||||
|
|
|
@ -327,7 +327,7 @@ impl QueryGraph {
|
|||
let mut peekable = term_with_frequency.into_iter().peekable();
|
||||
while let Some((idx, frequency)) = peekable.next() {
|
||||
term_weight.insert(idx, weight);
|
||||
if peekable.peek().map_or(false, |(_, f)| frequency != *f) {
|
||||
if peekable.peek().is_some_and(|(_, f)| frequency != *f) {
|
||||
weight += 1;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -398,7 +398,7 @@ fn split_best_frequency(
|
|||
let right = ctx.word_interner.insert(right.to_owned());
|
||||
|
||||
if let Some(frequency) = ctx.get_db_word_pair_proximity_docids_len(None, left, right, 1)? {
|
||||
if best.map_or(true, |(old, _, _)| frequency > old) {
|
||||
if best.is_none_or(|(old, _, _)| frequency > old) {
|
||||
best = Some((frequency, left, right));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -203,7 +203,7 @@ pub fn number_of_typos_allowed<'ctx>(
|
|||
Ok(Box::new(move |word: &str| {
|
||||
if !authorize_typos
|
||||
|| word.len() < min_len_one_typo as usize
|
||||
|| exact_words.as_ref().map_or(false, |fst| fst.contains(word))
|
||||
|| exact_words.as_ref().is_some_and(|fst| fst.contains(word))
|
||||
{
|
||||
0
|
||||
} else if word.len() < min_len_two_typos as usize {
|
||||
|
|
|
@ -17,7 +17,7 @@ use crate::Result;
|
|||
pub struct PhraseDocIdsCache {
|
||||
pub cache: FxHashMap<Interned<Phrase>, RoaringBitmap>,
|
||||
}
|
||||
impl<'ctx> SearchContext<'ctx> {
|
||||
impl SearchContext<'_> {
|
||||
/// Get the document ids associated with the given phrase
|
||||
pub fn get_phrase_docids(&mut self, phrase: Interned<Phrase>) -> Result<&RoaringBitmap> {
|
||||
if self.phrase_docids.cache.contains_key(&phrase) {
|
||||
|
|
|
@ -263,7 +263,7 @@ impl SmallBitmapInternal {
|
|||
|
||||
pub fn contains(&self, x: u16) -> bool {
|
||||
let (set, x) = self.get_set_index(x);
|
||||
set & 0b1 << x != 0
|
||||
set & (0b1 << x) != 0
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, x: u16) {
|
||||
|
@ -381,7 +381,7 @@ pub enum SmallBitmapInternalIter<'b> {
|
|||
Tiny(u64),
|
||||
Small { cur: u64, next: &'b [u64], base: u16 },
|
||||
}
|
||||
impl<'b> Iterator for SmallBitmapInternalIter<'b> {
|
||||
impl Iterator for SmallBitmapInternalIter<'_> {
|
||||
type Item = u16;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue