Merge branch 'main' into release-v1.14.0-tmp

This commit is contained in:
Tamo 2025-04-14 12:35:47 +02:00 committed by GitHub
commit b025f1bcf1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
113 changed files with 1268 additions and 852 deletions

View file

@ -358,7 +358,7 @@ impl<'a> FacetDistribution<'a> {
) -> bool {
// If the field is not filterable, we don't want to compute the facet distribution.
if !matching_features(name, filterable_attributes_rules)
.map_or(false, |(_, features)| features.is_filterable())
.is_some_and(|(_, features)| features.is_filterable())
{
return false;
}
@ -378,13 +378,21 @@ impl<'a> FacetDistribution<'a> {
filterable_attributes_rules: &[FilterableAttributesRule],
) -> Result<()> {
let mut invalid_facets = BTreeSet::new();
let mut matching_rule_indices = HashMap::new();
if let Some(facets) = &self.facets {
for field in facets.keys() {
let is_valid_filterable_field =
matching_features(field, filterable_attributes_rules)
.map_or(false, |(_, features)| features.is_filterable());
if !is_valid_filterable_field {
let matched_rule = matching_features(field, filterable_attributes_rules);
let is_filterable = matched_rule.is_some_and(|(_, f)| f.is_filterable());
if !is_filterable {
invalid_facets.insert(field.to_string());
// If the field matched a rule but that rule doesn't enable filtering,
// store the rule index for better error messages
if let Some((rule_index, _)) = matched_rule {
matching_rule_indices.insert(field.to_string(), rule_index);
}
}
}
}
@ -400,6 +408,7 @@ impl<'a> FacetDistribution<'a> {
return Err(Error::UserError(UserError::InvalidFacetsDistribution {
invalid_facets_name: invalid_facets,
valid_patterns,
matching_rule_indices,
}));
}

View file

@ -79,7 +79,7 @@ struct FacetRangeSearch<'t, 'b, 'bitmap> {
docids: &'bitmap mut RoaringBitmap,
}
impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
impl<'t> FacetRangeSearch<'t, '_, '_> {
fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> {
let left_key =
FacetGroupKey { field_id: self.field_id, level: 0, left_bound: starting_left_bound };

View file

@ -62,7 +62,7 @@ struct AscendingFacetSort<'t, 'e> {
)>,
}
impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
impl<'t> Iterator for AscendingFacetSort<'t, '_> {
type Item = Result<(RoaringBitmap, &'t [u8])>;
fn next(&mut self) -> Option<Self::Item> {

View file

@ -66,15 +66,15 @@ enum FilterError<'a> {
ParseGeoError(BadGeoError),
TooDeep,
}
impl<'a> std::error::Error for FilterError<'a> {}
impl std::error::Error for FilterError<'_> {}
impl<'a> From<BadGeoError> for FilterError<'a> {
impl From<BadGeoError> for FilterError<'_> {
fn from(geo_error: BadGeoError) -> Self {
FilterError::ParseGeoError(geo_error)
}
}
impl<'a> Display for FilterError<'a> {
impl Display for FilterError<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::AttributeNotFilterable { attribute, filterable_patterns } => {
@ -237,7 +237,7 @@ impl<'a> Filter<'a> {
for fid in self.condition.fids(MAX_FILTER_DEPTH) {
let attribute = fid.value();
if matching_features(attribute, &filterable_attributes_rules)
.map_or(false, |(_, features)| features.is_filterable())
.is_some_and(|(_, features)| features.is_filterable())
{
continue;
}
@ -461,7 +461,7 @@ impl<'a> Filter<'a> {
filterable_attribute_rules: &[FilterableAttributesRule],
universe: Option<&RoaringBitmap>,
) -> Result<RoaringBitmap> {
if universe.map_or(false, |u| u.is_empty()) {
if universe.is_some_and(|u| u.is_empty()) {
return Ok(RoaringBitmap::new());
}

View file

@ -75,9 +75,11 @@ impl<'a> SearchForFacetValues<'a> {
let rtxn = self.search_query.rtxn;
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
if !matching_features(&self.facet, &filterable_attributes_rules)
.map_or(false, |(_, features)| features.is_facet_searchable())
{
let matched_rule = matching_features(&self.facet, &filterable_attributes_rules);
let is_facet_searchable =
matched_rule.is_some_and(|(_, features)| features.is_facet_searchable());
if !is_facet_searchable {
let matching_field_names =
filtered_matching_patterns(&filterable_attributes_rules, &|features| {
features.is_facet_searchable()
@ -85,10 +87,14 @@ impl<'a> SearchForFacetValues<'a> {
let (valid_patterns, hidden_fields) =
index.remove_hidden_fields(rtxn, matching_field_names)?;
// Get the matching rule index if any rule matched the attribute
let matching_rule_index = matched_rule.map(|(rule_index, _)| rule_index);
return Err(UserError::InvalidFacetSearchFacetName {
field: self.facet.clone(),
valid_patterns,
hidden_fields,
matching_rule_index,
}
.into());
};
@ -129,7 +135,7 @@ impl<'a> SearchForFacetValues<'a> {
if authorize_typos && field_authorizes_typos {
let exact_words_fst = self.search_query.index.exact_words(rtxn)?;
if exact_words_fst.map_or(false, |fst| fst.contains(query)) {
if exact_words_fst.is_some_and(|fst| fst.contains(query)) {
if fst.contains(query) {
self.fetch_original_facets_using_normalized(
fid,

View file

@ -151,7 +151,7 @@ impl ScoreWithRatioResult {
}
}
impl<'a> Search<'a> {
impl Search<'_> {
#[tracing::instrument(level = "trace", skip_all, target = "search::hybrid")]
pub fn execute_hybrid(&self, semantic_ratio: f32) -> Result<(SearchResult, Option<u32>)> {
// TODO: find classier way to achieve that than to reset vector and query params

View file

@ -190,9 +190,10 @@ impl<'a> Search<'a> {
if let Some(distinct) = &self.distinct {
let filterable_fields = ctx.index.filterable_attributes_rules(ctx.txn)?;
// check if the distinct field is in the filterable fields
if !matching_features(distinct, &filterable_fields)
.map_or(false, |(_, features)| features.is_filterable())
{
let matched_rule = matching_features(distinct, &filterable_fields);
let is_filterable = matched_rule.is_some_and(|(_, features)| features.is_filterable());
if !is_filterable {
// if not, remove the hidden fields from the filterable fields to generate the error message
let matching_patterns =
filtered_matching_patterns(&filterable_fields, &|features| {
@ -200,11 +201,16 @@ impl<'a> Search<'a> {
});
let (valid_patterns, hidden_fields) =
ctx.index.remove_hidden_fields(ctx.txn, matching_patterns)?;
// Get the matching rule index if any rule matched the attribute
let matching_rule_index = matched_rule.map(|(rule_index, _)| rule_index);
// and return the error
return Err(Error::UserError(UserError::InvalidDistinctAttribute {
field: distinct.clone(),
valid_patterns,
hidden_fields,
matching_rule_index,
}));
}
}

View file

@ -537,7 +537,7 @@ impl<'ctx> SearchContext<'ctx> {
fid: u16,
) -> Result<Option<RoaringBitmap>> {
// if the requested fid isn't in the restricted list, return None.
if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) {
if self.restricted_fids.as_ref().is_some_and(|fids| !fids.contains(&fid)) {
return Ok(None);
}
@ -558,7 +558,7 @@ impl<'ctx> SearchContext<'ctx> {
fid: u16,
) -> Result<Option<RoaringBitmap>> {
// if the requested fid isn't in the restricted list, return None.
if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) {
if self.restricted_fids.as_ref().is_some_and(|fids| !fids.contains(&fid)) {
return Ok(None);
}

View file

@ -72,7 +72,7 @@ pub fn find_best_match_interval(matches: &[Match], crop_size: usize) -> [&Match;
let interval_score = get_interval_score(&matches[interval_first..=interval_last]);
let is_interval_score_better = &best_interval
.as_ref()
.map_or(true, |MatchIntervalWithScore { score, .. }| interval_score > *score);
.is_none_or(|MatchIntervalWithScore { score, .. }| interval_score > *score);
if *is_interval_score_better {
best_interval = Some(MatchIntervalWithScore {

View file

@ -8,6 +8,7 @@ use std::cmp::{max, min};
use charabia::{Language, SeparatorKind, Token, Tokenizer};
use either::Either;
use itertools::Itertools;
pub use matching_words::MatchingWords;
use matching_words::{MatchType, PartialMatch};
use r#match::{Match, MatchPosition};
@ -122,7 +123,7 @@ pub struct Matcher<'t, 'tokenizer, 'b, 'lang> {
matches: Option<(Vec<Token<'t>>, Vec<Match>)>,
}
impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
impl<'t> Matcher<'t, '_, '_, '_> {
/// Iterates over tokens and save any of them that matches the query.
fn compute_matches(&mut self) -> &mut Self {
/// some words are counted as matches only if they are close together and in the good order,
@ -229,8 +230,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
.iter()
.map(|m| MatchBounds {
start: tokens[m.get_first_token_pos()].byte_start,
// TODO: Why is this in chars, while start is in bytes?
length: m.char_count,
length: self.calc_byte_length(tokens, m),
indices: if array_indices.is_empty() {
None
} else {
@ -241,6 +241,18 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
}
}
fn calc_byte_length(&self, tokens: &[Token<'t>], m: &Match) -> usize {
(m.get_first_token_pos()..=m.get_last_token_pos())
.flat_map(|i| match &tokens[i].char_map {
Some(char_map) => {
char_map.iter().map(|(original, _)| *original as usize).collect_vec()
}
None => tokens[i].lemma().chars().map(|c| c.len_utf8()).collect_vec(),
})
.take(m.char_count)
.sum()
}
/// Returns the bounds in byte index of the crop window.
fn crop_bounds(&self, tokens: &[Token<'_>], matches: &[Match], crop_size: usize) -> [usize; 2] {
let (

View file

@ -327,7 +327,7 @@ impl QueryGraph {
let mut peekable = term_with_frequency.into_iter().peekable();
while let Some((idx, frequency)) = peekable.next() {
term_weight.insert(idx, weight);
if peekable.peek().map_or(false, |(_, f)| frequency != *f) {
if peekable.peek().is_some_and(|(_, f)| frequency != *f) {
weight += 1;
}
}

View file

@ -398,7 +398,7 @@ fn split_best_frequency(
let right = ctx.word_interner.insert(right.to_owned());
if let Some(frequency) = ctx.get_db_word_pair_proximity_docids_len(None, left, right, 1)? {
if best.map_or(true, |(old, _, _)| frequency > old) {
if best.is_none_or(|(old, _, _)| frequency > old) {
best = Some((frequency, left, right));
}
}

View file

@ -203,7 +203,7 @@ pub fn number_of_typos_allowed<'ctx>(
Ok(Box::new(move |word: &str| {
if !authorize_typos
|| word.len() < min_len_one_typo as usize
|| exact_words.as_ref().map_or(false, |fst| fst.contains(word))
|| exact_words.as_ref().is_some_and(|fst| fst.contains(word))
{
0
} else if word.len() < min_len_two_typos as usize {

View file

@ -17,7 +17,7 @@ use crate::Result;
pub struct PhraseDocIdsCache {
pub cache: FxHashMap<Interned<Phrase>, RoaringBitmap>,
}
impl<'ctx> SearchContext<'ctx> {
impl SearchContext<'_> {
/// Get the document ids associated with the given phrase
pub fn get_phrase_docids(&mut self, phrase: Interned<Phrase>) -> Result<&RoaringBitmap> {
if self.phrase_docids.cache.contains_key(&phrase) {

View file

@ -263,7 +263,7 @@ impl SmallBitmapInternal {
pub fn contains(&self, x: u16) -> bool {
let (set, x) = self.get_set_index(x);
set & 0b1 << x != 0
set & (0b1 << x) != 0
}
pub fn insert(&mut self, x: u16) {
@ -381,7 +381,7 @@ pub enum SmallBitmapInternalIter<'b> {
Tiny(u64),
Small { cur: u64, next: &'b [u64], base: u16 },
}
impl<'b> Iterator for SmallBitmapInternalIter<'b> {
impl Iterator for SmallBitmapInternalIter<'_> {
type Item = u16;
fn next(&mut self) -> Option<Self::Item> {