From c0e987979a95307082dfb28e18460b54921a08ea Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 30 Apr 2025 17:28:49 +0200 Subject: [PATCH] Allow lexicographic string comparisons --- crates/milli/src/search/facet/filter.rs | 79 ++++++++++++++++++------- 1 file changed, 59 insertions(+), 20 deletions(-) diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs index 3505f7d4a..969f00ec0 100644 --- a/crates/milli/src/search/facet/filter.rs +++ b/crates/milli/src/search/facet/filter.rs @@ -1,10 +1,11 @@ use std::collections::BTreeSet; use std::fmt::{Debug, Display}; -use std::ops::Bound::{self, Excluded, Included}; +use std::ops::Bound::{self, Excluded, Included, Unbounded}; use either::Either; pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token}; use heed::types::LazyDecode; +use heed::BytesEncode; use memchr::memmem::Finder; use roaring::{MultiOps, RoaringBitmap}; use serde_json::Value; @@ -14,7 +15,7 @@ use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::error::{Error, UserError}; use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features}; use crate::heed_codec::facet::{ - FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec, + FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, }; use crate::index::db_name::FACET_ID_STRING_DOCIDS; use crate::{ @@ -271,7 +272,7 @@ impl<'a> Filter<'a> { // as the facets values are all in the same database and prefixed by the // field id and the level. - let (left, right) = match operator { + let (number_bounds, (left_str, right_str)) = match operator { // return an error if the filter is not allowed for this field Condition::GreaterThan(_) | Condition::GreaterThanOrEqual(_) @@ -305,17 +306,37 @@ impl<'a> Filter<'a> { )); } Condition::GreaterThan(val) => { - (Excluded(val.parse_finite_float()?), Included(f64::MAX)) + let number = val.parse_finite_float().ok(); + let number_bounds = number.map(|number| (Excluded(number), Included(f64::MAX))); + let str_bounds = (Excluded(val.value()), Unbounded); + (number_bounds, str_bounds) } Condition::GreaterThanOrEqual(val) => { - (Included(val.parse_finite_float()?), Included(f64::MAX)) + let number = val.parse_finite_float().ok(); + let number_bounds = number.map(|number| (Included(number), Included(f64::MAX))); + let str_bounds = (Included(val.value()), Unbounded); + (number_bounds, str_bounds) + } + Condition::LowerThan(val) => { + let number = val.parse_finite_float().ok(); + let number_bounds = number.map(|number| (Included(f64::MIN), Excluded(number))); + let str_bounds = (Unbounded, Excluded(val.value())); + (number_bounds, str_bounds) } - Condition::LowerThan(val) => (Included(f64::MIN), Excluded(val.parse_finite_float()?)), Condition::LowerThanOrEqual(val) => { - (Included(f64::MIN), Included(val.parse_finite_float()?)) + let number = val.parse_finite_float().ok(); + let number_bounds = number.map(|number| (Included(f64::MIN), Included(number))); + let str_bounds = (Unbounded, Included(val.value())); + (number_bounds, str_bounds) } Condition::Between { from, to } => { - (Included(from.parse_finite_float()?), Included(to.parse_finite_float()?)) + let from_number = from.parse_finite_float().ok(); + let to_number = to.parse_finite_float().ok(); + + let number_bounds = + from_number.zip(to_number).map(|(from, to)| (Included(from), Included(to))); + let str_bounds = (Included(from.value()), Included(to.value())); + (number_bounds, str_bounds) } Condition::Null => { let is_null = index.null_faceted_documents_ids(rtxn, field_id)?; @@ -415,29 +436,47 @@ impl<'a> Filter<'a> { }; let mut output = RoaringBitmap::new(); - Self::explore_facet_number_levels( + + if let Some((left_number, right_number)) = number_bounds { + Self::explore_facet_levels( + rtxn, + numbers_db, + field_id, + &left_number, + &right_number, + universe, + &mut output, + )?; + } + + Self::explore_facet_levels( rtxn, - numbers_db, + strings_db, field_id, - left, - right, + &left_str, + &right_str, universe, &mut output, )?; + Ok(output) } /// Aggregates the documents ids that are part of the specified range automatically /// going deeper through the levels. - fn explore_facet_number_levels( - rtxn: &heed::RoTxn<'_>, - db: heed::Database, FacetGroupValueCodec>, + fn explore_facet_levels<'data, BoundCodec>( + rtxn: &'data heed::RoTxn<'data>, + db: heed::Database, FacetGroupValueCodec>, field_id: FieldId, - left: Bound, - right: Bound, + left: &'data Bound<>::EItem>, + right: &'data Bound<>::EItem>, universe: Option<&RoaringBitmap>, output: &mut RoaringBitmap, - ) -> Result<()> { + ) -> Result<()> + where + BoundCodec: for<'b> BytesEncode<'b>, + for<'b> >::EItem: Sized + PartialOrd, + { match (left, right) { // lower TO upper when lower > upper must return no result (Included(l), Included(r)) if l > r => return Ok(()), @@ -446,8 +485,8 @@ impl<'a> Filter<'a> { (Excluded(l), Included(r)) if l >= r => return Ok(()), (_, _) => (), } - facet_range_search::find_docids_of_facet_within_bounds::( - rtxn, db, field_id, &left, &right, universe, output, + facet_range_search::find_docids_of_facet_within_bounds::( + rtxn, db, field_id, left, right, universe, output, )?; Ok(())