Introduce a new OrderByMap struct to simplify the sort by usage

This commit is contained in:
Clément Renault 2024-03-12 11:01:46 +01:00
parent 69c118ef76
commit d3a95ea2f6
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
5 changed files with 73 additions and 33 deletions

View File

@ -671,27 +671,16 @@ pub fn perform_search(
let sort_facet_values_by = let sort_facet_values_by =
index.sort_facet_values_by(&rtxn).map_err(milli::Error::from)?; index.sort_facet_values_by(&rtxn).map_err(milli::Error::from)?;
let default_sort_facet_values_by =
sort_facet_values_by.get("*").copied().unwrap_or_default();
if fields.iter().all(|f| f != "*") { if fields.iter().all(|f| f != "*") {
let fields: Vec<_> = fields let fields: Vec<_> =
.iter() fields.iter().map(|n| (n, sort_facet_values_by.get(n))).collect();
.map(|n| {
(
n,
sort_facet_values_by
.get(n)
.copied()
.unwrap_or(default_sort_facet_values_by),
)
})
.collect();
facet_distribution.facets(fields); facet_distribution.facets(fields);
} }
let distribution = facet_distribution let distribution = facet_distribution
.candidates(candidates) .candidates(candidates)
.default_order_by(default_sort_facet_values_by) .default_order_by(sort_facet_values_by.get("*"))
.execute()?; .execute()?;
let stats = facet_distribution.compute_stats()?; let stats = facet_distribution.compute_stats()?;
(Some(distribution), Some(stats)) (Some(distribution), Some(stats))
@ -726,13 +715,7 @@ pub fn perform_facet_search(
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features, None)?; let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features, None)?;
let sort_by = { let sort_by = index.sort_facet_values_by(&rtxn)?.get(&facet_name);
let sorts = index.sort_facet_values_by(&rtxn)?;
sorts
.get(&facet_name)
.copied()
.unwrap_or_else(|| sorts.get("*").copied().unwrap_or_default())
};
let mut facet_search = let mut facet_search =
SearchForFacetValues::new(facet_name, search, sort_by, search_query.hybrid.is_some()); SearchForFacetValues::new(facet_name, search, sort_by, search_query.hybrid.is_some());
if let Some(facet_query) = &facet_query { if let Some(facet_query) = &facet_query {

View File

@ -20,13 +20,13 @@ use crate::heed_codec::facet::{
use crate::heed_codec::{ use crate::heed_codec::{
BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec, BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec,
}; };
use crate::order_by_map::OrderByMap;
use crate::proximity::ProximityPrecision; use crate::proximity::ProximityPrecision;
use crate::vector::EmbeddingConfig; use crate::vector::EmbeddingConfig;
use crate::{ use crate::{
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec, FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
OrderBy, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32, BEU64,
BEU32, BEU64,
}; };
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5; pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
@ -1373,21 +1373,19 @@ impl Index {
self.main.remap_key_type::<Str>().delete(txn, main_key::MAX_VALUES_PER_FACET) self.main.remap_key_type::<Str>().delete(txn, main_key::MAX_VALUES_PER_FACET)
} }
pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result<HashMap<String, OrderBy>> { pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result<OrderByMap> {
let mut orders = self let orders = self
.main .main
.remap_types::<Str, SerdeJson<HashMap<String, OrderBy>>>() .remap_types::<Str, SerdeJson<OrderByMap>>()
.get(txn, main_key::SORT_FACET_VALUES_BY)? .get(txn, main_key::SORT_FACET_VALUES_BY)?
.unwrap_or_default(); .unwrap_or_default();
// Insert the default ordering if it is not already overwritten by the user.
orders.entry("*".to_string()).or_insert(OrderBy::Lexicographic);
Ok(orders) Ok(orders)
} }
pub(crate) fn put_sort_facet_values_by( pub(crate) fn put_sort_facet_values_by(
&self, &self,
txn: &mut RwTxn, txn: &mut RwTxn,
val: &HashMap<String, OrderBy>, val: &OrderByMap,
) -> heed::Result<()> { ) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeJson<_>>().put(txn, main_key::SORT_FACET_VALUES_BY, &val) self.main.remap_types::<Str, SerdeJson<_>>().put(txn, main_key::SORT_FACET_VALUES_BY, &val)
} }

View File

@ -16,6 +16,7 @@ pub mod facet;
mod fields_ids_map; mod fields_ids_map;
pub mod heed_codec; pub mod heed_codec;
pub mod index; pub mod index;
pub mod order_by_map;
pub mod prompt; pub mod prompt;
pub mod proximity; pub mod proximity;
pub mod score_details; pub mod score_details;

57
milli/src/order_by_map.rs Normal file
View File

@ -0,0 +1,57 @@
use std::collections::{hash_map, HashMap};
use std::iter::FromIterator;
use serde::{Deserialize, Deserializer, Serialize};
use crate::OrderBy;
#[derive(Serialize)]
pub struct OrderByMap(HashMap<String, OrderBy>);
impl OrderByMap {
pub fn get(&self, key: impl AsRef<str>) -> OrderBy {
self.0
.get(key.as_ref())
.copied()
.unwrap_or_else(|| self.0.get("*").copied().unwrap_or_default())
}
pub fn insert(&mut self, key: String, value: OrderBy) -> Option<OrderBy> {
self.0.insert(key, value)
}
}
impl Default for OrderByMap {
fn default() -> Self {
let mut map = HashMap::new();
map.insert("*".to_string(), OrderBy::Lexicographic);
OrderByMap(map)
}
}
impl FromIterator<(String, OrderBy)> for OrderByMap {
fn from_iter<T: IntoIterator<Item = (String, OrderBy)>>(iter: T) -> Self {
OrderByMap(iter.into_iter().collect())
}
}
impl IntoIterator for OrderByMap {
type Item = (String, OrderBy);
type IntoIter = hash_map::IntoIter<String, OrderBy>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
impl<'de> Deserialize<'de> for OrderByMap {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let mut map = Deserialize::deserialize(deserializer).map(OrderByMap)?;
// Insert the default ordering if it is not already overwritten by the user.
map.0.entry("*".to_string()).or_insert(OrderBy::default());
Ok(map)
}
}

View File

@ -14,12 +14,13 @@ use super::IndexerConfig;
use crate::criterion::Criterion; use crate::criterion::Criterion;
use crate::error::UserError; use crate::error::UserError;
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
use crate::order_by_map::OrderByMap;
use crate::proximity::ProximityPrecision; use crate::proximity::ProximityPrecision;
use crate::update::index_documents::IndexDocumentsMethod; use crate::update::index_documents::IndexDocumentsMethod;
use crate::update::{IndexDocuments, UpdateIndexingStep}; use crate::update::{IndexDocuments, UpdateIndexingStep};
use crate::vector::settings::{check_set, check_unset, EmbedderSource, EmbeddingSettings}; use crate::vector::settings::{check_set, check_unset, EmbedderSource, EmbeddingSettings};
use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs}; use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs};
use crate::{FieldsIdsMap, Index, OrderBy, Result}; use crate::{FieldsIdsMap, Index, Result};
#[derive(Debug, Clone, PartialEq, Eq, Copy)] #[derive(Debug, Clone, PartialEq, Eq, Copy)]
pub enum Setting<T> { pub enum Setting<T> {
@ -145,7 +146,7 @@ pub struct Settings<'a, 't, 'i> {
/// Attributes on which typo tolerance is disabled. /// Attributes on which typo tolerance is disabled.
exact_attributes: Setting<HashSet<String>>, exact_attributes: Setting<HashSet<String>>,
max_values_per_facet: Setting<usize>, max_values_per_facet: Setting<usize>,
sort_facet_values_by: Setting<HashMap<String, OrderBy>>, sort_facet_values_by: Setting<OrderByMap>,
pagination_max_total_hits: Setting<usize>, pagination_max_total_hits: Setting<usize>,
proximity_precision: Setting<ProximityPrecision>, proximity_precision: Setting<ProximityPrecision>,
embedder_settings: Setting<BTreeMap<String, Setting<EmbeddingSettings>>>, embedder_settings: Setting<BTreeMap<String, Setting<EmbeddingSettings>>>,
@ -340,7 +341,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
self.max_values_per_facet = Setting::Reset; self.max_values_per_facet = Setting::Reset;
} }
pub fn set_sort_facet_values_by(&mut self, value: HashMap<String, OrderBy>) { pub fn set_sort_facet_values_by(&mut self, value: OrderByMap) {
self.sort_facet_values_by = Setting::Set(value); self.sort_facet_values_by = Setting::Set(value);
} }