Introduce a new OrderByMap struct to simplify the sort by usage

This commit is contained in:
Clément Renault 2024-03-12 11:01:46 +01:00
parent 69c118ef76
commit d3a95ea2f6
No known key found for this signature in database
GPG key ID: F250A4C4E3AE5F5F
5 changed files with 73 additions and 33 deletions

View file

@ -20,13 +20,13 @@ use crate::heed_codec::facet::{
use crate::heed_codec::{
BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec,
};
use crate::order_by_map::OrderByMap;
use crate::proximity::ProximityPrecision;
use crate::vector::EmbeddingConfig;
use crate::{
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
OrderBy, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16,
BEU32, BEU64,
Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32, BEU64,
};
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
@ -1373,21 +1373,19 @@ impl Index {
self.main.remap_key_type::<Str>().delete(txn, main_key::MAX_VALUES_PER_FACET)
}
pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result<HashMap<String, OrderBy>> {
let mut orders = self
pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result<OrderByMap> {
let orders = self
.main
.remap_types::<Str, SerdeJson<HashMap<String, OrderBy>>>()
.remap_types::<Str, SerdeJson<OrderByMap>>()
.get(txn, main_key::SORT_FACET_VALUES_BY)?
.unwrap_or_default();
// Insert the default ordering if it is not already overwritten by the user.
orders.entry("*".to_string()).or_insert(OrderBy::Lexicographic);
Ok(orders)
}
pub(crate) fn put_sort_facet_values_by(
&self,
txn: &mut RwTxn,
val: &HashMap<String, OrderBy>,
val: &OrderByMap,
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeJson<_>>().put(txn, main_key::SORT_FACET_VALUES_BY, &val)
}

View file

@ -16,6 +16,7 @@ pub mod facet;
mod fields_ids_map;
pub mod heed_codec;
pub mod index;
pub mod order_by_map;
pub mod prompt;
pub mod proximity;
pub mod score_details;

57
milli/src/order_by_map.rs Normal file
View file

@ -0,0 +1,57 @@
use std::collections::{hash_map, HashMap};
use std::iter::FromIterator;
use serde::{Deserialize, Deserializer, Serialize};
use crate::OrderBy;
#[derive(Serialize)]
pub struct OrderByMap(HashMap<String, OrderBy>);
impl OrderByMap {
pub fn get(&self, key: impl AsRef<str>) -> OrderBy {
self.0
.get(key.as_ref())
.copied()
.unwrap_or_else(|| self.0.get("*").copied().unwrap_or_default())
}
pub fn insert(&mut self, key: String, value: OrderBy) -> Option<OrderBy> {
self.0.insert(key, value)
}
}
impl Default for OrderByMap {
fn default() -> Self {
let mut map = HashMap::new();
map.insert("*".to_string(), OrderBy::Lexicographic);
OrderByMap(map)
}
}
impl FromIterator<(String, OrderBy)> for OrderByMap {
fn from_iter<T: IntoIterator<Item = (String, OrderBy)>>(iter: T) -> Self {
OrderByMap(iter.into_iter().collect())
}
}
impl IntoIterator for OrderByMap {
type Item = (String, OrderBy);
type IntoIter = hash_map::IntoIter<String, OrderBy>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
impl<'de> Deserialize<'de> for OrderByMap {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let mut map = Deserialize::deserialize(deserializer).map(OrderByMap)?;
// Insert the default ordering if it is not already overwritten by the user.
map.0.entry("*".to_string()).or_insert(OrderBy::default());
Ok(map)
}
}

View file

@ -14,12 +14,13 @@ use super::IndexerConfig;
use crate::criterion::Criterion;
use crate::error::UserError;
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
use crate::order_by_map::OrderByMap;
use crate::proximity::ProximityPrecision;
use crate::update::index_documents::IndexDocumentsMethod;
use crate::update::{IndexDocuments, UpdateIndexingStep};
use crate::vector::settings::{check_set, check_unset, EmbedderSource, EmbeddingSettings};
use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs};
use crate::{FieldsIdsMap, Index, OrderBy, Result};
use crate::{FieldsIdsMap, Index, Result};
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
pub enum Setting<T> {
@ -145,7 +146,7 @@ pub struct Settings<'a, 't, 'i> {
/// Attributes on which typo tolerance is disabled.
exact_attributes: Setting<HashSet<String>>,
max_values_per_facet: Setting<usize>,
sort_facet_values_by: Setting<HashMap<String, OrderBy>>,
sort_facet_values_by: Setting<OrderByMap>,
pagination_max_total_hits: Setting<usize>,
proximity_precision: Setting<ProximityPrecision>,
embedder_settings: Setting<BTreeMap<String, Setting<EmbeddingSettings>>>,
@ -340,7 +341,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
self.max_values_per_facet = Setting::Reset;
}
pub fn set_sort_facet_values_by(&mut self, value: HashMap<String, OrderBy>) {
pub fn set_sort_facet_values_by(&mut self, value: OrderByMap) {
self.sort_facet_values_by = Setting::Set(value);
}