From d3a95ea2f66ae90f62385b9b52bf39f66358cf19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 12 Mar 2024 11:01:46 +0100 Subject: [PATCH] Introduce a new OrderByMap struct to simplify the sort by usage --- meilisearch/src/search.rs | 27 ++++------------- milli/src/index.rs | 14 ++++----- milli/src/lib.rs | 1 + milli/src/order_by_map.rs | 57 ++++++++++++++++++++++++++++++++++++ milli/src/update/settings.rs | 7 +++-- 5 files changed, 73 insertions(+), 33 deletions(-) create mode 100644 milli/src/order_by_map.rs diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 6e253baad..8f3df04e0 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -671,27 +671,16 @@ pub fn perform_search( let sort_facet_values_by = index.sort_facet_values_by(&rtxn).map_err(milli::Error::from)?; - let default_sort_facet_values_by = - sort_facet_values_by.get("*").copied().unwrap_or_default(); if fields.iter().all(|f| f != "*") { - let fields: Vec<_> = fields - .iter() - .map(|n| { - ( - n, - sort_facet_values_by - .get(n) - .copied() - .unwrap_or(default_sort_facet_values_by), - ) - }) - .collect(); + let fields: Vec<_> = + fields.iter().map(|n| (n, sort_facet_values_by.get(n))).collect(); facet_distribution.facets(fields); } + let distribution = facet_distribution .candidates(candidates) - .default_order_by(default_sort_facet_values_by) + .default_order_by(sort_facet_values_by.get("*")) .execute()?; let stats = facet_distribution.compute_stats()?; (Some(distribution), Some(stats)) @@ -726,13 +715,7 @@ pub fn perform_facet_search( let rtxn = index.read_txn()?; let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features, None)?; - let sort_by = { - let sorts = index.sort_facet_values_by(&rtxn)?; - sorts - .get(&facet_name) - .copied() - .unwrap_or_else(|| sorts.get("*").copied().unwrap_or_default()) - }; + let sort_by = index.sort_facet_values_by(&rtxn)?.get(&facet_name); let mut facet_search = SearchForFacetValues::new(facet_name, search, sort_by, search_query.hybrid.is_some()); if let Some(facet_query) = &facet_query { diff --git a/milli/src/index.rs b/milli/src/index.rs index 6ad39dcb1..2c3977403 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -20,13 +20,13 @@ use crate::heed_codec::facet::{ use crate::heed_codec::{ BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec, }; +use crate::order_by_map::OrderByMap; use crate::proximity::ProximityPrecision; use crate::vector::EmbeddingConfig; use crate::{ default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec, - OrderBy, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, - BEU32, BEU64, + Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32, BEU64, }; pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5; @@ -1373,21 +1373,19 @@ impl Index { self.main.remap_key_type::().delete(txn, main_key::MAX_VALUES_PER_FACET) } - pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result> { - let mut orders = self + pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result { + let orders = self .main - .remap_types::>>() + .remap_types::>() .get(txn, main_key::SORT_FACET_VALUES_BY)? .unwrap_or_default(); - // Insert the default ordering if it is not already overwritten by the user. - orders.entry("*".to_string()).or_insert(OrderBy::Lexicographic); Ok(orders) } pub(crate) fn put_sort_facet_values_by( &self, txn: &mut RwTxn, - val: &HashMap, + val: &OrderByMap, ) -> heed::Result<()> { self.main.remap_types::>().put(txn, main_key::SORT_FACET_VALUES_BY, &val) } diff --git a/milli/src/lib.rs b/milli/src/lib.rs index f6b398304..be79a7e86 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -16,6 +16,7 @@ pub mod facet; mod fields_ids_map; pub mod heed_codec; pub mod index; +pub mod order_by_map; pub mod prompt; pub mod proximity; pub mod score_details; diff --git a/milli/src/order_by_map.rs b/milli/src/order_by_map.rs new file mode 100644 index 000000000..287e62c3a --- /dev/null +++ b/milli/src/order_by_map.rs @@ -0,0 +1,57 @@ +use std::collections::{hash_map, HashMap}; +use std::iter::FromIterator; + +use serde::{Deserialize, Deserializer, Serialize}; + +use crate::OrderBy; + +#[derive(Serialize)] +pub struct OrderByMap(HashMap); + +impl OrderByMap { + pub fn get(&self, key: impl AsRef) -> OrderBy { + self.0 + .get(key.as_ref()) + .copied() + .unwrap_or_else(|| self.0.get("*").copied().unwrap_or_default()) + } + + pub fn insert(&mut self, key: String, value: OrderBy) -> Option { + self.0.insert(key, value) + } +} + +impl Default for OrderByMap { + fn default() -> Self { + let mut map = HashMap::new(); + map.insert("*".to_string(), OrderBy::Lexicographic); + OrderByMap(map) + } +} + +impl FromIterator<(String, OrderBy)> for OrderByMap { + fn from_iter>(iter: T) -> Self { + OrderByMap(iter.into_iter().collect()) + } +} + +impl IntoIterator for OrderByMap { + type Item = (String, OrderBy); + type IntoIter = hash_map::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +impl<'de> Deserialize<'de> for OrderByMap { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let mut map = Deserialize::deserialize(deserializer).map(OrderByMap)?; + // Insert the default ordering if it is not already overwritten by the user. + map.0.entry("*".to_string()).or_insert(OrderBy::default()); + Ok(map) + } +} diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 3cad79467..dcf41970e 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -14,12 +14,13 @@ use super::IndexerConfig; use crate::criterion::Criterion; use crate::error::UserError; use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; +use crate::order_by_map::OrderByMap; use crate::proximity::ProximityPrecision; use crate::update::index_documents::IndexDocumentsMethod; use crate::update::{IndexDocuments, UpdateIndexingStep}; use crate::vector::settings::{check_set, check_unset, EmbedderSource, EmbeddingSettings}; use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs}; -use crate::{FieldsIdsMap, Index, OrderBy, Result}; +use crate::{FieldsIdsMap, Index, Result}; #[derive(Debug, Clone, PartialEq, Eq, Copy)] pub enum Setting { @@ -145,7 +146,7 @@ pub struct Settings<'a, 't, 'i> { /// Attributes on which typo tolerance is disabled. exact_attributes: Setting>, max_values_per_facet: Setting, - sort_facet_values_by: Setting>, + sort_facet_values_by: Setting, pagination_max_total_hits: Setting, proximity_precision: Setting, embedder_settings: Setting>>, @@ -340,7 +341,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { self.max_values_per_facet = Setting::Reset; } - pub fn set_sort_facet_values_by(&mut self, value: HashMap) { + pub fn set_sort_facet_values_by(&mut self, value: OrderByMap) { self.sort_facet_values_by = Setting::Set(value); }