MeiliSearch/crates/milli/src/score_details.rs

use std::cmp::Ordering;

use itertools::Itertools;
use serde::Serialize;

use crate::distance_between_two_points;

#[derive(Debug, Clone, PartialEq)]
pub enum ScoreDetails {
    Words(Words),
    Typo(Typo),
    Proximity(Rank),
    Fid(Rank),
    Position(Rank),
    ExactAttribute(ExactAttribute),
    ExactWords(ExactWords),
    Sort(Sort),
    Vector(Vector),
    GeoSort(GeoSort),

    /// Returned when we don't have the time to finish applying all the subsequent ranking-rules
    Skipped,
}

#[derive(Clone, Copy)]
pub enum ScoreValue<'a> {
    Score(f64),
    Sort(&'a Sort),
    GeoSort(&'a GeoSort),
}

enum RankOrValue<'a> {
    Rank(Rank),
    Sort(&'a Sort),
    GeoSort(&'a GeoSort),
    Score(f64),
}

impl ScoreDetails {
    pub fn local_score(&self) -> Option<f64> {
        self.rank().map(Rank::local_score)
    }

    pub fn rank(&self) -> Option<Rank> {
        match self {
            ScoreDetails::Words(details) => Some(details.rank()),
            ScoreDetails::Typo(details) => Some(details.rank()),
            ScoreDetails::Proximity(details) => Some(*details),
            ScoreDetails::Fid(details) => Some(*details),
            ScoreDetails::Position(details) => Some(*details),
            ScoreDetails::ExactAttribute(details) => Some(details.rank()),
            ScoreDetails::ExactWords(details) => Some(details.rank()),
            ScoreDetails::Sort(_) => None,
            ScoreDetails::GeoSort(_) => None,
            ScoreDetails::Vector(_) => None,
            ScoreDetails::Skipped => Some(Rank { rank: 0, max_rank: 1 }),
        }
    }

    pub fn global_score<'a>(details: impl Iterator<Item = &'a Self> + 'a) -> f64 {
        Self::score_values(details)
            .find_map(|x| {
                let ScoreValue::Score(score) = x else {
                    return None;
                };
                Some(score)
            })
            .unwrap_or(1.0f64)
    }

    pub fn score_values<'a>(
        details: impl Iterator<Item = &'a Self> + 'a,
    ) -> impl Iterator<Item = ScoreValue<'a>> + 'a {
        details
            .map(ScoreDetails::rank_or_value)
            .coalesce(|left, right| match (left, right) {
                (RankOrValue::Rank(left), RankOrValue::Rank(right)) => {
                    Ok(RankOrValue::Rank(Rank::merge(left, right)))
                }
                (left, right) => Err((left, right)),
            })
            .map(|rank_or_value| match rank_or_value {
                RankOrValue::Rank(r) => ScoreValue::Score(r.local_score()),
                RankOrValue::Sort(s) => ScoreValue::Sort(s),
                RankOrValue::GeoSort(g) => ScoreValue::GeoSort(g),
                RankOrValue::Score(s) => ScoreValue::Score(s),
            })
    }

    fn rank_or_value(&self) -> RankOrValue<'_> {
        match self {
            ScoreDetails::Words(w) => RankOrValue::Rank(w.rank()),
            ScoreDetails::Typo(t) => RankOrValue::Rank(t.rank()),
            ScoreDetails::Proximity(p) => RankOrValue::Rank(*p),
            ScoreDetails::Fid(f) => RankOrValue::Rank(*f),
            ScoreDetails::Position(p) => RankOrValue::Rank(*p),
            ScoreDetails::ExactAttribute(e) => RankOrValue::Rank(e.rank()),
            ScoreDetails::ExactWords(e) => RankOrValue::Rank(e.rank()),
            ScoreDetails::Sort(sort) => RankOrValue::Sort(sort),
            ScoreDetails::GeoSort(geosort) => RankOrValue::GeoSort(geosort),
            ScoreDetails::Vector(vector) => {
                RankOrValue::Score(vector.similarity.as_ref().map(|s| *s as f64).unwrap_or(0.0f64))
            }
            ScoreDetails::Skipped => RankOrValue::Rank(Rank { rank: 0, max_rank: 1 }),
        }
    }

    /// Panics
    ///
    /// - If Position is not preceded by Fid
    /// - If Exactness is not preceded by ExactAttribute
    pub fn to_json_map<'a>(
        details: impl Iterator<Item = &'a Self>,
    ) -> serde_json::Map<String, serde_json::Value> {
        let mut order = 0;
        let mut fid_details = None;
        let mut details_map = serde_json::Map::default();
        for details in details {
            match details {
                ScoreDetails::Words(words) => {
                    let words_details = serde_json::json!({
                            "order": order,
                            "matchingWords": words.matching_words,
                            "maxMatchingWords": words.max_matching_words,
                            "score": words.rank().local_score(),
                    });
                    details_map.insert("words".into(), words_details);
                    order += 1;
                }
                ScoreDetails::Typo(typo) => {
                    let typo_details = serde_json::json!({
                        "order": order,
                        "typoCount": typo.typo_count,
                        "maxTypoCount": typo.max_typo_count,
                        "score": typo.rank().local_score(),
                    });
                    details_map.insert("typo".into(), typo_details);
                    order += 1;
                }
                ScoreDetails::Proximity(proximity) => {
                    let proximity_details = serde_json::json!({
                        "order": order,
                        "score": proximity.local_score(),
                    });
                    details_map.insert("proximity".into(), proximity_details);
                    order += 1;
                }
                ScoreDetails::Fid(fid) => {
                    // copy the rank for future use in Position.
                    fid_details = Some(*fid);
                    // For now, fid is a virtual rule always followed by the "position" rule
                    let fid_details = serde_json::json!({
                        "order": order,
                        "attributeRankingOrderScore": fid.local_score(),
                    });
                    details_map.insert("attribute".into(), fid_details);
                    order += 1;
                }
                ScoreDetails::Position(position) => {
                    // For now, position is a virtual rule always preceded by the "fid" rule
                    let attribute_details = details_map
                        .get_mut("attribute")
                        .expect("position not preceded by attribute");
                    let attribute_details = attribute_details
                        .as_object_mut()
                        .expect("attribute details was not an object");
                    let Some(fid_details) = fid_details else {
                        unimplemented!("position not preceded by attribute");
                    };

                    attribute_details
                        .insert("queryWordDistanceScore".into(), position.local_score().into());
                    let score = Rank::global_score([fid_details, *position].iter().copied());
                    attribute_details.insert("score".into(), score.into());

                    // do not update the order since this was already done by fid
                }
                ScoreDetails::ExactAttribute(exact_attribute) => {
                    let exactness_details = serde_json::json!({
                        "order": order,
                        "matchType": exact_attribute,
                        "score": exact_attribute.rank().local_score(),
                    });
                    details_map.insert("exactness".into(), exactness_details);
                    order += 1;
                }
                ScoreDetails::ExactWords(details) => {
                    // For now, exactness is a virtual rule always preceded by the "ExactAttribute" rule
                    let exactness_details = details_map
                        .get_mut("exactness")
                        .expect("Exactness not preceded by exactAttribute");
                    let exactness_details = exactness_details
                        .as_object_mut()
                        .expect("exactness details was not an object");
                    if exactness_details.get("matchType").expect("missing 'matchType'")
                        == &serde_json::json!(ExactAttribute::NoExactMatch)
                    {
                        let score = Rank::global_score(
                            [ExactAttribute::NoExactMatch.rank(), details.rank()].iter().copied(),
                        );
                        // tiny detail, but we want the score to be the last displayed field,
                        // so we're removing it here, adding the other fields, then adding the new score
                        exactness_details.remove("score");
                        exactness_details
                            .insert("matchingWords".into(), details.matching_words.into());
                        exactness_details
                            .insert("maxMatchingWords".into(), details.max_matching_words.into());
                        exactness_details.insert("score".into(), score.into());
                    }
                    // do not update the order since this was already done by exactAttribute
                }
                ScoreDetails::Sort(details) => {
                    let sort = if details.redacted {
                        format!("<hidden-rule-{order}>")
                    } else {
                        format!(
                            "{}:{}",
                            details.field_name,
                            if details.ascending { "asc" } else { "desc" }
                        )
                    };
                    let value =
                        if details.redacted { "<hidden>".into() } else { details.value.clone() };
                    let sort_details = serde_json::json!({
                        "order": order,
                        "value": value,
                    });
                    details_map.insert(sort, sort_details);
                    order += 1;
                }
                ScoreDetails::GeoSort(details) => {
                    let sort = format!(
                        "_geoPoint({}, {}):{}",
                        details.target_point[0],
                        details.target_point[1],
                        if details.ascending { "asc" } else { "desc" }
                    );
                    let point = if let Some(value) = details.value {
                        serde_json::json!({ "lat": value[0], "lng": value[1]})
                    } else {
                        serde_json::Value::Null
                    };
                    let sort_details = serde_json::json!({
                        "order": order,
                        "value": point,
                        "distance": details.distance(),
                    });
                    details_map.insert(sort, sort_details);
                    order += 1;
                }
                ScoreDetails::Vector(s) => {
                    let similarity = s.similarity.as_ref();

                    let details = serde_json::json!({
                        "order": order,
                        "similarity": similarity,
                    });
                    details_map.insert("vectorSort".into(), details);
                    order += 1;
                }
                ScoreDetails::Skipped => {
                    details_map
                        .insert("skipped".to_string(), serde_json::json!({ "order": order }));
                    order += 1;
                }
            }
        }
        details_map
    }
}

/// The strategy to compute scores.
///
/// It makes sense to pass down this strategy to the internals of the search, because
/// some optimizations (today, mainly skipping ranking rules for universes of a single document)
/// are not correct to do when computing the scores.
///
/// This strategy could feasibly be extended to differentiate between the normalized score and the
/// detailed scores, but it is not useful today as the normalized score is *derived from* the
/// detailed scores.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum ScoringStrategy {
    /// Don't compute scores
    #[default]
    Skip,
    /// Compute detailed scores
    Detailed,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Words {
    pub matching_words: u32,
    pub max_matching_words: u32,
}

impl Words {
    pub fn rank(&self) -> Rank {
        Rank { rank: self.matching_words, max_rank: self.max_matching_words }
    }

    pub(crate) fn from_rank(rank: Rank) -> Self {
        Self { matching_words: rank.rank, max_matching_words: rank.max_rank }
    }
}

/// Structure that is super similar to [`Words`], but whose semantics is a bit distinct.
///
/// In exactness, the number of matching words can actually be 0 with a non-zero score,
/// if no words from the query appear exactly in the document.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ExactWords {
    pub matching_words: u32,
    pub max_matching_words: u32,
}

impl ExactWords {
    pub fn rank(&self) -> Rank {
        // 0 matching words means last rank (1)
        Rank { rank: self.matching_words + 1, max_rank: self.max_matching_words + 1 }
    }

    pub(crate) fn from_rank(rank: Rank) -> Self {
        // last rank (1) means that 0 words from the query appear exactly in the document.
        // first rank (max_rank) means that (max_rank - 1) words from the query appear exactly in the document.
        Self {
            matching_words: rank.rank.saturating_sub(1),
            max_matching_words: rank.max_rank.saturating_sub(1),
        }
    }
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Typo {
    pub typo_count: u32,
    pub max_typo_count: u32,
}

impl Typo {
    pub fn rank(&self) -> Rank {
        Rank {
            rank: (self.max_typo_count + 1).saturating_sub(self.typo_count),
            max_rank: (self.max_typo_count + 1),
        }
    }

    // max_rank = max_typo + 1
    // max_typo = max_rank - 1
    //
    // rank = max_typo - typo + 1
    // rank = max_rank - 1 - typo + 1
    // rank + typo = max_rank
    // typo = max_rank - rank
    pub fn from_rank(rank: Rank) -> Typo {
        Typo {
            typo_count: rank.max_rank.saturating_sub(rank.rank),
            max_typo_count: rank.max_rank.saturating_sub(1),
        }
    }
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Rank {
    /// The ordinal rank, such that `max_rank` is the first rank, and 0 is the last rank.
    ///
    /// The higher the better. Documents with a rank of 0 have a score of 0 and are typically never returned
    /// (they don't match the query).
    pub rank: u32,
    /// The maximum possible rank. Documents with this rank have a score of 1.
    ///
    /// The max rank should not be 0.
    pub max_rank: u32,
}

impl Rank {
    pub fn local_score(self) -> f64 {
        self.rank as f64 / self.max_rank as f64
    }

    pub fn global_score(details: impl Iterator<Item = Self>) -> f64 {
        let mut rank = Rank { rank: 1, max_rank: 1 };
        for inner_rank in details {
            rank = Rank::merge(rank, inner_rank);
        }
        rank.local_score()
    }

    pub fn merge(mut outer: Rank, inner: Rank) -> Rank {
        outer.rank = outer.rank.saturating_sub(1);

        outer.rank *= inner.max_rank;
        outer.max_rank *= inner.max_rank;

        outer.rank += inner.rank;

        outer
    }
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)]
#[serde(rename_all = "camelCase")]
pub enum ExactAttribute {
    ExactMatch,
    MatchesStart,
    NoExactMatch,
}

impl ExactAttribute {
    pub fn rank(&self) -> Rank {
        let rank = match self {
            ExactAttribute::ExactMatch => 3,
            ExactAttribute::MatchesStart => 2,
            ExactAttribute::NoExactMatch => 1,
        };
        Rank { rank, max_rank: 3 }
    }
}

#[derive(Debug, Clone, PartialEq)]
pub struct Sort {
    pub field_name: String,
    pub ascending: bool,
    pub redacted: bool,
    pub value: serde_json::Value,
}

impl PartialOrd for Sort {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        if self.ascending != other.ascending {
            return None;
        }
        match (&self.value, &other.value) {
            (serde_json::Value::Null, serde_json::Value::Null) => Some(Ordering::Equal),
            (serde_json::Value::Null, _) => Some(Ordering::Less),
            (_, serde_json::Value::Null) => Some(Ordering::Greater),
            // numbers are always before strings
            (serde_json::Value::Number(_), serde_json::Value::String(_)) => Some(Ordering::Greater),
            (serde_json::Value::String(_), serde_json::Value::Number(_)) => Some(Ordering::Less),
            (serde_json::Value::Number(left), serde_json::Value::Number(right)) => {
                // FIXME: unwrap permitted here?
                let order = left.as_f64().unwrap().partial_cmp(&right.as_f64().unwrap())?;
                // 12 < 42, and when ascending, we want to see 12 first, so the smallest.
                // Hence, when ascending, smaller is better
                Some(if self.ascending { order.reverse() } else { order })
            }
            (serde_json::Value::String(left), serde_json::Value::String(right)) => {
                let order = left.cmp(right);
                // Taking e.g. "a" and "z"
                // "a" < "z", and when ascending, we want to see "a" first, so the smallest.
                // Hence, when ascending, smaller is better
                Some(if self.ascending { order.reverse() } else { order })
            }
            _ => None,
        }
    }
}

#[derive(Debug, Clone, Copy, PartialEq)]
pub struct GeoSort {
    pub target_point: [f64; 2],
    pub ascending: bool,
    pub value: Option<[f64; 2]>,
}

impl PartialOrd for GeoSort {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        if self.ascending != other.ascending {
            return None;
        }
        Some(match (self.distance(), other.distance()) {
            (None, None) => Ordering::Equal,
            (None, Some(_)) => Ordering::Less,
            (Some(_), None) => Ordering::Greater,
            (Some(left), Some(right)) => {
                let order = left.partial_cmp(&right)?;
                if self.ascending {
                    // when ascending, the one with the smallest distance has the best score
                    order.reverse()
                } else {
                    order
                }
            }
        })
    }
}

#[derive(Debug, Clone, PartialEq, PartialOrd)]
pub struct Vector {
    pub similarity: Option<f32>,
}

impl GeoSort {
    pub fn distance(&self) -> Option<f64> {
        self.value.map(|value| distance_between_two_points(&self.target_point, &value))
    }
}
Small commit to add hybrid search and autoembedding 2023-11-15 15:46:37 +01:00			`use std::cmp::Ordering;`

			`use itertools::Itertools;`
Add score_details 2023-06-06 18:06:02 +02:00			`use serde::Serialize;`

			`use crate::distance_between_two_points;`

			`#[derive(Debug, Clone, PartialEq)]`
			`pub enum ScoreDetails {`
			`Words(Words),`
			`Typo(Typo),`
			`Proximity(Rank),`
			`Fid(Rank),`
			`Position(Rank),`
			`ExactAttribute(ExactAttribute),`
Add missing exactness.matchingWords, exactness.maxMatchingWords 2023-07-04 15:53:38 +02:00			`ExactWords(ExactWords),`
Add score_details 2023-06-06 18:06:02 +02:00			`Sort(Sort),`
Small commit to add hybrid search and autoembedding 2023-11-15 15:46:37 +01:00			`Vector(Vector),`
Add score_details 2023-06-06 18:06:02 +02:00			`GeoSort(GeoSort),`
fix the search cutoff and add a test 2024-03-14 17:34:46 +01:00
			`/// Returned when we don't have the time to finish applying all the subsequent ranking-rules`
			`Skipped,`
Add score_details 2023-06-06 18:06:02 +02:00			`}`

Small commit to add hybrid search and autoembedding 2023-11-15 15:46:37 +01:00			`#[derive(Clone, Copy)]`
			`pub enum ScoreValue<'a> {`
			`Score(f64),`
			`Sort(&'a Sort),`
			`GeoSort(&'a GeoSort),`
			`}`

			`enum RankOrValue<'a> {`
			`Rank(Rank),`
			`Sort(&'a Sort),`
			`GeoSort(&'a GeoSort),`
			`Score(f64),`
			`}`

Add score_details 2023-06-06 18:06:02 +02:00			`impl ScoreDetails {`
			`pub fn local_score(&self) -> Option<f64> {`
			`self.rank().map(Rank::local_score)`
			`}`

			`pub fn rank(&self) -> Option<Rank> {`
			`match self {`
			`ScoreDetails::Words(details) => Some(details.rank()),`
			`ScoreDetails::Typo(details) => Some(details.rank()),`
			`ScoreDetails::Proximity(details) => Some(*details),`
			`ScoreDetails::Fid(details) => Some(*details),`
			`ScoreDetails::Position(details) => Some(*details),`
			`ScoreDetails::ExactAttribute(details) => Some(details.rank()),`
Add missing exactness.matchingWords, exactness.maxMatchingWords 2023-07-04 15:53:38 +02:00			`ScoreDetails::ExactWords(details) => Some(details.rank()),`
Add score_details 2023-06-06 18:06:02 +02:00			`ScoreDetails::Sort(_) => None,`
			`ScoreDetails::GeoSort(_) => None,`
Small commit to add hybrid search and autoembedding 2023-11-15 15:46:37 +01:00			`ScoreDetails::Vector(_) => None,`
fix the search cutoff and add a test 2024-03-14 17:34:46 +01:00			`ScoreDetails::Skipped => Some(Rank { rank: 0, max_rank: 1 }),`
Add score_details 2023-06-06 18:06:02 +02:00			`}`
			`}`

Small commit to add hybrid search and autoembedding 2023-11-15 15:46:37 +01:00			`pub fn global_score<'a>(details: impl Iterator<Item = &'a Self> + 'a) -> f64 {`
			`Self::score_values(details)`
			`.find_map(\|x\| {`
			`let ScoreValue::Score(score) = x else {`
			`return None;`
			`};`
			`Some(score)`
			`})`
			`.unwrap_or(1.0f64)`
			`}`

			`pub fn score_values<'a>(`
			`details: impl Iterator<Item = &'a Self> + 'a,`
			`) -> impl Iterator<Item = ScoreValue<'a>> + 'a {`
			`details`
			`.map(ScoreDetails::rank_or_value)`
			`.coalesce(\|left, right\| match (left, right) {`
			`(RankOrValue::Rank(left), RankOrValue::Rank(right)) => {`
			`Ok(RankOrValue::Rank(Rank::merge(left, right)))`
			`}`
			`(left, right) => Err((left, right)),`
			`})`
			`.map(\|rank_or_value\| match rank_or_value {`
			`RankOrValue::Rank(r) => ScoreValue::Score(r.local_score()),`
			`RankOrValue::Sort(s) => ScoreValue::Sort(s),`
			`RankOrValue::GeoSort(g) => ScoreValue::GeoSort(g),`
			`RankOrValue::Score(s) => ScoreValue::Score(s),`
			`})`
			`}`

			`fn rank_or_value(&self) -> RankOrValue<'_> {`
			`match self {`
			`ScoreDetails::Words(w) => RankOrValue::Rank(w.rank()),`
			`ScoreDetails::Typo(t) => RankOrValue::Rank(t.rank()),`
			`ScoreDetails::Proximity(p) => RankOrValue::Rank(*p),`
			`ScoreDetails::Fid(f) => RankOrValue::Rank(*f),`
			`ScoreDetails::Position(p) => RankOrValue::Rank(*p),`
			`ScoreDetails::ExactAttribute(e) => RankOrValue::Rank(e.rank()),`
			`ScoreDetails::ExactWords(e) => RankOrValue::Rank(e.rank()),`
			`ScoreDetails::Sort(sort) => RankOrValue::Sort(sort),`
			`ScoreDetails::GeoSort(geosort) => RankOrValue::GeoSort(geosort),`
Breaking change: remove vector for score details 2024-03-27 15:36:49 +01:00			`ScoreDetails::Vector(vector) => {`
			`RankOrValue::Score(vector.similarity.as_ref().map(\|s\| *s as f64).unwrap_or(0.0f64))`
			`}`
return the order in the score details 2024-03-19 15:45:04 +01:00			`ScoreDetails::Skipped => RankOrValue::Rank(Rank { rank: 0, max_rank: 1 }),`
Small commit to add hybrid search and autoembedding 2023-11-15 15:46:37 +01:00			`}`
Add score_details 2023-06-06 18:06:02 +02:00			`}`

			`/// Panics`
			`///`
			`/// - If Position is not preceded by Fid`
			`/// - If Exactness is not preceded by ExactAttribute`
			`pub fn to_json_map<'a>(`
			`details: impl Iterator<Item = &'a Self>,`
			`) -> serde_json::Map<String, serde_json::Value> {`
			`let mut order = 0;`
			`let mut fid_details = None;`
			`let mut details_map = serde_json::Map::default();`
			`for details in details {`
			`match details {`
			`ScoreDetails::Words(words) => {`
			`let words_details = serde_json::json!({`
			`"order": order,`
			`"matchingWords": words.matching_words,`
			`"maxMatchingWords": words.max_matching_words,`
			`"score": words.rank().local_score(),`
			`});`
			`details_map.insert("words".into(), words_details);`
			`order += 1;`
			`}`
			`ScoreDetails::Typo(typo) => {`
			`let typo_details = serde_json::json!({`
			`"order": order,`
			`"typoCount": typo.typo_count,`
			`"maxTypoCount": typo.max_typo_count,`
			`"score": typo.rank().local_score(),`
			`});`
			`details_map.insert("typo".into(), typo_details);`
			`order += 1;`
			`}`
			`ScoreDetails::Proximity(proximity) => {`
			`let proximity_details = serde_json::json!({`
			`"order": order,`
			`"score": proximity.local_score(),`
			`});`
			`details_map.insert("proximity".into(), proximity_details);`
			`order += 1;`
			`}`
			`ScoreDetails::Fid(fid) => {`
			`// copy the rank for future use in Position.`
			`fid_details = Some(*fid);`
			`// For now, fid is a virtual rule always followed by the "position" rule`
			`let fid_details = serde_json::json!({`
			`"order": order,`
Fix score details casing 2023-07-25 15:49:33 +02:00			`"attributeRankingOrderScore": fid.local_score(),`
Add score_details 2023-06-06 18:06:02 +02:00			`});`
			`details_map.insert("attribute".into(), fid_details);`
			`order += 1;`
			`}`
			`ScoreDetails::Position(position) => {`
			`// For now, position is a virtual rule always preceded by the "fid" rule`
			`let attribute_details = details_map`
			`.get_mut("attribute")`
			`.expect("position not preceded by attribute");`
			`let attribute_details = attribute_details`
			`.as_object_mut()`
			`.expect("attribute details was not an object");`
Format let-else ❤️ 🎉 2023-07-03 10:20:28 +02:00			`let Some(fid_details) = fid_details else {`
Add score_details 2023-06-06 18:06:02 +02:00			`unimplemented!("position not preceded by attribute");`
			`};`

			`attribute_details`
Fix score details casing 2023-07-25 15:49:33 +02:00			`.insert("queryWordDistanceScore".into(), position.local_score().into());`
Add score_details 2023-06-06 18:06:02 +02:00			`let score = Rank::global_score([fid_details, *position].iter().copied());`
			`attribute_details.insert("score".into(), score.into());`

			`// do not update the order since this was already done by fid`
			`}`
			`ScoreDetails::ExactAttribute(exact_attribute) => {`
			`let exactness_details = serde_json::json!({`
			`"order": order,`
			`"matchType": exact_attribute,`
			`"score": exact_attribute.rank().local_score(),`
			`});`
			`details_map.insert("exactness".into(), exactness_details);`
			`order += 1;`
			`}`
Add missing exactness.matchingWords, exactness.maxMatchingWords 2023-07-04 15:53:38 +02:00			`ScoreDetails::ExactWords(details) => {`
Add score_details 2023-06-06 18:06:02 +02:00			`// For now, exactness is a virtual rule always preceded by the "ExactAttribute" rule`
			`let exactness_details = details_map`
			`.get_mut("exactness")`
			`.expect("Exactness not preceded by exactAttribute");`
			`let exactness_details = exactness_details`
			`.as_object_mut()`
			`.expect("exactness details was not an object");`
			`if exactness_details.get("matchType").expect("missing 'matchType'")`
			`== &serde_json::json!(ExactAttribute::NoExactMatch)`
			`{`
			`let score = Rank::global_score(`
Add missing exactness.matchingWords, exactness.maxMatchingWords 2023-07-04 15:53:38 +02:00			`[ExactAttribute::NoExactMatch.rank(), details.rank()].iter().copied(),`
Add score_details 2023-06-06 18:06:02 +02:00			`);`
Add missing exactness.matchingWords, exactness.maxMatchingWords 2023-07-04 15:53:38 +02:00			`// tiny detail, but we want the score to be the last displayed field,`
			`// so we're removing it here, adding the other fields, then adding the new score`
			`exactness_details.remove("score");`
			`exactness_details`
			`.insert("matchingWords".into(), details.matching_words.into());`
			`exactness_details`
			`.insert("maxMatchingWords".into(), details.max_matching_words.into());`
			`exactness_details.insert("score".into(), score.into());`
Add score_details 2023-06-06 18:06:02 +02:00			`}`
			`// do not update the order since this was already done by exactAttribute`
			`}`
			`ScoreDetails::Sort(details) => {`
			`let sort = if details.redacted {`
			`format!("<hidden-rule-{order}>")`
			`} else {`
			`format!(`
			`"{}:{}",`
			`details.field_name,`
			`if details.ascending { "asc" } else { "desc" }`
			`)`
			`};`
			`let value =`
			`if details.redacted { "<hidden>".into() } else { details.value.clone() };`
			`let sort_details = serde_json::json!({`
			`"order": order,`
			`"value": value,`
			`});`
			`details_map.insert(sort, sort_details);`
			`order += 1;`
			`}`
			`ScoreDetails::GeoSort(details) => {`
			`let sort = format!(`
			`"_geoPoint({}, {}):{}",`
			`details.target_point[0],`
			`details.target_point[1],`
			`if details.ascending { "asc" } else { "desc" }`
			`);`
			`let point = if let Some(value) = details.value {`
			`serde_json::json!({ "lat": value[0], "lng": value[1]})`
			`} else {`
			`serde_json::Value::Null`
			`};`
			`let sort_details = serde_json::json!({`
			`"order": order,`
			`"value": point,`
			`"distance": details.distance(),`
			`});`
			`details_map.insert(sort, sort_details);`
			`order += 1;`
			`}`
Small commit to add hybrid search and autoembedding 2023-11-15 15:46:37 +01:00			`ScoreDetails::Vector(s) => {`
Breaking change: remove vector for score details 2024-03-27 15:36:49 +01:00			`let similarity = s.similarity.as_ref();`
Small commit to add hybrid search and autoembedding 2023-11-15 15:46:37 +01:00
			`let details = serde_json::json!({`
			`"order": order,`
			`"similarity": similarity,`
			`});`
Breaking change: remove vector for score details 2024-03-27 15:36:49 +01:00			`details_map.insert("vectorSort".into(), details);`
Small commit to add hybrid search and autoembedding 2023-11-15 15:46:37 +01:00			`order += 1;`
			`}`
fix the search cutoff and add a test 2024-03-14 17:34:46 +01:00			`ScoreDetails::Skipped => {`
return the order in the score details 2024-03-19 15:45:04 +01:00			`details_map`
			`.insert("skipped".to_string(), serde_json::json!({ "order": order }));`
fix the search cutoff and add a test 2024-03-14 17:34:46 +01:00			`order += 1;`
			`}`
Add score_details 2023-06-06 18:06:02 +02:00			`}`
			`}`
			`details_map`
			`}`
			`}`

			`/// The strategy to compute scores.`
			`///`
			`/// It makes sense to pass down this strategy to the internals of the search, because`
			`/// some optimizations (today, mainly skipping ranking rules for universes of a single document)`
			`/// are not correct to do when computing the scores.`
			`///`
			`/// This strategy could feasibly be extended to differentiate between the normalized score and the`
			`/// detailed scores, but it is not useful today as the normalized score is derived from the`
			`/// detailed scores.`
			`#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]`
			`pub enum ScoringStrategy {`
			`/// Don't compute scores`
			`#[default]`
			`Skip,`
			`/// Compute detailed scores`
			`Detailed,`
			`}`

			`#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]`
			`pub struct Words {`
			`pub matching_words: u32,`
			`pub max_matching_words: u32,`
			`}`

			`impl Words {`
			`pub fn rank(&self) -> Rank {`
			`Rank { rank: self.matching_words, max_rank: self.max_matching_words }`
			`}`

Add missing exactness.matchingWords, exactness.maxMatchingWords 2023-07-04 15:53:38 +02:00			`pub(crate) fn from_rank(rank: Rank) -> Self {`
			`Self { matching_words: rank.rank, max_matching_words: rank.max_rank }`
			`}`
			`}`

			/// Structure that is super similar to [`Words`], but whose semantics is a bit distinct.
			`///`
			`/// In exactness, the number of matching words can actually be 0 with a non-zero score,`
			`/// if no words from the query appear exactly in the document.`
			`#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]`
			`pub struct ExactWords {`
			`pub matching_words: u32,`
			`pub max_matching_words: u32,`
			`}`

			`impl ExactWords {`
			`pub fn rank(&self) -> Rank {`
			`// 0 matching words means last rank (1)`
			`Rank { rank: self.matching_words + 1, max_rank: self.max_matching_words + 1 }`
			`}`

			`pub(crate) fn from_rank(rank: Rank) -> Self {`
			`// last rank (1) means that 0 words from the query appear exactly in the document.`
			`// first rank (max_rank) means that (max_rank - 1) words from the query appear exactly in the document.`
			`Self {`
			`matching_words: rank.rank.saturating_sub(1),`
			`max_matching_words: rank.max_rank.saturating_sub(1),`
			`}`
Add score_details 2023-06-06 18:06:02 +02:00			`}`
			`}`

			`#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]`
			`pub struct Typo {`
			`pub typo_count: u32,`
			`pub max_typo_count: u32,`
			`}`

			`impl Typo {`
			`pub fn rank(&self) -> Rank {`
			`Rank {`
Add missing exactness.matchingWords, exactness.maxMatchingWords 2023-07-04 15:53:38 +02:00			`rank: (self.max_typo_count + 1).saturating_sub(self.typo_count),`
Add score_details 2023-06-06 18:06:02 +02:00			`max_rank: (self.max_typo_count + 1),`
			`}`
			`}`

			`// max_rank = max_typo + 1`
			`// max_typo = max_rank - 1`
			`//`
			`// rank = max_typo - typo + 1`
			`// rank = max_rank - 1 - typo + 1`
			`// rank + typo = max_rank`
			`// typo = max_rank - rank`
			`pub fn from_rank(rank: Rank) -> Typo {`
Add missing exactness.matchingWords, exactness.maxMatchingWords 2023-07-04 15:53:38 +02:00			`Typo {`
			`typo_count: rank.max_rank.saturating_sub(rank.rank),`
			`max_typo_count: rank.max_rank.saturating_sub(1),`
			`}`
Add score_details 2023-06-06 18:06:02 +02:00			`}`
			`}`

			`#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]`
			`pub struct Rank {`
			/// The ordinal rank, such that `max_rank` is the first rank, and 0 is the last rank.
			`///`
			`/// The higher the better. Documents with a rank of 0 have a score of 0 and are typically never returned`
			`/// (they don't match the query).`
			`pub rank: u32,`
			`/// The maximum possible rank. Documents with this rank have a score of 1.`
			`///`
			`/// The max rank should not be 0.`
			`pub max_rank: u32,`
			`}`

			`impl Rank {`
			`pub fn local_score(self) -> f64 {`
			`self.rank as f64 / self.max_rank as f64`
			`}`

			`pub fn global_score(details: impl Iterator<Item = Self>) -> f64 {`
			`let mut rank = Rank { rank: 1, max_rank: 1 };`
			`for inner_rank in details {`
Small commit to add hybrid search and autoembedding 2023-11-15 15:46:37 +01:00			`rank = Rank::merge(rank, inner_rank);`
Add score_details 2023-06-06 18:06:02 +02:00			`}`
			`rank.local_score()`
			`}`
Small commit to add hybrid search and autoembedding 2023-11-15 15:46:37 +01:00
			`pub fn merge(mut outer: Rank, inner: Rank) -> Rank {`
			`outer.rank = outer.rank.saturating_sub(1);`

			`outer.rank *= inner.max_rank;`
			`outer.max_rank *= inner.max_rank;`

			`outer.rank += inner.rank;`

			`outer`
			`}`
Add score_details 2023-06-06 18:06:02 +02:00			`}`

			`#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)]`
			`#[serde(rename_all = "camelCase")]`
			`pub enum ExactAttribute {`
			`ExactMatch,`
			`MatchesStart,`
			`NoExactMatch,`
			`}`

			`impl ExactAttribute {`
			`pub fn rank(&self) -> Rank {`
			`let rank = match self {`
			`ExactAttribute::ExactMatch => 3,`
			`ExactAttribute::MatchesStart => 2,`
			`ExactAttribute::NoExactMatch => 1,`
			`};`
			`Rank { rank, max_rank: 3 }`
			`}`
			`}`

			`#[derive(Debug, Clone, PartialEq)]`
			`pub struct Sort {`
			`pub field_name: String,`
			`pub ascending: bool,`
			`pub redacted: bool,`
			`pub value: serde_json::Value,`
			`}`

Small commit to add hybrid search and autoembedding 2023-11-15 15:46:37 +01:00			`impl PartialOrd for Sort {`
			`fn partial_cmp(&self, other: &Self) -> Option<Ordering> {`
			`if self.ascending != other.ascending {`
			`return None;`
			`}`
			`match (&self.value, &other.value) {`
			`(serde_json::Value::Null, serde_json::Value::Null) => Some(Ordering::Equal),`
			`(serde_json::Value::Null, _) => Some(Ordering::Less),`
			`(_, serde_json::Value::Null) => Some(Ordering::Greater),`
			`// numbers are always before strings`
			`(serde_json::Value::Number(_), serde_json::Value::String(_)) => Some(Ordering::Greater),`
			`(serde_json::Value::String(_), serde_json::Value::Number(_)) => Some(Ordering::Less),`
			`(serde_json::Value::Number(left), serde_json::Value::Number(right)) => {`
			`// FIXME: unwrap permitted here?`
			`let order = left.as_f64().unwrap().partial_cmp(&right.as_f64().unwrap())?;`
			`// 12 < 42, and when ascending, we want to see 12 first, so the smallest.`
			`// Hence, when ascending, smaller is better`
			`Some(if self.ascending { order.reverse() } else { order })`
			`}`
			`(serde_json::Value::String(left), serde_json::Value::String(right)) => {`
			`let order = left.cmp(right);`
			`// Taking e.g. "a" and "z"`
			`// "a" < "z", and when ascending, we want to see "a" first, so the smallest.`
			`// Hence, when ascending, smaller is better`
			`Some(if self.ascending { order.reverse() } else { order })`
			`}`
			`_ => None,`
			`}`
			`}`
			`}`

			`#[derive(Debug, Clone, Copy, PartialEq)]`
Add score_details 2023-06-06 18:06:02 +02:00			`pub struct GeoSort {`
			`pub target_point: [f64; 2],`
			`pub ascending: bool,`
			`pub value: Option<[f64; 2]>,`
			`}`

Small commit to add hybrid search and autoembedding 2023-11-15 15:46:37 +01:00			`impl PartialOrd for GeoSort {`
			`fn partial_cmp(&self, other: &Self) -> Option<Ordering> {`
			`if self.ascending != other.ascending {`
			`return None;`
			`}`
			`Some(match (self.distance(), other.distance()) {`
			`(None, None) => Ordering::Equal,`
			`(None, Some(_)) => Ordering::Less,`
			`(Some(_), None) => Ordering::Greater,`
			`(Some(left), Some(right)) => {`
			`let order = left.partial_cmp(&right)?;`
			`if self.ascending {`
			`// when ascending, the one with the smallest distance has the best score`
			`order.reverse()`
			`} else {`
			`order`
			`}`
			`}`
			`})`
			`}`
			`}`

			`#[derive(Debug, Clone, PartialEq, PartialOrd)]`
			`pub struct Vector {`
Breaking change: remove vector for score details 2024-03-27 15:36:49 +01:00			`pub similarity: Option<f32>,`
Small commit to add hybrid search and autoembedding 2023-11-15 15:46:37 +01:00			`}`

Add score_details 2023-06-06 18:06:02 +02:00			`impl GeoSort {`
			`pub fn distance(&self) -> Option<f64> {`
			`self.value.map(\|value\| distance_between_two_points(&self.target_point, &value))`
			`}`
			`}`