mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-08 12:34:30 +01:00
Make the basic ranking rule boosting work
This commit is contained in:
parent
67dc0268c5
commit
4cfb48fbb6
@ -583,6 +583,8 @@ pub enum RankingRuleView {
|
|||||||
/// Sorted by decreasing number of matched query terms.
|
/// Sorted by decreasing number of matched query terms.
|
||||||
/// Query words at the front of an attribute is considered better than if it was at the back.
|
/// Query words at the front of an attribute is considered better than if it was at the back.
|
||||||
Words,
|
Words,
|
||||||
|
/// Sorted by documents matching the given filter and then documents not matching it.
|
||||||
|
Boost(String),
|
||||||
/// Sorted by increasing number of typos.
|
/// Sorted by increasing number of typos.
|
||||||
Typo,
|
Typo,
|
||||||
/// Sorted by increasing distance between matched query terms.
|
/// Sorted by increasing distance between matched query terms.
|
||||||
@ -648,6 +650,7 @@ impl From<RankingRule> for RankingRuleView {
|
|||||||
fn from(value: RankingRule) -> Self {
|
fn from(value: RankingRule) -> Self {
|
||||||
match value {
|
match value {
|
||||||
RankingRule::Words => RankingRuleView::Words,
|
RankingRule::Words => RankingRuleView::Words,
|
||||||
|
RankingRule::Boost(filter) => RankingRuleView::Boost(filter),
|
||||||
RankingRule::Typo => RankingRuleView::Typo,
|
RankingRule::Typo => RankingRuleView::Typo,
|
||||||
RankingRule::Proximity => RankingRuleView::Proximity,
|
RankingRule::Proximity => RankingRuleView::Proximity,
|
||||||
RankingRule::Attribute => RankingRuleView::Attribute,
|
RankingRule::Attribute => RankingRuleView::Attribute,
|
||||||
@ -662,6 +665,7 @@ impl From<RankingRuleView> for RankingRule {
|
|||||||
fn from(value: RankingRuleView) -> Self {
|
fn from(value: RankingRuleView) -> Self {
|
||||||
match value {
|
match value {
|
||||||
RankingRuleView::Words => RankingRule::Words,
|
RankingRuleView::Words => RankingRule::Words,
|
||||||
|
RankingRuleView::Boost(filter) => RankingRule::Boost(filter),
|
||||||
RankingRuleView::Typo => RankingRule::Typo,
|
RankingRuleView::Typo => RankingRule::Typo,
|
||||||
RankingRuleView::Proximity => RankingRule::Proximity,
|
RankingRuleView::Proximity => RankingRule::Proximity,
|
||||||
RankingRuleView::Attribute => RankingRule::Attribute,
|
RankingRuleView::Attribute => RankingRule::Attribute,
|
||||||
|
144
milli/src/boost.rs
Normal file
144
milli/src/boost.rs
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
//! This module provides the `Boost` type and defines all the errors related to this type.
|
||||||
|
|
||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
|
use crate::RankingRuleError;
|
||||||
|
|
||||||
|
/// This error type is never supposed to be shown to the end user.
|
||||||
|
/// You must always cast it to a sort error or a criterion error.
|
||||||
|
#[derive(Error, Debug)]
|
||||||
|
pub enum BoostError {
|
||||||
|
#[error("Invalid syntax for the boost parameter: expected expression ending by `boost:`, found `{name}`.")]
|
||||||
|
InvalidSyntax { name: String },
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<BoostError> for RankingRuleError {
|
||||||
|
fn from(error: BoostError) -> Self {
|
||||||
|
match error {
|
||||||
|
BoostError::InvalidSyntax { name } => RankingRuleError::InvalidName { name },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
|
||||||
|
pub struct Boost(pub String);
|
||||||
|
|
||||||
|
impl Boost {
|
||||||
|
pub fn filter(&self) -> &str {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FromStr for Boost {
|
||||||
|
type Err = BoostError;
|
||||||
|
|
||||||
|
fn from_str(text: &str) -> Result<Boost, Self::Err> {
|
||||||
|
match text.split_once(':') {
|
||||||
|
Some(("boost", right)) => Ok(Boost(right.to_string())), // TODO check filter validity
|
||||||
|
_ => Err(BoostError::InvalidSyntax { name: text.to_string() }),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use big_s::S;
|
||||||
|
use BoostError::*;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_asc_desc() {
|
||||||
|
let valid_req = [
|
||||||
|
("truc:asc", Asc(Field(S("truc")))),
|
||||||
|
("bidule:desc", Desc(Field(S("bidule")))),
|
||||||
|
("a-b:desc", Desc(Field(S("a-b")))),
|
||||||
|
("a:b:desc", Desc(Field(S("a:b")))),
|
||||||
|
("a12:asc", Asc(Field(S("a12")))),
|
||||||
|
("42:asc", Asc(Field(S("42")))),
|
||||||
|
("_geoPoint(42, 59):asc", Asc(Geo([42., 59.]))),
|
||||||
|
("_geoPoint(42.459, 59):desc", Desc(Geo([42.459, 59.]))),
|
||||||
|
("_geoPoint(42, 59.895):desc", Desc(Geo([42., 59.895]))),
|
||||||
|
("_geoPoint(42, 59.895):desc", Desc(Geo([42., 59.895]))),
|
||||||
|
("_geoPoint(90.000000000, 180):desc", Desc(Geo([90., 180.]))),
|
||||||
|
("_geoPoint(-90, -180.0000000000):asc", Asc(Geo([-90., -180.]))),
|
||||||
|
("_geoPoint(42.0002, 59.895):desc", Desc(Geo([42.0002, 59.895]))),
|
||||||
|
("_geoPoint(42., 59.):desc", Desc(Geo([42., 59.]))),
|
||||||
|
("truc(12, 13):desc", Desc(Field(S("truc(12, 13)")))),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (req, expected) in valid_req {
|
||||||
|
let res = req.parse::<Boost>();
|
||||||
|
assert!(
|
||||||
|
res.is_ok(),
|
||||||
|
"Failed to parse `{}`, was expecting `{:?}` but instead got `{:?}`",
|
||||||
|
req,
|
||||||
|
expected,
|
||||||
|
res
|
||||||
|
);
|
||||||
|
assert_eq!(res.unwrap(), expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
let invalid_req = [
|
||||||
|
("truc:machin", InvalidSyntax { name: S("truc:machin") }),
|
||||||
|
("truc:deesc", InvalidSyntax { name: S("truc:deesc") }),
|
||||||
|
("truc:asc:deesc", InvalidSyntax { name: S("truc:asc:deesc") }),
|
||||||
|
("42desc", InvalidSyntax { name: S("42desc") }),
|
||||||
|
("_geoPoint:asc", ReservedKeyword { name: S("_geoPoint") }),
|
||||||
|
("_geoDistance:asc", ReservedKeyword { name: S("_geoDistance") }),
|
||||||
|
("_geoPoint(42.12 , 59.598)", InvalidSyntax { name: S("_geoPoint(42.12 , 59.598)") }),
|
||||||
|
(
|
||||||
|
"_geoPoint(42.12 , 59.598):deesc",
|
||||||
|
InvalidSyntax { name: S("_geoPoint(42.12 , 59.598):deesc") },
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"_geoPoint(42.12 , 59.598):machin",
|
||||||
|
InvalidSyntax { name: S("_geoPoint(42.12 , 59.598):machin") },
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"_geoPoint(42.12 , 59.598):asc:aasc",
|
||||||
|
InvalidSyntax { name: S("_geoPoint(42.12 , 59.598):asc:aasc") },
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"_geoPoint(42,12 , 59,598):desc",
|
||||||
|
ReservedKeyword { name: S("_geoPoint(42,12 , 59,598)") },
|
||||||
|
),
|
||||||
|
("_geoPoint(35, 85, 75):asc", ReservedKeyword { name: S("_geoPoint(35, 85, 75)") }),
|
||||||
|
("_geoPoint(18):asc", ReservedKeyword { name: S("_geoPoint(18)") }),
|
||||||
|
("_geoPoint(200, 200):asc", GeoError(BadGeoError::Lat(200.))),
|
||||||
|
("_geoPoint(90.000001, 0):asc", GeoError(BadGeoError::Lat(90.000001))),
|
||||||
|
("_geoPoint(0, -180.000001):desc", GeoError(BadGeoError::Lng(-180.000001))),
|
||||||
|
("_geoPoint(159.256, 130):asc", GeoError(BadGeoError::Lat(159.256))),
|
||||||
|
("_geoPoint(12, -2021):desc", GeoError(BadGeoError::Lng(-2021.))),
|
||||||
|
("_geo(12, -2021):asc", ReservedKeyword { name: S("_geo(12, -2021)") }),
|
||||||
|
("_geo(12, -2021):desc", ReservedKeyword { name: S("_geo(12, -2021)") }),
|
||||||
|
("_geoDistance(12, -2021):asc", ReservedKeyword { name: S("_geoDistance(12, -2021)") }),
|
||||||
|
(
|
||||||
|
"_geoDistance(12, -2021):desc",
|
||||||
|
ReservedKeyword { name: S("_geoDistance(12, -2021)") },
|
||||||
|
),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (req, expected_error) in invalid_req {
|
||||||
|
let res = req.parse::<Boost>();
|
||||||
|
assert!(
|
||||||
|
res.is_err(),
|
||||||
|
"Should no be able to parse `{}`, was expecting an error but instead got: `{:?}`",
|
||||||
|
req,
|
||||||
|
res,
|
||||||
|
);
|
||||||
|
let res = res.unwrap_err();
|
||||||
|
assert_eq!(
|
||||||
|
res.to_string(),
|
||||||
|
expected_error.to_string(),
|
||||||
|
"Bad error for input {}: got `{:?}` instead of `{:?}`",
|
||||||
|
req,
|
||||||
|
res,
|
||||||
|
expected_error
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -9,6 +9,7 @@ pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
|||||||
pub mod documents;
|
pub mod documents;
|
||||||
|
|
||||||
mod asc_desc;
|
mod asc_desc;
|
||||||
|
mod boost;
|
||||||
pub mod distance;
|
pub mod distance;
|
||||||
mod error;
|
mod error;
|
||||||
mod external_documents_ids;
|
mod external_documents_ids;
|
||||||
|
@ -4,10 +4,11 @@ use std::str::FromStr;
|
|||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
use crate::{AscDesc, Member};
|
use crate::boost::{Boost, BoostError};
|
||||||
|
use crate::{AscDesc, AscDescError, Member};
|
||||||
|
|
||||||
#[derive(Error, Debug)]
|
#[derive(Error, Debug)]
|
||||||
pub enum CriterionError {
|
pub enum RankingRuleError {
|
||||||
#[error("`{name}` ranking rule is invalid. Valid ranking rules are words, typo, sort, proximity, attribute, exactness and custom ranking rules.")]
|
#[error("`{name}` ranking rule is invalid. Valid ranking rules are words, typo, sort, proximity, attribute, exactness and custom ranking rules.")]
|
||||||
InvalidName { name: String },
|
InvalidName { name: String },
|
||||||
#[error("`{name}` is a reserved keyword and thus can't be used as a ranking rule")]
|
#[error("`{name}` is a reserved keyword and thus can't be used as a ranking rule")]
|
||||||
@ -25,10 +26,12 @@ pub enum CriterionError {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
|
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
|
||||||
pub enum Criterion {
|
pub enum RankingRule {
|
||||||
/// Sorted by decreasing number of matched query terms.
|
/// Sorted by decreasing number of matched query terms.
|
||||||
/// Query words at the front of an attribute is considered better than if it was at the back.
|
/// Query words at the front of an attribute is considered better than if it was at the back.
|
||||||
Words,
|
Words,
|
||||||
|
/// Sorted by documents matching the given filter and then documents not matching it.
|
||||||
|
Boost(String),
|
||||||
/// Sorted by increasing number of typos.
|
/// Sorted by increasing number of typos.
|
||||||
Typo,
|
Typo,
|
||||||
/// Sorted by increasing distance between matched query terms.
|
/// Sorted by increasing distance between matched query terms.
|
||||||
@ -47,62 +50,76 @@ pub enum Criterion {
|
|||||||
Desc(String),
|
Desc(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Criterion {
|
impl RankingRule {
|
||||||
/// Returns the field name parameter of this criterion.
|
/// Returns the field name parameter of this criterion.
|
||||||
pub fn field_name(&self) -> Option<&str> {
|
pub fn field_name(&self) -> Option<&str> {
|
||||||
match self {
|
match self {
|
||||||
Criterion::Asc(name) | Criterion::Desc(name) => Some(name),
|
RankingRule::Asc(name) | RankingRule::Desc(name) => Some(name),
|
||||||
_otherwise => None,
|
_otherwise => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FromStr for Criterion {
|
impl FromStr for RankingRule {
|
||||||
type Err = CriterionError;
|
type Err = RankingRuleError;
|
||||||
|
|
||||||
fn from_str(text: &str) -> Result<Criterion, Self::Err> {
|
fn from_str(text: &str) -> Result<RankingRule, Self::Err> {
|
||||||
match text {
|
match text {
|
||||||
"words" => Ok(Criterion::Words),
|
"words" => Ok(RankingRule::Words),
|
||||||
"typo" => Ok(Criterion::Typo),
|
"typo" => Ok(RankingRule::Typo),
|
||||||
"proximity" => Ok(Criterion::Proximity),
|
"proximity" => Ok(RankingRule::Proximity),
|
||||||
"attribute" => Ok(Criterion::Attribute),
|
"attribute" => Ok(RankingRule::Attribute),
|
||||||
"sort" => Ok(Criterion::Sort),
|
"sort" => Ok(RankingRule::Sort),
|
||||||
"exactness" => Ok(Criterion::Exactness),
|
"exactness" => Ok(RankingRule::Exactness),
|
||||||
text => match AscDesc::from_str(text)? {
|
text => match (AscDesc::from_str(text), Boost::from_str(text)) {
|
||||||
AscDesc::Asc(Member::Field(field)) => Ok(Criterion::Asc(field)),
|
(Ok(asc_desc), _) => match asc_desc {
|
||||||
AscDesc::Desc(Member::Field(field)) => Ok(Criterion::Desc(field)),
|
AscDesc::Asc(Member::Field(field)) => Ok(RankingRule::Asc(field)),
|
||||||
AscDesc::Asc(Member::Geo(_)) | AscDesc::Desc(Member::Geo(_)) => {
|
AscDesc::Desc(Member::Field(field)) => Ok(RankingRule::Desc(field)),
|
||||||
Err(CriterionError::ReservedNameForSort { name: "_geoPoint".to_string() })?
|
AscDesc::Asc(Member::Geo(_)) | AscDesc::Desc(Member::Geo(_)) => {
|
||||||
}
|
Err(RankingRuleError::ReservedNameForSort {
|
||||||
|
name: "_geoPoint".to_string(),
|
||||||
|
})?
|
||||||
|
}
|
||||||
|
},
|
||||||
|
(_, Ok(Boost(filter))) => Ok(RankingRule::Boost(filter)),
|
||||||
|
(
|
||||||
|
Err(AscDescError::InvalidSyntax { name: asc_desc_name }),
|
||||||
|
Err(BoostError::InvalidSyntax { name: boost_name }),
|
||||||
|
) => Err(RankingRuleError::InvalidName {
|
||||||
|
// TODO improve the error message quality
|
||||||
|
name: format!("{asc_desc_name} {boost_name}"),
|
||||||
|
}),
|
||||||
|
(Err(asc_desc_error), _) => Err(asc_desc_error.into()),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn default_criteria() -> Vec<Criterion> {
|
pub fn default_criteria() -> Vec<RankingRule> {
|
||||||
vec![
|
vec![
|
||||||
Criterion::Words,
|
RankingRule::Words,
|
||||||
Criterion::Typo,
|
RankingRule::Typo,
|
||||||
Criterion::Proximity,
|
RankingRule::Proximity,
|
||||||
Criterion::Attribute,
|
RankingRule::Attribute,
|
||||||
Criterion::Sort,
|
RankingRule::Sort,
|
||||||
Criterion::Exactness,
|
RankingRule::Exactness,
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for Criterion {
|
impl fmt::Display for RankingRule {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
use Criterion::*;
|
use RankingRule::*;
|
||||||
|
|
||||||
match self {
|
match self {
|
||||||
Words => f.write_str("words"),
|
Words => f.write_str("words"),
|
||||||
|
Boost(filter) => write!(f, "boost:{filter}"),
|
||||||
Typo => f.write_str("typo"),
|
Typo => f.write_str("typo"),
|
||||||
Proximity => f.write_str("proximity"),
|
Proximity => f.write_str("proximity"),
|
||||||
Attribute => f.write_str("attribute"),
|
Attribute => f.write_str("attribute"),
|
||||||
Sort => f.write_str("sort"),
|
Sort => f.write_str("sort"),
|
||||||
Exactness => f.write_str("exactness"),
|
Exactness => f.write_str("exactness"),
|
||||||
Asc(attr) => write!(f, "{}:asc", attr),
|
Asc(attr) => write!(f, "{attr}:asc"),
|
||||||
Desc(attr) => write!(f, "{}:desc", attr),
|
Desc(attr) => write!(f, "{attr}:desc"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -110,29 +127,29 @@ impl fmt::Display for Criterion {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use big_s::S;
|
use big_s::S;
|
||||||
use CriterionError::*;
|
use RankingRuleError::*;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_criterion() {
|
fn parse_criterion() {
|
||||||
let valid_criteria = [
|
let valid_criteria = [
|
||||||
("words", Criterion::Words),
|
("words", RankingRule::Words),
|
||||||
("typo", Criterion::Typo),
|
("typo", RankingRule::Typo),
|
||||||
("proximity", Criterion::Proximity),
|
("proximity", RankingRule::Proximity),
|
||||||
("attribute", Criterion::Attribute),
|
("attribute", RankingRule::Attribute),
|
||||||
("sort", Criterion::Sort),
|
("sort", RankingRule::Sort),
|
||||||
("exactness", Criterion::Exactness),
|
("exactness", RankingRule::Exactness),
|
||||||
("price:asc", Criterion::Asc(S("price"))),
|
("price:asc", RankingRule::Asc(S("price"))),
|
||||||
("price:desc", Criterion::Desc(S("price"))),
|
("price:desc", RankingRule::Desc(S("price"))),
|
||||||
("price:asc:desc", Criterion::Desc(S("price:asc"))),
|
("price:asc:desc", RankingRule::Desc(S("price:asc"))),
|
||||||
("truc:machin:desc", Criterion::Desc(S("truc:machin"))),
|
("truc:machin:desc", RankingRule::Desc(S("truc:machin"))),
|
||||||
("hello-world!:desc", Criterion::Desc(S("hello-world!"))),
|
("hello-world!:desc", RankingRule::Desc(S("hello-world!"))),
|
||||||
("it's spacy over there:asc", Criterion::Asc(S("it's spacy over there"))),
|
("it's spacy over there:asc", RankingRule::Asc(S("it's spacy over there"))),
|
||||||
];
|
];
|
||||||
|
|
||||||
for (input, expected) in valid_criteria {
|
for (input, expected) in valid_criteria {
|
||||||
let res = input.parse::<Criterion>();
|
let res = input.parse::<RankingRule>();
|
||||||
assert!(
|
assert!(
|
||||||
res.is_ok(),
|
res.is_ok(),
|
||||||
"Failed to parse `{}`, was expecting `{:?}` but instead got `{:?}`",
|
"Failed to parse `{}`, was expecting `{:?}` but instead got `{:?}`",
|
||||||
@ -167,7 +184,7 @@ mod tests {
|
|||||||
];
|
];
|
||||||
|
|
||||||
for (input, expected) in invalid_criteria {
|
for (input, expected) in invalid_criteria {
|
||||||
let res = input.parse::<Criterion>();
|
let res = input.parse::<RankingRule>();
|
||||||
assert!(
|
assert!(
|
||||||
res.is_err(),
|
res.is_err(),
|
||||||
"Should no be able to parse `{}`, was expecting an error but instead got: `{:?}`",
|
"Should no be able to parse `{}`, was expecting an error but instead got: `{:?}`",
|
||||||
|
@ -5,6 +5,7 @@ use crate::distance_between_two_points;
|
|||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
pub enum ScoreDetails {
|
pub enum ScoreDetails {
|
||||||
Words(Words),
|
Words(Words),
|
||||||
|
Boost(Boost),
|
||||||
Typo(Typo),
|
Typo(Typo),
|
||||||
Proximity(Rank),
|
Proximity(Rank),
|
||||||
Fid(Rank),
|
Fid(Rank),
|
||||||
@ -23,6 +24,7 @@ impl ScoreDetails {
|
|||||||
pub fn rank(&self) -> Option<Rank> {
|
pub fn rank(&self) -> Option<Rank> {
|
||||||
match self {
|
match self {
|
||||||
ScoreDetails::Words(details) => Some(details.rank()),
|
ScoreDetails::Words(details) => Some(details.rank()),
|
||||||
|
ScoreDetails::Boost(_) => None,
|
||||||
ScoreDetails::Typo(details) => Some(details.rank()),
|
ScoreDetails::Typo(details) => Some(details.rank()),
|
||||||
ScoreDetails::Proximity(details) => Some(*details),
|
ScoreDetails::Proximity(details) => Some(*details),
|
||||||
ScoreDetails::Fid(details) => Some(*details),
|
ScoreDetails::Fid(details) => Some(*details),
|
||||||
@ -60,6 +62,14 @@ impl ScoreDetails {
|
|||||||
details_map.insert("words".into(), words_details);
|
details_map.insert("words".into(), words_details);
|
||||||
order += 1;
|
order += 1;
|
||||||
}
|
}
|
||||||
|
ScoreDetails::Boost(Boost { filter, matching }) => {
|
||||||
|
let sort = format!("boost:{}", filter);
|
||||||
|
let sort_details = serde_json::json!({
|
||||||
|
"value": matching,
|
||||||
|
});
|
||||||
|
details_map.insert(sort, sort_details);
|
||||||
|
order += 1;
|
||||||
|
}
|
||||||
ScoreDetails::Typo(typo) => {
|
ScoreDetails::Typo(typo) => {
|
||||||
let typo_details = serde_json::json!({
|
let typo_details = serde_json::json!({
|
||||||
"order": order,
|
"order": order,
|
||||||
@ -221,6 +231,12 @@ impl Words {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct Boost {
|
||||||
|
pub filter: String,
|
||||||
|
pub matching: bool,
|
||||||
|
}
|
||||||
|
|
||||||
/// Structure that is super similar to [`Words`], but whose semantics is a bit distinct.
|
/// Structure that is super similar to [`Words`], but whose semantics is a bit distinct.
|
||||||
///
|
///
|
||||||
/// In exactness, the number of matching words can actually be 0 with a non-zero score,
|
/// In exactness, the number of matching words can actually be 0 with a non-zero score,
|
||||||
|
88
milli/src/search/new/boost.rs
Normal file
88
milli/src/search/new/boost.rs
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use super::logger::SearchLogger;
|
||||||
|
use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext};
|
||||||
|
use crate::score_details::{self, ScoreDetails};
|
||||||
|
use crate::{Filter, Result};
|
||||||
|
|
||||||
|
pub struct Boost<Query> {
|
||||||
|
original_expression: String,
|
||||||
|
original_query: Option<Query>,
|
||||||
|
matching: Option<RankingRuleOutput<Query>>,
|
||||||
|
non_matching: Option<RankingRuleOutput<Query>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<Query> Boost<Query> {
|
||||||
|
pub fn new(expression: String) -> Result<Self> {
|
||||||
|
Ok(Self {
|
||||||
|
original_expression: expression,
|
||||||
|
original_query: None,
|
||||||
|
matching: None,
|
||||||
|
non_matching: None,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Boost<Query> {
|
||||||
|
fn id(&self) -> String {
|
||||||
|
// TODO improve this
|
||||||
|
let Self { original_expression, .. } = self;
|
||||||
|
format!("boost:{original_expression}")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn start_iteration(
|
||||||
|
&mut self,
|
||||||
|
ctx: &mut SearchContext<'ctx>,
|
||||||
|
_logger: &mut dyn SearchLogger<Query>,
|
||||||
|
parent_candidates: &RoaringBitmap,
|
||||||
|
parent_query: &Query,
|
||||||
|
) -> Result<()> {
|
||||||
|
let universe_matching = match Filter::from_str(&self.original_expression)? {
|
||||||
|
Some(filter) => filter.evaluate(ctx.txn, ctx.index)?,
|
||||||
|
None => RoaringBitmap::default(),
|
||||||
|
};
|
||||||
|
let matching = parent_candidates & universe_matching;
|
||||||
|
let non_matching = parent_candidates - &matching;
|
||||||
|
|
||||||
|
self.original_query = Some(parent_query.clone());
|
||||||
|
|
||||||
|
self.matching = Some(RankingRuleOutput {
|
||||||
|
query: parent_query.clone(),
|
||||||
|
candidates: matching,
|
||||||
|
score: ScoreDetails::Boost(score_details::Boost {
|
||||||
|
filter: self.original_expression.clone(),
|
||||||
|
matching: true,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
self.non_matching = Some(RankingRuleOutput {
|
||||||
|
query: parent_query.clone(),
|
||||||
|
candidates: non_matching,
|
||||||
|
score: ScoreDetails::Boost(score_details::Boost {
|
||||||
|
filter: self.original_expression.clone(),
|
||||||
|
matching: false,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_bucket(
|
||||||
|
&mut self,
|
||||||
|
_ctx: &mut SearchContext<'ctx>,
|
||||||
|
_logger: &mut dyn SearchLogger<Query>,
|
||||||
|
_universe: &RoaringBitmap,
|
||||||
|
) -> Result<Option<RankingRuleOutput<Query>>> {
|
||||||
|
Ok(self.matching.take().or_else(|| self.non_matching.take()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn end_iteration(
|
||||||
|
&mut self,
|
||||||
|
_ctx: &mut SearchContext<'ctx>,
|
||||||
|
_logger: &mut dyn SearchLogger<Query>,
|
||||||
|
) {
|
||||||
|
self.original_query = None;
|
||||||
|
self.matching = None;
|
||||||
|
self.non_matching = None;
|
||||||
|
}
|
||||||
|
}
|
@ -14,6 +14,7 @@ mod ranking_rules;
|
|||||||
mod resolve_query_graph;
|
mod resolve_query_graph;
|
||||||
mod small_bitmap;
|
mod small_bitmap;
|
||||||
|
|
||||||
|
mod boost;
|
||||||
mod exact_attribute;
|
mod exact_attribute;
|
||||||
mod sort;
|
mod sort;
|
||||||
|
|
||||||
@ -22,6 +23,7 @@ mod tests;
|
|||||||
|
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
|
||||||
|
use boost::Boost;
|
||||||
use bucket_sort::{bucket_sort, BucketSortOutput};
|
use bucket_sort::{bucket_sort, BucketSortOutput};
|
||||||
use charabia::TokenizerBuilder;
|
use charabia::TokenizerBuilder;
|
||||||
use db_cache::DatabaseCache;
|
use db_cache::DatabaseCache;
|
||||||
@ -208,6 +210,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
|
|||||||
| crate::RankingRule::Attribute
|
| crate::RankingRule::Attribute
|
||||||
| crate::RankingRule::Proximity
|
| crate::RankingRule::Proximity
|
||||||
| crate::RankingRule::Exactness => continue,
|
| crate::RankingRule::Exactness => continue,
|
||||||
|
crate::RankingRule::Boost(filter) => ranking_rules.push(Box::new(Boost::new(filter)?)),
|
||||||
crate::RankingRule::Sort => {
|
crate::RankingRule::Sort => {
|
||||||
if sort {
|
if sort {
|
||||||
continue;
|
continue;
|
||||||
@ -287,6 +290,9 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
|
|||||||
ranking_rules.push(Box::new(Words::new(terms_matching_strategy)));
|
ranking_rules.push(Box::new(Words::new(terms_matching_strategy)));
|
||||||
words = true;
|
words = true;
|
||||||
}
|
}
|
||||||
|
crate::RankingRule::Boost(filter) => {
|
||||||
|
ranking_rules.push(Box::new(Boost::new(filter)?));
|
||||||
|
}
|
||||||
crate::RankingRule::Typo => {
|
crate::RankingRule::Typo => {
|
||||||
if typo {
|
if typo {
|
||||||
continue;
|
continue;
|
||||||
@ -332,6 +338,7 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
|
|||||||
exactness = true;
|
exactness = true;
|
||||||
}
|
}
|
||||||
crate::RankingRule::Asc(field_name) => {
|
crate::RankingRule::Asc(field_name) => {
|
||||||
|
// TODO Question: Why would it be invalid to sort price:asc, typo, price:desc?
|
||||||
if sorted_fields.contains(&field_name) {
|
if sorted_fields.contains(&field_name) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user