4257: Change proximity precision settings r=dureuill a=ManyTheFish

- [x] Add proximity_precision value into the analytics
- [x] Change the naming of `attributeScale` and `wordScale` into `byAttribute` and `byWord`
- [x] Remove proximityPrecision from the experimental feature

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Many the fish <many@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2023-12-18 09:07:28 +00:00 committed by GitHub
commit 7c4ed07617
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 44 additions and 122 deletions

View File

@ -1354,9 +1354,6 @@ impl IndexScheduler {
if matches!(checked_settings.embedders, milli::update::Setting::Set(_)) {
self.features().check_vector("Passing `embedders` in settings")?
}
if checked_settings.proximity_precision.set().is_some() {
self.features.features().check_proximity_precision()?;
}
task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) });
apply_settings_to_builder(&checked_settings, &mut builder);

View File

@ -81,19 +81,6 @@ impl RoFeatures {
.into())
}
}
pub fn check_proximity_precision(&self) -> Result<()> {
if self.runtime.proximity_precision {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Using `proximityPrecision` index setting",
feature: "proximity precision",
issue_link: "https://github.com/orgs/meilisearch/discussions/710",
}
.into())
}
}
}
impl FeatureData {

View File

@ -7,7 +7,6 @@ pub struct RuntimeTogglableFeatures {
pub vector_store: bool,
pub metrics: bool,
pub export_puffin_reports: bool,
pub proximity_precision: bool,
}
#[derive(Default, Debug, Clone, Copy)]

View File

@ -724,23 +724,23 @@ impl From<RankingRuleView> for Criterion {
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(error = DeserrJsonError<InvalidSettingsProximityPrecision>, rename_all = camelCase, deny_unknown_fields)]
pub enum ProximityPrecisionView {
WordScale,
AttributeScale,
ByWord,
ByAttribute,
}
impl From<ProximityPrecision> for ProximityPrecisionView {
fn from(value: ProximityPrecision) -> Self {
match value {
ProximityPrecision::WordScale => ProximityPrecisionView::WordScale,
ProximityPrecision::AttributeScale => ProximityPrecisionView::AttributeScale,
ProximityPrecision::ByWord => ProximityPrecisionView::ByWord,
ProximityPrecision::ByAttribute => ProximityPrecisionView::ByAttribute,
}
}
}
impl From<ProximityPrecisionView> for ProximityPrecision {
fn from(value: ProximityPrecisionView) -> Self {
match value {
ProximityPrecisionView::WordScale => ProximityPrecision::WordScale,
ProximityPrecisionView::AttributeScale => ProximityPrecision::AttributeScale,
ProximityPrecisionView::ByWord => ProximityPrecision::ByWord,
ProximityPrecisionView::ByAttribute => ProximityPrecision::ByAttribute,
}
}
}

View File

@ -48,8 +48,6 @@ pub struct RuntimeTogglableFeatures {
pub metrics: Option<bool>,
#[deserr(default)]
pub export_puffin_reports: Option<bool>,
#[deserr(default)]
pub proximity_precision: Option<bool>,
}
async fn patch_features(
@ -72,10 +70,6 @@ async fn patch_features(
.0
.export_puffin_reports
.unwrap_or(old_features.export_puffin_reports),
proximity_precision: new_features
.0
.proximity_precision
.unwrap_or(old_features.proximity_precision),
};
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
@ -86,7 +80,6 @@ async fn patch_features(
vector_store,
metrics,
export_puffin_reports,
proximity_precision,
} = new_features;
analytics.publish(
@ -96,7 +89,6 @@ async fn patch_features(
"vector_store": vector_store,
"metrics": metrics,
"export_puffin_reports": export_puffin_reports,
"proximity_precision": proximity_precision,
}),
Some(&req),
);

View File

@ -453,6 +453,7 @@ make_setting_route!(
json!({
"proximity_precision": {
"set": precision.is_some(),
"value": precision,
}
}),
Some(req),

View File

@ -1860,8 +1860,7 @@ async fn import_dump_v6_containing_experimental_features() {
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"exportPuffinReports": false,
"proximityPrecision": false
"exportPuffinReports": false
}
"###);
@ -1890,7 +1889,7 @@ async fn import_dump_v6_containing_experimental_features() {
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": "attributeScale",
"proximityPrecision": "byAttribute",
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {

View File

@ -21,8 +21,7 @@ async fn experimental_features() {
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"exportPuffinReports": false,
"proximityPrecision": false
"exportPuffinReports": false
}
"###);
@ -34,8 +33,7 @@ async fn experimental_features() {
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"exportPuffinReports": false,
"proximityPrecision": false
"exportPuffinReports": false
}
"###);
@ -47,8 +45,7 @@ async fn experimental_features() {
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"exportPuffinReports": false,
"proximityPrecision": false
"exportPuffinReports": false
}
"###);
@ -61,8 +58,7 @@ async fn experimental_features() {
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"exportPuffinReports": false,
"proximityPrecision": false
"exportPuffinReports": false
}
"###);
@ -75,8 +71,7 @@ async fn experimental_features() {
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"exportPuffinReports": false,
"proximityPrecision": false
"exportPuffinReports": false
}
"###);
}
@ -96,8 +91,7 @@ async fn experimental_feature_metrics() {
"scoreDetails": false,
"vectorStore": false,
"metrics": true,
"exportPuffinReports": false,
"proximityPrecision": false
"exportPuffinReports": false
}
"###);
@ -152,7 +146,7 @@ async fn errors() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`, `proximityPrecision`",
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"

View File

@ -16,8 +16,7 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Inde
"scoreDetails": false,
"vectorStore": true,
"metrics": false,
"exportPuffinReports": false,
"proximityPrecision": false
"exportPuffinReports": false
}
"###);

View File

@ -27,17 +27,6 @@ static DOCUMENTS: Lazy<crate::common::Value> = Lazy::new(|| {
#[actix_rt::test]
async fn attribute_scale_search() {
let server = Server::new().await;
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"exportPuffinReports": false,
"proximityPrecision": true
}
"###);
let index = server.index("test");
index.add_documents(DOCUMENTS.clone(), None).await;
@ -45,7 +34,7 @@ async fn attribute_scale_search() {
let (response, code) = index
.update_settings(json!({
"proximityPrecision": "attributeScale",
"proximityPrecision": "byAttribute",
"rankingRules": ["words", "typo", "proximity"],
}))
.await;
@ -111,17 +100,6 @@ async fn attribute_scale_search() {
#[actix_rt::test]
async fn attribute_scale_phrase_search() {
let server = Server::new().await;
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"exportPuffinReports": false,
"proximityPrecision": true
}
"###);
let index = server.index("test");
index.add_documents(DOCUMENTS.clone(), None).await;
@ -129,7 +107,7 @@ async fn attribute_scale_phrase_search() {
let (_response, _code) = index
.update_settings(json!({
"proximityPrecision": "attributeScale",
"proximityPrecision": "byAttribute",
"rankingRules": ["words", "typo", "proximity"],
}))
.await;
@ -190,17 +168,6 @@ async fn attribute_scale_phrase_search() {
#[actix_rt::test]
async fn word_scale_set_and_reset() {
let server = Server::new().await;
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"exportPuffinReports": false,
"proximityPrecision": true
}
"###);
let index = server.index("test");
index.add_documents(DOCUMENTS.clone(), None).await;
@ -209,7 +176,7 @@ async fn word_scale_set_and_reset() {
// Set and reset the setting ensuring the swap between the 2 settings is applied.
let (_response, _code) = index
.update_settings(json!({
"proximityPrecision": "attributeScale",
"proximityPrecision": "byAttribute",
"rankingRules": ["words", "typo", "proximity"],
}))
.await;
@ -217,7 +184,7 @@ async fn word_scale_set_and_reset() {
let (_response, _code) = index
.update_settings(json!({
"proximityPrecision": "wordScale",
"proximityPrecision": "byWord",
"rankingRules": ["words", "typo", "proximity"],
}))
.await;
@ -316,17 +283,6 @@ async fn word_scale_set_and_reset() {
#[actix_rt::test]
async fn attribute_scale_default_ranking_rules() {
let server = Server::new().await;
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"scoreDetails": false,
"vectorStore": false,
"metrics": false,
"exportPuffinReports": false,
"proximityPrecision": true
}
"###);
let index = server.index("test");
index.add_documents(DOCUMENTS.clone(), None).await;
@ -334,7 +290,7 @@ async fn attribute_scale_default_ranking_rules() {
let (response, code) = index
.update_settings(json!({
"proximityPrecision": "attributeScale"
"proximityPrecision": "byAttribute"
}))
.await;
assert_eq!("202", code.as_str(), "{:?}", response);

View File

@ -32,6 +32,6 @@ pub fn path_proximity(path: &[Position]) -> u32 {
#[serde(rename_all = "camelCase")]
pub enum ProximityPrecision {
#[default]
WordScale,
AttributeScale,
ByWord,
ByAttribute,
}

View File

@ -299,9 +299,9 @@ impl<'ctx> SearchContext<'ctx> {
proximity: u8,
) -> Result<Option<RoaringBitmap>> {
match self.index.proximity_precision(self.txn)?.unwrap_or_default() {
ProximityPrecision::AttributeScale => {
ProximityPrecision::ByAttribute => {
// Force proximity to 0 because:
// in AttributeScale, there are only 2 possible distances:
// in ByAttribute, there are only 2 possible distances:
// 1. words in same attribute: in that the DB contains (0, word1, word2)
// 2. words in different attributes: no DB entry for these two words.
let proximity = 0;
@ -345,19 +345,17 @@ impl<'ctx> SearchContext<'ctx> {
Ok(docids)
}
ProximityPrecision::WordScale => {
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
self.txn,
(proximity, word1, word2),
&(
proximity,
self.word_interner.get(word1).as_str(),
self.word_interner.get(word2).as_str(),
),
&mut self.db_cache.word_pair_proximity_docids,
self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(),
)
}
ProximityPrecision::ByWord => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
self.txn,
(proximity, word1, word2),
&(
proximity,
self.word_interner.get(word1).as_str(),
self.word_interner.get(word2).as_str(),
),
&mut self.db_cache.word_pair_proximity_docids,
self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(),
),
}
}
@ -368,10 +366,10 @@ impl<'ctx> SearchContext<'ctx> {
proximity: u8,
) -> Result<Option<u64>> {
match self.index.proximity_precision(self.txn)?.unwrap_or_default() {
ProximityPrecision::AttributeScale => Ok(self
ProximityPrecision::ByAttribute => Ok(self
.get_db_word_pair_proximity_docids(word1, word2, proximity)?
.map(|d| d.len())),
ProximityPrecision::WordScale => {
ProximityPrecision::ByWord => {
DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>(
self.txn,
(proximity, word1, word2),
@ -394,9 +392,9 @@ impl<'ctx> SearchContext<'ctx> {
mut proximity: u8,
) -> Result<Option<RoaringBitmap>> {
let proximity_precision = self.index.proximity_precision(self.txn)?.unwrap_or_default();
if proximity_precision == ProximityPrecision::AttributeScale {
if proximity_precision == ProximityPrecision::ByAttribute {
// Force proximity to 0 because:
// in AttributeScale, there are only 2 possible distances:
// in ByAttribute, there are only 2 possible distances:
// 1. words in same attribute: in that the DB contains (0, word1, word2)
// 2. words in different attributes: no DB entry for these two words.
proximity = 0;
@ -408,7 +406,7 @@ impl<'ctx> SearchContext<'ctx> {
docids.clone()
} else {
let prefix_docids = match proximity_precision {
ProximityPrecision::AttributeScale => {
ProximityPrecision::ByAttribute => {
// Compute the distance at the attribute level and store it in the cache.
let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? {
fids
@ -429,7 +427,7 @@ impl<'ctx> SearchContext<'ctx> {
}
prefix_docids
}
ProximityPrecision::WordScale => {
ProximityPrecision::ByWord => {
// compute docids using prefix iter and store the result in the cache.
let key = U8StrStrCodec::bytes_encode(&(
proximity,

View File

@ -157,7 +157,7 @@ pub(crate) fn data_from_obkv_documents(
});
}
if proximity_precision == ProximityPrecision::WordScale {
if proximity_precision == ProximityPrecision::ByWord {
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
docid_word_positions_chunks.clone(),
indexer,