mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-22 20:50:04 +01:00
Merge #4257
4257: Change proximity precision settings r=dureuill a=ManyTheFish - [x] Add proximity_precision value into the analytics - [x] Change the naming of `attributeScale` and `wordScale` into `byAttribute` and `byWord` - [x] Remove proximityPrecision from the experimental feature Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Many the fish <many@meilisearch.com>
This commit is contained in:
commit
7c4ed07617
@ -1354,9 +1354,6 @@ impl IndexScheduler {
|
||||
if matches!(checked_settings.embedders, milli::update::Setting::Set(_)) {
|
||||
self.features().check_vector("Passing `embedders` in settings")?
|
||||
}
|
||||
if checked_settings.proximity_precision.set().is_some() {
|
||||
self.features.features().check_proximity_precision()?;
|
||||
}
|
||||
task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) });
|
||||
apply_settings_to_builder(&checked_settings, &mut builder);
|
||||
|
||||
|
@ -81,19 +81,6 @@ impl RoFeatures {
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_proximity_precision(&self) -> Result<()> {
|
||||
if self.runtime.proximity_precision {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action: "Using `proximityPrecision` index setting",
|
||||
feature: "proximity precision",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/710",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FeatureData {
|
||||
|
@ -7,7 +7,6 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub vector_store: bool,
|
||||
pub metrics: bool,
|
||||
pub export_puffin_reports: bool,
|
||||
pub proximity_precision: bool,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, Copy)]
|
||||
|
@ -724,23 +724,23 @@ impl From<RankingRuleView> for Criterion {
|
||||
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
||||
#[deserr(error = DeserrJsonError<InvalidSettingsProximityPrecision>, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub enum ProximityPrecisionView {
|
||||
WordScale,
|
||||
AttributeScale,
|
||||
ByWord,
|
||||
ByAttribute,
|
||||
}
|
||||
|
||||
impl From<ProximityPrecision> for ProximityPrecisionView {
|
||||
fn from(value: ProximityPrecision) -> Self {
|
||||
match value {
|
||||
ProximityPrecision::WordScale => ProximityPrecisionView::WordScale,
|
||||
ProximityPrecision::AttributeScale => ProximityPrecisionView::AttributeScale,
|
||||
ProximityPrecision::ByWord => ProximityPrecisionView::ByWord,
|
||||
ProximityPrecision::ByAttribute => ProximityPrecisionView::ByAttribute,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl From<ProximityPrecisionView> for ProximityPrecision {
|
||||
fn from(value: ProximityPrecisionView) -> Self {
|
||||
match value {
|
||||
ProximityPrecisionView::WordScale => ProximityPrecision::WordScale,
|
||||
ProximityPrecisionView::AttributeScale => ProximityPrecision::AttributeScale,
|
||||
ProximityPrecisionView::ByWord => ProximityPrecision::ByWord,
|
||||
ProximityPrecisionView::ByAttribute => ProximityPrecision::ByAttribute,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -48,8 +48,6 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub metrics: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub export_puffin_reports: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub proximity_precision: Option<bool>,
|
||||
}
|
||||
|
||||
async fn patch_features(
|
||||
@ -72,10 +70,6 @@ async fn patch_features(
|
||||
.0
|
||||
.export_puffin_reports
|
||||
.unwrap_or(old_features.export_puffin_reports),
|
||||
proximity_precision: new_features
|
||||
.0
|
||||
.proximity_precision
|
||||
.unwrap_or(old_features.proximity_precision),
|
||||
};
|
||||
|
||||
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
||||
@ -86,7 +80,6 @@ async fn patch_features(
|
||||
vector_store,
|
||||
metrics,
|
||||
export_puffin_reports,
|
||||
proximity_precision,
|
||||
} = new_features;
|
||||
|
||||
analytics.publish(
|
||||
@ -96,7 +89,6 @@ async fn patch_features(
|
||||
"vector_store": vector_store,
|
||||
"metrics": metrics,
|
||||
"export_puffin_reports": export_puffin_reports,
|
||||
"proximity_precision": proximity_precision,
|
||||
}),
|
||||
Some(&req),
|
||||
);
|
||||
|
@ -453,6 +453,7 @@ make_setting_route!(
|
||||
json!({
|
||||
"proximity_precision": {
|
||||
"set": precision.is_some(),
|
||||
"value": precision,
|
||||
}
|
||||
}),
|
||||
Some(req),
|
||||
|
Binary file not shown.
@ -1860,8 +1860,7 @@ async fn import_dump_v6_containing_experimental_features() {
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": false
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -1890,7 +1889,7 @@ async fn import_dump_v6_containing_experimental_features() {
|
||||
"dictionary": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null,
|
||||
"proximityPrecision": "attributeScale",
|
||||
"proximityPrecision": "byAttribute",
|
||||
"typoTolerance": {
|
||||
"enabled": true,
|
||||
"minWordSizeForTypos": {
|
||||
|
@ -21,8 +21,7 @@ async fn experimental_features() {
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": false
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -34,8 +33,7 @@ async fn experimental_features() {
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": false
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -47,8 +45,7 @@ async fn experimental_features() {
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": false
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -61,8 +58,7 @@ async fn experimental_features() {
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": false
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -75,8 +71,7 @@ async fn experimental_features() {
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": false
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
}
|
||||
@ -96,8 +91,7 @@ async fn experimental_feature_metrics() {
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": true,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": false
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@ -152,7 +146,7 @@ async fn errors() {
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`, `proximityPrecision`",
|
||||
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
|
@ -16,8 +16,7 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Inde
|
||||
"scoreDetails": false,
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": false
|
||||
"exportPuffinReports": false
|
||||
}
|
||||
"###);
|
||||
|
||||
|
@ -27,17 +27,6 @@ static DOCUMENTS: Lazy<crate::common::Value> = Lazy::new(|| {
|
||||
#[actix_rt::test]
|
||||
async fn attribute_scale_search() {
|
||||
let server = Server::new().await;
|
||||
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": true
|
||||
}
|
||||
"###);
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(DOCUMENTS.clone(), None).await;
|
||||
@ -45,7 +34,7 @@ async fn attribute_scale_search() {
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"proximityPrecision": "attributeScale",
|
||||
"proximityPrecision": "byAttribute",
|
||||
"rankingRules": ["words", "typo", "proximity"],
|
||||
}))
|
||||
.await;
|
||||
@ -111,17 +100,6 @@ async fn attribute_scale_search() {
|
||||
#[actix_rt::test]
|
||||
async fn attribute_scale_phrase_search() {
|
||||
let server = Server::new().await;
|
||||
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": true
|
||||
}
|
||||
"###);
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(DOCUMENTS.clone(), None).await;
|
||||
@ -129,7 +107,7 @@ async fn attribute_scale_phrase_search() {
|
||||
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"proximityPrecision": "attributeScale",
|
||||
"proximityPrecision": "byAttribute",
|
||||
"rankingRules": ["words", "typo", "proximity"],
|
||||
}))
|
||||
.await;
|
||||
@ -190,17 +168,6 @@ async fn attribute_scale_phrase_search() {
|
||||
#[actix_rt::test]
|
||||
async fn word_scale_set_and_reset() {
|
||||
let server = Server::new().await;
|
||||
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": true
|
||||
}
|
||||
"###);
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(DOCUMENTS.clone(), None).await;
|
||||
@ -209,7 +176,7 @@ async fn word_scale_set_and_reset() {
|
||||
// Set and reset the setting ensuring the swap between the 2 settings is applied.
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"proximityPrecision": "attributeScale",
|
||||
"proximityPrecision": "byAttribute",
|
||||
"rankingRules": ["words", "typo", "proximity"],
|
||||
}))
|
||||
.await;
|
||||
@ -217,7 +184,7 @@ async fn word_scale_set_and_reset() {
|
||||
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"proximityPrecision": "wordScale",
|
||||
"proximityPrecision": "byWord",
|
||||
"rankingRules": ["words", "typo", "proximity"],
|
||||
}))
|
||||
.await;
|
||||
@ -316,17 +283,6 @@ async fn word_scale_set_and_reset() {
|
||||
#[actix_rt::test]
|
||||
async fn attribute_scale_default_ranking_rules() {
|
||||
let server = Server::new().await;
|
||||
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"scoreDetails": false,
|
||||
"vectorStore": false,
|
||||
"metrics": false,
|
||||
"exportPuffinReports": false,
|
||||
"proximityPrecision": true
|
||||
}
|
||||
"###);
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(DOCUMENTS.clone(), None).await;
|
||||
@ -334,7 +290,7 @@ async fn attribute_scale_default_ranking_rules() {
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"proximityPrecision": "attributeScale"
|
||||
"proximityPrecision": "byAttribute"
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||
|
@ -32,6 +32,6 @@ pub fn path_proximity(path: &[Position]) -> u32 {
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum ProximityPrecision {
|
||||
#[default]
|
||||
WordScale,
|
||||
AttributeScale,
|
||||
ByWord,
|
||||
ByAttribute,
|
||||
}
|
||||
|
@ -299,9 +299,9 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
proximity: u8,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
match self.index.proximity_precision(self.txn)?.unwrap_or_default() {
|
||||
ProximityPrecision::AttributeScale => {
|
||||
ProximityPrecision::ByAttribute => {
|
||||
// Force proximity to 0 because:
|
||||
// in AttributeScale, there are only 2 possible distances:
|
||||
// in ByAttribute, there are only 2 possible distances:
|
||||
// 1. words in same attribute: in that the DB contains (0, word1, word2)
|
||||
// 2. words in different attributes: no DB entry for these two words.
|
||||
let proximity = 0;
|
||||
@ -345,19 +345,17 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
|
||||
Ok(docids)
|
||||
}
|
||||
ProximityPrecision::WordScale => {
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(proximity, word1, word2),
|
||||
&(
|
||||
proximity,
|
||||
self.word_interner.get(word1).as_str(),
|
||||
self.word_interner.get(word2).as_str(),
|
||||
),
|
||||
&mut self.db_cache.word_pair_proximity_docids,
|
||||
self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(),
|
||||
)
|
||||
}
|
||||
ProximityPrecision::ByWord => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
(proximity, word1, word2),
|
||||
&(
|
||||
proximity,
|
||||
self.word_interner.get(word1).as_str(),
|
||||
self.word_interner.get(word2).as_str(),
|
||||
),
|
||||
&mut self.db_cache.word_pair_proximity_docids,
|
||||
self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
@ -368,10 +366,10 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
proximity: u8,
|
||||
) -> Result<Option<u64>> {
|
||||
match self.index.proximity_precision(self.txn)?.unwrap_or_default() {
|
||||
ProximityPrecision::AttributeScale => Ok(self
|
||||
ProximityPrecision::ByAttribute => Ok(self
|
||||
.get_db_word_pair_proximity_docids(word1, word2, proximity)?
|
||||
.map(|d| d.len())),
|
||||
ProximityPrecision::WordScale => {
|
||||
ProximityPrecision::ByWord => {
|
||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>(
|
||||
self.txn,
|
||||
(proximity, word1, word2),
|
||||
@ -394,9 +392,9 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
mut proximity: u8,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
let proximity_precision = self.index.proximity_precision(self.txn)?.unwrap_or_default();
|
||||
if proximity_precision == ProximityPrecision::AttributeScale {
|
||||
if proximity_precision == ProximityPrecision::ByAttribute {
|
||||
// Force proximity to 0 because:
|
||||
// in AttributeScale, there are only 2 possible distances:
|
||||
// in ByAttribute, there are only 2 possible distances:
|
||||
// 1. words in same attribute: in that the DB contains (0, word1, word2)
|
||||
// 2. words in different attributes: no DB entry for these two words.
|
||||
proximity = 0;
|
||||
@ -408,7 +406,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
docids.clone()
|
||||
} else {
|
||||
let prefix_docids = match proximity_precision {
|
||||
ProximityPrecision::AttributeScale => {
|
||||
ProximityPrecision::ByAttribute => {
|
||||
// Compute the distance at the attribute level and store it in the cache.
|
||||
let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? {
|
||||
fids
|
||||
@ -429,7 +427,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
}
|
||||
prefix_docids
|
||||
}
|
||||
ProximityPrecision::WordScale => {
|
||||
ProximityPrecision::ByWord => {
|
||||
// compute docids using prefix iter and store the result in the cache.
|
||||
let key = U8StrStrCodec::bytes_encode(&(
|
||||
proximity,
|
||||
|
@ -157,7 +157,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
});
|
||||
}
|
||||
|
||||
if proximity_precision == ProximityPrecision::WordScale {
|
||||
if proximity_precision == ProximityPrecision::ByWord {
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||
docid_word_positions_chunks.clone(),
|
||||
indexer,
|
||||
|
Loading…
x
Reference in New Issue
Block a user