Make the changes necessary to use milli 0.31.1

This commit is contained in:
Kerollmops 2022-06-22 17:24:25 +02:00
parent 7feb15df28
commit dad86fc3d6
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
11 changed files with 51 additions and 61 deletions

View File

@ -318,7 +318,7 @@ make_setting_route!(
"Pagination Updated".to_string(),
json!({
"pagination": {
"limited_to": setting.as_ref().and_then(|s| s.limited_to.set()),
"max_total_hits": setting.as_ref().and_then(|s| s.max_total_hits.set()),
},
}),
Some(req),
@ -418,10 +418,10 @@ pub async fn update_all(
.and_then(|s| s.max_values_per_facet.as_ref().set()),
},
"pagination": {
"limited_to": settings.pagination
"max_total_hits": settings.pagination
.as_ref()
.set()
.and_then(|s| s.limited_to.as_ref().set()),
.and_then(|s| s.max_total_hits.as_ref().set()),
},
}),
Some(&req),

View File

@ -61,7 +61,7 @@ async fn import_dump_v2_movie_raw() {
assert_eq!(code, 200);
assert_eq!(
settings,
json!({"displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
json!({"displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 } })
);
let (tasks, code) = index.list_tasks().await;
@ -125,7 +125,7 @@ async fn import_dump_v2_movie_with_settings() {
assert_eq!(code, 200);
assert_eq!(
settings,
json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 } })
);
let (tasks, code) = index.list_tasks().await;
@ -189,7 +189,7 @@ async fn import_dump_v2_rubygems_with_settings() {
assert_eq!(code, 200);
assert_eq!(
settings,
json!({"displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 }})
json!({"displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 }})
);
let (tasks, code) = index.list_tasks().await;
@ -253,7 +253,7 @@ async fn import_dump_v3_movie_raw() {
assert_eq!(code, 200);
assert_eq!(
settings,
json!({"displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
json!({"displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 } })
);
let (tasks, code) = index.list_tasks().await;
@ -317,7 +317,7 @@ async fn import_dump_v3_movie_with_settings() {
assert_eq!(code, 200);
assert_eq!(
settings,
json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 } })
);
let (tasks, code) = index.list_tasks().await;
@ -381,7 +381,7 @@ async fn import_dump_v3_rubygems_with_settings() {
assert_eq!(code, 200);
assert_eq!(
settings,
json!({"displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
json!({"displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 } })
);
let (tasks, code) = index.list_tasks().await;
@ -445,7 +445,7 @@ async fn import_dump_v4_movie_raw() {
assert_eq!(code, 200);
assert_eq!(
settings,
json!({ "displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
json!({ "displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 } })
);
let (tasks, code) = index.list_tasks().await;
@ -509,7 +509,7 @@ async fn import_dump_v4_movie_with_settings() {
assert_eq!(code, 200);
assert_eq!(
settings,
json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 } })
);
let (tasks, code) = index.list_tasks().await;
@ -573,7 +573,7 @@ async fn import_dump_v4_rubygems_with_settings() {
assert_eq!(code, 200);
assert_eq!(
settings,
json!({ "displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
json!({ "displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 } })
);
let (tasks, code) = index.list_tasks().await;

View File

@ -567,7 +567,7 @@ async fn placeholder_search_is_hard_limited() {
.await;
index
.update_settings(json!({ "pagination": { "limitedTo": 10_000 } }))
.update_settings(json!({ "pagination": { "maxTotalHits": 10_000 } }))
.await;
index.wait_task(1).await;
@ -636,7 +636,7 @@ async fn search_is_hard_limited() {
.await;
index
.update_settings(json!({ "pagination": { "limitedTo": 10_000 } }))
.update_settings(json!({ "pagination": { "maxTotalHits": 10_000 } }))
.await;
index.wait_task(1).await;

View File

@ -33,7 +33,7 @@ static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(|
map.insert(
"pagination",
json!({
"limitedTo": json!(1000),
"maxTotalHits": json!(1000),
}),
);
map
@ -82,7 +82,7 @@ async fn get_settings() {
assert_eq!(
settings["pagination"],
json!({
"limitedTo": 1000,
"maxTotalHits": 1000,
})
);
}

View File

@ -27,7 +27,7 @@ const DATA_FILE_NAME: &str = "documents.jsonl";
impl Index {
pub fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
// acquire write txn make sure any ongoing write is finished before we start.
let txn = self.env.write_txn()?;
let txn = self.write_txn()?;
let path = path.as_ref().join(format!("indexes/{}", self.uuid));
create_dir_all(&path)?;

View File

@ -4,9 +4,10 @@ use std::marker::PhantomData;
use std::ops::Deref;
use std::path::Path;
use std::sync::Arc;
use walkdir::WalkDir;
use fst::IntoStreamer;
use milli::heed::{EnvOpenOptions, RoTxn};
use milli::heed::{CompactionOption, EnvOpenOptions, RoTxn};
use milli::update::{IndexerConfig, Setting};
use milli::{obkv_to_json, FieldDistribution, DEFAULT_VALUES_PER_FACET};
use serde::{Deserialize, Serialize};
@ -14,8 +15,7 @@ use serde_json::{Map, Value};
use time::OffsetDateTime;
use uuid::Uuid;
use crate::index::search::DEFAULT_PAGINATION_LIMITED_TO;
use crate::EnvSizer;
use crate::index::search::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
use super::error::IndexError;
use super::error::Result;
@ -202,9 +202,9 @@ impl Index {
};
let pagination = PaginationSettings {
limited_to: Setting::Set(
self.pagination_limited_to(txn)?
.unwrap_or(DEFAULT_PAGINATION_LIMITED_TO),
max_total_hits: Setting::Set(
self.pagination_max_total_hits(txn)?
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
),
};
@ -245,7 +245,7 @@ impl Index {
let fields_ids_map = self.fields_ids_map(&txn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let iter = self.documents.range(&txn, &(..))?.skip(offset).take(limit);
let iter = self.all_documents(&txn)?.skip(offset).take(limit);
let mut documents = Vec::new();
@ -302,7 +302,12 @@ impl Index {
}
pub fn size(&self) -> u64 {
self.env.size()
WalkDir::new(self.inner.path())
.into_iter()
.filter_map(|entry| entry.ok())
.filter_map(|entry| entry.metadata().ok())
.filter(|metadata| metadata.is_file())
.fold(0, |acc, m| acc + m.len())
}
pub fn snapshot(&self, path: impl AsRef<Path>) -> Result<()> {
@ -310,9 +315,7 @@ impl Index {
create_dir_all(&dst)?;
dst.push("data.mdb");
let _txn = self.write_txn()?;
self.inner
.env
.copy_to_path(dst, milli::heed::CompactionOption::Enabled)?;
self.inner.copy_to_path(dst, CompactionOption::Enabled)?;
Ok(())
}
}

View File

@ -29,7 +29,7 @@ pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
/// The maximimum number of results that the engine
/// will be able to return in one search call.
pub const DEFAULT_PAGINATION_LIMITED_TO: usize = 1000;
pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000;
#[derive(Deserialize, Debug, Clone, PartialEq)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
@ -91,14 +91,14 @@ impl Index {
search.query(query);
}
let pagination_limited_to = self
.pagination_limited_to(&rtxn)?
.unwrap_or(DEFAULT_PAGINATION_LIMITED_TO);
let max_total_hits = self
.pagination_max_total_hits(&rtxn)?
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
// Make sure that a user can't get more documents than the hard limit,
// we align that on the offset too.
let offset = min(query.offset.unwrap_or(0), pagination_limited_to);
let limit = min(query.limit, pagination_limited_to.saturating_sub(offset));
let offset = min(query.offset.unwrap_or(0), max_total_hits);
let limit = min(query.limit, max_total_hits.saturating_sub(offset));
search.offset(offset);
search.limit(limit);

View File

@ -86,7 +86,7 @@ pub struct FacetingSettings {
pub struct PaginationSettings {
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub limited_to: Setting<usize>,
pub max_total_hits: Setting<usize>,
}
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
@ -474,12 +474,12 @@ pub fn apply_settings_to_builder(
}
match settings.pagination {
Setting::Set(ref value) => match value.limited_to {
Setting::Set(val) => builder.set_pagination_limited_to(val),
Setting::Reset => builder.reset_pagination_limited_to(),
Setting::Set(ref value) => match value.max_total_hits {
Setting::Set(val) => builder.set_pagination_max_total_hits(val),
Setting::Reset => builder.reset_pagination_max_total_hits(),
Setting::NotSet => (),
},
Setting::Reset => builder.reset_pagination_limited_to(),
Setting::Reset => builder.reset_pagination_max_total_hits(),
Setting::NotSet => (),
}
}

View File

@ -3,6 +3,7 @@ use std::fs::{create_dir_all, File};
use std::io::{BufRead, BufReader, Write};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use walkdir::WalkDir;
use milli::heed::types::{SerdeBincode, Str};
use milli::heed::{CompactionOption, Database, Env};
@ -11,7 +12,6 @@ use uuid::Uuid;
use super::error::{IndexResolverError, Result};
use crate::tasks::task::TaskId;
use crate::EnvSizer;
#[derive(Serialize, Deserialize)]
pub struct DumpEntry {
@ -131,7 +131,12 @@ impl HeedMetaStore {
}
fn get_size(&self) -> Result<u64> {
Ok(self.env.size())
Ok(WalkDir::new(self.env.path())
.into_iter()
.filter_map(|entry| entry.ok())
.filter_map(|entry| entry.metadata().ok())
.filter(|metadata| metadata.is_file())
.fold(0, |acc, m| acc + m.len()))
}
pub fn dump(&self, path: PathBuf) -> Result<()> {

View File

@ -20,23 +20,6 @@ pub use milli::heed;
mod compression;
pub mod document_formats;
use walkdir::WalkDir;
pub trait EnvSizer {
fn size(&self) -> u64;
}
impl EnvSizer for milli::heed::Env {
fn size(&self) -> u64 {
WalkDir::new(self.path())
.into_iter()
.filter_map(|entry| entry.ok())
.filter_map(|entry| entry.metadata().ok())
.filter(|metadata| metadata.is_file())
.fold(0, |acc, m| acc + m.len())
}
}
/// Check if a db is empty. It does not provide any information on the
/// validity of the data in it.
/// We consider a database as non empty when it's a non empty directory.

View File

@ -7,6 +7,7 @@ use anyhow::bail;
use fs_extra::dir::{self, CopyOptions};
use log::{info, trace};
use meilisearch_auth::open_auth_store_env;
use milli::heed::CompactionOption;
use tokio::sync::RwLock;
use tokio::time::sleep;
use walkdir::WalkDir;
@ -181,9 +182,7 @@ impl SnapshotJob {
let mut options = milli::heed::EnvOpenOptions::new();
options.map_size(self.index_size);
let index = milli::Index::new(options, entry.path())?;
index
.env
.copy_to_path(dst, milli::heed::CompactionOption::Enabled)?;
index.copy_to_path(dst, CompactionOption::Enabled)?;
}
Ok(())