Add a new songs benchmark to test multi batch indexing

This commit is contained in:
Kerollmops 2022-02-21 16:30:13 +01:00
parent acd9535588
commit ab5247dc64
No known key found for this signature in database
GPG key ID: 92ADA4E935E71FA4
2 changed files with 86 additions and 1 deletions

View file

@ -11,10 +11,23 @@ use reqwest::IntoUrl;
const BASE_URL: &str = "https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets";
const DATASET_SONGS: (&str, &str) = ("smol-songs", "csv");
const DATASET_SONGS_1_2: (&str, &str) = ("smol-songs-1_2", "csv");
const DATASET_SONGS_3_4: (&str, &str) = ("smol-songs-3_4", "csv");
const DATASET_SONGS_4_4: (&str, &str) = ("smol-songs-4_4", "csv");
const DATASET_WIKI: (&str, &str) = ("smol-wiki-articles", "csv");
const DATASET_MOVIES: (&str, &str) = ("movies", "json");
const DATASET_GEO: (&str, &str) = ("smol-all-countries", "jsonl");
const ALL_DATASETS: &[(&str, &str)] = &[
DATASET_SONGS,
DATASET_SONGS_1_2,
DATASET_SONGS_3_4,
DATASET_SONGS_4_4,
DATASET_WIKI,
DATASET_MOVIES,
DATASET_GEO,
];
/// The name of the environment variable used to select the path
/// of the directory containing the datasets
const BASE_DATASETS_PATH_KEY: &str = "MILLI_BENCH_DATASETS_PATH";
@ -33,7 +46,7 @@ fn main() -> anyhow::Result<()> {
)?;
writeln!(manifest_paths_file)?;
for (dataset, extension) in [DATASET_SONGS, DATASET_WIKI, DATASET_MOVIES, DATASET_GEO] {
for (dataset, extension) in ALL_DATASETS {
let out_path = out_dir.join(dataset);
let out_file = out_path.with_extension(extension);