mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 11:57:07 +02:00
move the benchmarks to another crate so we can download the datasets automatically without adding overhead to the build of milli
This commit is contained in:
parent
3c84075d2d
commit
06c414a753
10 changed files with 154 additions and 55 deletions
29
benchmarks/Cargo.toml
Normal file
29
benchmarks/Cargo.toml
Normal file
|
@ -0,0 +1,29 @@
|
|||
[package]
|
||||
name = "benchmarks"
|
||||
version = "0.1.0"
|
||||
edition = "2018"
|
||||
publish = false
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
milli = { path = "../milli" }
|
||||
|
||||
[dev-dependencies]
|
||||
heed = "*" # we want to use the version milli uses
|
||||
criterion = "0.3.4"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0"
|
||||
bytes = "1.0"
|
||||
flate2 = "1.0.20"
|
||||
convert_case = "0.4"
|
||||
reqwest = { version = "0.11.3", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
|
||||
[[bench]]
|
||||
name = "songs"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "wiki"
|
||||
harness = false
|
30
benchmarks/README.md
Normal file
30
benchmarks/README.md
Normal file
|
@ -0,0 +1,30 @@
|
|||
Benchmarks
|
||||
==========
|
||||
|
||||
For our benchmark we are using a small subset of the dataset `songs.csv`. It was generated with this command:
|
||||
```
|
||||
xsv sample --seed 42 1000000 songs.csv -o smol-songs.csv
|
||||
```
|
||||
You can download it [here](https://meili-datasets.s3.fr-par.scw.cloud/benchmarks/smol-songs.csv.gz)
|
||||
And the original `songs.csv` dataset is available [here](https://meili-datasets.s3.fr-par.scw.cloud/songs.csv.gz).
|
||||
|
||||
We also use a subset of `wikipedia-articles.csv` that was generated with the following command:
|
||||
```
|
||||
xsv sample --seed 42 500000 wikipedia-articles.csv -o smol-wikipedia-articles.csv
|
||||
```
|
||||
You can download the original [here](https://meili-datasets.s3.fr-par.scw.cloud/wikipedia-articles.csv.gz) and the subset [here](https://meili-datasets.s3.fr-par.scw.cloud/benchmarks/smol-wikipedia-articles.csv.gz).
|
||||
|
||||
-----
|
||||
|
||||
- To run all the benchmarks we recommand using `cargo bench`, this should takes around ~4h
|
||||
- You can also run the benchmarks on the `songs` dataset with `cargo bench --bench songs`, it should takes around 1h
|
||||
- And on the `wiki` dataset with `cargo bench --bench wiki`, it should takes around 3h
|
||||
|
||||
By default the benchmarks will be downloaded and uncompressed automatically in the target directory.
|
||||
If you don't want to download the datasets everytime you updates something on the code you can specify a custom directory with the env variable `MILLI_BENCH_DATASETS_PATH`:
|
||||
```
|
||||
mkdir ~/datasets
|
||||
MILLI_BENCH_DATASETS_PATH=~/datasets cargo bench --bench songs # the two datasets are downloaded
|
||||
touch build.rs
|
||||
MILLI_BENCH_DATASETS_PATH=~/datasets cargo bench --bench songs # the code is compiled again but the datasets are not downloaded
|
||||
```
|
210
benchmarks/benches/songs.rs
Normal file
210
benchmarks/benches/songs.rs
Normal file
|
@ -0,0 +1,210 @@
|
|||
mod datasets_paths;
|
||||
mod utils;
|
||||
|
||||
use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use utils::Conf;
|
||||
|
||||
fn base_conf(builder: &mut Settings) {
|
||||
let displayed_fields = [
|
||||
"id", "title", "album", "artist", "genre", "country", "released", "duration",
|
||||
]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_displayed_fields(displayed_fields);
|
||||
|
||||
let searchable_fields = ["title", "album", "artist"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
|
||||
let faceted_fields = [
|
||||
("released-timestamp", "number"),
|
||||
("duration-float", "number"),
|
||||
("genre", "string"),
|
||||
("country", "string"),
|
||||
("artist", "string"),
|
||||
]
|
||||
.iter()
|
||||
.map(|(a, b)| (a.to_string(), b.to_string()))
|
||||
.collect();
|
||||
builder.set_faceted_fields(faceted_fields);
|
||||
}
|
||||
|
||||
const BASE_CONF: Conf = Conf {
|
||||
dataset: datasets_paths::SMOL_SONGS,
|
||||
queries: &[
|
||||
"john ", // 9097
|
||||
"david ", // 4794
|
||||
"charles ", // 1957
|
||||
"david bowie ", // 1200
|
||||
"michael jackson ", // 600
|
||||
"thelonious monk ", // 303
|
||||
"charles mingus ", // 142
|
||||
"marcus miller ", // 60
|
||||
"tamo ", // 13
|
||||
"Notstandskomitee ", // 4
|
||||
],
|
||||
configure: base_conf,
|
||||
..Conf::BASE
|
||||
};
|
||||
|
||||
fn bench_songs(c: &mut criterion::Criterion) {
|
||||
let default_criterion: Vec<String> = milli::default_criteria()
|
||||
.iter()
|
||||
.map(|criteria| criteria.to_string())
|
||||
.collect();
|
||||
let default_criterion = default_criterion.iter().map(|s| s.as_str());
|
||||
let asc_default: Vec<&str> = std::iter::once("asc(released-timestamp)")
|
||||
.chain(default_criterion.clone())
|
||||
.collect();
|
||||
let desc_default: Vec<&str> = std::iter::once("desc(released-timestamp)")
|
||||
.chain(default_criterion.clone())
|
||||
.collect();
|
||||
|
||||
let basic_with_quote: Vec<String> = BASE_CONF
|
||||
.queries
|
||||
.iter()
|
||||
.map(|s| {
|
||||
s.trim()
|
||||
.split(' ')
|
||||
.map(|s| format!(r#""{}""#, s))
|
||||
.collect::<Vec<String>>()
|
||||
.join(" ")
|
||||
})
|
||||
.collect();
|
||||
let basic_with_quote: &[&str] = &basic_with_quote
|
||||
.iter()
|
||||
.map(|s| s.as_str())
|
||||
.collect::<Vec<&str>>();
|
||||
|
||||
let confs = &[
|
||||
/* first we bench each criterion alone */
|
||||
utils::Conf {
|
||||
group_name: "proximity",
|
||||
queries: &[
|
||||
"black saint sinner lady ",
|
||||
"les dangeureuses 1960 ",
|
||||
"The Disneyland Sing-Along Chorus ",
|
||||
"Under Great Northern Lights ",
|
||||
"7000 Danses Un Jour Dans Notre Vie ",
|
||||
],
|
||||
criterion: Some(&["proximity"]),
|
||||
optional_words: false,
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "typo",
|
||||
queries: &[
|
||||
"mongus ",
|
||||
"thelonius monk ",
|
||||
"Disnaylande ",
|
||||
"the white striper ",
|
||||
"indochie ",
|
||||
"indochien ",
|
||||
"klub des loopers ",
|
||||
"fear of the duck ",
|
||||
"michel depech ",
|
||||
"stromal ",
|
||||
"dire straights ",
|
||||
"Arethla Franklin ",
|
||||
],
|
||||
criterion: Some(&["typo"]),
|
||||
optional_words: false,
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "words",
|
||||
queries: &[
|
||||
"the black saint and the sinner lady and the good doggo ", // four words to pop
|
||||
"les liaisons dangeureuses 1793 ", // one word to pop
|
||||
"The Disneyland Children's Sing-Alone song ", // two words to pop
|
||||
"seven nation mummy ", // one word to pop
|
||||
"7000 Danses / Le Baiser / je me trompe de mots ", // four words to pop
|
||||
"Bring Your Daughter To The Slaughter but now this is not part of the title ", // nine words to pop
|
||||
"whathavenotnsuchforth and a good amount of words to pop to match the first one ", // 13
|
||||
],
|
||||
criterion: Some(&["words"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "asc",
|
||||
criterion: Some(&["asc(released-timestamp)"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "desc",
|
||||
criterion: Some(&["desc(released-timestamp)"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
/* then we bench the asc and desc criterion on top of the default criterion */
|
||||
utils::Conf {
|
||||
group_name: "asc + default",
|
||||
criterion: Some(&asc_default[..]),
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "desc + default",
|
||||
criterion: Some(&desc_default[..]),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
/* we bench the filters with the default request */
|
||||
utils::Conf {
|
||||
group_name: "basic filter: <=",
|
||||
facet_condition: Some("released-timestamp <= 946728000"), // year 2000
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "basic filter: TO",
|
||||
facet_condition: Some("released-timestamp 946728000 TO 1262347200"), // year 2000 to 2010
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "big filter",
|
||||
facet_condition: Some("released-timestamp != 1262347200 AND (NOT (released-timestamp = 946728000)) AND (duration-float = 1 OR (duration-float 1.1 TO 1.5 AND released-timestamp > 315576000))"),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
/* the we bench some global / normal search with all the default criterion in the default
|
||||
* order */
|
||||
utils::Conf {
|
||||
group_name: "basic placeholder",
|
||||
queries: &[""],
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "basic without quote",
|
||||
queries: &BASE_CONF
|
||||
.queries
|
||||
.iter()
|
||||
.map(|s| s.trim()) // we remove the space at the end of each request
|
||||
.collect::<Vec<&str>>(),
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "basic with quote",
|
||||
queries: basic_with_quote,
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "prefix search",
|
||||
queries: &[
|
||||
"s", // 500k+ results
|
||||
"a", //
|
||||
"b", //
|
||||
"i", //
|
||||
"x", // only 7k results
|
||||
],
|
||||
..BASE_CONF
|
||||
},
|
||||
];
|
||||
|
||||
utils::run_benches(c, confs);
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_songs);
|
||||
criterion_main!(benches);
|
114
benchmarks/benches/utils.rs
Normal file
114
benchmarks/benches/utils.rs
Normal file
|
@ -0,0 +1,114 @@
|
|||
use std::fs::{create_dir_all, remove_dir_all, File};
|
||||
|
||||
use criterion::BenchmarkId;
|
||||
use heed::EnvOpenOptions;
|
||||
use milli::{
|
||||
update::{IndexDocumentsMethod, Settings, UpdateBuilder, UpdateFormat},
|
||||
FacetCondition, Index,
|
||||
};
|
||||
|
||||
pub struct Conf<'a> {
|
||||
/// where we are going to create our database.mmdb directory
|
||||
/// each benchmark will first try to delete it and then recreate it
|
||||
pub database_name: &'a str,
|
||||
/// the dataset to be used, it must be an uncompressed csv
|
||||
pub dataset: &'a str,
|
||||
pub group_name: &'a str,
|
||||
pub queries: &'a [&'a str],
|
||||
/// here you can change which criterion are used and in which order.
|
||||
/// - if you specify something all the base configuration will be thrown out
|
||||
/// - if you don't specify anything (None) the default configuration will be kept
|
||||
pub criterion: Option<&'a [&'a str]>,
|
||||
/// the last chance to configure your database as you want
|
||||
pub configure: fn(&mut Settings),
|
||||
pub facet_condition: Option<&'a str>,
|
||||
/// enable or disable the optional words on the query
|
||||
pub optional_words: bool,
|
||||
/// primary key, if there is None we'll auto-generate docids for every documents
|
||||
pub primary_key: Option<&'a str>,
|
||||
}
|
||||
|
||||
impl Conf<'_> {
|
||||
fn nop(_builder: &mut Settings) {}
|
||||
|
||||
pub const BASE: Self = Conf {
|
||||
database_name: "benches.mmdb",
|
||||
dataset: "",
|
||||
group_name: "",
|
||||
queries: &[],
|
||||
criterion: None,
|
||||
configure: Self::nop,
|
||||
facet_condition: None,
|
||||
optional_words: true,
|
||||
primary_key: None,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn base_setup(conf: &Conf) -> Index {
|
||||
match remove_dir_all(&conf.database_name) {
|
||||
Ok(_) => (),
|
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => (),
|
||||
Err(e) => panic!("{}", e),
|
||||
}
|
||||
create_dir_all(&conf.database_name).unwrap();
|
||||
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
options.max_readers(10);
|
||||
let index = Index::new(options, conf.database_name).unwrap();
|
||||
|
||||
let update_builder = UpdateBuilder::new(0);
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = update_builder.settings(&mut wtxn, &index);
|
||||
|
||||
if let Some(criterion) = conf.criterion {
|
||||
builder.reset_faceted_fields();
|
||||
builder.reset_criteria();
|
||||
builder.reset_stop_words();
|
||||
|
||||
let criterion = criterion.iter().map(|s| s.to_string()).collect();
|
||||
builder.set_criteria(criterion);
|
||||
}
|
||||
|
||||
(conf.configure)(&mut builder);
|
||||
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let update_builder = UpdateBuilder::new(0);
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = update_builder.index_documents(&mut wtxn, &index);
|
||||
builder.update_format(UpdateFormat::Csv);
|
||||
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let reader = File::open(conf.dataset)
|
||||
.expect(&format!("could not find the dataset in: {}", conf.dataset));
|
||||
builder.execute(reader, |_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index
|
||||
}
|
||||
|
||||
pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
|
||||
for conf in confs {
|
||||
let index = base_setup(conf);
|
||||
|
||||
let mut group = c.benchmark_group(&format!("{}: {}", conf.dataset, conf.group_name));
|
||||
|
||||
for &query in conf.queries {
|
||||
group.bench_with_input(BenchmarkId::from_parameter(query), &query, |b, &query| {
|
||||
b.iter(|| {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut search = index.search(&rtxn);
|
||||
search.query(query).optional_words(conf.optional_words);
|
||||
if let Some(facet_condition) = conf.facet_condition {
|
||||
let facet_condition =
|
||||
FacetCondition::from_str(&rtxn, &index, facet_condition).unwrap();
|
||||
search.facet_condition(facet_condition);
|
||||
}
|
||||
let _ids = search.execute().unwrap();
|
||||
});
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
}
|
133
benchmarks/benches/wiki.rs
Normal file
133
benchmarks/benches/wiki.rs
Normal file
|
@ -0,0 +1,133 @@
|
|||
mod datasets_paths;
|
||||
mod utils;
|
||||
|
||||
use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use utils::Conf;
|
||||
|
||||
fn base_conf(builder: &mut Settings) {
|
||||
let displayed_fields = ["title", "body", "url"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_displayed_fields(displayed_fields);
|
||||
|
||||
let searchable_fields = ["title", "body"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
}
|
||||
|
||||
const BASE_CONF: Conf = Conf {
|
||||
dataset: datasets_paths::SMOL_WIKI_ARTICLES,
|
||||
queries: &[
|
||||
"mingus ", // 46 candidates
|
||||
"miles davis ", // 159
|
||||
"rock and roll ", // 1007
|
||||
"machine ", // 3448
|
||||
"spain ", // 7002
|
||||
"japan ", // 10.593
|
||||
"france ", // 17.616
|
||||
"film ", // 24.959
|
||||
],
|
||||
configure: base_conf,
|
||||
..Conf::BASE
|
||||
};
|
||||
|
||||
fn bench_songs(c: &mut criterion::Criterion) {
|
||||
let basic_with_quote: Vec<String> = BASE_CONF
|
||||
.queries
|
||||
.iter()
|
||||
.map(|s| {
|
||||
s.trim()
|
||||
.split(' ')
|
||||
.map(|s| format!(r#""{}""#, s))
|
||||
.collect::<Vec<String>>()
|
||||
.join(" ")
|
||||
})
|
||||
.collect();
|
||||
let basic_with_quote: &[&str] = &basic_with_quote
|
||||
.iter()
|
||||
.map(|s| s.as_str())
|
||||
.collect::<Vec<&str>>();
|
||||
|
||||
let confs = &[
|
||||
/* first we bench each criterion alone */
|
||||
utils::Conf {
|
||||
group_name: "proximity",
|
||||
queries: &[
|
||||
"herald sings ",
|
||||
"april paris ",
|
||||
"tea two ",
|
||||
"diesel engine ",
|
||||
],
|
||||
criterion: Some(&["proximity"]),
|
||||
optional_words: false,
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "typo",
|
||||
queries: &[
|
||||
"migrosoft ",
|
||||
"linax ",
|
||||
"Disnaylande ",
|
||||
"phytogropher ",
|
||||
"nympalidea ",
|
||||
"aritmetric ",
|
||||
"the fronce ",
|
||||
"sisan ",
|
||||
],
|
||||
criterion: Some(&["typo"]),
|
||||
optional_words: false,
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "words",
|
||||
queries: &[
|
||||
"the black saint and the sinner lady and the good doggo ", // four words to pop, 27 results
|
||||
"Kameya Tokujirō mingus monk ", // two words to pop, 55
|
||||
"Ulrich Hensel meilisearch milli ", // two words to pop, 306
|
||||
"Idaho Bellevue pizza ", // one word to pop, 800
|
||||
"Abraham machin ", // one word to pop, 1141
|
||||
],
|
||||
criterion: Some(&["words"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
/* the we bench some global / normal search with all the default criterion in the default
|
||||
* order */
|
||||
utils::Conf {
|
||||
group_name: "basic placeholder",
|
||||
queries: &[""],
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "basic without quote",
|
||||
queries: &BASE_CONF
|
||||
.queries
|
||||
.iter()
|
||||
.map(|s| s.trim()) // we remove the space at the end of each request
|
||||
.collect::<Vec<&str>>(),
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "basic with quote",
|
||||
queries: basic_with_quote,
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "prefix search",
|
||||
queries: &[
|
||||
"t", // 453k results
|
||||
"c", // 405k
|
||||
"g", // 318k
|
||||
"j", // 227k
|
||||
"q", // 71k
|
||||
"x", // 17k
|
||||
],
|
||||
..BASE_CONF
|
||||
},
|
||||
];
|
||||
|
||||
utils::run_benches(c, confs);
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_songs);
|
||||
criterion_main!(benches);
|
80
benchmarks/build.rs
Normal file
80
benchmarks/build.rs
Normal file
|
@ -0,0 +1,80 @@
|
|||
use std::path::{Path, PathBuf};
|
||||
use std::{env, fs};
|
||||
use std::{
|
||||
fs::File,
|
||||
io::{Cursor, Read, Seek, Write},
|
||||
};
|
||||
|
||||
use bytes::Bytes;
|
||||
use convert_case::{Case, Casing};
|
||||
use flate2::read::GzDecoder;
|
||||
use reqwest::IntoUrl;
|
||||
|
||||
const BASE_URL: &str = "https://meili-datasets.s3.fr-par.scw.cloud/benchmarks";
|
||||
|
||||
const DATASET_SONGS: &str = "smol-songs";
|
||||
const DATASET_WIKI: &str = "smol-wiki-articles";
|
||||
|
||||
/// The name of the environment variable used to select the path
|
||||
/// of the directory containing the datasets
|
||||
const BASE_DATASETS_PATH_KEY: &str = "MILLI_BENCH_DATASETS_PATH";
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let out_dir = PathBuf::from(env::var(BASE_DATASETS_PATH_KEY).unwrap_or(env::var("OUT_DIR")?));
|
||||
|
||||
let benches_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR")?).join("benches");
|
||||
let mut manifest_paths_file = File::create(benches_dir.join("datasets_paths.rs"))?;
|
||||
writeln!(
|
||||
manifest_paths_file,
|
||||
r#"//! This file is generated by the build script.
|
||||
//! Do not modify by hand, use the build.rs file.
|
||||
#![allow(dead_code)]
|
||||
"#
|
||||
)?;
|
||||
writeln!(manifest_paths_file)?;
|
||||
|
||||
for dataset in &[DATASET_SONGS, DATASET_WIKI] {
|
||||
let out_path = out_dir.join(dataset);
|
||||
let out_file = out_path.with_extension("csv");
|
||||
|
||||
writeln!(
|
||||
&mut manifest_paths_file,
|
||||
r#"pub const {}: &str = {:?};"#,
|
||||
dataset.to_case(Case::ScreamingSnake),
|
||||
out_file.display(),
|
||||
)?;
|
||||
|
||||
if out_file.exists() {
|
||||
eprintln!("The dataset {} already exists on the file system and will not be downloaded again", dataset);
|
||||
continue;
|
||||
}
|
||||
let url = format!("{}/{}.csv.gz", BASE_URL, dataset);
|
||||
eprintln!("downloading: {}", url);
|
||||
let bytes = download_dataset(url.clone())?;
|
||||
eprintln!("{} downloaded successfully", url);
|
||||
eprintln!("uncompressing in {}", out_path.display());
|
||||
uncompress_in_file(bytes, &out_file)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn download_dataset<U: IntoUrl>(url: U) -> anyhow::Result<Cursor<Bytes>> {
|
||||
let bytes = reqwest::blocking::Client::builder()
|
||||
.timeout(None)
|
||||
.build()?
|
||||
.get(url)
|
||||
.send()?
|
||||
.bytes()?;
|
||||
Ok(Cursor::new(bytes))
|
||||
}
|
||||
|
||||
fn uncompress_in_file<R: Read + Seek, P: AsRef<Path>>(bytes: R, path: P) -> anyhow::Result<()> {
|
||||
let path = path.as_ref();
|
||||
let mut gz = GzDecoder::new(bytes);
|
||||
let mut dataset = Vec::new();
|
||||
gz.read_to_end(&mut dataset)?;
|
||||
|
||||
fs::write(path, dataset)?;
|
||||
Ok(())
|
||||
}
|
5
benchmarks/src/lib.rs
Normal file
5
benchmarks/src/lib.rs
Normal file
|
@ -0,0 +1,5 @@
|
|||
//! This library is only used to isolate the benchmarks
|
||||
//! from the original milli library.
|
||||
//!
|
||||
//! It does not include interesting functions for milli library
|
||||
//! users only for milli contributors.
|
Loading…
Add table
Add a link
Reference in a new issue