mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 08:44:27 +01:00
Merge #364
364: Fix all the benchmarks r=Kerollmops a=irevoire #324 broke all benchmarks. I fixed everything and noticed that `cargo check --all` was insufficient to check the bench in multiple workspaces, so I also updated the CI to use `cargo check --workspace --all-targets`. Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
commit
ad3befaaf5
2
.github/workflows/rust.yml
vendored
2
.github/workflows/rust.yml
vendored
@ -33,7 +33,7 @@ jobs:
|
|||||||
uses: actions-rs/cargo@v1
|
uses: actions-rs/cargo@v1
|
||||||
with:
|
with:
|
||||||
command: check
|
command: check
|
||||||
args: --all
|
args: --workspace --all-targets
|
||||||
- name: Run cargo test
|
- name: Run cargo test
|
||||||
uses: actions-rs/cargo@v1
|
uses: actions-rs/cargo@v1
|
||||||
with:
|
with:
|
||||||
|
@ -6,6 +6,9 @@ publish = false
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
milli = { path = "../milli" }
|
milli = { path = "../milli" }
|
||||||
|
anyhow = "1.0"
|
||||||
|
serde_json = { version = "1.0.62", features = ["preserve_order"] }
|
||||||
|
csv = "1.1.6"
|
||||||
|
|
||||||
[target.'cfg(target_os = "linux")'.dependencies]
|
[target.'cfg(target_os = "linux")'.dependencies]
|
||||||
jemallocator = "0.3.2"
|
jemallocator = "0.3.2"
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
mod datasets_paths;
|
mod datasets_paths;
|
||||||
|
mod utils;
|
||||||
|
|
||||||
use std::fs::{create_dir_all, remove_dir_all, File};
|
use std::fs::{create_dir_all, remove_dir_all};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use criterion::{criterion_group, criterion_main, Criterion};
|
use criterion::{criterion_group, criterion_main, Criterion};
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
|
use milli::update::UpdateBuilder;
|
||||||
use milli::Index;
|
use milli::Index;
|
||||||
|
|
||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
@ -67,15 +68,10 @@ fn indexing_songs_default(c: &mut Criterion) {
|
|||||||
move |index| {
|
move |index| {
|
||||||
let update_builder = UpdateBuilder::new(0);
|
let update_builder = UpdateBuilder::new(0);
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = update_builder.index_documents(&mut wtxn, &index);
|
let builder = update_builder.index_documents(&mut wtxn, &index);
|
||||||
|
|
||||||
builder.update_format(UpdateFormat::Csv);
|
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||||
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
|
builder.execute(documents, |_, _| ()).unwrap();
|
||||||
let reader = File::open(datasets_paths::SMOL_SONGS).expect(&format!(
|
|
||||||
"could not find the dataset in: {}",
|
|
||||||
datasets_paths::SMOL_SONGS
|
|
||||||
));
|
|
||||||
builder.execute(reader, |_, _| ()).unwrap();
|
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
index.prepare_for_closing().wait();
|
index.prepare_for_closing().wait();
|
||||||
@ -118,15 +114,10 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
|||||||
move |index| {
|
move |index| {
|
||||||
let update_builder = UpdateBuilder::new(0);
|
let update_builder = UpdateBuilder::new(0);
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = update_builder.index_documents(&mut wtxn, &index);
|
let builder = update_builder.index_documents(&mut wtxn, &index);
|
||||||
|
|
||||||
builder.update_format(UpdateFormat::Csv);
|
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||||
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
|
builder.execute(documents, |_, _| ()).unwrap();
|
||||||
let reader = File::open(datasets_paths::SMOL_SONGS).expect(&format!(
|
|
||||||
"could not find the dataset in: {}",
|
|
||||||
datasets_paths::SMOL_SONGS
|
|
||||||
));
|
|
||||||
builder.execute(reader, |_, _| ()).unwrap();
|
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
index.prepare_for_closing().wait();
|
index.prepare_for_closing().wait();
|
||||||
@ -165,15 +156,10 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
|||||||
move |index| {
|
move |index| {
|
||||||
let update_builder = UpdateBuilder::new(0);
|
let update_builder = UpdateBuilder::new(0);
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = update_builder.index_documents(&mut wtxn, &index);
|
let builder = update_builder.index_documents(&mut wtxn, &index);
|
||||||
|
|
||||||
builder.update_format(UpdateFormat::Csv);
|
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||||
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
|
builder.execute(documents, |_, _| ()).unwrap();
|
||||||
let reader = File::open(datasets_paths::SMOL_SONGS).expect(&format!(
|
|
||||||
"could not find the dataset in: {}",
|
|
||||||
datasets_paths::SMOL_SONGS
|
|
||||||
));
|
|
||||||
builder.execute(reader, |_, _| ()).unwrap();
|
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
index.prepare_for_closing().wait();
|
index.prepare_for_closing().wait();
|
||||||
@ -211,15 +197,10 @@ fn indexing_wiki(c: &mut Criterion) {
|
|||||||
move |index| {
|
move |index| {
|
||||||
let update_builder = UpdateBuilder::new(0);
|
let update_builder = UpdateBuilder::new(0);
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = update_builder.index_documents(&mut wtxn, &index);
|
let builder = update_builder.index_documents(&mut wtxn, &index);
|
||||||
|
|
||||||
builder.update_format(UpdateFormat::Csv);
|
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
||||||
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
|
builder.execute(documents, |_, _| ()).unwrap();
|
||||||
let reader = File::open(datasets_paths::SMOL_WIKI_ARTICLES).expect(&format!(
|
|
||||||
"could not find the dataset in: {}",
|
|
||||||
datasets_paths::SMOL_SONGS
|
|
||||||
));
|
|
||||||
builder.execute(reader, |_, _| ()).unwrap();
|
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
index.prepare_for_closing().wait();
|
index.prepare_for_closing().wait();
|
||||||
@ -262,13 +243,10 @@ fn indexing_movies_default(c: &mut Criterion) {
|
|||||||
move |index| {
|
move |index| {
|
||||||
let update_builder = UpdateBuilder::new(0);
|
let update_builder = UpdateBuilder::new(0);
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = update_builder.index_documents(&mut wtxn, &index);
|
let builder = update_builder.index_documents(&mut wtxn, &index);
|
||||||
|
|
||||||
builder.update_format(UpdateFormat::Json);
|
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
||||||
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
|
builder.execute(documents, |_, _| ()).unwrap();
|
||||||
let reader = File::open(datasets_paths::MOVIES)
|
|
||||||
.expect(&format!("could not find the dataset in: {}", datasets_paths::MOVIES));
|
|
||||||
builder.execute(reader, |_, _| ()).unwrap();
|
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
index.prepare_for_closing().wait();
|
index.prepare_for_closing().wait();
|
||||||
@ -316,15 +294,11 @@ fn indexing_geo(c: &mut Criterion) {
|
|||||||
move |index| {
|
move |index| {
|
||||||
let update_builder = UpdateBuilder::new(0);
|
let update_builder = UpdateBuilder::new(0);
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = update_builder.index_documents(&mut wtxn, &index);
|
let builder = update_builder.index_documents(&mut wtxn, &index);
|
||||||
|
|
||||||
|
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
|
||||||
|
builder.execute(documents, |_, _| ()).unwrap();
|
||||||
|
|
||||||
builder.update_format(UpdateFormat::JsonStream);
|
|
||||||
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
|
|
||||||
let reader = File::open(datasets_paths::SMOL_ALL_COUNTRIES).expect(&format!(
|
|
||||||
"could not find the dataset in: {}",
|
|
||||||
datasets_paths::SMOL_ALL_COUNTRIES
|
|
||||||
));
|
|
||||||
builder.execute(reader, |_, _| ()).unwrap();
|
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
index.prepare_for_closing().wait();
|
index.prepare_for_closing().wait();
|
||||||
|
@ -2,7 +2,7 @@ mod datasets_paths;
|
|||||||
mod utils;
|
mod utils;
|
||||||
|
|
||||||
use criterion::{criterion_group, criterion_main};
|
use criterion::{criterion_group, criterion_main};
|
||||||
use milli::update::{Settings, UpdateFormat};
|
use milli::update::Settings;
|
||||||
use utils::Conf;
|
use utils::Conf;
|
||||||
|
|
||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
@ -33,7 +33,7 @@ fn base_conf(builder: &mut Settings) {
|
|||||||
#[rustfmt::skip]
|
#[rustfmt::skip]
|
||||||
const BASE_CONF: Conf = Conf {
|
const BASE_CONF: Conf = Conf {
|
||||||
dataset: datasets_paths::SMOL_ALL_COUNTRIES,
|
dataset: datasets_paths::SMOL_ALL_COUNTRIES,
|
||||||
dataset_format: UpdateFormat::JsonStream,
|
dataset_format: "jsonl",
|
||||||
queries: &[
|
queries: &[
|
||||||
"",
|
"",
|
||||||
],
|
],
|
||||||
|
@ -1,10 +1,15 @@
|
|||||||
|
#![allow(dead_code)]
|
||||||
|
|
||||||
use std::fs::{create_dir_all, remove_dir_all, File};
|
use std::fs::{create_dir_all, remove_dir_all, File};
|
||||||
|
use std::io::{self, Cursor, Read, Seek};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use criterion::BenchmarkId;
|
use criterion::BenchmarkId;
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use milli::update::{IndexDocumentsMethod, Settings, UpdateBuilder, UpdateFormat};
|
use milli::documents::DocumentBatchReader;
|
||||||
|
use milli::update::{IndexDocumentsMethod, Settings, UpdateBuilder};
|
||||||
use milli::{FilterCondition, Index};
|
use milli::{FilterCondition, Index};
|
||||||
|
use serde_json::{Map, Value};
|
||||||
|
|
||||||
pub struct Conf<'a> {
|
pub struct Conf<'a> {
|
||||||
/// where we are going to create our database.mmdb directory
|
/// where we are going to create our database.mmdb directory
|
||||||
@ -13,7 +18,7 @@ pub struct Conf<'a> {
|
|||||||
/// the dataset to be used, it must be an uncompressed csv
|
/// the dataset to be used, it must be an uncompressed csv
|
||||||
pub dataset: &'a str,
|
pub dataset: &'a str,
|
||||||
/// The format of the dataset
|
/// The format of the dataset
|
||||||
pub dataset_format: UpdateFormat,
|
pub dataset_format: &'a str,
|
||||||
pub group_name: &'a str,
|
pub group_name: &'a str,
|
||||||
pub queries: &'a [&'a str],
|
pub queries: &'a [&'a str],
|
||||||
/// here you can change which criterion are used and in which order.
|
/// here you can change which criterion are used and in which order.
|
||||||
@ -33,7 +38,7 @@ pub struct Conf<'a> {
|
|||||||
impl Conf<'_> {
|
impl Conf<'_> {
|
||||||
pub const BASE: Self = Conf {
|
pub const BASE: Self = Conf {
|
||||||
database_name: "benches.mmdb",
|
database_name: "benches.mmdb",
|
||||||
dataset_format: UpdateFormat::Csv,
|
dataset_format: "csv",
|
||||||
dataset: "",
|
dataset: "",
|
||||||
group_name: "",
|
group_name: "",
|
||||||
queries: &[],
|
queries: &[],
|
||||||
@ -87,11 +92,10 @@ pub fn base_setup(conf: &Conf) -> Index {
|
|||||||
if let None = conf.primary_key {
|
if let None = conf.primary_key {
|
||||||
builder.enable_autogenerate_docids();
|
builder.enable_autogenerate_docids();
|
||||||
}
|
}
|
||||||
builder.update_format(conf.dataset_format);
|
let documents = documents_from(conf.dataset, conf.dataset_format);
|
||||||
|
|
||||||
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
|
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
|
||||||
let reader = File::open(conf.dataset)
|
builder.execute(documents, |_, _| ()).unwrap();
|
||||||
.expect(&format!("could not find the dataset in: {}", conf.dataset));
|
|
||||||
builder.execute(reader, |_, _| ()).unwrap();
|
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
index
|
index
|
||||||
@ -128,3 +132,58 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
|
|||||||
index.prepare_for_closing().wait();
|
index.prepare_for_closing().wait();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn documents_from(filename: &str, filetype: &str) -> DocumentBatchReader<impl Read + Seek> {
|
||||||
|
let reader =
|
||||||
|
File::open(filename).expect(&format!("could not find the dataset in: {}", filename));
|
||||||
|
let documents = match filetype {
|
||||||
|
"csv" => documents_from_csv(reader).unwrap(),
|
||||||
|
"json" => documents_from_json(reader).unwrap(),
|
||||||
|
"jsonl" => documents_from_jsonl(reader).unwrap(),
|
||||||
|
otherwise => panic!("invalid update format {:?}", otherwise),
|
||||||
|
};
|
||||||
|
DocumentBatchReader::from_reader(Cursor::new(documents)).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn documents_from_jsonl(reader: impl io::Read) -> anyhow::Result<Vec<u8>> {
|
||||||
|
let mut writer = Cursor::new(Vec::new());
|
||||||
|
let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?;
|
||||||
|
|
||||||
|
let values = serde_json::Deserializer::from_reader(reader)
|
||||||
|
.into_iter::<serde_json::Map<String, serde_json::Value>>();
|
||||||
|
for document in values {
|
||||||
|
let document = document?;
|
||||||
|
documents.add_documents(document)?;
|
||||||
|
}
|
||||||
|
documents.finish()?;
|
||||||
|
|
||||||
|
Ok(writer.into_inner())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn documents_from_json(reader: impl io::Read) -> anyhow::Result<Vec<u8>> {
|
||||||
|
let mut writer = Cursor::new(Vec::new());
|
||||||
|
let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?;
|
||||||
|
|
||||||
|
let json: serde_json::Value = serde_json::from_reader(reader)?;
|
||||||
|
documents.add_documents(json)?;
|
||||||
|
documents.finish()?;
|
||||||
|
|
||||||
|
Ok(writer.into_inner())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn documents_from_csv(reader: impl io::Read) -> anyhow::Result<Vec<u8>> {
|
||||||
|
let mut writer = Cursor::new(Vec::new());
|
||||||
|
let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?;
|
||||||
|
|
||||||
|
let mut records = csv::Reader::from_reader(reader);
|
||||||
|
let iter = records.deserialize::<Map<String, Value>>();
|
||||||
|
|
||||||
|
for doc in iter {
|
||||||
|
let doc = doc?;
|
||||||
|
documents.add_documents(doc)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
documents.finish()?;
|
||||||
|
|
||||||
|
Ok(writer.into_inner())
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user