mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 11:57:07 +02:00
fix all benchmarks and add the compile time checking of the benhcmarks in the ci
This commit is contained in:
parent
fe9f380993
commit
176160d32f
5 changed files with 94 additions and 58 deletions
|
@ -1,10 +1,15 @@
|
|||
#![allow(dead_code)]
|
||||
|
||||
use std::fs::{create_dir_all, remove_dir_all, File};
|
||||
use std::io::{self, Cursor, Read, Seek};
|
||||
use std::path::Path;
|
||||
|
||||
use criterion::BenchmarkId;
|
||||
use heed::EnvOpenOptions;
|
||||
use milli::update::{IndexDocumentsMethod, Settings, UpdateBuilder, UpdateFormat};
|
||||
use milli::documents::DocumentBatchReader;
|
||||
use milli::update::{IndexDocumentsMethod, Settings, UpdateBuilder};
|
||||
use milli::{FilterCondition, Index};
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
pub struct Conf<'a> {
|
||||
/// where we are going to create our database.mmdb directory
|
||||
|
@ -13,7 +18,7 @@ pub struct Conf<'a> {
|
|||
/// the dataset to be used, it must be an uncompressed csv
|
||||
pub dataset: &'a str,
|
||||
/// The format of the dataset
|
||||
pub dataset_format: UpdateFormat,
|
||||
pub dataset_format: &'a str,
|
||||
pub group_name: &'a str,
|
||||
pub queries: &'a [&'a str],
|
||||
/// here you can change which criterion are used and in which order.
|
||||
|
@ -33,7 +38,7 @@ pub struct Conf<'a> {
|
|||
impl Conf<'_> {
|
||||
pub const BASE: Self = Conf {
|
||||
database_name: "benches.mmdb",
|
||||
dataset_format: UpdateFormat::Csv,
|
||||
dataset_format: "csv",
|
||||
dataset: "",
|
||||
group_name: "",
|
||||
queries: &[],
|
||||
|
@ -87,11 +92,10 @@ pub fn base_setup(conf: &Conf) -> Index {
|
|||
if let None = conf.primary_key {
|
||||
builder.enable_autogenerate_docids();
|
||||
}
|
||||
builder.update_format(conf.dataset_format);
|
||||
let documents = documents_from(conf.dataset, conf.dataset_format);
|
||||
|
||||
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let reader = File::open(conf.dataset)
|
||||
.expect(&format!("could not find the dataset in: {}", conf.dataset));
|
||||
builder.execute(reader, |_, _| ()).unwrap();
|
||||
builder.execute(documents, |_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index
|
||||
|
@ -128,3 +132,58 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
|
|||
index.prepare_for_closing().wait();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn documents_from(filename: &str, filetype: &str) -> DocumentBatchReader<impl Read + Seek> {
|
||||
let reader =
|
||||
File::open(filename).expect(&format!("could not find the dataset in: {}", filename));
|
||||
let documents = match filetype {
|
||||
"csv" => documents_from_csv(reader).unwrap(),
|
||||
"json" => documents_from_json(reader).unwrap(),
|
||||
"jsonl" => documents_from_jsonl(reader).unwrap(),
|
||||
otherwise => panic!("invalid update format {:?}", otherwise),
|
||||
};
|
||||
DocumentBatchReader::from_reader(Cursor::new(documents)).unwrap()
|
||||
}
|
||||
|
||||
fn documents_from_jsonl(reader: impl io::Read) -> anyhow::Result<Vec<u8>> {
|
||||
let mut writer = Cursor::new(Vec::new());
|
||||
let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?;
|
||||
|
||||
let values = serde_json::Deserializer::from_reader(reader)
|
||||
.into_iter::<serde_json::Map<String, serde_json::Value>>();
|
||||
for document in values {
|
||||
let document = document?;
|
||||
documents.add_documents(document)?;
|
||||
}
|
||||
documents.finish()?;
|
||||
|
||||
Ok(writer.into_inner())
|
||||
}
|
||||
|
||||
fn documents_from_json(reader: impl io::Read) -> anyhow::Result<Vec<u8>> {
|
||||
let mut writer = Cursor::new(Vec::new());
|
||||
let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?;
|
||||
|
||||
let json: serde_json::Value = serde_json::from_reader(reader)?;
|
||||
documents.add_documents(json)?;
|
||||
documents.finish()?;
|
||||
|
||||
Ok(writer.into_inner())
|
||||
}
|
||||
|
||||
fn documents_from_csv(reader: impl io::Read) -> anyhow::Result<Vec<u8>> {
|
||||
let mut writer = Cursor::new(Vec::new());
|
||||
let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?;
|
||||
|
||||
let mut records = csv::Reader::from_reader(reader);
|
||||
let iter = records.deserialize::<Map<String, Value>>();
|
||||
|
||||
for doc in iter {
|
||||
let doc = doc?;
|
||||
documents.add_documents(doc)?;
|
||||
}
|
||||
|
||||
documents.finish()?;
|
||||
|
||||
Ok(writer.into_inner())
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue