mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Merge #583
583: Use BufReader to read datasets in benchmarks r=ManyTheFish a=loiclec ## What does this PR do? Ensure that the datasets used by the benchmarks are read efficiently by using a `BufReader`. ## Why? Using a `BufReader` is more representative of how `meilisearch` works. It will also make performance comparisons between different branches of `milli` more accurate. Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
This commit is contained in:
commit
ce90fc628a
@ -140,9 +140,10 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn documents_from(filename: &str, filetype: &str) -> DocumentBatchReader<impl Read + Seek> {
|
pub fn documents_from(filename: &str, filetype: &str) -> DocumentBatchReader<impl BufRead + Seek> {
|
||||||
let reader =
|
let reader =
|
||||||
File::open(filename).expect(&format!("could not find the dataset in: {}", filename));
|
File::open(filename).expect(&format!("could not find the dataset in: {}", filename));
|
||||||
|
let reader = BufReader::new(reader);
|
||||||
let documents = match filetype {
|
let documents = match filetype {
|
||||||
"csv" => documents_from_csv(reader).unwrap(),
|
"csv" => documents_from_csv(reader).unwrap(),
|
||||||
"json" => documents_from_json(reader).unwrap(),
|
"json" => documents_from_json(reader).unwrap(),
|
||||||
@ -152,12 +153,11 @@ pub fn documents_from(filename: &str, filetype: &str) -> DocumentBatchReader<imp
|
|||||||
DocumentBatchReader::from_reader(Cursor::new(documents)).unwrap()
|
DocumentBatchReader::from_reader(Cursor::new(documents)).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn documents_from_jsonl(reader: impl Read) -> anyhow::Result<Vec<u8>> {
|
fn documents_from_jsonl(mut reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
|
||||||
let mut writer = Cursor::new(Vec::new());
|
let mut writer = Cursor::new(Vec::new());
|
||||||
let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?;
|
let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?;
|
||||||
|
|
||||||
let mut buf = String::new();
|
let mut buf = String::new();
|
||||||
let mut reader = BufReader::new(reader);
|
|
||||||
|
|
||||||
while reader.read_line(&mut buf)? > 0 {
|
while reader.read_line(&mut buf)? > 0 {
|
||||||
documents.extend_from_json(&mut buf.as_bytes())?;
|
documents.extend_from_json(&mut buf.as_bytes())?;
|
||||||
@ -168,7 +168,7 @@ fn documents_from_jsonl(reader: impl Read) -> anyhow::Result<Vec<u8>> {
|
|||||||
Ok(writer.into_inner())
|
Ok(writer.into_inner())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn documents_from_json(reader: impl Read) -> anyhow::Result<Vec<u8>> {
|
fn documents_from_json(reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
|
||||||
let mut writer = Cursor::new(Vec::new());
|
let mut writer = Cursor::new(Vec::new());
|
||||||
let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?;
|
let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?;
|
||||||
|
|
||||||
@ -178,7 +178,7 @@ fn documents_from_json(reader: impl Read) -> anyhow::Result<Vec<u8>> {
|
|||||||
Ok(writer.into_inner())
|
Ok(writer.into_inner())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn documents_from_csv(reader: impl Read) -> anyhow::Result<Vec<u8>> {
|
fn documents_from_csv(reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
|
||||||
let mut writer = Cursor::new(Vec::new());
|
let mut writer = Cursor::new(Vec::new());
|
||||||
milli::documents::DocumentBatchBuilder::from_csv(reader, &mut writer)?.finish()?;
|
milli::documents::DocumentBatchBuilder::from_csv(reader, &mut writer)?.finish()?;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user