From 448114cc1c1edd1781a830590f2c888dcdab775d Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 12 Jul 2022 15:22:09 +0200 Subject: [PATCH] Fix the benchmarks with the new indexation API --- benchmarks/benches/indexing.rs | 40 +++++++++++++--------- benchmarks/benches/utils.rs | 4 +-- milli/src/update/index_documents/enrich.rs | 6 ++-- 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/benchmarks/benches/indexing.rs b/benchmarks/benches/indexing.rs index 80c7ba0ed..81b21b5ea 100644 --- a/benchmarks/benches/indexing.rs +++ b/benchmarks/benches/indexing.rs @@ -170,12 +170,13 @@ fn reindexing_songs_default(c: &mut Criterion) { let config = IndexerConfig::default(); let indexing_config = IndexDocumentsConfig::default(); let mut wtxn = index.write_txn().unwrap(); - let mut builder = + let builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) .unwrap(); let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); - builder.add_documents(documents).unwrap(); + let (builder, user_error) = builder.add_documents(documents).unwrap(); + user_error.unwrap(); builder.execute().unwrap(); wtxn.commit().unwrap(); @@ -185,12 +186,13 @@ fn reindexing_songs_default(c: &mut Criterion) { let config = IndexerConfig::default(); let indexing_config = IndexDocumentsConfig::default(); let mut wtxn = index.write_txn().unwrap(); - let mut builder = + let builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) .unwrap(); let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); - builder.add_documents(documents).unwrap(); + let (builder, user_error) = builder.add_documents(documents).unwrap(); + user_error.unwrap(); builder.execute().unwrap(); wtxn.commit().unwrap(); @@ -460,12 +462,13 @@ fn reindexing_wiki(c: &mut Criterion) { let indexing_config = IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() }; let mut wtxn = index.write_txn().unwrap(); - let mut builder = + let builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) .unwrap(); let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv"); - builder.add_documents(documents).unwrap(); + let (builder, user_error) = builder.add_documents(documents).unwrap(); + user_error.unwrap(); builder.execute().unwrap(); wtxn.commit().unwrap(); @@ -476,12 +479,13 @@ fn reindexing_wiki(c: &mut Criterion) { let indexing_config = IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() }; let mut wtxn = index.write_txn().unwrap(); - let mut builder = + let builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) .unwrap(); let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv"); - builder.add_documents(documents).unwrap(); + let (builder, user_error) = builder.add_documents(documents).unwrap(); + user_error.unwrap(); builder.execute().unwrap(); wtxn.commit().unwrap(); @@ -680,12 +684,13 @@ fn reindexing_movies_default(c: &mut Criterion) { let config = IndexerConfig::default(); let indexing_config = IndexDocumentsConfig::default(); let mut wtxn = index.write_txn().unwrap(); - let mut builder = + let builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) .unwrap(); let documents = utils::documents_from(datasets_paths::MOVIES, "json"); - builder.add_documents(documents).unwrap(); + let (builder, user_error) = builder.add_documents(documents).unwrap(); + user_error.unwrap(); builder.execute().unwrap(); wtxn.commit().unwrap(); @@ -695,12 +700,13 @@ fn reindexing_movies_default(c: &mut Criterion) { let config = IndexerConfig::default(); let indexing_config = IndexDocumentsConfig::default(); let mut wtxn = index.write_txn().unwrap(); - let mut builder = + let builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) .unwrap(); let documents = utils::documents_from(datasets_paths::MOVIES, "json"); - builder.add_documents(documents).unwrap(); + let (builder, user_error) = builder.add_documents(documents).unwrap(); + user_error.unwrap(); builder.execute().unwrap(); wtxn.commit().unwrap(); @@ -1079,12 +1085,13 @@ fn reindexing_geo(c: &mut Criterion) { let config = IndexerConfig::default(); let indexing_config = IndexDocumentsConfig::default(); let mut wtxn = index.write_txn().unwrap(); - let mut builder = + let builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) .unwrap(); let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl"); - builder.add_documents(documents).unwrap(); + let (builder, user_error) = builder.add_documents(documents).unwrap(); + user_error.unwrap(); builder.execute().unwrap(); wtxn.commit().unwrap(); @@ -1095,12 +1102,13 @@ fn reindexing_geo(c: &mut Criterion) { let config = IndexerConfig::default(); let indexing_config = IndexDocumentsConfig::default(); let mut wtxn = index.write_txn().unwrap(); - let mut builder = + let builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) .unwrap(); let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl"); - builder.add_documents(documents).unwrap(); + let (builder, user_error) = builder.add_documents(documents).unwrap(); + user_error.unwrap(); builder.execute().unwrap(); wtxn.commit().unwrap(); diff --git a/benchmarks/benches/utils.rs b/benchmarks/benches/utils.rs index 51178b43b..fba05edbe 100644 --- a/benchmarks/benches/utils.rs +++ b/benchmarks/benches/utils.rs @@ -1,7 +1,7 @@ #![allow(dead_code)] use std::fs::{create_dir_all, remove_dir_all, File}; -use std::io::{self, BufReader, Cursor, Read, Seek}; +use std::io::{self, BufRead, BufReader, Cursor, Read, Seek}; use std::num::ParseFloatError; use std::path::Path; @@ -138,7 +138,7 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) { } } -pub fn documents_from(filename: &str, filetype: &str) -> DocumentBatchReader { +pub fn documents_from(filename: &str, filetype: &str) -> DocumentsBatchReader { let reader = File::open(filename).expect(&format!("could not find the dataset in: {}", filename)); let reader = BufReader::new(reader); diff --git a/milli/src/update/index_documents/enrich.rs b/milli/src/update/index_documents/enrich.rs index 56f8fa4c0..51495c598 100644 --- a/milli/src/update/index_documents/enrich.rs +++ b/milli/src/update/index_documents/enrich.rs @@ -30,7 +30,7 @@ pub fn enrich_documents_batch( let mut cursor = reader.into_cursor(); let mut documents_batch_index = cursor.documents_batch_index().clone(); let mut external_ids = tempfile::tempfile().map(grenad::Writer::new)?; - let mut uuid_buffer = [0; uuid::adapter::Hyphenated::LENGTH]; + let mut uuid_buffer = [0; uuid::fmt::Hyphenated::LENGTH]; // The primary key *field id* that has already been set for this index or the one // we will guess by searching for the first key that contains "id" as a substring. @@ -119,7 +119,7 @@ fn fetch_or_generate_document_id( documents_batch_index: &DocumentsBatchIndex, primary_key: PrimaryKey, autogenerate_docids: bool, - uuid_buffer: &mut [u8; uuid::adapter::Hyphenated::LENGTH], + uuid_buffer: &mut [u8; uuid::fmt::Hyphenated::LENGTH], count: u32, ) -> Result> { match primary_key { @@ -134,7 +134,7 @@ fn fetch_or_generate_document_id( } } None if autogenerate_docids => { - let uuid = uuid::Uuid::new_v4().to_hyphenated().encode_lower(uuid_buffer); + let uuid = uuid::Uuid::new_v4().as_hyphenated().encode_lower(uuid_buffer); Ok(Ok(DocumentId::generated(uuid.to_string(), count))) } None => Ok(Err(UserError::MissingDocumentId {