Fix the benchmarks with the new indexation API

This commit is contained in:
Kerollmops 2022-07-12 15:22:09 +02:00
parent 25e768f31c
commit 448114cc1c
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
3 changed files with 29 additions and 21 deletions

View File

@ -170,12 +170,13 @@ fn reindexing_songs_default(c: &mut Criterion) {
let config = IndexerConfig::default(); let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default(); let indexing_config = IndexDocumentsConfig::default();
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut builder = let builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
.unwrap(); .unwrap();
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
builder.add_documents(documents).unwrap(); let (builder, user_error) = builder.add_documents(documents).unwrap();
user_error.unwrap();
builder.execute().unwrap(); builder.execute().unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -185,12 +186,13 @@ fn reindexing_songs_default(c: &mut Criterion) {
let config = IndexerConfig::default(); let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default(); let indexing_config = IndexDocumentsConfig::default();
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut builder = let builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
.unwrap(); .unwrap();
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
builder.add_documents(documents).unwrap(); let (builder, user_error) = builder.add_documents(documents).unwrap();
user_error.unwrap();
builder.execute().unwrap(); builder.execute().unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -460,12 +462,13 @@ fn reindexing_wiki(c: &mut Criterion) {
let indexing_config = let indexing_config =
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() }; IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut builder = let builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
.unwrap(); .unwrap();
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv"); let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
builder.add_documents(documents).unwrap(); let (builder, user_error) = builder.add_documents(documents).unwrap();
user_error.unwrap();
builder.execute().unwrap(); builder.execute().unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -476,12 +479,13 @@ fn reindexing_wiki(c: &mut Criterion) {
let indexing_config = let indexing_config =
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() }; IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut builder = let builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
.unwrap(); .unwrap();
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv"); let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
builder.add_documents(documents).unwrap(); let (builder, user_error) = builder.add_documents(documents).unwrap();
user_error.unwrap();
builder.execute().unwrap(); builder.execute().unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -680,12 +684,13 @@ fn reindexing_movies_default(c: &mut Criterion) {
let config = IndexerConfig::default(); let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default(); let indexing_config = IndexDocumentsConfig::default();
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut builder = let builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
.unwrap(); .unwrap();
let documents = utils::documents_from(datasets_paths::MOVIES, "json"); let documents = utils::documents_from(datasets_paths::MOVIES, "json");
builder.add_documents(documents).unwrap(); let (builder, user_error) = builder.add_documents(documents).unwrap();
user_error.unwrap();
builder.execute().unwrap(); builder.execute().unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -695,12 +700,13 @@ fn reindexing_movies_default(c: &mut Criterion) {
let config = IndexerConfig::default(); let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default(); let indexing_config = IndexDocumentsConfig::default();
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut builder = let builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
.unwrap(); .unwrap();
let documents = utils::documents_from(datasets_paths::MOVIES, "json"); let documents = utils::documents_from(datasets_paths::MOVIES, "json");
builder.add_documents(documents).unwrap(); let (builder, user_error) = builder.add_documents(documents).unwrap();
user_error.unwrap();
builder.execute().unwrap(); builder.execute().unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -1079,12 +1085,13 @@ fn reindexing_geo(c: &mut Criterion) {
let config = IndexerConfig::default(); let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default(); let indexing_config = IndexDocumentsConfig::default();
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut builder = let builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
.unwrap(); .unwrap();
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl"); let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
builder.add_documents(documents).unwrap(); let (builder, user_error) = builder.add_documents(documents).unwrap();
user_error.unwrap();
builder.execute().unwrap(); builder.execute().unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -1095,12 +1102,13 @@ fn reindexing_geo(c: &mut Criterion) {
let config = IndexerConfig::default(); let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default(); let indexing_config = IndexDocumentsConfig::default();
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut builder = let builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
.unwrap(); .unwrap();
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl"); let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
builder.add_documents(documents).unwrap(); let (builder, user_error) = builder.add_documents(documents).unwrap();
user_error.unwrap();
builder.execute().unwrap(); builder.execute().unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();

View File

@ -1,7 +1,7 @@
#![allow(dead_code)] #![allow(dead_code)]
use std::fs::{create_dir_all, remove_dir_all, File}; use std::fs::{create_dir_all, remove_dir_all, File};
use std::io::{self, BufReader, Cursor, Read, Seek}; use std::io::{self, BufRead, BufReader, Cursor, Read, Seek};
use std::num::ParseFloatError; use std::num::ParseFloatError;
use std::path::Path; use std::path::Path;
@ -138,7 +138,7 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
} }
} }
pub fn documents_from(filename: &str, filetype: &str) -> DocumentBatchReader<impl BufRead + Seek> { pub fn documents_from(filename: &str, filetype: &str) -> DocumentsBatchReader<impl BufRead + Seek> {
let reader = let reader =
File::open(filename).expect(&format!("could not find the dataset in: {}", filename)); File::open(filename).expect(&format!("could not find the dataset in: {}", filename));
let reader = BufReader::new(reader); let reader = BufReader::new(reader);

View File

@ -30,7 +30,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
let mut cursor = reader.into_cursor(); let mut cursor = reader.into_cursor();
let mut documents_batch_index = cursor.documents_batch_index().clone(); let mut documents_batch_index = cursor.documents_batch_index().clone();
let mut external_ids = tempfile::tempfile().map(grenad::Writer::new)?; let mut external_ids = tempfile::tempfile().map(grenad::Writer::new)?;
let mut uuid_buffer = [0; uuid::adapter::Hyphenated::LENGTH]; let mut uuid_buffer = [0; uuid::fmt::Hyphenated::LENGTH];
// The primary key *field id* that has already been set for this index or the one // The primary key *field id* that has already been set for this index or the one
// we will guess by searching for the first key that contains "id" as a substring. // we will guess by searching for the first key that contains "id" as a substring.
@ -119,7 +119,7 @@ fn fetch_or_generate_document_id(
documents_batch_index: &DocumentsBatchIndex, documents_batch_index: &DocumentsBatchIndex,
primary_key: PrimaryKey, primary_key: PrimaryKey,
autogenerate_docids: bool, autogenerate_docids: bool,
uuid_buffer: &mut [u8; uuid::adapter::Hyphenated::LENGTH], uuid_buffer: &mut [u8; uuid::fmt::Hyphenated::LENGTH],
count: u32, count: u32,
) -> Result<StdResult<DocumentId, UserError>> { ) -> Result<StdResult<DocumentId, UserError>> {
match primary_key { match primary_key {
@ -134,7 +134,7 @@ fn fetch_or_generate_document_id(
} }
} }
None if autogenerate_docids => { None if autogenerate_docids => {
let uuid = uuid::Uuid::new_v4().to_hyphenated().encode_lower(uuid_buffer); let uuid = uuid::Uuid::new_v4().as_hyphenated().encode_lower(uuid_buffer);
Ok(Ok(DocumentId::generated(uuid.to_string(), count))) Ok(Ok(DocumentId::generated(uuid.to_string(), count)))
} }
None => Ok(Err(UserError::MissingDocumentId { None => Ok(Err(UserError::MissingDocumentId {