mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-09 21:14:30 +01:00
Introduce an indexation abortion function when indexing documents
This commit is contained in:
parent
fad0de4581
commit
6603437cb1
@ -59,7 +59,7 @@ fn setup_settings<'t>(
|
|||||||
let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect();
|
let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect();
|
||||||
builder.set_sortable_fields(sortable_fields);
|
builder.set_sortable_fields(sortable_fields);
|
||||||
|
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| (), || false).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn setup_index_with_settings<'t>(
|
fn setup_index_with_settings<'t>(
|
||||||
@ -131,9 +131,15 @@ fn indexing_songs_default(c: &mut Criterion) {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
@ -169,9 +175,15 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
@ -185,9 +197,15 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
@ -225,9 +243,15 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
user_error.unwrap();
|
user_error.unwrap();
|
||||||
@ -282,9 +306,15 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_1_2, "csv");
|
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_1_2, "csv");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
user_error.unwrap();
|
user_error.unwrap();
|
||||||
@ -298,18 +328,30 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_3_4, "csv");
|
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_3_4, "csv");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
user_error.unwrap();
|
user_error.unwrap();
|
||||||
builder.execute().unwrap();
|
builder.execute().unwrap();
|
||||||
|
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_4_4, "csv");
|
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_4_4, "csv");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
user_error.unwrap();
|
user_error.unwrap();
|
||||||
@ -345,9 +387,15 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||||
|
|
||||||
@ -384,9 +432,15 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
@ -423,9 +477,15 @@ fn indexing_wiki(c: &mut Criterion) {
|
|||||||
let indexing_config =
|
let indexing_config =
|
||||||
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
@ -461,9 +521,15 @@ fn reindexing_wiki(c: &mut Criterion) {
|
|||||||
let indexing_config =
|
let indexing_config =
|
||||||
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
@ -478,9 +544,15 @@ fn reindexing_wiki(c: &mut Criterion) {
|
|||||||
let indexing_config =
|
let indexing_config =
|
||||||
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
@ -518,9 +590,15 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
|||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let indexing_config =
|
let indexing_config =
|
||||||
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
user_error.unwrap();
|
user_error.unwrap();
|
||||||
@ -576,9 +654,15 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let indexing_config =
|
let indexing_config =
|
||||||
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
let documents =
|
let documents =
|
||||||
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_1_2, "csv");
|
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_1_2, "csv");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
@ -594,9 +678,15 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
let indexing_config =
|
let indexing_config =
|
||||||
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let documents =
|
let documents =
|
||||||
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_3_4, "csv");
|
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_3_4, "csv");
|
||||||
@ -606,9 +696,15 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
|
|
||||||
let indexing_config =
|
let indexing_config =
|
||||||
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let documents =
|
let documents =
|
||||||
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_4_4, "csv");
|
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_4_4, "csv");
|
||||||
@ -646,9 +742,15 @@ fn indexing_movies_default(c: &mut Criterion) {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
@ -683,9 +785,15 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
@ -699,9 +807,15 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
@ -738,9 +852,15 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
user_error.unwrap();
|
user_error.unwrap();
|
||||||
@ -794,9 +914,15 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
// as we don't care about the time it takes.
|
// as we don't care about the time it takes.
|
||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::MOVIES_1_2, "json");
|
let documents = utils::documents_from(datasets_paths::MOVIES_1_2, "json");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
@ -811,9 +937,15 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::MOVIES_3_4, "json");
|
let documents = utils::documents_from(datasets_paths::MOVIES_3_4, "json");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
@ -821,9 +953,15 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
builder.execute().unwrap();
|
builder.execute().unwrap();
|
||||||
|
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::MOVIES_4_4, "json");
|
let documents = utils::documents_from(datasets_paths::MOVIES_4_4, "json");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
@ -883,9 +1021,15 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json");
|
let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
@ -945,9 +1089,15 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json");
|
let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
user_error.unwrap();
|
user_error.unwrap();
|
||||||
@ -1008,9 +1158,15 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json");
|
let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
@ -1046,9 +1202,15 @@ fn indexing_geo(c: &mut Criterion) {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
|
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
@ -1084,9 +1246,15 @@ fn reindexing_geo(c: &mut Criterion) {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
|
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
@ -1101,9 +1269,15 @@ fn reindexing_geo(c: &mut Criterion) {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
|
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
@ -1141,9 +1315,15 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let builder =
|
let builder = IndexDocuments::new(
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
&mut wtxn,
|
||||||
.unwrap();
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
|
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
user_error.unwrap();
|
user_error.unwrap();
|
||||||
|
@ -86,7 +86,7 @@ pub fn base_setup(conf: &Conf) -> Index {
|
|||||||
|
|
||||||
(conf.configure)(&mut builder);
|
(conf.configure)(&mut builder);
|
||||||
|
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| (), || false).unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
@ -96,7 +96,8 @@ pub fn base_setup(conf: &Conf) -> Index {
|
|||||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
|
let builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| (), || false).unwrap();
|
||||||
let documents = documents_from(conf.dataset, conf.dataset_format);
|
let documents = documents_from(conf.dataset, conf.dataset_format);
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
user_error.unwrap();
|
user_error.unwrap();
|
||||||
|
@ -239,7 +239,7 @@ impl Performer for DocumentAddition {
|
|||||||
if let Some(primary) = self.primary {
|
if let Some(primary) = self.primary {
|
||||||
let mut builder = update::Settings::new(&mut txn, &index, &config);
|
let mut builder = update::Settings::new(&mut txn, &index, &config);
|
||||||
builder.set_primary_key(primary);
|
builder.set_primary_key(primary);
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| (), || false).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
let indexing_config = IndexDocumentsConfig {
|
let indexing_config = IndexDocumentsConfig {
|
||||||
@ -260,6 +260,7 @@ impl Performer for DocumentAddition {
|
|||||||
&config,
|
&config,
|
||||||
indexing_config,
|
indexing_config,
|
||||||
|step| indexing_callback(step, &bars),
|
|step| indexing_callback(step, &bars),
|
||||||
|
|| false,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let (addition, user_error) = addition.add_documents(reader)?;
|
let (addition, user_error) = addition.add_documents(reader)?;
|
||||||
@ -517,7 +518,7 @@ impl Performer for SettingsUpdate {
|
|||||||
bars.push(bar);
|
bars.push(bar);
|
||||||
}
|
}
|
||||||
|
|
||||||
update.execute(|step| indexing_callback(step, &bars))?;
|
update.execute(|step| indexing_callback(step, &bars), || false)?;
|
||||||
|
|
||||||
txn.commit()?;
|
txn.commit()?;
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -56,6 +56,8 @@ pub enum InternalError {
|
|||||||
Store(#[from] MdbError),
|
Store(#[from] MdbError),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
Utf8(#[from] str::Utf8Error),
|
Utf8(#[from] str::Utf8Error),
|
||||||
|
#[error("An indexation process was explicitly aborted.")]
|
||||||
|
AbortedIndexation,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Error, Debug)]
|
#[derive(Error, Debug)]
|
||||||
|
@ -1245,6 +1245,7 @@ pub(crate) mod tests {
|
|||||||
&self.indexer_config,
|
&self.indexer_config,
|
||||||
self.index_documents_config.clone(),
|
self.index_documents_config.clone(),
|
||||||
|_| (),
|
|_| (),
|
||||||
|
|| false,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
@ -1281,7 +1282,7 @@ pub(crate) mod tests {
|
|||||||
) -> Result<(), crate::error::Error> {
|
) -> Result<(), crate::error::Error> {
|
||||||
let mut builder = update::Settings::new(wtxn, &self.inner, &self.indexer_config);
|
let mut builder = update::Settings::new(wtxn, &self.inner, &self.indexer_config);
|
||||||
update(&mut builder);
|
update(&mut builder);
|
||||||
builder.execute(drop)?;
|
builder.execute(drop, || false)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -89,7 +89,7 @@ mod test {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let mut update = Settings::new(&mut txn, &index, &config);
|
let mut update = Settings::new(&mut txn, &index, &config);
|
||||||
update.set_distinct_field(distinct.to_string());
|
update.set_distinct_field(distinct.to_string());
|
||||||
update.execute(|_| ()).unwrap();
|
update.execute(|_| (), || false).unwrap();
|
||||||
|
|
||||||
// add documents to the index
|
// add documents to the index
|
||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
@ -98,7 +98,8 @@ mod test {
|
|||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let addition =
|
let addition =
|
||||||
IndexDocuments::new(&mut txn, &index, &config, indexing_config, |_| ()).unwrap();
|
IndexDocuments::new(&mut txn, &index, &config, indexing_config, |_| (), || false)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let reader =
|
let reader =
|
||||||
crate::documents::DocumentsBatchReader::from_reader(Cursor::new(JSON.as_slice()))
|
crate::documents::DocumentsBatchReader::from_reader(Cursor::new(JSON.as_slice()))
|
||||||
|
@ -33,7 +33,7 @@ pub use self::helpers::{
|
|||||||
use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
|
use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
|
||||||
pub use self::transform::{Transform, TransformOutput};
|
pub use self::transform::{Transform, TransformOutput};
|
||||||
use crate::documents::{obkv_to_object, DocumentsBatchReader};
|
use crate::documents::{obkv_to_object, DocumentsBatchReader};
|
||||||
use crate::error::UserError;
|
use crate::error::{Error, InternalError, UserError};
|
||||||
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
||||||
use crate::update::{
|
use crate::update::{
|
||||||
self, Facets, IndexerConfig, UpdateIndexingStep, WordPrefixDocids,
|
self, Facets, IndexerConfig, UpdateIndexingStep, WordPrefixDocids,
|
||||||
@ -71,13 +71,14 @@ impl Default for IndexDocumentsMethod {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct IndexDocuments<'t, 'u, 'i, 'a, F> {
|
pub struct IndexDocuments<'t, 'u, 'i, 'a, FP, FA> {
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
config: IndexDocumentsConfig,
|
config: IndexDocumentsConfig,
|
||||||
indexer_config: &'a IndexerConfig,
|
indexer_config: &'a IndexerConfig,
|
||||||
transform: Option<Transform<'a, 'i>>,
|
transform: Option<Transform<'a, 'i>>,
|
||||||
progress: F,
|
progress: FP,
|
||||||
|
should_abort: FA,
|
||||||
added_documents: u64,
|
added_documents: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -93,17 +94,19 @@ pub struct IndexDocumentsConfig {
|
|||||||
pub autogenerate_docids: bool,
|
pub autogenerate_docids: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'t, 'u, 'i, 'a, F> IndexDocuments<'t, 'u, 'i, 'a, F>
|
impl<'t, 'u, 'i, 'a, FP, FA> IndexDocuments<'t, 'u, 'i, 'a, FP, FA>
|
||||||
where
|
where
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
FP: Fn(UpdateIndexingStep) + Sync,
|
||||||
|
FA: Fn() -> bool + Sync,
|
||||||
{
|
{
|
||||||
pub fn new(
|
pub fn new(
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
indexer_config: &'a IndexerConfig,
|
indexer_config: &'a IndexerConfig,
|
||||||
config: IndexDocumentsConfig,
|
config: IndexDocumentsConfig,
|
||||||
progress: F,
|
progress: FP,
|
||||||
) -> Result<IndexDocuments<'t, 'u, 'i, 'a, F>> {
|
should_abort: FA,
|
||||||
|
) -> Result<IndexDocuments<'t, 'u, 'i, 'a, FP, FA>> {
|
||||||
let transform = Some(Transform::new(
|
let transform = Some(Transform::new(
|
||||||
wtxn,
|
wtxn,
|
||||||
&index,
|
&index,
|
||||||
@ -117,6 +120,7 @@ where
|
|||||||
config,
|
config,
|
||||||
indexer_config,
|
indexer_config,
|
||||||
progress,
|
progress,
|
||||||
|
should_abort,
|
||||||
wtxn,
|
wtxn,
|
||||||
index,
|
index,
|
||||||
added_documents: 0,
|
added_documents: 0,
|
||||||
@ -151,12 +155,13 @@ where
|
|||||||
Err(user_error) => return Ok((self, Err(user_error))),
|
Err(user_error) => return Ok((self, Err(user_error))),
|
||||||
};
|
};
|
||||||
|
|
||||||
let indexed_documents = self
|
let indexed_documents =
|
||||||
.transform
|
self.transform.as_mut().expect("Invalid document addition state").read_documents(
|
||||||
.as_mut()
|
enriched_documents_reader,
|
||||||
.expect("Invalid document addition state")
|
self.wtxn,
|
||||||
.read_documents(enriched_documents_reader, self.wtxn, &self.progress)?
|
&self.progress,
|
||||||
as u64;
|
&self.should_abort,
|
||||||
|
)? as u64;
|
||||||
|
|
||||||
self.added_documents += indexed_documents;
|
self.added_documents += indexed_documents;
|
||||||
|
|
||||||
@ -200,7 +205,8 @@ where
|
|||||||
#[logging_timer::time("IndexDocuments::{}")]
|
#[logging_timer::time("IndexDocuments::{}")]
|
||||||
pub fn execute_raw(self, output: TransformOutput) -> Result<u64>
|
pub fn execute_raw(self, output: TransformOutput) -> Result<u64>
|
||||||
where
|
where
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
FP: Fn(UpdateIndexingStep) + Sync,
|
||||||
|
FA: Fn() -> bool + Sync,
|
||||||
{
|
{
|
||||||
let TransformOutput {
|
let TransformOutput {
|
||||||
primary_key,
|
primary_key,
|
||||||
@ -355,6 +361,10 @@ where
|
|||||||
});
|
});
|
||||||
|
|
||||||
for result in lmdb_writer_rx {
|
for result in lmdb_writer_rx {
|
||||||
|
if (self.should_abort)() {
|
||||||
|
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||||
|
}
|
||||||
|
|
||||||
let typed_chunk = match result? {
|
let typed_chunk = match result? {
|
||||||
TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader } => {
|
TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader } => {
|
||||||
let cloneable_chunk = unsafe { as_cloneable_grenad(&word_docids_reader)? };
|
let cloneable_chunk = unsafe { as_cloneable_grenad(&word_docids_reader)? };
|
||||||
@ -431,11 +441,16 @@ where
|
|||||||
word_position_docids: Option<grenad::Reader<CursorClonableMmap>>,
|
word_position_docids: Option<grenad::Reader<CursorClonableMmap>>,
|
||||||
) -> Result<()>
|
) -> Result<()>
|
||||||
where
|
where
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
FP: Fn(UpdateIndexingStep) + Sync,
|
||||||
|
FA: Fn() -> bool + Sync,
|
||||||
{
|
{
|
||||||
// Merged databases are already been indexed, we start from this count;
|
// Merged databases are already been indexed, we start from this count;
|
||||||
let mut databases_seen = MERGED_DATABASE_COUNT;
|
let mut databases_seen = MERGED_DATABASE_COUNT;
|
||||||
|
|
||||||
|
if (self.should_abort)() {
|
||||||
|
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||||
|
}
|
||||||
|
|
||||||
// Run the facets update operation.
|
// Run the facets update operation.
|
||||||
let mut builder = Facets::new(self.wtxn, self.index);
|
let mut builder = Facets::new(self.wtxn, self.index);
|
||||||
builder.chunk_compression_type = self.indexer_config.chunk_compression_type;
|
builder.chunk_compression_type = self.indexer_config.chunk_compression_type;
|
||||||
@ -454,6 +469,10 @@ where
|
|||||||
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (self.should_abort)() {
|
||||||
|
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||||
|
}
|
||||||
|
|
||||||
let previous_words_prefixes_fst =
|
let previous_words_prefixes_fst =
|
||||||
self.index.words_prefixes_fst(self.wtxn)?.map_data(|cow| cow.into_owned())?;
|
self.index.words_prefixes_fst(self.wtxn)?.map_data(|cow| cow.into_owned())?;
|
||||||
|
|
||||||
@ -467,6 +486,10 @@ where
|
|||||||
}
|
}
|
||||||
builder.execute()?;
|
builder.execute()?;
|
||||||
|
|
||||||
|
if (self.should_abort)() {
|
||||||
|
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||||
|
}
|
||||||
|
|
||||||
let current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?;
|
let current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?;
|
||||||
|
|
||||||
// We retrieve the common words between the previous and new prefix word fst.
|
// We retrieve the common words between the previous and new prefix word fst.
|
||||||
@ -494,6 +517,10 @@ where
|
|||||||
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (self.should_abort)() {
|
||||||
|
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||||
|
}
|
||||||
|
|
||||||
if let Some(word_docids) = word_docids {
|
if let Some(word_docids) = word_docids {
|
||||||
execute_word_prefix_docids(
|
execute_word_prefix_docids(
|
||||||
self.wtxn,
|
self.wtxn,
|
||||||
@ -520,6 +547,10 @@ where
|
|||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (self.should_abort)() {
|
||||||
|
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||||
|
}
|
||||||
|
|
||||||
databases_seen += 1;
|
databases_seen += 1;
|
||||||
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||||
databases_seen,
|
databases_seen,
|
||||||
@ -541,6 +572,10 @@ where
|
|||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (self.should_abort)() {
|
||||||
|
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||||
|
}
|
||||||
|
|
||||||
databases_seen += 1;
|
databases_seen += 1;
|
||||||
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||||
databases_seen,
|
databases_seen,
|
||||||
@ -568,6 +603,10 @@ where
|
|||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (self.should_abort)() {
|
||||||
|
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||||
|
}
|
||||||
|
|
||||||
databases_seen += 1;
|
databases_seen += 1;
|
||||||
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||||
databases_seen,
|
databases_seen,
|
||||||
|
@ -138,15 +138,17 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn read_documents<R, F>(
|
pub fn read_documents<R, FP, FA>(
|
||||||
&mut self,
|
&mut self,
|
||||||
reader: EnrichedDocumentsBatchReader<R>,
|
reader: EnrichedDocumentsBatchReader<R>,
|
||||||
wtxn: &mut heed::RwTxn,
|
wtxn: &mut heed::RwTxn,
|
||||||
progress_callback: F,
|
progress_callback: FP,
|
||||||
|
should_abort: FA,
|
||||||
) -> Result<usize>
|
) -> Result<usize>
|
||||||
where
|
where
|
||||||
R: Read + Seek,
|
R: Read + Seek,
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
FP: Fn(UpdateIndexingStep) + Sync,
|
||||||
|
FA: Fn() -> bool + Sync,
|
||||||
{
|
{
|
||||||
let (mut cursor, fields_index) = reader.into_cursor_and_fields_index();
|
let (mut cursor, fields_index) = reader.into_cursor_and_fields_index();
|
||||||
|
|
||||||
@ -165,6 +167,10 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
while let Some(enriched_document) = cursor.next_enriched_document()? {
|
while let Some(enriched_document) = cursor.next_enriched_document()? {
|
||||||
let EnrichedDocument { document, document_id } = enriched_document;
|
let EnrichedDocument { document, document_id } = enriched_document;
|
||||||
|
|
||||||
|
if should_abort() {
|
||||||
|
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||||
|
}
|
||||||
|
|
||||||
// drop_and_reuse is called instead of .clear() to communicate to the compiler that field_buffer
|
// drop_and_reuse is called instead of .clear() to communicate to the compiler that field_buffer
|
||||||
// does not keep references from the cursor between loop iterations
|
// does not keep references from the cursor between loop iterations
|
||||||
let mut field_buffer_cache = drop_and_reuse(field_buffer);
|
let mut field_buffer_cache = drop_and_reuse(field_buffer);
|
||||||
|
@ -266,9 +266,15 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
self.pagination_max_total_hits = Setting::Reset;
|
self.pagination_max_total_hits = Setting::Reset;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()>
|
fn reindex<FP, FA>(
|
||||||
|
&mut self,
|
||||||
|
progress_callback: &FP,
|
||||||
|
should_abort: &FA,
|
||||||
|
old_fields_ids_map: FieldsIdsMap,
|
||||||
|
) -> Result<()>
|
||||||
where
|
where
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
FP: Fn(UpdateIndexingStep) + Sync,
|
||||||
|
FA: Fn() -> bool + Sync,
|
||||||
{
|
{
|
||||||
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||||
// if the settings are set before any document update, we don't need to do anything, and
|
// if the settings are set before any document update, we don't need to do anything, and
|
||||||
@ -305,7 +311,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
self.index,
|
self.index,
|
||||||
&self.indexer_config,
|
&self.indexer_config,
|
||||||
IndexDocumentsConfig::default(),
|
IndexDocumentsConfig::default(),
|
||||||
&cb,
|
&progress_callback,
|
||||||
|
&should_abort,
|
||||||
)?;
|
)?;
|
||||||
indexing_builder.execute_raw(output)?;
|
indexing_builder.execute_raw(output)?;
|
||||||
|
|
||||||
@ -660,9 +667,10 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute<F>(mut self, progress_callback: F) -> Result<()>
|
pub fn execute<FP, FA>(mut self, progress_callback: FP, should_abort: FA) -> Result<()>
|
||||||
where
|
where
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
FP: Fn(UpdateIndexingStep) + Sync,
|
||||||
|
FA: Fn() -> bool + Sync,
|
||||||
{
|
{
|
||||||
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
|
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
|
||||||
|
|
||||||
@ -698,7 +706,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
|| searchable_updated
|
|| searchable_updated
|
||||||
|| exact_attributes_updated
|
|| exact_attributes_updated
|
||||||
{
|
{
|
||||||
self.reindex(&progress_callback, old_fields_ids_map)?;
|
self.reindex(&progress_callback, &should_abort, old_fields_ids_map)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -19,7 +19,7 @@ macro_rules! test_distinct {
|
|||||||
let config = milli::update::IndexerConfig::default();
|
let config = milli::update::IndexerConfig::default();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.set_distinct_field(S(stringify!($distinct)));
|
builder.set_distinct_field(S(stringify!($distinct)));
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| (), || false).unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
@ -23,13 +23,14 @@ fn test_facet_distribution_with_no_facet_values() {
|
|||||||
S("genres"),
|
S("genres"),
|
||||||
S("tags"),
|
S("tags"),
|
||||||
});
|
});
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| (), || false).unwrap();
|
||||||
|
|
||||||
// index documents
|
// index documents
|
||||||
let config = IndexerConfig { max_memory: Some(10 * 1024 * 1024), ..Default::default() };
|
let config = IndexerConfig { max_memory: Some(10 * 1024 * 1024), ..Default::default() };
|
||||||
let indexing_config = IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
let indexing_config = IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
|
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
|
let builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| (), || false).unwrap();
|
||||||
let mut documents_builder = DocumentsBatchBuilder::new(Vec::new());
|
let mut documents_builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
let reader = Cursor::new(
|
let reader = Cursor::new(
|
||||||
r#"{
|
r#"{
|
||||||
|
@ -57,13 +57,14 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
|||||||
S("america") => vec![S("the united states")],
|
S("america") => vec![S("the united states")],
|
||||||
});
|
});
|
||||||
builder.set_searchable_fields(vec![S("title"), S("description")]);
|
builder.set_searchable_fields(vec![S("title"), S("description")]);
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| (), || false).unwrap();
|
||||||
|
|
||||||
// index documents
|
// index documents
|
||||||
let config = IndexerConfig { max_memory: Some(10 * 1024 * 1024), ..Default::default() };
|
let config = IndexerConfig { max_memory: Some(10 * 1024 * 1024), ..Default::default() };
|
||||||
let indexing_config = IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
let indexing_config = IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
|
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
|
let builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| (), || false).unwrap();
|
||||||
let mut documents_builder = DocumentsBatchBuilder::new(Vec::new());
|
let mut documents_builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
let reader = Cursor::new(CONTENT.as_bytes());
|
let reader = Cursor::new(CONTENT.as_bytes());
|
||||||
|
|
||||||
|
@ -345,7 +345,7 @@ fn criteria_mixup() {
|
|||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.set_criteria(criteria.iter().map(ToString::to_string).collect());
|
builder.set_criteria(criteria.iter().map(ToString::to_string).collect());
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| (), || false).unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
let mut rtxn = index.read_txn().unwrap();
|
let mut rtxn = index.read_txn().unwrap();
|
||||||
@ -385,12 +385,13 @@ fn criteria_ascdesc() {
|
|||||||
S("name"),
|
S("name"),
|
||||||
S("age"),
|
S("age"),
|
||||||
});
|
});
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| (), || false).unwrap();
|
||||||
|
|
||||||
// index documents
|
// index documents
|
||||||
let config = IndexerConfig { max_memory: Some(10 * 1024 * 1024), ..Default::default() };
|
let config = IndexerConfig { max_memory: Some(10 * 1024 * 1024), ..Default::default() };
|
||||||
let indexing_config = IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
let indexing_config = IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
|
let builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| (), || false).unwrap();
|
||||||
|
|
||||||
let mut batch_builder = DocumentsBatchBuilder::new(Vec::new());
|
let mut batch_builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
|
|
||||||
@ -436,7 +437,7 @@ fn criteria_ascdesc() {
|
|||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.set_criteria(vec![criterion.to_string()]);
|
builder.set_criteria(vec![criterion.to_string()]);
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| (), || false).unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
let mut rtxn = index.read_txn().unwrap();
|
let mut rtxn = index.read_txn().unwrap();
|
||||||
|
@ -40,7 +40,7 @@ fn test_typo_tolerance_one_typo() {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let mut builder = Settings::new(&mut txn, &index, &config);
|
let mut builder = Settings::new(&mut txn, &index, &config);
|
||||||
builder.set_min_word_len_one_typo(4);
|
builder.set_min_word_len_one_typo(4);
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| (), || false).unwrap();
|
||||||
|
|
||||||
// typo is now supported for 4 letters words
|
// typo is now supported for 4 letters words
|
||||||
let mut search = Search::new(&txn, &index);
|
let mut search = Search::new(&txn, &index);
|
||||||
@ -86,7 +86,7 @@ fn test_typo_tolerance_two_typo() {
|
|||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let mut builder = Settings::new(&mut txn, &index, &config);
|
let mut builder = Settings::new(&mut txn, &index, &config);
|
||||||
builder.set_min_word_len_two_typos(7);
|
builder.set_min_word_len_two_typos(7);
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| (), || false).unwrap();
|
||||||
|
|
||||||
// typo is now supported for 4 letters words
|
// typo is now supported for 4 letters words
|
||||||
let mut search = Search::new(&txn, &index);
|
let mut search = Search::new(&txn, &index);
|
||||||
@ -127,7 +127,8 @@ fn test_typo_disabled_on_word() {
|
|||||||
let mut txn = index.write_txn().unwrap();
|
let mut txn = index.write_txn().unwrap();
|
||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let builder = IndexDocuments::new(&mut txn, &index, &config, indexing_config, |_| ()).unwrap();
|
let builder =
|
||||||
|
IndexDocuments::new(&mut txn, &index, &config, indexing_config, |_| (), || false).unwrap();
|
||||||
|
|
||||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||||
user_error.unwrap();
|
user_error.unwrap();
|
||||||
@ -156,7 +157,7 @@ fn test_typo_disabled_on_word() {
|
|||||||
// `zealand` doesn't allow typos anymore
|
// `zealand` doesn't allow typos anymore
|
||||||
exact_words.insert("zealand".to_string());
|
exact_words.insert("zealand".to_string());
|
||||||
builder.set_exact_words(exact_words);
|
builder.set_exact_words(exact_words);
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| (), || false).unwrap();
|
||||||
|
|
||||||
let mut search = Search::new(&txn, &index);
|
let mut search = Search::new(&txn, &index);
|
||||||
search.query("zealand");
|
search.query("zealand");
|
||||||
@ -194,7 +195,7 @@ fn test_disable_typo_on_attribute() {
|
|||||||
let mut builder = Settings::new(&mut txn, &index, &config);
|
let mut builder = Settings::new(&mut txn, &index, &config);
|
||||||
// disable typos on `description`
|
// disable typos on `description`
|
||||||
builder.set_exact_attributes(vec!["description".to_string()].into_iter().collect());
|
builder.set_exact_attributes(vec!["description".to_string()].into_iter().collect());
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| (), || false).unwrap();
|
||||||
|
|
||||||
let mut search = Search::new(&txn, &index);
|
let mut search = Search::new(&txn, &index);
|
||||||
search.query("antebelum");
|
search.query("antebelum");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user