document batch support

reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt
This commit is contained in:
Marin Postma 2021-12-08 14:12:07 +01:00 committed by mpostma
parent 74962b2fd9
commit 0c84a40298
18 changed files with 912 additions and 803 deletions

View file

@ -7,13 +7,9 @@ use std::collections::HashSet;
use std::io::{Read, Seek};
use std::iter::FromIterator;
use std::num::{NonZeroU32, NonZeroUsize};
use std::time::Instant;
use chrono::Utc;
use crossbeam_channel::{Receiver, Sender};
use grenad::{self, CompressionType};
use log::{debug, info};
use rayon::ThreadPool;
use log::debug;
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize};
use typed_chunk::{write_typed_chunk_into_index, TypedChunk};
@ -26,8 +22,8 @@ use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
pub use self::transform::{Transform, TransformOutput};
use crate::documents::DocumentBatchReader;
use crate::update::{
Facets, UpdateBuilder, UpdateIndexingStep, WordPrefixDocids, WordPrefixPairProximityDocids,
WordPrefixPositionDocids, WordsPrefixesFst,
self, Facets, IndexerConfig, UpdateIndexingStep, WordPrefixDocids,
WordPrefixPairProximityDocids, WordPrefixPositionDocids, WordsPrefixesFst,
};
use crate::{Index, Result};
@ -55,120 +51,116 @@ pub enum IndexDocumentsMethod {
UpdateDocuments,
}
impl Default for IndexDocumentsMethod {
fn default() -> Self {
Self::ReplaceDocuments
}
}
#[derive(Debug, Copy, Clone)]
pub enum WriteMethod {
Append,
GetMergePut,
}
pub struct IndexDocuments<'t, 'u, 'i, 'a> {
pub struct IndexDocuments<'t, 'u, 'i, 'a, F> {
wtxn: &'t mut heed::RwTxn<'i, 'u>,
index: &'i Index,
pub(crate) log_every_n: Option<usize>,
pub(crate) documents_chunk_size: Option<usize>,
pub(crate) max_nb_chunks: Option<usize>,
pub(crate) max_memory: Option<usize>,
pub(crate) chunk_compression_type: CompressionType,
pub(crate) chunk_compression_level: Option<u32>,
pub(crate) thread_pool: Option<&'a ThreadPool>,
pub(crate) max_positions_per_attributes: Option<u32>,
facet_level_group_size: Option<NonZeroUsize>,
facet_min_level_size: Option<NonZeroUsize>,
words_prefix_threshold: Option<u32>,
max_prefix_length: Option<usize>,
words_positions_level_group_size: Option<NonZeroU32>,
words_positions_min_level_size: Option<NonZeroU32>,
update_method: IndexDocumentsMethod,
autogenerate_docids: bool,
config: IndexDocumentsConfig,
indexer_config: &'a IndexerConfig,
transform: Option<Transform<'a, 'i>>,
progress: F,
added_documents: u64,
}
impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
#[derive(Default, Debug, Clone)]
pub struct IndexDocumentsConfig {
pub facet_level_group_size: Option<NonZeroUsize>,
pub facet_min_level_size: Option<NonZeroUsize>,
pub words_prefix_threshold: Option<u32>,
pub max_prefix_length: Option<usize>,
pub words_positions_level_group_size: Option<NonZeroU32>,
pub words_positions_min_level_size: Option<NonZeroU32>,
pub update_method: IndexDocumentsMethod,
pub autogenerate_docids: bool,
}
impl<'t, 'u, 'i, 'a, F> IndexDocuments<'t, 'u, 'i, 'a, F>
where
F: Fn(UpdateIndexingStep) + Sync,
{
pub fn new(
wtxn: &'t mut heed::RwTxn<'i, 'u>,
index: &'i Index,
) -> IndexDocuments<'t, 'u, 'i, 'a> {
indexer_config: &'a IndexerConfig,
config: IndexDocumentsConfig,
progress: F,
) -> IndexDocuments<'t, 'u, 'i, 'a, F> {
let transform = Some(Transform::new(
&index,
indexer_config,
config.update_method,
config.autogenerate_docids,
));
IndexDocuments {
transform,
config,
indexer_config,
progress,
wtxn,
index,
log_every_n: None,
documents_chunk_size: None,
max_nb_chunks: None,
max_memory: None,
chunk_compression_type: CompressionType::None,
chunk_compression_level: None,
thread_pool: None,
facet_level_group_size: None,
facet_min_level_size: None,
words_prefix_threshold: None,
max_prefix_length: None,
words_positions_level_group_size: None,
words_positions_min_level_size: None,
update_method: IndexDocumentsMethod::ReplaceDocuments,
autogenerate_docids: false,
max_positions_per_attributes: None,
added_documents: 0,
}
}
pub fn log_every_n(&mut self, n: usize) {
self.log_every_n = Some(n);
}
pub fn index_documents_method(&mut self, method: IndexDocumentsMethod) {
self.update_method = method;
}
pub fn enable_autogenerate_docids(&mut self) {
self.autogenerate_docids = true;
}
pub fn disable_autogenerate_docids(&mut self) {
self.autogenerate_docids = false;
}
#[logging_timer::time("IndexDocuments::{}")]
pub fn execute<R, F>(
self,
reader: DocumentBatchReader<R>,
progress_callback: F,
) -> Result<DocumentAdditionResult>
/// Adds a batch of documents to the current builder.
///
/// Since the documents are progressively added to the writer, a failure will cause a stale
/// builder, and the builder must be discarded.
///
/// Returns the number of documents added to the builder.
pub fn add_documents<R>(&mut self, reader: DocumentBatchReader<R>) -> Result<u64>
where
R: Read + Seek,
F: Fn(UpdateIndexingStep) + Sync,
{
// Early return when there is no document to add
if reader.is_empty() {
return Ok(DocumentAdditionResult {
indexed_documents: 0,
number_of_documents: self.index.number_of_documents(self.wtxn)?,
});
return Ok(0);
}
self.index.set_updated_at(self.wtxn, &Utc::now())?;
let before_transform = Instant::now();
let transform = Transform {
rtxn: &self.wtxn,
index: self.index,
log_every_n: self.log_every_n,
chunk_compression_type: self.chunk_compression_type,
chunk_compression_level: self.chunk_compression_level,
max_nb_chunks: self.max_nb_chunks,
max_memory: self.max_memory,
index_documents_method: self.update_method,
autogenerate_docids: self.autogenerate_docids,
};
let indexed_documents = self
.transform
.as_mut()
.expect("Invalid document addition state")
.read_documents(reader, self.wtxn, &self.progress)?
as u64;
let output = transform.read_documents(reader, &progress_callback)?;
self.added_documents += indexed_documents;
Ok(indexed_documents)
}
#[logging_timer::time("IndexDocuments::{}")]
pub fn execute(mut self) -> Result<DocumentAdditionResult> {
if self.added_documents == 0 {
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents });
}
let output = self
.transform
.take()
.expect("Invalid document addition state")
.output_from_sorter(self.wtxn, &self.progress)?;
let indexed_documents = output.documents_count as u64;
info!("Update transformed in {:.02?}", before_transform.elapsed());
let number_of_documents = self.execute_raw(output, progress_callback)?;
let number_of_documents = self.execute_raw(output)?;
Ok(DocumentAdditionResult { indexed_documents, number_of_documents })
}
/// Returns the total number of documents in the index after the update.
#[logging_timer::time("IndexDocuments::{}")]
pub fn execute_raw<F>(self, output: TransformOutput, progress_callback: F) -> Result<u64>
pub fn execute_raw(self, output: TransformOutput) -> Result<u64>
where
F: Fn(UpdateIndexingStep) + Sync,
{
@ -188,8 +180,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
let backup_pool;
let pool = match self.thread_pool {
Some(pool) => pool,
let pool = match self.indexer_config.thread_pool {
Some(ref pool) => pool,
#[cfg(not(test))]
None => {
// We initialize a bakcup pool with the default
@ -237,22 +229,21 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
};
let stop_words = self.index.stop_words(self.wtxn)?;
// let stop_words = stop_words.as_ref();
// Run extraction pipeline in parallel.
pool.install(|| {
let params = GrenadParameters {
chunk_compression_type: self.chunk_compression_type,
chunk_compression_level: self.chunk_compression_level,
max_memory: self.max_memory,
max_nb_chunks: self.max_nb_chunks, // default value, may be chosen.
chunk_compression_type: self.indexer_config.chunk_compression_type,
chunk_compression_level: self.indexer_config.chunk_compression_level,
max_memory: self.indexer_config.max_memory,
max_nb_chunks: self.indexer_config.max_nb_chunks, // default value, may be chosen.
};
// split obkv file into several chuncks
let chunk_iter = grenad_obkv_into_chunks(
documents_file,
params.clone(),
self.documents_chunk_size.unwrap_or(1024 * 1024 * 128), // 128MiB
self.indexer_config.documents_chunk_size.unwrap_or(1024 * 1024 * 128), // 128MiB
);
let result = chunk_iter.map(|chunk_iter| {
@ -266,7 +257,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
primary_key_id,
geo_field_id,
stop_words,
self.max_positions_per_attributes,
self.indexer_config.max_positions_per_attributes,
)
});
@ -281,17 +272,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
// We delete the documents that this document addition replaces. This way we are
// able to simply insert all the documents even if they already exist in the database.
if !replaced_documents_ids.is_empty() {
let update_builder = UpdateBuilder {
log_every_n: self.log_every_n,
max_nb_chunks: self.max_nb_chunks,
max_memory: self.max_memory,
documents_chunk_size: self.documents_chunk_size,
chunk_compression_type: self.chunk_compression_type,
chunk_compression_level: self.chunk_compression_level,
thread_pool: self.thread_pool,
max_positions_per_attributes: self.max_positions_per_attributes,
};
let mut deletion_builder = update_builder.delete_documents(self.wtxn, self.index)?;
let mut deletion_builder = update::DeleteDocuments::new(self.wtxn, self.index)?;
debug!("documents to delete {:?}", replaced_documents_ids);
deletion_builder.delete_documents(&replaced_documents_ids);
let deleted_documents_count = deletion_builder.execute()?;
@ -303,7 +284,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
let mut final_documents_ids = RoaringBitmap::new();
let mut databases_seen = 0;
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
databases_seen,
total_databases: TOTAL_POSTING_DATABASE_COUNT,
});
@ -314,7 +295,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
if !docids.is_empty() {
final_documents_ids |= docids;
let documents_seen_count = final_documents_ids.len();
progress_callback(UpdateIndexingStep::IndexDocuments {
(self.progress)(UpdateIndexingStep::IndexDocuments {
documents_seen: documents_seen_count as usize,
total_documents: documents_count,
});
@ -325,7 +306,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
}
if is_merged_database {
databases_seen += 1;
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
databases_seen,
total_databases: TOTAL_POSTING_DATABASE_COUNT,
});
@ -344,98 +325,95 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
let all_documents_ids = index_documents_ids | new_documents_ids | replaced_documents_ids;
self.index.put_documents_ids(self.wtxn, &all_documents_ids)?;
self.execute_prefix_databases(progress_callback)?;
self.execute_prefix_databases()?;
Ok(all_documents_ids.len())
}
#[logging_timer::time("IndexDocuments::{}")]
pub fn execute_prefix_databases<F>(self, progress_callback: F) -> Result<()>
where
F: Fn(UpdateIndexingStep) + Sync,
{
pub fn execute_prefix_databases(self) -> Result<()> {
// Merged databases are already been indexed, we start from this count;
let mut databases_seen = MERGED_DATABASE_COUNT;
// Run the facets update operation.
let mut builder = Facets::new(self.wtxn, self.index);
builder.chunk_compression_type = self.chunk_compression_type;
builder.chunk_compression_level = self.chunk_compression_level;
if let Some(value) = self.facet_level_group_size {
builder.chunk_compression_type = self.indexer_config.chunk_compression_type;
builder.chunk_compression_level = self.indexer_config.chunk_compression_level;
if let Some(value) = self.config.facet_level_group_size {
builder.level_group_size(value);
}
if let Some(value) = self.facet_min_level_size {
if let Some(value) = self.config.facet_min_level_size {
builder.min_level_size(value);
}
builder.execute()?;
databases_seen += 1;
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
databases_seen: databases_seen,
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
databases_seen,
total_databases: TOTAL_POSTING_DATABASE_COUNT,
});
// Run the words prefixes update operation.
let mut builder = WordsPrefixesFst::new(self.wtxn, self.index);
if let Some(value) = self.words_prefix_threshold {
if let Some(value) = self.config.words_prefix_threshold {
builder.threshold(value);
}
if let Some(value) = self.max_prefix_length {
if let Some(value) = self.config.max_prefix_length {
builder.max_prefix_length(value);
}
builder.execute()?;
databases_seen += 1;
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
databases_seen: databases_seen,
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
databases_seen,
total_databases: TOTAL_POSTING_DATABASE_COUNT,
});
// Run the word prefix docids update operation.
let mut builder = WordPrefixDocids::new(self.wtxn, self.index);
builder.chunk_compression_type = self.chunk_compression_type;
builder.chunk_compression_level = self.chunk_compression_level;
builder.max_nb_chunks = self.max_nb_chunks;
builder.max_memory = self.max_memory;
builder.chunk_compression_type = self.indexer_config.chunk_compression_type;
builder.chunk_compression_level = self.indexer_config.chunk_compression_level;
builder.max_nb_chunks = self.indexer_config.max_nb_chunks;
builder.max_memory = self.indexer_config.max_memory;
builder.execute()?;
databases_seen += 1;
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
databases_seen: databases_seen,
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
databases_seen,
total_databases: TOTAL_POSTING_DATABASE_COUNT,
});
// Run the word prefix pair proximity docids update operation.
let mut builder = WordPrefixPairProximityDocids::new(self.wtxn, self.index);
builder.chunk_compression_type = self.chunk_compression_type;
builder.chunk_compression_level = self.chunk_compression_level;
builder.max_nb_chunks = self.max_nb_chunks;
builder.max_memory = self.max_memory;
builder.chunk_compression_type = self.indexer_config.chunk_compression_type;
builder.chunk_compression_level = self.indexer_config.chunk_compression_level;
builder.max_nb_chunks = self.indexer_config.max_nb_chunks;
builder.max_memory = self.indexer_config.max_memory;
builder.execute()?;
databases_seen += 1;
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
databases_seen: databases_seen,
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
databases_seen,
total_databases: TOTAL_POSTING_DATABASE_COUNT,
});
// Run the words prefix position docids update operation.
let mut builder = WordPrefixPositionDocids::new(self.wtxn, self.index);
builder.chunk_compression_type = self.chunk_compression_type;
builder.chunk_compression_level = self.chunk_compression_level;
builder.max_nb_chunks = self.max_nb_chunks;
builder.max_memory = self.max_memory;
if let Some(value) = self.words_positions_level_group_size {
builder.chunk_compression_type = self.indexer_config.chunk_compression_type;
builder.chunk_compression_level = self.indexer_config.chunk_compression_level;
builder.max_nb_chunks = self.indexer_config.max_nb_chunks;
builder.max_memory = self.indexer_config.max_memory;
if let Some(value) = self.config.words_positions_level_group_size {
builder.level_group_size(value);
}
if let Some(value) = self.words_positions_min_level_size {
if let Some(value) = self.config.words_positions_min_level_size {
builder.min_level_size(value);
}
builder.execute()?;
databases_seen += 1;
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
databases_seen: databases_seen,
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
databases_seen,
total_databases: TOTAL_POSTING_DATABASE_COUNT,
});
@ -469,8 +447,13 @@ mod tests {
{ "id": 2, "name": "kevina" },
{ "id": 3, "name": "benoit" }
]);
let builder = IndexDocuments::new(&mut wtxn, &index);
builder.execute(content, |_| ()).unwrap();
let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default();
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
builder.add_documents(content).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
// Check that there is 3 documents now.
@ -482,8 +465,10 @@ mod tests {
// Second we send 1 document with id 1, to erase the previous ones.
let mut wtxn = index.write_txn().unwrap();
let content = documents!([ { "id": 1, "name": "updated kevin" } ]);
let builder = IndexDocuments::new(&mut wtxn, &index);
builder.execute(content, |_| ()).unwrap();
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
builder.add_documents(content).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
// Check that there is **always** 3 documents.
@ -499,8 +484,8 @@ mod tests {
{ "id": 2, "name": "updated kevina" },
{ "id": 3, "name": "updated benoit" }
]);
let builder = IndexDocuments::new(&mut wtxn, &index);
builder.execute(content, |_| ()).unwrap();
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
builder.add_documents(content).unwrap();
wtxn.commit().unwrap();
// Check that there is **always** 3 documents.
@ -525,9 +510,15 @@ mod tests {
{ "id": 1, "name": "kevina" },
{ "id": 1, "name": "benoit" }
]);
let mut builder = IndexDocuments::new(&mut wtxn, &index);
builder.index_documents_method(IndexDocumentsMethod::UpdateDocuments);
builder.execute(content, |_| ()).unwrap();
let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig {
update_method: IndexDocumentsMethod::UpdateDocuments,
..Default::default()
};
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
builder.add_documents(content).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
// Check that there is only 1 document now.
@ -551,9 +542,9 @@ mod tests {
// Second we send 1 document with id 1, to force it to be merged with the previous one.
let mut wtxn = index.write_txn().unwrap();
let content = documents!([ { "id": 1, "age": 25 } ]);
let mut builder = IndexDocuments::new(&mut wtxn, &index);
builder.index_documents_method(IndexDocumentsMethod::UpdateDocuments);
builder.execute(content, |_| ()).unwrap();
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
builder.add_documents(content).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
// Check that there is **always** 1 document.
@ -590,8 +581,10 @@ mod tests {
{ "name": "kevina" },
{ "name": "benoit" }
]);
let builder = IndexDocuments::new(&mut wtxn, &index);
assert!(builder.execute(content, |_| ()).is_err());
let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default();
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
assert!(builder.add_documents(content).is_err());
wtxn.commit().unwrap();
// Check that there is no document.
@ -615,9 +608,13 @@ mod tests {
{ "name": "kevina" },
{ "name": "benoit" }
]);
let mut builder = IndexDocuments::new(&mut wtxn, &index);
builder.enable_autogenerate_docids();
builder.execute(content, |_| ()).unwrap();
let config = IndexerConfig::default();
let indexing_config =
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
builder.add_documents(content).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
// Check that there is 3 documents now.
@ -633,8 +630,9 @@ mod tests {
// Second we send 1 document with the generated uuid, to erase the previous ones.
let mut wtxn = index.write_txn().unwrap();
let content = documents!([ { "name": "updated kevin", "id": kevin_uuid } ]);
let builder = IndexDocuments::new(&mut wtxn, &index);
builder.execute(content, |_| ()).unwrap();
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
builder.add_documents(content).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
// Check that there is **always** 3 documents.
@ -670,8 +668,11 @@ mod tests {
{ "id": 2, "name": "kevina" },
{ "id": 3, "name": "benoit" }
]);
let builder = IndexDocuments::new(&mut wtxn, &index);
builder.execute(content, |_| ()).unwrap();
let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default();
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
builder.add_documents(content).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
// Check that there is 3 documents now.
@ -683,9 +684,11 @@ mod tests {
// Second we send 1 document without specifying the id.
let mut wtxn = index.write_txn().unwrap();
let content = documents!([ { "name": "new kevin" } ]);
let mut builder = IndexDocuments::new(&mut wtxn, &index);
builder.enable_autogenerate_docids();
builder.execute(content, |_| ()).unwrap();
let indexing_config =
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
builder.add_documents(content).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
// Check that there is 4 documents now.
@ -705,8 +708,11 @@ mod tests {
// First we send 0 documents and only headers.
let mut wtxn = index.write_txn().unwrap();
let content = documents!([]);
let builder = IndexDocuments::new(&mut wtxn, &index);
builder.execute(content, |_| ()).unwrap();
let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default();
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
builder.add_documents(content).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
// Check that there is no documents.
@ -727,16 +733,20 @@ mod tests {
let mut wtxn = index.write_txn().unwrap();
// There is a space in the document id.
let content = documents!([ { "id": "brume bleue", "name": "kevin" } ]);
let builder = IndexDocuments::new(&mut wtxn, &index);
assert!(builder.execute(content, |_| ()).is_err());
let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default();
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
assert!(builder.add_documents(content).is_err());
wtxn.commit().unwrap();
// First we send 1 document with a valid id.
let mut wtxn = index.write_txn().unwrap();
// There is a space in the document id.
let content = documents!([ { "id": 32, "name": "kevin" } ]);
let builder = IndexDocuments::new(&mut wtxn, &index);
builder.execute(content, |_| ()).unwrap();
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
builder.add_documents(content).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
// Check that there is 1 document now.
@ -760,8 +770,11 @@ mod tests {
{ "id": 1, "name": "kevina", "array": ["I", "am", "fine"] },
{ "id": 2, "name": "benoit", "array_of_object": [{ "wow": "amazing" }] }
]);
let builder = IndexDocuments::new(&mut wtxn, &index);
builder.execute(content, |_| ()).unwrap();
let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default();
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
builder.add_documents(content).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
// Check that there is 1 documents now.
@ -799,14 +812,22 @@ mod tests {
{ "id": 4, "title": "Harry Potter and the Half-Blood Prince", "author": "J. K. Rowling", "genre": "fantasy" },
{ "id": 42, "title": "The Hitchhiker's Guide to the Galaxy", "author": "Douglas Adams", "_geo": { "lat": 35, "lng": 23 } }
]);
let mut builder = IndexDocuments::new(&mut wtxn, &index);
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
builder.execute(documents, |_| ()).unwrap();
let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig {
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
};
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
builder.add_documents(documents).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
let mut wtxn = index.write_txn().unwrap();
let mut builder = IndexDocuments::new(&mut wtxn, &index);
builder.index_documents_method(IndexDocumentsMethod::UpdateDocuments);
let indexing_config = IndexDocumentsConfig {
update_method: IndexDocumentsMethod::UpdateDocuments,
..Default::default()
};
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
let documents = documents!([
{
"id": 2,
@ -815,7 +836,8 @@ mod tests {
}
]);
builder.execute(documents, |_| ()).unwrap();
builder.add_documents(documents).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
}
@ -833,7 +855,12 @@ mod tests {
{ "objectId": 1, "title": "Alice In Wonderland", "comment": "A weird book" },
{ "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
]);
IndexDocuments::new(&mut wtxn, &index).execute(content, |_| ()).unwrap();
let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default();
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
builder.add_documents(content).unwrap();
builder.execute().unwrap();
assert_eq!(index.primary_key(&wtxn).unwrap(), Some("objectId"));
@ -848,15 +875,22 @@ mod tests {
let content = documents!([
{ "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
]);
IndexDocuments::new(&mut wtxn, &index).execute(content, |_| ()).unwrap();
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
builder.add_documents(content).unwrap();
builder.execute().unwrap();
let external_documents_ids = index.external_documents_ids(&wtxn).unwrap();
assert!(external_documents_ids.get("30").is_some());
let content = documents!([
{ "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
]);
IndexDocuments::new(&mut wtxn, &index).execute(content, |_| ()).unwrap();
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
builder.add_documents(content).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
}
@ -886,8 +920,12 @@ mod tests {
cursor.set_position(0);
let content = DocumentBatchReader::from_reader(cursor).unwrap();
let builder = IndexDocuments::new(&mut wtxn, &index);
builder.execute(content, |_| ()).unwrap();
let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default();
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
builder.add_documents(content).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
}
@ -916,8 +954,12 @@ mod tests {
cursor.set_position(0);
let content = DocumentBatchReader::from_reader(cursor).unwrap();
let builder = IndexDocuments::new(&mut wtxn, &index);
builder.execute(content, |_| ()).unwrap();
let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default();
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
builder.add_documents(content).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
@ -969,8 +1011,12 @@ mod tests {
},
]);
let builder = IndexDocuments::new(&mut wtxn, &index);
builder.execute(content, |_| ()).unwrap();
let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default();
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
builder.add_documents(content).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
}
@ -990,8 +1036,12 @@ mod tests {
]);
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(&mut wtxn, &index);
builder.execute(content, |_| ()).unwrap();
let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default();
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
builder.add_documents(content).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
// Check that there is 4 document now.
@ -1002,8 +1052,12 @@ mod tests {
let content = documents!([]);
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(&mut wtxn, &index);
builder.execute(content, |_| ()).unwrap();
let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default();
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
builder.add_documents(content).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
// Check that there is 4 document now.
@ -1019,8 +1073,12 @@ mod tests {
]);
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(&mut wtxn, &index);
builder.execute(content, |_| ()).unwrap();
let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default();
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
builder.add_documents(content).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
// Check that there is 4 document now.
@ -1042,8 +1100,12 @@ mod tests {
]);
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(&mut wtxn, &index);
builder.execute(content, |_| ()).unwrap();
let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default();
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
builder.add_documents(content).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
let rtxn = index.read_txn().unwrap();