mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 00:34:26 +01:00
document batch support
reusable transform rework update api add indexer config fix tests review changes Co-authored-by: Clément Renault <clement@meilisearch.com> fmt
This commit is contained in:
parent
74962b2fd9
commit
0c84a40298
@ -6,7 +6,7 @@ use std::path::Path;
|
|||||||
|
|
||||||
use criterion::{criterion_group, criterion_main, Criterion};
|
use criterion::{criterion_group, criterion_main, Criterion};
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use milli::update::UpdateBuilder;
|
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||||
use milli::Index;
|
use milli::Index;
|
||||||
|
|
||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
@ -39,9 +39,9 @@ fn indexing_songs_default(c: &mut Criterion) {
|
|||||||
move || {
|
move || {
|
||||||
let index = setup_index();
|
let index = setup_index();
|
||||||
|
|
||||||
let update_builder = UpdateBuilder::new();
|
let config = IndexerConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = update_builder.settings(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
|
|
||||||
builder.set_primary_key("id".to_owned());
|
builder.set_primary_key("id".to_owned());
|
||||||
let displayed_fields =
|
let displayed_fields =
|
||||||
@ -66,12 +66,15 @@ fn indexing_songs_default(c: &mut Criterion) {
|
|||||||
index
|
index
|
||||||
},
|
},
|
||||||
move |index| {
|
move |index| {
|
||||||
let update_builder = UpdateBuilder::new();
|
let config = IndexerConfig::default();
|
||||||
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder = update_builder.index_documents(&mut wtxn, &index);
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||||
builder.execute(documents, |_| ()).unwrap();
|
builder.add_documents(documents).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
index.prepare_for_closing().wait();
|
index.prepare_for_closing().wait();
|
||||||
@ -88,9 +91,9 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
|||||||
move || {
|
move || {
|
||||||
let index = setup_index();
|
let index = setup_index();
|
||||||
|
|
||||||
let update_builder = UpdateBuilder::new();
|
let config = IndexerConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = update_builder.settings(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
|
|
||||||
builder.set_primary_key("id".to_owned());
|
builder.set_primary_key("id".to_owned());
|
||||||
let displayed_fields =
|
let displayed_fields =
|
||||||
@ -112,12 +115,16 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
|||||||
index
|
index
|
||||||
},
|
},
|
||||||
move |index| {
|
move |index| {
|
||||||
let update_builder = UpdateBuilder::new();
|
let config = IndexerConfig::default();
|
||||||
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder = update_builder.index_documents(&mut wtxn, &index);
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||||
builder.execute(documents, |_| ()).unwrap();
|
|
||||||
|
builder.add_documents(documents).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
index.prepare_for_closing().wait();
|
index.prepare_for_closing().wait();
|
||||||
@ -134,9 +141,9 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
|||||||
move || {
|
move || {
|
||||||
let index = setup_index();
|
let index = setup_index();
|
||||||
|
|
||||||
let update_builder = UpdateBuilder::new();
|
let config = IndexerConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = update_builder.settings(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
|
|
||||||
builder.set_primary_key("id".to_owned());
|
builder.set_primary_key("id".to_owned());
|
||||||
let displayed_fields =
|
let displayed_fields =
|
||||||
@ -154,12 +161,15 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
|||||||
index
|
index
|
||||||
},
|
},
|
||||||
move |index| {
|
move |index| {
|
||||||
let update_builder = UpdateBuilder::new();
|
let config = IndexerConfig::default();
|
||||||
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder = update_builder.index_documents(&mut wtxn, &index);
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||||
builder.execute(documents, |_| ()).unwrap();
|
builder.add_documents(documents).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
index.prepare_for_closing().wait();
|
index.prepare_for_closing().wait();
|
||||||
@ -176,9 +186,9 @@ fn indexing_wiki(c: &mut Criterion) {
|
|||||||
move || {
|
move || {
|
||||||
let index = setup_index();
|
let index = setup_index();
|
||||||
|
|
||||||
let update_builder = UpdateBuilder::new();
|
let config = IndexerConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = update_builder.settings(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
|
|
||||||
builder.set_primary_key("id".to_owned());
|
builder.set_primary_key("id".to_owned());
|
||||||
let displayed_fields =
|
let displayed_fields =
|
||||||
@ -195,13 +205,16 @@ fn indexing_wiki(c: &mut Criterion) {
|
|||||||
index
|
index
|
||||||
},
|
},
|
||||||
move |index| {
|
move |index| {
|
||||||
let update_builder = UpdateBuilder::new();
|
let config = IndexerConfig::default();
|
||||||
|
let indexing_config =
|
||||||
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = update_builder.index_documents(&mut wtxn, &index);
|
let mut builder =
|
||||||
builder.enable_autogenerate_docids();
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
||||||
builder.execute(documents, |_| ()).unwrap();
|
builder.add_documents(documents).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
index.prepare_for_closing().wait();
|
index.prepare_for_closing().wait();
|
||||||
@ -218,9 +231,9 @@ fn indexing_movies_default(c: &mut Criterion) {
|
|||||||
move || {
|
move || {
|
||||||
let index = setup_index();
|
let index = setup_index();
|
||||||
|
|
||||||
let update_builder = UpdateBuilder::new();
|
let config = IndexerConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = update_builder.settings(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
|
|
||||||
builder.set_primary_key("id".to_owned());
|
builder.set_primary_key("id".to_owned());
|
||||||
let displayed_fields = ["title", "poster", "overview", "release_date", "genres"]
|
let displayed_fields = ["title", "poster", "overview", "release_date", "genres"]
|
||||||
@ -242,12 +255,15 @@ fn indexing_movies_default(c: &mut Criterion) {
|
|||||||
index
|
index
|
||||||
},
|
},
|
||||||
move |index| {
|
move |index| {
|
||||||
let update_builder = UpdateBuilder::new();
|
let config = IndexerConfig::default();
|
||||||
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder = update_builder.index_documents(&mut wtxn, &index);
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
||||||
builder.execute(documents, |_| ()).unwrap();
|
builder.add_documents(documents).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
index.prepare_for_closing().wait();
|
index.prepare_for_closing().wait();
|
||||||
@ -264,9 +280,9 @@ fn indexing_geo(c: &mut Criterion) {
|
|||||||
move || {
|
move || {
|
||||||
let index = setup_index();
|
let index = setup_index();
|
||||||
|
|
||||||
let update_builder = UpdateBuilder::new();
|
let config = IndexerConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = update_builder.settings(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
|
|
||||||
builder.set_primary_key("geonameid".to_owned());
|
builder.set_primary_key("geonameid".to_owned());
|
||||||
let displayed_fields =
|
let displayed_fields =
|
||||||
@ -293,12 +309,15 @@ fn indexing_geo(c: &mut Criterion) {
|
|||||||
index
|
index
|
||||||
},
|
},
|
||||||
move |index| {
|
move |index| {
|
||||||
let update_builder = UpdateBuilder::new();
|
let config = IndexerConfig::default();
|
||||||
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder = update_builder.index_documents(&mut wtxn, &index);
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
|
||||||
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
|
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
|
||||||
builder.execute(documents, |_| ()).unwrap();
|
builder.add_documents(documents).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
@ -8,7 +8,9 @@ use std::path::Path;
|
|||||||
use criterion::BenchmarkId;
|
use criterion::BenchmarkId;
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use milli::documents::DocumentBatchReader;
|
use milli::documents::DocumentBatchReader;
|
||||||
use milli::update::{IndexDocumentsMethod, Settings, UpdateBuilder};
|
use milli::update::{
|
||||||
|
IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings,
|
||||||
|
};
|
||||||
use milli::{Filter, Index};
|
use milli::{Filter, Index};
|
||||||
use serde_json::{Map, Value};
|
use serde_json::{Map, Value};
|
||||||
|
|
||||||
@ -65,9 +67,9 @@ pub fn base_setup(conf: &Conf) -> Index {
|
|||||||
options.max_readers(10);
|
options.max_readers(10);
|
||||||
let index = Index::new(options, conf.database_name).unwrap();
|
let index = Index::new(options, conf.database_name).unwrap();
|
||||||
|
|
||||||
let update_builder = UpdateBuilder::new();
|
let config = IndexerConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = update_builder.settings(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
|
|
||||||
if let Some(primary_key) = conf.primary_key {
|
if let Some(primary_key) = conf.primary_key {
|
||||||
builder.set_primary_key(primary_key.to_string());
|
builder.set_primary_key(primary_key.to_string());
|
||||||
@ -87,16 +89,19 @@ pub fn base_setup(conf: &Conf) -> Index {
|
|||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
let update_builder = UpdateBuilder::new();
|
let config = IndexerConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = update_builder.index_documents(&mut wtxn, &index);
|
let indexing_config = IndexDocumentsConfig {
|
||||||
if let None = conf.primary_key {
|
autogenerate_docids: conf.primary_key.is_none(),
|
||||||
builder.enable_autogenerate_docids();
|
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||||
}
|
..Default::default()
|
||||||
|
};
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
let documents = documents_from(conf.dataset, conf.dataset_format);
|
let documents = documents_from(conf.dataset, conf.dataset_format);
|
||||||
|
|
||||||
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
|
builder.add_documents(documents).unwrap();
|
||||||
builder.execute(documents, |_| ()).unwrap();
|
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
index
|
index
|
||||||
|
@ -9,6 +9,7 @@ use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
|
|||||||
use milli::update::UpdateIndexingStep::{
|
use milli::update::UpdateIndexingStep::{
|
||||||
ComputeIdsAndMergeDocuments, IndexDocuments, MergeDataIntoFinalDatabase, RemapDocumentAddition,
|
ComputeIdsAndMergeDocuments, IndexDocuments, MergeDataIntoFinalDatabase, RemapDocumentAddition,
|
||||||
};
|
};
|
||||||
|
use milli::update::{IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig};
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
|
|
||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
@ -122,18 +123,18 @@ impl DocumentAddition {
|
|||||||
println!("Adding {} documents to the index.", reader.len());
|
println!("Adding {} documents to the index.", reader.len());
|
||||||
|
|
||||||
let mut txn = index.env.write_txn()?;
|
let mut txn = index.env.write_txn()?;
|
||||||
let mut addition = milli::update::IndexDocuments::new(&mut txn, &index);
|
let config = milli::update::IndexerConfig { log_every_n: Some(100), ..Default::default() };
|
||||||
|
let update_method = if self.update_documents {
|
||||||
if self.update_documents {
|
IndexDocumentsMethod::UpdateDocuments
|
||||||
addition.index_documents_method(milli::update::IndexDocumentsMethod::UpdateDocuments);
|
} else {
|
||||||
}
|
IndexDocumentsMethod::ReplaceDocuments
|
||||||
|
};
|
||||||
addition.log_every_n(100);
|
|
||||||
|
|
||||||
if self.autogen_docids {
|
|
||||||
addition.enable_autogenerate_docids()
|
|
||||||
}
|
|
||||||
|
|
||||||
|
let indexing_config = IndexDocumentsConfig {
|
||||||
|
update_method,
|
||||||
|
autogenerate_docids: self.autogen_docids,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
let mut bars = Vec::new();
|
let mut bars = Vec::new();
|
||||||
let progesses = MultiProgress::new();
|
let progesses = MultiProgress::new();
|
||||||
for _ in 0..4 {
|
for _ in 0..4 {
|
||||||
@ -141,12 +142,20 @@ impl DocumentAddition {
|
|||||||
let bar = progesses.add(bar);
|
let bar = progesses.add(bar);
|
||||||
bars.push(bar);
|
bars.push(bar);
|
||||||
}
|
}
|
||||||
|
let mut addition = milli::update::IndexDocuments::new(
|
||||||
|
&mut txn,
|
||||||
|
&index,
|
||||||
|
&config,
|
||||||
|
indexing_config,
|
||||||
|
|step| indexing_callback(step, &bars),
|
||||||
|
);
|
||||||
|
addition.add_documents(reader)?;
|
||||||
|
|
||||||
std::thread::spawn(move || {
|
std::thread::spawn(move || {
|
||||||
progesses.join().unwrap();
|
progesses.join().unwrap();
|
||||||
});
|
});
|
||||||
|
|
||||||
let result = addition.execute(reader, |step| indexing_callback(step, &bars))?;
|
let result = addition.execute()?;
|
||||||
|
|
||||||
txn.commit()?;
|
txn.commit()?;
|
||||||
|
|
||||||
@ -293,8 +302,9 @@ impl SettingsUpdate {
|
|||||||
fn perform(&self, index: milli::Index) -> Result<()> {
|
fn perform(&self, index: milli::Index) -> Result<()> {
|
||||||
let mut txn = index.env.write_txn()?;
|
let mut txn = index.env.write_txn()?;
|
||||||
|
|
||||||
let mut update = milli::update::Settings::new(&mut txn, &index);
|
let config = IndexerConfig { log_every_n: Some(100), ..Default::default() };
|
||||||
update.log_every_n(100);
|
|
||||||
|
let mut update = milli::update::Settings::new(&mut txn, &index, &config);
|
||||||
|
|
||||||
if let Some(ref filterable_attributes) = self.filterable_attributes {
|
if let Some(ref filterable_attributes) = self.filterable_attributes {
|
||||||
if !filterable_attributes.is_empty() {
|
if !filterable_attributes.is_empty() {
|
||||||
|
@ -21,13 +21,14 @@ use heed::EnvOpenOptions;
|
|||||||
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
|
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
|
||||||
use milli::documents::DocumentBatchReader;
|
use milli::documents::DocumentBatchReader;
|
||||||
use milli::update::UpdateIndexingStep::*;
|
use milli::update::UpdateIndexingStep::*;
|
||||||
use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder};
|
use milli::update::{
|
||||||
|
ClearDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Setting,
|
||||||
|
};
|
||||||
use milli::{
|
use milli::{
|
||||||
obkv_to_json, CompressionType, Filter as MilliFilter, FilterCondition, Index, MatchingWords,
|
obkv_to_json, CompressionType, Filter as MilliFilter, FilterCondition, Index, MatchingWords,
|
||||||
SearchResult, SortError,
|
SearchResult, SortError,
|
||||||
};
|
};
|
||||||
use once_cell::sync::OnceCell;
|
use once_cell::sync::OnceCell;
|
||||||
use rayon::ThreadPool;
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_json::{Map, Value};
|
use serde_json::{Map, Value};
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
@ -44,7 +45,7 @@ use self::update_store::UpdateStore;
|
|||||||
#[global_allocator]
|
#[global_allocator]
|
||||||
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||||
|
|
||||||
static GLOBAL_THREAD_POOL: OnceCell<ThreadPool> = OnceCell::new();
|
static GLOBAL_CONFIG: OnceCell<IndexerConfig> = OnceCell::new();
|
||||||
|
|
||||||
#[derive(Debug, StructOpt)]
|
#[derive(Debug, StructOpt)]
|
||||||
/// The HTTP main server of the milli project.
|
/// The HTTP main server of the milli project.
|
||||||
@ -327,7 +328,19 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
// Setup the global thread pool
|
// Setup the global thread pool
|
||||||
let jobs = opt.indexer.indexing_jobs.unwrap_or(0);
|
let jobs = opt.indexer.indexing_jobs.unwrap_or(0);
|
||||||
let pool = rayon::ThreadPoolBuilder::new().num_threads(jobs).build()?;
|
let pool = rayon::ThreadPoolBuilder::new().num_threads(jobs).build()?;
|
||||||
GLOBAL_THREAD_POOL.set(pool).unwrap();
|
|
||||||
|
let config = IndexerConfig {
|
||||||
|
max_nb_chunks: opt.indexer.max_nb_chunks,
|
||||||
|
chunk_compression_level: opt.indexer.chunk_compression_level,
|
||||||
|
max_positions_per_attributes: opt.indexer.max_positions_per_attributes,
|
||||||
|
thread_pool: Some(pool),
|
||||||
|
log_every_n: Some(opt.indexer.log_every_n),
|
||||||
|
max_memory: Some(opt.indexer.max_memory.get_bytes() as usize),
|
||||||
|
chunk_compression_type: opt.indexer.chunk_compression_type.unwrap_or(CompressionType::None),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
GLOBAL_CONFIG.set(config).unwrap();
|
||||||
|
|
||||||
// Open the LMDB database.
|
// Open the LMDB database.
|
||||||
let index = Index::new(options, &opt.database)?;
|
let index = Index::new(options, &opt.database)?;
|
||||||
@ -342,64 +355,31 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
let (update_status_sender, _) = broadcast::channel(100);
|
let (update_status_sender, _) = broadcast::channel(100);
|
||||||
let update_status_sender_cloned = update_status_sender.clone();
|
let update_status_sender_cloned = update_status_sender.clone();
|
||||||
let index_cloned = index.clone();
|
let index_cloned = index.clone();
|
||||||
let indexer_opt_cloned = opt.indexer.clone();
|
|
||||||
let update_store = UpdateStore::open(
|
let update_store = UpdateStore::open(
|
||||||
update_store_options,
|
update_store_options,
|
||||||
update_store_path,
|
update_store_path,
|
||||||
// the type hint is necessary: https://github.com/rust-lang/rust/issues/32600
|
// the type hint is necessary: https://github.com/rust-lang/rust/issues/32600
|
||||||
move |update_id, meta, content: &_| {
|
move |update_id, meta, content: &_| {
|
||||||
// We prepare the update by using the update builder.
|
// We prepare the update by using the update builder.
|
||||||
let mut update_builder = UpdateBuilder::new();
|
|
||||||
if let Some(max_nb_chunks) = indexer_opt_cloned.max_nb_chunks {
|
|
||||||
update_builder.max_nb_chunks(max_nb_chunks);
|
|
||||||
}
|
|
||||||
if let Some(chunk_compression_level) = indexer_opt_cloned.chunk_compression_level {
|
|
||||||
update_builder.chunk_compression_level(chunk_compression_level);
|
|
||||||
}
|
|
||||||
if let Some(max_pos_per_attributes) = indexer_opt_cloned.max_positions_per_attributes {
|
|
||||||
update_builder.max_positions_per_attributes(max_pos_per_attributes);
|
|
||||||
}
|
|
||||||
update_builder.thread_pool(GLOBAL_THREAD_POOL.get().unwrap());
|
|
||||||
update_builder.log_every_n(indexer_opt_cloned.log_every_n);
|
|
||||||
update_builder.max_memory(indexer_opt_cloned.max_memory.get_bytes() as usize);
|
|
||||||
update_builder.chunk_compression_type(
|
|
||||||
indexer_opt_cloned.chunk_compression_type.unwrap_or(CompressionType::None),
|
|
||||||
);
|
|
||||||
|
|
||||||
let before_update = Instant::now();
|
let before_update = Instant::now();
|
||||||
// we extract the update type and execute the update itself.
|
// we extract the update type and execute the update itself.
|
||||||
let result: anyhow::Result<()> =
|
let result: anyhow::Result<()> = (|| match meta {
|
||||||
(|| match meta {
|
|
||||||
UpdateMeta::DocumentsAddition { method, format, encoding } => {
|
UpdateMeta::DocumentsAddition { method, format, encoding } => {
|
||||||
// We must use the write transaction of the update here.
|
// We must use the write transaction of the update here.
|
||||||
let mut wtxn = index_cloned.write_txn()?;
|
let mut wtxn = index_cloned.write_txn()?;
|
||||||
let mut builder = update_builder.index_documents(&mut wtxn, &index_cloned);
|
let update_method = match method.as_str() {
|
||||||
builder.enable_autogenerate_docids();
|
"replace" => IndexDocumentsMethod::ReplaceDocuments,
|
||||||
|
"update" => IndexDocumentsMethod::UpdateDocuments,
|
||||||
match method.as_str() {
|
|
||||||
"replace" => builder
|
|
||||||
.index_documents_method(IndexDocumentsMethod::ReplaceDocuments),
|
|
||||||
"update" => builder
|
|
||||||
.index_documents_method(IndexDocumentsMethod::UpdateDocuments),
|
|
||||||
otherwise => panic!("invalid indexing method {:?}", otherwise),
|
otherwise => panic!("invalid indexing method {:?}", otherwise),
|
||||||
};
|
};
|
||||||
|
let indexing_config = IndexDocumentsConfig {
|
||||||
let reader = match encoding.as_deref() {
|
update_method,
|
||||||
Some("gzip") => Box::new(GzDecoder::new(content)),
|
autogenerate_docids: true,
|
||||||
None => Box::new(content) as Box<dyn io::Read>,
|
..Default::default()
|
||||||
otherwise => panic!("invalid encoding format {:?}", otherwise),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let documents = match format.as_str() {
|
let indexing_callback = |indexing_step| {
|
||||||
"csv" => documents_from_csv(reader)?,
|
|
||||||
"json" => documents_from_json(reader)?,
|
|
||||||
"jsonl" => documents_from_jsonl(reader)?,
|
|
||||||
otherwise => panic!("invalid update format {:?}", otherwise),
|
|
||||||
};
|
|
||||||
|
|
||||||
let documents = DocumentBatchReader::from_reader(Cursor::new(documents))?;
|
|
||||||
|
|
||||||
let result = builder.execute(documents, |indexing_step| {
|
|
||||||
let (current, total) = match indexing_step {
|
let (current, total) = match indexing_step {
|
||||||
RemapDocumentAddition { documents_seen } => (documents_seen, None),
|
RemapDocumentAddition { documents_seen } => (documents_seen, None),
|
||||||
ComputeIdsAndMergeDocuments { documents_seen, total_documents } => {
|
ComputeIdsAndMergeDocuments { documents_seen, total_documents } => {
|
||||||
@ -421,7 +401,34 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
total,
|
total,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
});
|
};
|
||||||
|
|
||||||
|
let mut builder = milli::update::IndexDocuments::new(
|
||||||
|
&mut wtxn,
|
||||||
|
&index_cloned,
|
||||||
|
GLOBAL_CONFIG.get().unwrap(),
|
||||||
|
indexing_config,
|
||||||
|
indexing_callback,
|
||||||
|
);
|
||||||
|
|
||||||
|
let reader = match encoding.as_deref() {
|
||||||
|
Some("gzip") => Box::new(GzDecoder::new(content)),
|
||||||
|
None => Box::new(content) as Box<dyn io::Read>,
|
||||||
|
otherwise => panic!("invalid encoding format {:?}", otherwise),
|
||||||
|
};
|
||||||
|
|
||||||
|
let documents = match format.as_str() {
|
||||||
|
"csv" => documents_from_csv(reader)?,
|
||||||
|
"json" => documents_from_json(reader)?,
|
||||||
|
"jsonl" => documents_from_jsonl(reader)?,
|
||||||
|
otherwise => panic!("invalid update format {:?}", otherwise),
|
||||||
|
};
|
||||||
|
|
||||||
|
let documents = DocumentBatchReader::from_reader(Cursor::new(documents))?;
|
||||||
|
|
||||||
|
builder.add_documents(documents)?;
|
||||||
|
|
||||||
|
let result = builder.execute();
|
||||||
|
|
||||||
match result {
|
match result {
|
||||||
Ok(_) => wtxn.commit().map_err(Into::into),
|
Ok(_) => wtxn.commit().map_err(Into::into),
|
||||||
@ -431,7 +438,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
UpdateMeta::ClearDocuments => {
|
UpdateMeta::ClearDocuments => {
|
||||||
// We must use the write transaction of the update here.
|
// We must use the write transaction of the update here.
|
||||||
let mut wtxn = index_cloned.write_txn()?;
|
let mut wtxn = index_cloned.write_txn()?;
|
||||||
let builder = update_builder.clear_documents(&mut wtxn, &index_cloned);
|
let builder = ClearDocuments::new(&mut wtxn, &index_cloned);
|
||||||
|
|
||||||
match builder.execute() {
|
match builder.execute() {
|
||||||
Ok(_count) => wtxn.commit().map_err(Into::into),
|
Ok(_count) => wtxn.commit().map_err(Into::into),
|
||||||
@ -441,7 +448,11 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
UpdateMeta::Settings(settings) => {
|
UpdateMeta::Settings(settings) => {
|
||||||
// We must use the write transaction of the update here.
|
// We must use the write transaction of the update here.
|
||||||
let mut wtxn = index_cloned.write_txn()?;
|
let mut wtxn = index_cloned.write_txn()?;
|
||||||
let mut builder = update_builder.settings(&mut wtxn, &index_cloned);
|
let mut builder = milli::update::Settings::new(
|
||||||
|
&mut wtxn,
|
||||||
|
&index_cloned,
|
||||||
|
GLOBAL_CONFIG.get().unwrap(),
|
||||||
|
);
|
||||||
|
|
||||||
// We transpose the settings JSON struct into a real setting update.
|
// We transpose the settings JSON struct into a real setting update.
|
||||||
match settings.searchable_attributes {
|
match settings.searchable_attributes {
|
||||||
@ -532,7 +543,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
UpdateMeta::Facets(levels) => {
|
UpdateMeta::Facets(levels) => {
|
||||||
// We must use the write transaction of the update here.
|
// We must use the write transaction of the update here.
|
||||||
let mut wtxn = index_cloned.write_txn()?;
|
let mut wtxn = index_cloned.write_txn()?;
|
||||||
let mut builder = update_builder.facets(&mut wtxn, &index_cloned);
|
let mut builder = milli::update::Facets::new(&mut wtxn, &index_cloned);
|
||||||
if let Some(value) = levels.level_group_size {
|
if let Some(value) = levels.level_group_size {
|
||||||
builder.level_group_size(value);
|
builder.level_group_size(value);
|
||||||
}
|
}
|
||||||
|
@ -867,7 +867,7 @@ pub(crate) mod tests {
|
|||||||
use maplit::btreemap;
|
use maplit::btreemap;
|
||||||
use tempfile::TempDir;
|
use tempfile::TempDir;
|
||||||
|
|
||||||
use crate::update::IndexDocuments;
|
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig};
|
||||||
use crate::Index;
|
use crate::Index;
|
||||||
|
|
||||||
pub(crate) struct TempIndex {
|
pub(crate) struct TempIndex {
|
||||||
@ -908,8 +908,13 @@ pub(crate) mod tests {
|
|||||||
{ "id": 2, "name": "bob", "age": 20 },
|
{ "id": 2, "name": "bob", "age": 20 },
|
||||||
{ "id": 2, "name": "bob", "age": 20 }
|
{ "id": 2, "name": "bob", "age": 20 }
|
||||||
]);
|
]);
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
|
||||||
builder.execute(content, |_| ()).unwrap();
|
let config = IndexerConfig::default();
|
||||||
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
@ -927,13 +932,15 @@ pub(crate) mod tests {
|
|||||||
// we add all the documents a second time. we are supposed to get the same
|
// we add all the documents a second time. we are supposed to get the same
|
||||||
// field_distribution in the end
|
// field_distribution in the end
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
let content = documents!([
|
let content = documents!([
|
||||||
{ "id": 1, "name": "kevin" },
|
{ "id": 1, "name": "kevin" },
|
||||||
{ "id": 2, "name": "bob", "age": 20 },
|
{ "id": 2, "name": "bob", "age": 20 },
|
||||||
{ "id": 2, "name": "bob", "age": 20 }
|
{ "id": 2, "name": "bob", "age": 20 }
|
||||||
]);
|
]);
|
||||||
builder.execute(content, |_| ()).unwrap();
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
@ -955,8 +962,10 @@ pub(crate) mod tests {
|
|||||||
]);
|
]);
|
||||||
|
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
let mut builder =
|
||||||
builder.execute(content, |_| ()).unwrap();
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
@ -38,7 +38,9 @@ mod test {
|
|||||||
use crate::documents::{DocumentBatchBuilder, DocumentBatchReader};
|
use crate::documents::{DocumentBatchBuilder, DocumentBatchReader};
|
||||||
use crate::index::tests::TempIndex;
|
use crate::index::tests::TempIndex;
|
||||||
use crate::index::Index;
|
use crate::index::Index;
|
||||||
use crate::update::{IndexDocumentsMethod, UpdateBuilder};
|
use crate::update::{
|
||||||
|
IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings,
|
||||||
|
};
|
||||||
use crate::{DocumentId, FieldId, BEU32};
|
use crate::{DocumentId, FieldId, BEU32};
|
||||||
|
|
||||||
static JSON: Lazy<Vec<u8>> = Lazy::new(generate_documents);
|
static JSON: Lazy<Vec<u8>> = Lazy::new(generate_documents);
|
||||||
@ -84,19 +86,24 @@ mod test {
|
|||||||
let mut txn = index.write_txn().unwrap();
|
let mut txn = index.write_txn().unwrap();
|
||||||
|
|
||||||
// set distinct and faceted attributes for the index.
|
// set distinct and faceted attributes for the index.
|
||||||
let builder = UpdateBuilder::new();
|
let config = IndexerConfig::default();
|
||||||
let mut update = builder.settings(&mut txn, &index);
|
let mut update = Settings::new(&mut txn, &index, &config);
|
||||||
update.set_distinct_field(distinct.to_string());
|
update.set_distinct_field(distinct.to_string());
|
||||||
update.execute(|_| ()).unwrap();
|
update.execute(|_| ()).unwrap();
|
||||||
|
|
||||||
// add documents to the index
|
// add documents to the index
|
||||||
let builder = UpdateBuilder::new();
|
let config = IndexerConfig::default();
|
||||||
let mut addition = builder.index_documents(&mut txn, &index);
|
let indexing_config = IndexDocumentsConfig {
|
||||||
|
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let mut addition = IndexDocuments::new(&mut txn, &index, &config, indexing_config, |_| ());
|
||||||
|
|
||||||
addition.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
|
|
||||||
let reader =
|
let reader =
|
||||||
crate::documents::DocumentBatchReader::from_reader(Cursor::new(&*JSON)).unwrap();
|
crate::documents::DocumentBatchReader::from_reader(Cursor::new(&*JSON)).unwrap();
|
||||||
addition.execute(reader, |_| ()).unwrap();
|
|
||||||
|
addition.add_documents(reader).unwrap();
|
||||||
|
addition.execute().unwrap();
|
||||||
|
|
||||||
let fields_map = index.fields_ids_map(&txn).unwrap();
|
let fields_map = index.fields_ids_map(&txn).unwrap();
|
||||||
let fid = fields_map.id(&distinct).unwrap();
|
let fid = fields_map.id(&distinct).unwrap();
|
||||||
|
@ -450,7 +450,7 @@ mod tests {
|
|||||||
use maplit::hashset;
|
use maplit::hashset;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::update::Settings;
|
use crate::update::{IndexerConfig, Settings};
|
||||||
use crate::Index;
|
use crate::Index;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -461,8 +461,9 @@ mod tests {
|
|||||||
let index = Index::new(options, &path).unwrap();
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
|
||||||
// Set the filterable fields to be the channel.
|
// Set the filterable fields to be the channel.
|
||||||
|
let config = IndexerConfig::default();
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.set_searchable_fields(vec![S("PrIcE")]); // to keep the fields order
|
builder.set_searchable_fields(vec![S("PrIcE")]); // to keep the fields order
|
||||||
builder.set_filterable_fields(hashset! { S("PrIcE") });
|
builder.set_filterable_fields(hashset! { S("PrIcE") });
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
@ -563,9 +564,10 @@ mod tests {
|
|||||||
));
|
));
|
||||||
drop(rtxn);
|
drop(rtxn);
|
||||||
|
|
||||||
|
let config = IndexerConfig::default();
|
||||||
// Set the filterable fields to be the channel.
|
// Set the filterable fields to be the channel.
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.set_searchable_fields(vec![S("title")]);
|
builder.set_searchable_fields(vec![S("title")]);
|
||||||
builder.set_filterable_fields(hashset! { S("title") });
|
builder.set_filterable_fields(hashset! { S("title") });
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
@ -593,9 +595,10 @@ mod tests {
|
|||||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||||
let index = Index::new(options, &path).unwrap();
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
|
||||||
|
let config = IndexerConfig::default();
|
||||||
// Set the filterable fields to be the channel.
|
// Set the filterable fields to be the channel.
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order
|
builder.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order
|
||||||
builder.set_filterable_fields(hashset! { S("_geo"), S("price") });
|
builder.set_filterable_fields(hashset! { S("_geo"), S("price") });
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
|
@ -77,7 +77,7 @@ mod tests {
|
|||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::update::IndexDocuments;
|
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn clear_documents() {
|
fn clear_documents() {
|
||||||
@ -92,7 +92,11 @@ mod tests {
|
|||||||
{ "id": 1, "name": "kevina" },
|
{ "id": 1, "name": "kevina" },
|
||||||
{ "id": 2, "name": "benoit", "country": "France", "_geo": { "lng": 42, "lat": 35 } }
|
{ "id": 2, "name": "benoit", "country": "France", "_geo": { "lng": 42, "lat": 35 } }
|
||||||
]);
|
]);
|
||||||
IndexDocuments::new(&mut wtxn, &index).execute(content, |_| ()).unwrap();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
// Clear all documents from the database.
|
// Clear all documents from the database.
|
||||||
let builder = ClearDocuments::new(&mut wtxn, &index);
|
let builder = ClearDocuments::new(&mut wtxn, &index);
|
||||||
|
@ -580,7 +580,7 @@ mod tests {
|
|||||||
use maplit::hashset;
|
use maplit::hashset;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::update::{IndexDocuments, Settings};
|
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||||
use crate::Filter;
|
use crate::Filter;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -596,8 +596,11 @@ mod tests {
|
|||||||
{ "id": 1, "name": "kevina", "array": ["I", "am", "fine"] },
|
{ "id": 1, "name": "kevina", "array": ["I", "am", "fine"] },
|
||||||
{ "id": 2, "name": "benoit", "array_of_object": [{ "wow": "amazing" }] }
|
{ "id": 2, "name": "benoit", "array_of_object": [{ "wow": "amazing" }] }
|
||||||
]);
|
]);
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
builder.execute(content, |_| ()).unwrap();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
// delete those documents, ids are synchronous therefore 0, 1, and 2.
|
// delete those documents, ids are synchronous therefore 0, 1, and 2.
|
||||||
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||||
@ -626,8 +629,12 @@ mod tests {
|
|||||||
{ "mysuperid": 1, "name": "kevina" },
|
{ "mysuperid": 1, "name": "kevina" },
|
||||||
{ "mysuperid": 2, "name": "benoit" }
|
{ "mysuperid": 2, "name": "benoit" }
|
||||||
]);
|
]);
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
|
||||||
builder.execute(content, |_| ()).unwrap();
|
let config = IndexerConfig::default();
|
||||||
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
// Delete not all of the documents but some of them.
|
// Delete not all of the documents but some of them.
|
||||||
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||||
@ -646,7 +653,8 @@ mod tests {
|
|||||||
let index = Index::new(options, &path).unwrap();
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.set_primary_key(S("docid"));
|
builder.set_primary_key(S("docid"));
|
||||||
builder.set_filterable_fields(hashset! { S("label") });
|
builder.set_filterable_fields(hashset! { S("label") });
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
@ -673,8 +681,12 @@ mod tests {
|
|||||||
{"docid":"1_68","label":"design"},
|
{"docid":"1_68","label":"design"},
|
||||||
{"docid":"1_69","label":"geometry"}
|
{"docid":"1_69","label":"geometry"}
|
||||||
]);
|
]);
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
|
||||||
builder.execute(content, |_| ()).unwrap();
|
let config = IndexerConfig::default();
|
||||||
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
// Delete not all of the documents but some of them.
|
// Delete not all of the documents but some of them.
|
||||||
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||||
@ -696,7 +708,8 @@ mod tests {
|
|||||||
let index = Index::new(options, &path).unwrap();
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.set_primary_key(S("id"));
|
builder.set_primary_key(S("id"));
|
||||||
builder.set_filterable_fields(hashset!(S("_geo")));
|
builder.set_filterable_fields(hashset!(S("_geo")));
|
||||||
builder.set_sortable_fields(hashset!(S("_geo")));
|
builder.set_sortable_fields(hashset!(S("_geo")));
|
||||||
@ -726,7 +739,11 @@ mod tests {
|
|||||||
]);
|
]);
|
||||||
let external_ids_to_delete = ["5", "6", "7", "12", "17", "19"];
|
let external_ids_to_delete = ["5", "6", "7", "12", "17", "19"];
|
||||||
|
|
||||||
IndexDocuments::new(&mut wtxn, &index).execute(content, |_| ()).unwrap();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
let external_document_ids = index.external_documents_ids(&wtxn).unwrap();
|
let external_document_ids = index.external_documents_ids(&wtxn).unwrap();
|
||||||
let ids_to_delete: Vec<u32> = external_ids_to_delete
|
let ids_to_delete: Vec<u32> = external_ids_to_delete
|
||||||
|
@ -7,13 +7,9 @@ use std::collections::HashSet;
|
|||||||
use std::io::{Read, Seek};
|
use std::io::{Read, Seek};
|
||||||
use std::iter::FromIterator;
|
use std::iter::FromIterator;
|
||||||
use std::num::{NonZeroU32, NonZeroUsize};
|
use std::num::{NonZeroU32, NonZeroUsize};
|
||||||
use std::time::Instant;
|
|
||||||
|
|
||||||
use chrono::Utc;
|
|
||||||
use crossbeam_channel::{Receiver, Sender};
|
use crossbeam_channel::{Receiver, Sender};
|
||||||
use grenad::{self, CompressionType};
|
use log::debug;
|
||||||
use log::{debug, info};
|
|
||||||
use rayon::ThreadPool;
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use typed_chunk::{write_typed_chunk_into_index, TypedChunk};
|
use typed_chunk::{write_typed_chunk_into_index, TypedChunk};
|
||||||
@ -26,8 +22,8 @@ use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
|
|||||||
pub use self::transform::{Transform, TransformOutput};
|
pub use self::transform::{Transform, TransformOutput};
|
||||||
use crate::documents::DocumentBatchReader;
|
use crate::documents::DocumentBatchReader;
|
||||||
use crate::update::{
|
use crate::update::{
|
||||||
Facets, UpdateBuilder, UpdateIndexingStep, WordPrefixDocids, WordPrefixPairProximityDocids,
|
self, Facets, IndexerConfig, UpdateIndexingStep, WordPrefixDocids,
|
||||||
WordPrefixPositionDocids, WordsPrefixesFst,
|
WordPrefixPairProximityDocids, WordPrefixPositionDocids, WordsPrefixesFst,
|
||||||
};
|
};
|
||||||
use crate::{Index, Result};
|
use crate::{Index, Result};
|
||||||
|
|
||||||
@ -55,120 +51,116 @@ pub enum IndexDocumentsMethod {
|
|||||||
UpdateDocuments,
|
UpdateDocuments,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Default for IndexDocumentsMethod {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::ReplaceDocuments
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone)]
|
#[derive(Debug, Copy, Clone)]
|
||||||
pub enum WriteMethod {
|
pub enum WriteMethod {
|
||||||
Append,
|
Append,
|
||||||
GetMergePut,
|
GetMergePut,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct IndexDocuments<'t, 'u, 'i, 'a> {
|
pub struct IndexDocuments<'t, 'u, 'i, 'a, F> {
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
pub(crate) log_every_n: Option<usize>,
|
config: IndexDocumentsConfig,
|
||||||
pub(crate) documents_chunk_size: Option<usize>,
|
indexer_config: &'a IndexerConfig,
|
||||||
pub(crate) max_nb_chunks: Option<usize>,
|
transform: Option<Transform<'a, 'i>>,
|
||||||
pub(crate) max_memory: Option<usize>,
|
progress: F,
|
||||||
pub(crate) chunk_compression_type: CompressionType,
|
added_documents: u64,
|
||||||
pub(crate) chunk_compression_level: Option<u32>,
|
|
||||||
pub(crate) thread_pool: Option<&'a ThreadPool>,
|
|
||||||
pub(crate) max_positions_per_attributes: Option<u32>,
|
|
||||||
facet_level_group_size: Option<NonZeroUsize>,
|
|
||||||
facet_min_level_size: Option<NonZeroUsize>,
|
|
||||||
words_prefix_threshold: Option<u32>,
|
|
||||||
max_prefix_length: Option<usize>,
|
|
||||||
words_positions_level_group_size: Option<NonZeroU32>,
|
|
||||||
words_positions_min_level_size: Option<NonZeroU32>,
|
|
||||||
update_method: IndexDocumentsMethod,
|
|
||||||
autogenerate_docids: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
#[derive(Default, Debug, Clone)]
|
||||||
|
pub struct IndexDocumentsConfig {
|
||||||
|
pub facet_level_group_size: Option<NonZeroUsize>,
|
||||||
|
pub facet_min_level_size: Option<NonZeroUsize>,
|
||||||
|
pub words_prefix_threshold: Option<u32>,
|
||||||
|
pub max_prefix_length: Option<usize>,
|
||||||
|
pub words_positions_level_group_size: Option<NonZeroU32>,
|
||||||
|
pub words_positions_min_level_size: Option<NonZeroU32>,
|
||||||
|
pub update_method: IndexDocumentsMethod,
|
||||||
|
pub autogenerate_docids: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'t, 'u, 'i, 'a, F> IndexDocuments<'t, 'u, 'i, 'a, F>
|
||||||
|
where
|
||||||
|
F: Fn(UpdateIndexingStep) + Sync,
|
||||||
|
{
|
||||||
pub fn new(
|
pub fn new(
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
) -> IndexDocuments<'t, 'u, 'i, 'a> {
|
indexer_config: &'a IndexerConfig,
|
||||||
|
config: IndexDocumentsConfig,
|
||||||
|
progress: F,
|
||||||
|
) -> IndexDocuments<'t, 'u, 'i, 'a, F> {
|
||||||
|
let transform = Some(Transform::new(
|
||||||
|
&index,
|
||||||
|
indexer_config,
|
||||||
|
config.update_method,
|
||||||
|
config.autogenerate_docids,
|
||||||
|
));
|
||||||
|
|
||||||
IndexDocuments {
|
IndexDocuments {
|
||||||
|
transform,
|
||||||
|
config,
|
||||||
|
indexer_config,
|
||||||
|
progress,
|
||||||
wtxn,
|
wtxn,
|
||||||
index,
|
index,
|
||||||
log_every_n: None,
|
added_documents: 0,
|
||||||
documents_chunk_size: None,
|
|
||||||
max_nb_chunks: None,
|
|
||||||
max_memory: None,
|
|
||||||
chunk_compression_type: CompressionType::None,
|
|
||||||
chunk_compression_level: None,
|
|
||||||
thread_pool: None,
|
|
||||||
facet_level_group_size: None,
|
|
||||||
facet_min_level_size: None,
|
|
||||||
words_prefix_threshold: None,
|
|
||||||
max_prefix_length: None,
|
|
||||||
words_positions_level_group_size: None,
|
|
||||||
words_positions_min_level_size: None,
|
|
||||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
|
||||||
autogenerate_docids: false,
|
|
||||||
max_positions_per_attributes: None,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn log_every_n(&mut self, n: usize) {
|
/// Adds a batch of documents to the current builder.
|
||||||
self.log_every_n = Some(n);
|
///
|
||||||
}
|
/// Since the documents are progressively added to the writer, a failure will cause a stale
|
||||||
|
/// builder, and the builder must be discarded.
|
||||||
pub fn index_documents_method(&mut self, method: IndexDocumentsMethod) {
|
///
|
||||||
self.update_method = method;
|
/// Returns the number of documents added to the builder.
|
||||||
}
|
pub fn add_documents<R>(&mut self, reader: DocumentBatchReader<R>) -> Result<u64>
|
||||||
|
|
||||||
pub fn enable_autogenerate_docids(&mut self) {
|
|
||||||
self.autogenerate_docids = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn disable_autogenerate_docids(&mut self) {
|
|
||||||
self.autogenerate_docids = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
#[logging_timer::time("IndexDocuments::{}")]
|
|
||||||
pub fn execute<R, F>(
|
|
||||||
self,
|
|
||||||
reader: DocumentBatchReader<R>,
|
|
||||||
progress_callback: F,
|
|
||||||
) -> Result<DocumentAdditionResult>
|
|
||||||
where
|
where
|
||||||
R: Read + Seek,
|
R: Read + Seek,
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
|
||||||
{
|
{
|
||||||
// Early return when there is no document to add
|
// Early return when there is no document to add
|
||||||
if reader.is_empty() {
|
if reader.is_empty() {
|
||||||
return Ok(DocumentAdditionResult {
|
return Ok(0);
|
||||||
indexed_documents: 0,
|
|
||||||
number_of_documents: self.index.number_of_documents(self.wtxn)?,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
self.index.set_updated_at(self.wtxn, &Utc::now())?;
|
let indexed_documents = self
|
||||||
let before_transform = Instant::now();
|
.transform
|
||||||
let transform = Transform {
|
.as_mut()
|
||||||
rtxn: &self.wtxn,
|
.expect("Invalid document addition state")
|
||||||
index: self.index,
|
.read_documents(reader, self.wtxn, &self.progress)?
|
||||||
log_every_n: self.log_every_n,
|
as u64;
|
||||||
chunk_compression_type: self.chunk_compression_type,
|
|
||||||
chunk_compression_level: self.chunk_compression_level,
|
|
||||||
max_nb_chunks: self.max_nb_chunks,
|
|
||||||
max_memory: self.max_memory,
|
|
||||||
index_documents_method: self.update_method,
|
|
||||||
autogenerate_docids: self.autogenerate_docids,
|
|
||||||
};
|
|
||||||
|
|
||||||
let output = transform.read_documents(reader, &progress_callback)?;
|
self.added_documents += indexed_documents;
|
||||||
|
|
||||||
|
Ok(indexed_documents)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[logging_timer::time("IndexDocuments::{}")]
|
||||||
|
pub fn execute(mut self) -> Result<DocumentAdditionResult> {
|
||||||
|
if self.added_documents == 0 {
|
||||||
|
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
||||||
|
return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents });
|
||||||
|
}
|
||||||
|
let output = self
|
||||||
|
.transform
|
||||||
|
.take()
|
||||||
|
.expect("Invalid document addition state")
|
||||||
|
.output_from_sorter(self.wtxn, &self.progress)?;
|
||||||
let indexed_documents = output.documents_count as u64;
|
let indexed_documents = output.documents_count as u64;
|
||||||
|
let number_of_documents = self.execute_raw(output)?;
|
||||||
info!("Update transformed in {:.02?}", before_transform.elapsed());
|
|
||||||
|
|
||||||
let number_of_documents = self.execute_raw(output, progress_callback)?;
|
|
||||||
|
|
||||||
Ok(DocumentAdditionResult { indexed_documents, number_of_documents })
|
Ok(DocumentAdditionResult { indexed_documents, number_of_documents })
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the total number of documents in the index after the update.
|
/// Returns the total number of documents in the index after the update.
|
||||||
#[logging_timer::time("IndexDocuments::{}")]
|
#[logging_timer::time("IndexDocuments::{}")]
|
||||||
pub fn execute_raw<F>(self, output: TransformOutput, progress_callback: F) -> Result<u64>
|
pub fn execute_raw(self, output: TransformOutput) -> Result<u64>
|
||||||
where
|
where
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
F: Fn(UpdateIndexingStep) + Sync,
|
||||||
{
|
{
|
||||||
@ -188,8 +180,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||||
|
|
||||||
let backup_pool;
|
let backup_pool;
|
||||||
let pool = match self.thread_pool {
|
let pool = match self.indexer_config.thread_pool {
|
||||||
Some(pool) => pool,
|
Some(ref pool) => pool,
|
||||||
#[cfg(not(test))]
|
#[cfg(not(test))]
|
||||||
None => {
|
None => {
|
||||||
// We initialize a bakcup pool with the default
|
// We initialize a bakcup pool with the default
|
||||||
@ -237,22 +229,21 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let stop_words = self.index.stop_words(self.wtxn)?;
|
let stop_words = self.index.stop_words(self.wtxn)?;
|
||||||
// let stop_words = stop_words.as_ref();
|
|
||||||
|
|
||||||
// Run extraction pipeline in parallel.
|
// Run extraction pipeline in parallel.
|
||||||
pool.install(|| {
|
pool.install(|| {
|
||||||
let params = GrenadParameters {
|
let params = GrenadParameters {
|
||||||
chunk_compression_type: self.chunk_compression_type,
|
chunk_compression_type: self.indexer_config.chunk_compression_type,
|
||||||
chunk_compression_level: self.chunk_compression_level,
|
chunk_compression_level: self.indexer_config.chunk_compression_level,
|
||||||
max_memory: self.max_memory,
|
max_memory: self.indexer_config.max_memory,
|
||||||
max_nb_chunks: self.max_nb_chunks, // default value, may be chosen.
|
max_nb_chunks: self.indexer_config.max_nb_chunks, // default value, may be chosen.
|
||||||
};
|
};
|
||||||
|
|
||||||
// split obkv file into several chuncks
|
// split obkv file into several chuncks
|
||||||
let chunk_iter = grenad_obkv_into_chunks(
|
let chunk_iter = grenad_obkv_into_chunks(
|
||||||
documents_file,
|
documents_file,
|
||||||
params.clone(),
|
params.clone(),
|
||||||
self.documents_chunk_size.unwrap_or(1024 * 1024 * 128), // 128MiB
|
self.indexer_config.documents_chunk_size.unwrap_or(1024 * 1024 * 128), // 128MiB
|
||||||
);
|
);
|
||||||
|
|
||||||
let result = chunk_iter.map(|chunk_iter| {
|
let result = chunk_iter.map(|chunk_iter| {
|
||||||
@ -266,7 +257,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
primary_key_id,
|
primary_key_id,
|
||||||
geo_field_id,
|
geo_field_id,
|
||||||
stop_words,
|
stop_words,
|
||||||
self.max_positions_per_attributes,
|
self.indexer_config.max_positions_per_attributes,
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -281,17 +272,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
// We delete the documents that this document addition replaces. This way we are
|
// We delete the documents that this document addition replaces. This way we are
|
||||||
// able to simply insert all the documents even if they already exist in the database.
|
// able to simply insert all the documents even if they already exist in the database.
|
||||||
if !replaced_documents_ids.is_empty() {
|
if !replaced_documents_ids.is_empty() {
|
||||||
let update_builder = UpdateBuilder {
|
let mut deletion_builder = update::DeleteDocuments::new(self.wtxn, self.index)?;
|
||||||
log_every_n: self.log_every_n,
|
|
||||||
max_nb_chunks: self.max_nb_chunks,
|
|
||||||
max_memory: self.max_memory,
|
|
||||||
documents_chunk_size: self.documents_chunk_size,
|
|
||||||
chunk_compression_type: self.chunk_compression_type,
|
|
||||||
chunk_compression_level: self.chunk_compression_level,
|
|
||||||
thread_pool: self.thread_pool,
|
|
||||||
max_positions_per_attributes: self.max_positions_per_attributes,
|
|
||||||
};
|
|
||||||
let mut deletion_builder = update_builder.delete_documents(self.wtxn, self.index)?;
|
|
||||||
debug!("documents to delete {:?}", replaced_documents_ids);
|
debug!("documents to delete {:?}", replaced_documents_ids);
|
||||||
deletion_builder.delete_documents(&replaced_documents_ids);
|
deletion_builder.delete_documents(&replaced_documents_ids);
|
||||||
let deleted_documents_count = deletion_builder.execute()?;
|
let deleted_documents_count = deletion_builder.execute()?;
|
||||||
@ -303,7 +284,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
let mut final_documents_ids = RoaringBitmap::new();
|
let mut final_documents_ids = RoaringBitmap::new();
|
||||||
|
|
||||||
let mut databases_seen = 0;
|
let mut databases_seen = 0;
|
||||||
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||||
databases_seen,
|
databases_seen,
|
||||||
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
||||||
});
|
});
|
||||||
@ -314,7 +295,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
if !docids.is_empty() {
|
if !docids.is_empty() {
|
||||||
final_documents_ids |= docids;
|
final_documents_ids |= docids;
|
||||||
let documents_seen_count = final_documents_ids.len();
|
let documents_seen_count = final_documents_ids.len();
|
||||||
progress_callback(UpdateIndexingStep::IndexDocuments {
|
(self.progress)(UpdateIndexingStep::IndexDocuments {
|
||||||
documents_seen: documents_seen_count as usize,
|
documents_seen: documents_seen_count as usize,
|
||||||
total_documents: documents_count,
|
total_documents: documents_count,
|
||||||
});
|
});
|
||||||
@ -325,7 +306,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
}
|
}
|
||||||
if is_merged_database {
|
if is_merged_database {
|
||||||
databases_seen += 1;
|
databases_seen += 1;
|
||||||
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||||
databases_seen,
|
databases_seen,
|
||||||
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
||||||
});
|
});
|
||||||
@ -344,98 +325,95 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
let all_documents_ids = index_documents_ids | new_documents_ids | replaced_documents_ids;
|
let all_documents_ids = index_documents_ids | new_documents_ids | replaced_documents_ids;
|
||||||
self.index.put_documents_ids(self.wtxn, &all_documents_ids)?;
|
self.index.put_documents_ids(self.wtxn, &all_documents_ids)?;
|
||||||
|
|
||||||
self.execute_prefix_databases(progress_callback)?;
|
self.execute_prefix_databases()?;
|
||||||
|
|
||||||
Ok(all_documents_ids.len())
|
Ok(all_documents_ids.len())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[logging_timer::time("IndexDocuments::{}")]
|
#[logging_timer::time("IndexDocuments::{}")]
|
||||||
pub fn execute_prefix_databases<F>(self, progress_callback: F) -> Result<()>
|
pub fn execute_prefix_databases(self) -> Result<()> {
|
||||||
where
|
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
|
||||||
{
|
|
||||||
// Merged databases are already been indexed, we start from this count;
|
// Merged databases are already been indexed, we start from this count;
|
||||||
let mut databases_seen = MERGED_DATABASE_COUNT;
|
let mut databases_seen = MERGED_DATABASE_COUNT;
|
||||||
|
|
||||||
// Run the facets update operation.
|
// Run the facets update operation.
|
||||||
let mut builder = Facets::new(self.wtxn, self.index);
|
let mut builder = Facets::new(self.wtxn, self.index);
|
||||||
builder.chunk_compression_type = self.chunk_compression_type;
|
builder.chunk_compression_type = self.indexer_config.chunk_compression_type;
|
||||||
builder.chunk_compression_level = self.chunk_compression_level;
|
builder.chunk_compression_level = self.indexer_config.chunk_compression_level;
|
||||||
if let Some(value) = self.facet_level_group_size {
|
if let Some(value) = self.config.facet_level_group_size {
|
||||||
builder.level_group_size(value);
|
builder.level_group_size(value);
|
||||||
}
|
}
|
||||||
if let Some(value) = self.facet_min_level_size {
|
if let Some(value) = self.config.facet_min_level_size {
|
||||||
builder.min_level_size(value);
|
builder.min_level_size(value);
|
||||||
}
|
}
|
||||||
builder.execute()?;
|
builder.execute()?;
|
||||||
|
|
||||||
databases_seen += 1;
|
databases_seen += 1;
|
||||||
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||||
databases_seen: databases_seen,
|
databases_seen,
|
||||||
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
||||||
});
|
});
|
||||||
|
|
||||||
// Run the words prefixes update operation.
|
// Run the words prefixes update operation.
|
||||||
let mut builder = WordsPrefixesFst::new(self.wtxn, self.index);
|
let mut builder = WordsPrefixesFst::new(self.wtxn, self.index);
|
||||||
if let Some(value) = self.words_prefix_threshold {
|
if let Some(value) = self.config.words_prefix_threshold {
|
||||||
builder.threshold(value);
|
builder.threshold(value);
|
||||||
}
|
}
|
||||||
if let Some(value) = self.max_prefix_length {
|
if let Some(value) = self.config.max_prefix_length {
|
||||||
builder.max_prefix_length(value);
|
builder.max_prefix_length(value);
|
||||||
}
|
}
|
||||||
builder.execute()?;
|
builder.execute()?;
|
||||||
|
|
||||||
databases_seen += 1;
|
databases_seen += 1;
|
||||||
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||||
databases_seen: databases_seen,
|
databases_seen,
|
||||||
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
||||||
});
|
});
|
||||||
|
|
||||||
// Run the word prefix docids update operation.
|
// Run the word prefix docids update operation.
|
||||||
let mut builder = WordPrefixDocids::new(self.wtxn, self.index);
|
let mut builder = WordPrefixDocids::new(self.wtxn, self.index);
|
||||||
builder.chunk_compression_type = self.chunk_compression_type;
|
builder.chunk_compression_type = self.indexer_config.chunk_compression_type;
|
||||||
builder.chunk_compression_level = self.chunk_compression_level;
|
builder.chunk_compression_level = self.indexer_config.chunk_compression_level;
|
||||||
builder.max_nb_chunks = self.max_nb_chunks;
|
builder.max_nb_chunks = self.indexer_config.max_nb_chunks;
|
||||||
builder.max_memory = self.max_memory;
|
builder.max_memory = self.indexer_config.max_memory;
|
||||||
builder.execute()?;
|
builder.execute()?;
|
||||||
|
|
||||||
databases_seen += 1;
|
databases_seen += 1;
|
||||||
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||||
databases_seen: databases_seen,
|
databases_seen,
|
||||||
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
||||||
});
|
});
|
||||||
|
|
||||||
// Run the word prefix pair proximity docids update operation.
|
// Run the word prefix pair proximity docids update operation.
|
||||||
let mut builder = WordPrefixPairProximityDocids::new(self.wtxn, self.index);
|
let mut builder = WordPrefixPairProximityDocids::new(self.wtxn, self.index);
|
||||||
builder.chunk_compression_type = self.chunk_compression_type;
|
builder.chunk_compression_type = self.indexer_config.chunk_compression_type;
|
||||||
builder.chunk_compression_level = self.chunk_compression_level;
|
builder.chunk_compression_level = self.indexer_config.chunk_compression_level;
|
||||||
builder.max_nb_chunks = self.max_nb_chunks;
|
builder.max_nb_chunks = self.indexer_config.max_nb_chunks;
|
||||||
builder.max_memory = self.max_memory;
|
builder.max_memory = self.indexer_config.max_memory;
|
||||||
builder.execute()?;
|
builder.execute()?;
|
||||||
|
|
||||||
databases_seen += 1;
|
databases_seen += 1;
|
||||||
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||||
databases_seen: databases_seen,
|
databases_seen,
|
||||||
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
||||||
});
|
});
|
||||||
|
|
||||||
// Run the words prefix position docids update operation.
|
// Run the words prefix position docids update operation.
|
||||||
let mut builder = WordPrefixPositionDocids::new(self.wtxn, self.index);
|
let mut builder = WordPrefixPositionDocids::new(self.wtxn, self.index);
|
||||||
builder.chunk_compression_type = self.chunk_compression_type;
|
builder.chunk_compression_type = self.indexer_config.chunk_compression_type;
|
||||||
builder.chunk_compression_level = self.chunk_compression_level;
|
builder.chunk_compression_level = self.indexer_config.chunk_compression_level;
|
||||||
builder.max_nb_chunks = self.max_nb_chunks;
|
builder.max_nb_chunks = self.indexer_config.max_nb_chunks;
|
||||||
builder.max_memory = self.max_memory;
|
builder.max_memory = self.indexer_config.max_memory;
|
||||||
if let Some(value) = self.words_positions_level_group_size {
|
if let Some(value) = self.config.words_positions_level_group_size {
|
||||||
builder.level_group_size(value);
|
builder.level_group_size(value);
|
||||||
}
|
}
|
||||||
if let Some(value) = self.words_positions_min_level_size {
|
if let Some(value) = self.config.words_positions_min_level_size {
|
||||||
builder.min_level_size(value);
|
builder.min_level_size(value);
|
||||||
}
|
}
|
||||||
builder.execute()?;
|
builder.execute()?;
|
||||||
|
|
||||||
databases_seen += 1;
|
databases_seen += 1;
|
||||||
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||||
databases_seen: databases_seen,
|
databases_seen,
|
||||||
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -469,8 +447,13 @@ mod tests {
|
|||||||
{ "id": 2, "name": "kevina" },
|
{ "id": 2, "name": "kevina" },
|
||||||
{ "id": 3, "name": "benoit" }
|
{ "id": 3, "name": "benoit" }
|
||||||
]);
|
]);
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
|
||||||
builder.execute(content, |_| ()).unwrap();
|
let config = IndexerConfig::default();
|
||||||
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// Check that there is 3 documents now.
|
// Check that there is 3 documents now.
|
||||||
@ -482,8 +465,10 @@ mod tests {
|
|||||||
// Second we send 1 document with id 1, to erase the previous ones.
|
// Second we send 1 document with id 1, to erase the previous ones.
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let content = documents!([ { "id": 1, "name": "updated kevin" } ]);
|
let content = documents!([ { "id": 1, "name": "updated kevin" } ]);
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
let mut builder =
|
||||||
builder.execute(content, |_| ()).unwrap();
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// Check that there is **always** 3 documents.
|
// Check that there is **always** 3 documents.
|
||||||
@ -499,8 +484,8 @@ mod tests {
|
|||||||
{ "id": 2, "name": "updated kevina" },
|
{ "id": 2, "name": "updated kevina" },
|
||||||
{ "id": 3, "name": "updated benoit" }
|
{ "id": 3, "name": "updated benoit" }
|
||||||
]);
|
]);
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
builder.execute(content, |_| ()).unwrap();
|
builder.add_documents(content).unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// Check that there is **always** 3 documents.
|
// Check that there is **always** 3 documents.
|
||||||
@ -525,9 +510,15 @@ mod tests {
|
|||||||
{ "id": 1, "name": "kevina" },
|
{ "id": 1, "name": "kevina" },
|
||||||
{ "id": 1, "name": "benoit" }
|
{ "id": 1, "name": "benoit" }
|
||||||
]);
|
]);
|
||||||
let mut builder = IndexDocuments::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
builder.index_documents_method(IndexDocumentsMethod::UpdateDocuments);
|
let indexing_config = IndexDocumentsConfig {
|
||||||
builder.execute(content, |_| ()).unwrap();
|
update_method: IndexDocumentsMethod::UpdateDocuments,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// Check that there is only 1 document now.
|
// Check that there is only 1 document now.
|
||||||
@ -551,9 +542,9 @@ mod tests {
|
|||||||
// Second we send 1 document with id 1, to force it to be merged with the previous one.
|
// Second we send 1 document with id 1, to force it to be merged with the previous one.
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let content = documents!([ { "id": 1, "age": 25 } ]);
|
let content = documents!([ { "id": 1, "age": 25 } ]);
|
||||||
let mut builder = IndexDocuments::new(&mut wtxn, &index);
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
builder.index_documents_method(IndexDocumentsMethod::UpdateDocuments);
|
builder.add_documents(content).unwrap();
|
||||||
builder.execute(content, |_| ()).unwrap();
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// Check that there is **always** 1 document.
|
// Check that there is **always** 1 document.
|
||||||
@ -590,8 +581,10 @@ mod tests {
|
|||||||
{ "name": "kevina" },
|
{ "name": "kevina" },
|
||||||
{ "name": "benoit" }
|
{ "name": "benoit" }
|
||||||
]);
|
]);
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
assert!(builder.execute(content, |_| ()).is_err());
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
assert!(builder.add_documents(content).is_err());
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// Check that there is no document.
|
// Check that there is no document.
|
||||||
@ -615,9 +608,13 @@ mod tests {
|
|||||||
{ "name": "kevina" },
|
{ "name": "kevina" },
|
||||||
{ "name": "benoit" }
|
{ "name": "benoit" }
|
||||||
]);
|
]);
|
||||||
let mut builder = IndexDocuments::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
builder.enable_autogenerate_docids();
|
let indexing_config =
|
||||||
builder.execute(content, |_| ()).unwrap();
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// Check that there is 3 documents now.
|
// Check that there is 3 documents now.
|
||||||
@ -633,8 +630,9 @@ mod tests {
|
|||||||
// Second we send 1 document with the generated uuid, to erase the previous ones.
|
// Second we send 1 document with the generated uuid, to erase the previous ones.
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let content = documents!([ { "name": "updated kevin", "id": kevin_uuid } ]);
|
let content = documents!([ { "name": "updated kevin", "id": kevin_uuid } ]);
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
builder.execute(content, |_| ()).unwrap();
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// Check that there is **always** 3 documents.
|
// Check that there is **always** 3 documents.
|
||||||
@ -670,8 +668,11 @@ mod tests {
|
|||||||
{ "id": 2, "name": "kevina" },
|
{ "id": 2, "name": "kevina" },
|
||||||
{ "id": 3, "name": "benoit" }
|
{ "id": 3, "name": "benoit" }
|
||||||
]);
|
]);
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
builder.execute(content, |_| ()).unwrap();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// Check that there is 3 documents now.
|
// Check that there is 3 documents now.
|
||||||
@ -683,9 +684,11 @@ mod tests {
|
|||||||
// Second we send 1 document without specifying the id.
|
// Second we send 1 document without specifying the id.
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let content = documents!([ { "name": "new kevin" } ]);
|
let content = documents!([ { "name": "new kevin" } ]);
|
||||||
let mut builder = IndexDocuments::new(&mut wtxn, &index);
|
let indexing_config =
|
||||||
builder.enable_autogenerate_docids();
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
builder.execute(content, |_| ()).unwrap();
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// Check that there is 4 documents now.
|
// Check that there is 4 documents now.
|
||||||
@ -705,8 +708,11 @@ mod tests {
|
|||||||
// First we send 0 documents and only headers.
|
// First we send 0 documents and only headers.
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let content = documents!([]);
|
let content = documents!([]);
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
builder.execute(content, |_| ()).unwrap();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// Check that there is no documents.
|
// Check that there is no documents.
|
||||||
@ -727,16 +733,20 @@ mod tests {
|
|||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
// There is a space in the document id.
|
// There is a space in the document id.
|
||||||
let content = documents!([ { "id": "brume bleue", "name": "kevin" } ]);
|
let content = documents!([ { "id": "brume bleue", "name": "kevin" } ]);
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
assert!(builder.execute(content, |_| ()).is_err());
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
assert!(builder.add_documents(content).is_err());
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// First we send 1 document with a valid id.
|
// First we send 1 document with a valid id.
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
// There is a space in the document id.
|
// There is a space in the document id.
|
||||||
let content = documents!([ { "id": 32, "name": "kevin" } ]);
|
let content = documents!([ { "id": 32, "name": "kevin" } ]);
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
builder.execute(content, |_| ()).unwrap();
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// Check that there is 1 document now.
|
// Check that there is 1 document now.
|
||||||
@ -760,8 +770,11 @@ mod tests {
|
|||||||
{ "id": 1, "name": "kevina", "array": ["I", "am", "fine"] },
|
{ "id": 1, "name": "kevina", "array": ["I", "am", "fine"] },
|
||||||
{ "id": 2, "name": "benoit", "array_of_object": [{ "wow": "amazing" }] }
|
{ "id": 2, "name": "benoit", "array_of_object": [{ "wow": "amazing" }] }
|
||||||
]);
|
]);
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
builder.execute(content, |_| ()).unwrap();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// Check that there is 1 documents now.
|
// Check that there is 1 documents now.
|
||||||
@ -799,14 +812,22 @@ mod tests {
|
|||||||
{ "id": 4, "title": "Harry Potter and the Half-Blood Prince", "author": "J. K. Rowling", "genre": "fantasy" },
|
{ "id": 4, "title": "Harry Potter and the Half-Blood Prince", "author": "J. K. Rowling", "genre": "fantasy" },
|
||||||
{ "id": 42, "title": "The Hitchhiker's Guide to the Galaxy", "author": "Douglas Adams", "_geo": { "lat": 35, "lng": 23 } }
|
{ "id": 42, "title": "The Hitchhiker's Guide to the Galaxy", "author": "Douglas Adams", "_geo": { "lat": 35, "lng": 23 } }
|
||||||
]);
|
]);
|
||||||
let mut builder = IndexDocuments::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
|
let indexing_config = IndexDocumentsConfig {
|
||||||
builder.execute(documents, |_| ()).unwrap();
|
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
|
builder.add_documents(documents).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = IndexDocuments::new(&mut wtxn, &index);
|
let indexing_config = IndexDocumentsConfig {
|
||||||
builder.index_documents_method(IndexDocumentsMethod::UpdateDocuments);
|
update_method: IndexDocumentsMethod::UpdateDocuments,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
let documents = documents!([
|
let documents = documents!([
|
||||||
{
|
{
|
||||||
"id": 2,
|
"id": 2,
|
||||||
@ -815,7 +836,8 @@ mod tests {
|
|||||||
}
|
}
|
||||||
]);
|
]);
|
||||||
|
|
||||||
builder.execute(documents, |_| ()).unwrap();
|
builder.add_documents(documents).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -833,7 +855,12 @@ mod tests {
|
|||||||
{ "objectId": 1, "title": "Alice In Wonderland", "comment": "A weird book" },
|
{ "objectId": 1, "title": "Alice In Wonderland", "comment": "A weird book" },
|
||||||
{ "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
|
{ "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
|
||||||
]);
|
]);
|
||||||
IndexDocuments::new(&mut wtxn, &index).execute(content, |_| ()).unwrap();
|
let config = IndexerConfig::default();
|
||||||
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
assert_eq!(index.primary_key(&wtxn).unwrap(), Some("objectId"));
|
assert_eq!(index.primary_key(&wtxn).unwrap(), Some("objectId"));
|
||||||
|
|
||||||
@ -848,15 +875,22 @@ mod tests {
|
|||||||
let content = documents!([
|
let content = documents!([
|
||||||
{ "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
|
{ "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
|
||||||
]);
|
]);
|
||||||
IndexDocuments::new(&mut wtxn, &index).execute(content, |_| ()).unwrap();
|
|
||||||
|
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
let external_documents_ids = index.external_documents_ids(&wtxn).unwrap();
|
let external_documents_ids = index.external_documents_ids(&wtxn).unwrap();
|
||||||
assert!(external_documents_ids.get("30").is_some());
|
assert!(external_documents_ids.get("30").is_some());
|
||||||
|
|
||||||
let content = documents!([
|
let content = documents!([
|
||||||
{ "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
|
{ "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
|
||||||
]);
|
]);
|
||||||
IndexDocuments::new(&mut wtxn, &index).execute(content, |_| ()).unwrap();
|
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
}
|
}
|
||||||
@ -886,8 +920,12 @@ mod tests {
|
|||||||
cursor.set_position(0);
|
cursor.set_position(0);
|
||||||
let content = DocumentBatchReader::from_reader(cursor).unwrap();
|
let content = DocumentBatchReader::from_reader(cursor).unwrap();
|
||||||
|
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
builder.execute(content, |_| ()).unwrap();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
}
|
}
|
||||||
@ -916,8 +954,12 @@ mod tests {
|
|||||||
cursor.set_position(0);
|
cursor.set_position(0);
|
||||||
let content = DocumentBatchReader::from_reader(cursor).unwrap();
|
let content = DocumentBatchReader::from_reader(cursor).unwrap();
|
||||||
|
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
builder.execute(content, |_| ()).unwrap();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
@ -969,8 +1011,12 @@ mod tests {
|
|||||||
},
|
},
|
||||||
]);
|
]);
|
||||||
|
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
builder.execute(content, |_| ()).unwrap();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
}
|
}
|
||||||
@ -990,8 +1036,12 @@ mod tests {
|
|||||||
]);
|
]);
|
||||||
|
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
builder.execute(content, |_| ()).unwrap();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// Check that there is 4 document now.
|
// Check that there is 4 document now.
|
||||||
@ -1002,8 +1052,12 @@ mod tests {
|
|||||||
let content = documents!([]);
|
let content = documents!([]);
|
||||||
|
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
builder.execute(content, |_| ()).unwrap();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// Check that there is 4 document now.
|
// Check that there is 4 document now.
|
||||||
@ -1019,8 +1073,12 @@ mod tests {
|
|||||||
]);
|
]);
|
||||||
|
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
builder.execute(content, |_| ()).unwrap();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// Check that there is 4 document now.
|
// Check that there is 4 document now.
|
||||||
@ -1042,8 +1100,12 @@ mod tests {
|
|||||||
]);
|
]);
|
||||||
|
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
builder.execute(content, |_| ()).unwrap();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
@ -5,7 +5,6 @@ use std::fs::File;
|
|||||||
use std::io::{Read, Seek, SeekFrom};
|
use std::io::{Read, Seek, SeekFrom};
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use grenad::CompressionType;
|
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use log::info;
|
use log::info;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -14,7 +13,7 @@ use serde_json::{Map, Value};
|
|||||||
use super::helpers::{
|
use super::helpers::{
|
||||||
create_sorter, create_writer, keep_latest_obkv, merge_obkvs, merge_two_obkvs, MergeFn,
|
create_sorter, create_writer, keep_latest_obkv, merge_obkvs, merge_two_obkvs, MergeFn,
|
||||||
};
|
};
|
||||||
use super::IndexDocumentsMethod;
|
use super::{IndexDocumentsMethod, IndexerConfig};
|
||||||
use crate::documents::{DocumentBatchReader, DocumentsBatchIndex};
|
use crate::documents::{DocumentBatchReader, DocumentsBatchIndex};
|
||||||
use crate::error::{Error, InternalError, UserError};
|
use crate::error::{Error, InternalError, UserError};
|
||||||
use crate::index::db_name;
|
use crate::index::db_name;
|
||||||
@ -40,16 +39,14 @@ pub struct TransformOutput {
|
|||||||
/// Outputs the new `FieldsIdsMap`, the new `UsersIdsDocumentsIds` map, the new documents ids,
|
/// Outputs the new `FieldsIdsMap`, the new `UsersIdsDocumentsIds` map, the new documents ids,
|
||||||
/// the replaced documents ids, the number of documents in this update and the file
|
/// the replaced documents ids, the number of documents in this update and the file
|
||||||
/// containing all those documents.
|
/// containing all those documents.
|
||||||
pub struct Transform<'t, 'i> {
|
pub struct Transform<'a, 'i> {
|
||||||
pub rtxn: &'t heed::RoTxn<'i>,
|
|
||||||
pub index: &'i Index,
|
pub index: &'i Index,
|
||||||
pub log_every_n: Option<usize>,
|
indexer_settings: &'a IndexerConfig,
|
||||||
pub chunk_compression_type: CompressionType,
|
|
||||||
pub chunk_compression_level: Option<u32>,
|
|
||||||
pub max_nb_chunks: Option<usize>,
|
|
||||||
pub max_memory: Option<usize>,
|
|
||||||
pub index_documents_method: IndexDocumentsMethod,
|
|
||||||
pub autogenerate_docids: bool,
|
pub autogenerate_docids: bool,
|
||||||
|
pub index_documents_method: IndexDocumentsMethod,
|
||||||
|
|
||||||
|
sorter: grenad::Sorter<MergeFn>,
|
||||||
|
documents_count: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a mapping between the field ids found in the document batch and the one that were
|
/// Create a mapping between the field ids found in the document batch and the one that were
|
||||||
@ -84,56 +81,73 @@ fn find_primary_key(index: &DocumentsBatchIndex) -> Option<&str> {
|
|||||||
.map(String::as_str)
|
.map(String::as_str)
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Transform<'_, '_> {
|
impl<'a, 'i> Transform<'a, 'i> {
|
||||||
pub fn read_documents<R, F>(
|
pub fn new(
|
||||||
self,
|
index: &'i Index,
|
||||||
mut reader: DocumentBatchReader<R>,
|
indexer_settings: &'a IndexerConfig,
|
||||||
progress_callback: F,
|
index_documents_method: IndexDocumentsMethod,
|
||||||
) -> Result<TransformOutput>
|
autogenerate_docids: bool,
|
||||||
where
|
) -> Self {
|
||||||
R: Read + Seek,
|
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
|
||||||
{
|
|
||||||
let fields_index = reader.index();
|
|
||||||
let mut fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
|
||||||
let mapping = create_fields_mapping(&mut fields_ids_map, fields_index)?;
|
|
||||||
|
|
||||||
let alternative_name = self
|
|
||||||
.index
|
|
||||||
.primary_key(self.rtxn)?
|
|
||||||
.or_else(|| find_primary_key(fields_index))
|
|
||||||
.map(String::from);
|
|
||||||
|
|
||||||
let (primary_key_id, primary_key_name) = compute_primary_key_pair(
|
|
||||||
self.index.primary_key(self.rtxn)?,
|
|
||||||
&mut fields_ids_map,
|
|
||||||
alternative_name,
|
|
||||||
self.autogenerate_docids,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
// We must choose the appropriate merge function for when two or more documents
|
// We must choose the appropriate merge function for when two or more documents
|
||||||
// with the same user id must be merged or fully replaced in the same batch.
|
// with the same user id must be merged or fully replaced in the same batch.
|
||||||
let merge_function = match self.index_documents_method {
|
let merge_function = match index_documents_method {
|
||||||
IndexDocumentsMethod::ReplaceDocuments => keep_latest_obkv,
|
IndexDocumentsMethod::ReplaceDocuments => keep_latest_obkv,
|
||||||
IndexDocumentsMethod::UpdateDocuments => merge_obkvs,
|
IndexDocumentsMethod::UpdateDocuments => merge_obkvs,
|
||||||
};
|
};
|
||||||
|
|
||||||
// We initialize the sorter with the user indexing settings.
|
// We initialize the sorter with the user indexing settings.
|
||||||
let mut sorter = create_sorter(
|
let sorter = create_sorter(
|
||||||
merge_function,
|
merge_function,
|
||||||
self.chunk_compression_type,
|
indexer_settings.chunk_compression_type,
|
||||||
self.chunk_compression_level,
|
indexer_settings.chunk_compression_level,
|
||||||
self.max_nb_chunks,
|
indexer_settings.max_nb_chunks,
|
||||||
self.max_memory,
|
indexer_settings.max_memory,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
Transform {
|
||||||
|
index,
|
||||||
|
indexer_settings,
|
||||||
|
autogenerate_docids,
|
||||||
|
sorter,
|
||||||
|
documents_count: 0,
|
||||||
|
index_documents_method,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn read_documents<R, F>(
|
||||||
|
&mut self,
|
||||||
|
mut reader: DocumentBatchReader<R>,
|
||||||
|
wtxn: &mut heed::RwTxn,
|
||||||
|
progress_callback: F,
|
||||||
|
) -> Result<usize>
|
||||||
|
where
|
||||||
|
R: Read + Seek,
|
||||||
|
F: Fn(UpdateIndexingStep) + Sync,
|
||||||
|
{
|
||||||
|
let fields_index = reader.index();
|
||||||
|
let mut fields_ids_map = self.index.fields_ids_map(wtxn)?;
|
||||||
|
let mapping = create_fields_mapping(&mut fields_ids_map, fields_index)?;
|
||||||
|
|
||||||
|
let alternative_name = self
|
||||||
|
.index
|
||||||
|
.primary_key(wtxn)?
|
||||||
|
.or_else(|| find_primary_key(fields_index))
|
||||||
|
.map(String::from);
|
||||||
|
|
||||||
|
let (primary_key_id, primary_key_name) = compute_primary_key_pair(
|
||||||
|
self.index.primary_key(wtxn)?,
|
||||||
|
&mut fields_ids_map,
|
||||||
|
alternative_name,
|
||||||
|
self.autogenerate_docids,
|
||||||
|
)?;
|
||||||
|
|
||||||
let mut obkv_buffer = Vec::new();
|
let mut obkv_buffer = Vec::new();
|
||||||
let mut documents_count = 0;
|
let mut documents_count = 0;
|
||||||
let mut external_id_buffer = Vec::new();
|
let mut external_id_buffer = Vec::new();
|
||||||
let mut field_buffer: Vec<(u16, &[u8])> = Vec::new();
|
let mut field_buffer: Vec<(u16, &[u8])> = Vec::new();
|
||||||
while let Some((addition_index, document)) = reader.next_document_with_index()? {
|
while let Some((addition_index, document)) = reader.next_document_with_index()? {
|
||||||
let mut field_buffer_cache = drop_and_reuse(field_buffer);
|
let mut field_buffer_cache = drop_and_reuse(field_buffer);
|
||||||
if self.log_every_n.map_or(false, |len| documents_count % len == 0) {
|
if self.indexer_settings.log_every_n.map_or(false, |len| documents_count % len == 0) {
|
||||||
progress_callback(UpdateIndexingStep::RemapDocumentAddition {
|
progress_callback(UpdateIndexingStep::RemapDocumentAddition {
|
||||||
documents_seen: documents_count,
|
documents_seen: documents_count,
|
||||||
});
|
});
|
||||||
@ -214,7 +228,7 @@ impl Transform<'_, '_> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// We use the extracted/generated user id as the key for this document.
|
// We use the extracted/generated user id as the key for this document.
|
||||||
sorter.insert(&external_id.as_ref().as_bytes(), &obkv_buffer)?;
|
self.sorter.insert(&external_id.as_ref().as_bytes(), &obkv_buffer)?;
|
||||||
documents_count += 1;
|
documents_count += 1;
|
||||||
|
|
||||||
progress_callback(UpdateIndexingStep::RemapDocumentAddition {
|
progress_callback(UpdateIndexingStep::RemapDocumentAddition {
|
||||||
@ -230,38 +244,40 @@ impl Transform<'_, '_> {
|
|||||||
documents_seen: documents_count,
|
documents_seen: documents_count,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
self.index.put_fields_ids_map(wtxn, &fields_ids_map)?;
|
||||||
|
self.index.put_primary_key(wtxn, &primary_key_name)?;
|
||||||
|
self.documents_count += documents_count;
|
||||||
// Now that we have a valid sorter that contains the user id and the obkv we
|
// Now that we have a valid sorter that contains the user id and the obkv we
|
||||||
// give it to the last transforming function which returns the TransformOutput.
|
// give it to the last transforming function which returns the TransformOutput.
|
||||||
self.output_from_sorter(
|
Ok(documents_count)
|
||||||
sorter,
|
|
||||||
primary_key_name,
|
|
||||||
fields_ids_map,
|
|
||||||
documents_count,
|
|
||||||
progress_callback,
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Generate the `TransformOutput` based on the given sorter that can be generated from any
|
/// Generate the `TransformOutput` based on the given sorter that can be generated from any
|
||||||
/// format like CSV, JSON or JSON stream. This sorter must contain a key that is the document
|
/// format like CSV, JSON or JSON stream. This sorter must contain a key that is the document
|
||||||
/// id for the user side and the value must be an obkv where keys are valid fields ids.
|
/// id for the user side and the value must be an obkv where keys are valid fields ids.
|
||||||
fn output_from_sorter<F>(
|
pub(crate) fn output_from_sorter<F>(
|
||||||
self,
|
self,
|
||||||
sorter: grenad::Sorter<MergeFn>,
|
wtxn: &mut heed::RwTxn,
|
||||||
primary_key: String,
|
|
||||||
fields_ids_map: FieldsIdsMap,
|
|
||||||
approximate_number_of_documents: usize,
|
|
||||||
progress_callback: F,
|
progress_callback: F,
|
||||||
) -> Result<TransformOutput>
|
) -> Result<TransformOutput>
|
||||||
where
|
where
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
F: Fn(UpdateIndexingStep) + Sync,
|
||||||
{
|
{
|
||||||
let mut external_documents_ids = self.index.external_documents_ids(self.rtxn).unwrap();
|
let primary_key = self
|
||||||
let documents_ids = self.index.documents_ids(self.rtxn)?;
|
.index
|
||||||
let mut field_distribution = self.index.field_distribution(self.rtxn)?;
|
.primary_key(&wtxn)?
|
||||||
|
.ok_or(Error::UserError(UserError::MissingPrimaryKey))?
|
||||||
|
.to_string();
|
||||||
|
let fields_ids_map = self.index.fields_ids_map(wtxn)?;
|
||||||
|
let approximate_number_of_documents = self.documents_count;
|
||||||
|
|
||||||
|
let mut external_documents_ids = self.index.external_documents_ids(wtxn).unwrap();
|
||||||
|
let documents_ids = self.index.documents_ids(wtxn)?;
|
||||||
|
let mut field_distribution = self.index.field_distribution(wtxn)?;
|
||||||
let mut available_documents_ids = AvailableDocumentsIds::from_documents_ids(&documents_ids);
|
let mut available_documents_ids = AvailableDocumentsIds::from_documents_ids(&documents_ids);
|
||||||
|
|
||||||
// consume sorter, in order to free the internal allocation, before creating a new one.
|
// consume sorter, in order to free the internal allocation, before creating a new one.
|
||||||
let mut iter = sorter.into_merger_iter()?;
|
let mut iter = self.sorter.into_merger_iter()?;
|
||||||
|
|
||||||
// Once we have sort and deduplicated the documents we write them into a final file.
|
// Once we have sort and deduplicated the documents we write them into a final file.
|
||||||
let mut final_sorter = create_sorter(
|
let mut final_sorter = create_sorter(
|
||||||
@ -272,10 +288,10 @@ impl Transform<'_, '_> {
|
|||||||
Err(InternalError::IndexingMergingKeys { process: "documents" }.into())
|
Err(InternalError::IndexingMergingKeys { process: "documents" }.into())
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
self.chunk_compression_type,
|
self.indexer_settings.chunk_compression_type,
|
||||||
self.chunk_compression_level,
|
self.indexer_settings.chunk_compression_level,
|
||||||
self.max_nb_chunks,
|
self.indexer_settings.max_nb_chunks,
|
||||||
self.max_memory,
|
self.indexer_settings.max_memory,
|
||||||
);
|
);
|
||||||
let mut new_external_documents_ids_builder = fst::MapBuilder::memory();
|
let mut new_external_documents_ids_builder = fst::MapBuilder::memory();
|
||||||
let mut replaced_documents_ids = RoaringBitmap::new();
|
let mut replaced_documents_ids = RoaringBitmap::new();
|
||||||
@ -285,7 +301,7 @@ impl Transform<'_, '_> {
|
|||||||
// While we write into final file we get or generate the internal documents ids.
|
// While we write into final file we get or generate the internal documents ids.
|
||||||
let mut documents_count = 0;
|
let mut documents_count = 0;
|
||||||
while let Some((external_id, update_obkv)) = iter.next()? {
|
while let Some((external_id, update_obkv)) = iter.next()? {
|
||||||
if self.log_every_n.map_or(false, |len| documents_count % len == 0) {
|
if self.indexer_settings.log_every_n.map_or(false, |len| documents_count % len == 0) {
|
||||||
progress_callback(UpdateIndexingStep::ComputeIdsAndMergeDocuments {
|
progress_callback(UpdateIndexingStep::ComputeIdsAndMergeDocuments {
|
||||||
documents_seen: documents_count,
|
documents_seen: documents_count,
|
||||||
total_documents: approximate_number_of_documents,
|
total_documents: approximate_number_of_documents,
|
||||||
@ -299,7 +315,7 @@ impl Transform<'_, '_> {
|
|||||||
replaced_documents_ids.insert(docid);
|
replaced_documents_ids.insert(docid);
|
||||||
|
|
||||||
let key = BEU32::new(docid);
|
let key = BEU32::new(docid);
|
||||||
let base_obkv = self.index.documents.get(&self.rtxn, &key)?.ok_or(
|
let base_obkv = self.index.documents.get(wtxn, &key)?.ok_or(
|
||||||
InternalError::DatabaseMissingEntry {
|
InternalError::DatabaseMissingEntry {
|
||||||
db_name: db_name::DOCUMENTS,
|
db_name: db_name::DOCUMENTS,
|
||||||
key: None,
|
key: None,
|
||||||
@ -359,8 +375,11 @@ impl Transform<'_, '_> {
|
|||||||
|
|
||||||
// We create a final writer to write the new documents in order from the sorter.
|
// We create a final writer to write the new documents in order from the sorter.
|
||||||
let file = tempfile::tempfile()?;
|
let file = tempfile::tempfile()?;
|
||||||
let mut writer =
|
let mut writer = create_writer(
|
||||||
create_writer(self.chunk_compression_type, self.chunk_compression_level, file)?;
|
self.indexer_settings.chunk_compression_type,
|
||||||
|
self.indexer_settings.chunk_compression_level,
|
||||||
|
file,
|
||||||
|
)?;
|
||||||
|
|
||||||
// Once we have written all the documents into the final sorter, we write the documents
|
// Once we have written all the documents into the final sorter, we write the documents
|
||||||
// into this writer, extract the file and reset the seek to be able to read it again.
|
// into this writer, extract the file and reset the seek to be able to read it again.
|
||||||
@ -392,22 +411,28 @@ impl Transform<'_, '_> {
|
|||||||
// TODO this can be done in parallel by using the rayon `ThreadPool`.
|
// TODO this can be done in parallel by using the rayon `ThreadPool`.
|
||||||
pub fn remap_index_documents(
|
pub fn remap_index_documents(
|
||||||
self,
|
self,
|
||||||
primary_key: String,
|
wtxn: &mut heed::RwTxn,
|
||||||
old_fields_ids_map: FieldsIdsMap,
|
old_fields_ids_map: FieldsIdsMap,
|
||||||
new_fields_ids_map: FieldsIdsMap,
|
new_fields_ids_map: FieldsIdsMap,
|
||||||
) -> Result<TransformOutput> {
|
) -> Result<TransformOutput> {
|
||||||
let field_distribution = self.index.field_distribution(self.rtxn)?;
|
// There already has been a document addition, the primary key should be set by now.
|
||||||
let external_documents_ids = self.index.external_documents_ids(self.rtxn)?;
|
let primary_key =
|
||||||
let documents_ids = self.index.documents_ids(self.rtxn)?;
|
self.index.primary_key(wtxn)?.ok_or(UserError::MissingPrimaryKey)?.to_string();
|
||||||
|
let field_distribution = self.index.field_distribution(wtxn)?;
|
||||||
|
let external_documents_ids = self.index.external_documents_ids(wtxn)?;
|
||||||
|
let documents_ids = self.index.documents_ids(wtxn)?;
|
||||||
let documents_count = documents_ids.len() as usize;
|
let documents_count = documents_ids.len() as usize;
|
||||||
|
|
||||||
// We create a final writer to write the new documents in order from the sorter.
|
// We create a final writer to write the new documents in order from the sorter.
|
||||||
let file = tempfile::tempfile()?;
|
let file = tempfile::tempfile()?;
|
||||||
let mut writer =
|
let mut writer = create_writer(
|
||||||
create_writer(self.chunk_compression_type, self.chunk_compression_level, file)?;
|
self.indexer_settings.chunk_compression_type,
|
||||||
|
self.indexer_settings.chunk_compression_level,
|
||||||
|
file,
|
||||||
|
)?;
|
||||||
|
|
||||||
let mut obkv_buffer = Vec::new();
|
let mut obkv_buffer = Vec::new();
|
||||||
for result in self.index.documents.iter(self.rtxn)? {
|
for result in self.index.documents.iter(wtxn)? {
|
||||||
let (docid, obkv) = result?;
|
let (docid, obkv) = result?;
|
||||||
let docid = docid.get();
|
let docid = docid.get();
|
||||||
|
|
||||||
|
29
milli/src/update/indexer_config.rs
Normal file
29
milli/src/update/indexer_config.rs
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
use grenad::CompressionType;
|
||||||
|
use rayon::ThreadPool;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct IndexerConfig {
|
||||||
|
pub log_every_n: Option<usize>,
|
||||||
|
pub max_nb_chunks: Option<usize>,
|
||||||
|
pub documents_chunk_size: Option<usize>,
|
||||||
|
pub max_memory: Option<usize>,
|
||||||
|
pub chunk_compression_type: CompressionType,
|
||||||
|
pub chunk_compression_level: Option<u32>,
|
||||||
|
pub thread_pool: Option<ThreadPool>,
|
||||||
|
pub max_positions_per_attributes: Option<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for IndexerConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
log_every_n: None,
|
||||||
|
max_nb_chunks: None,
|
||||||
|
documents_chunk_size: None,
|
||||||
|
max_memory: None,
|
||||||
|
chunk_compression_type: CompressionType::None,
|
||||||
|
chunk_compression_level: None,
|
||||||
|
thread_pool: None,
|
||||||
|
max_positions_per_attributes: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -2,9 +2,11 @@ pub use self::available_documents_ids::AvailableDocumentsIds;
|
|||||||
pub use self::clear_documents::ClearDocuments;
|
pub use self::clear_documents::ClearDocuments;
|
||||||
pub use self::delete_documents::{DeleteDocuments, DocumentDeletionResult};
|
pub use self::delete_documents::{DeleteDocuments, DocumentDeletionResult};
|
||||||
pub use self::facets::Facets;
|
pub use self::facets::Facets;
|
||||||
pub use self::index_documents::{DocumentAdditionResult, IndexDocuments, IndexDocumentsMethod};
|
pub use self::index_documents::{
|
||||||
|
DocumentAdditionResult, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod,
|
||||||
|
};
|
||||||
|
pub use self::indexer_config::IndexerConfig;
|
||||||
pub use self::settings::{Setting, Settings};
|
pub use self::settings::{Setting, Settings};
|
||||||
pub use self::update_builder::UpdateBuilder;
|
|
||||||
pub use self::update_step::UpdateIndexingStep;
|
pub use self::update_step::UpdateIndexingStep;
|
||||||
pub use self::word_prefix_docids::WordPrefixDocids;
|
pub use self::word_prefix_docids::WordPrefixDocids;
|
||||||
pub use self::word_prefix_pair_proximity_docids::WordPrefixPairProximityDocids;
|
pub use self::word_prefix_pair_proximity_docids::WordPrefixPairProximityDocids;
|
||||||
@ -16,8 +18,8 @@ mod clear_documents;
|
|||||||
mod delete_documents;
|
mod delete_documents;
|
||||||
mod facets;
|
mod facets;
|
||||||
mod index_documents;
|
mod index_documents;
|
||||||
|
mod indexer_config;
|
||||||
mod settings;
|
mod settings;
|
||||||
mod update_builder;
|
|
||||||
mod update_step;
|
mod update_step;
|
||||||
mod word_prefix_docids;
|
mod word_prefix_docids;
|
||||||
mod word_prefix_pair_proximity_docids;
|
mod word_prefix_pair_proximity_docids;
|
||||||
|
@ -2,15 +2,15 @@ use std::collections::{BTreeSet, HashMap, HashSet};
|
|||||||
use std::result::Result as StdResult;
|
use std::result::Result as StdResult;
|
||||||
|
|
||||||
use chrono::Utc;
|
use chrono::Utc;
|
||||||
use grenad::CompressionType;
|
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
|
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
|
||||||
use rayon::ThreadPool;
|
|
||||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||||
|
|
||||||
|
use super::index_documents::{IndexDocumentsConfig, Transform};
|
||||||
|
use super::IndexerConfig;
|
||||||
use crate::criterion::Criterion;
|
use crate::criterion::Criterion;
|
||||||
use crate::error::UserError;
|
use crate::error::UserError;
|
||||||
use crate::update::index_documents::{IndexDocumentsMethod, Transform};
|
use crate::update::index_documents::IndexDocumentsMethod;
|
||||||
use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
|
use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
|
||||||
use crate::{FieldsIdsMap, Index, Result};
|
use crate::{FieldsIdsMap, Index, Result};
|
||||||
|
|
||||||
@ -77,14 +77,8 @@ impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting<T> {
|
|||||||
pub struct Settings<'a, 't, 'u, 'i> {
|
pub struct Settings<'a, 't, 'u, 'i> {
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
pub(crate) log_every_n: Option<usize>,
|
|
||||||
pub(crate) max_nb_chunks: Option<usize>,
|
indexer_config: &'a IndexerConfig,
|
||||||
pub(crate) max_memory: Option<usize>,
|
|
||||||
pub(crate) documents_chunk_size: Option<usize>,
|
|
||||||
pub(crate) chunk_compression_type: CompressionType,
|
|
||||||
pub(crate) chunk_compression_level: Option<u32>,
|
|
||||||
pub(crate) thread_pool: Option<&'a ThreadPool>,
|
|
||||||
pub(crate) max_positions_per_attributes: Option<u32>,
|
|
||||||
|
|
||||||
searchable_fields: Setting<Vec<String>>,
|
searchable_fields: Setting<Vec<String>>,
|
||||||
displayed_fields: Setting<Vec<String>>,
|
displayed_fields: Setting<Vec<String>>,
|
||||||
@ -98,17 +92,14 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||||
pub fn new(wtxn: &'t mut heed::RwTxn<'i, 'u>, index: &'i Index) -> Settings<'a, 't, 'u, 'i> {
|
pub fn new(
|
||||||
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||||
|
index: &'i Index,
|
||||||
|
indexer_config: &'a IndexerConfig,
|
||||||
|
) -> Settings<'a, 't, 'u, 'i> {
|
||||||
Settings {
|
Settings {
|
||||||
wtxn,
|
wtxn,
|
||||||
index,
|
index,
|
||||||
log_every_n: None,
|
|
||||||
max_nb_chunks: None,
|
|
||||||
max_memory: None,
|
|
||||||
documents_chunk_size: None,
|
|
||||||
chunk_compression_type: CompressionType::None,
|
|
||||||
chunk_compression_level: None,
|
|
||||||
thread_pool: None,
|
|
||||||
searchable_fields: Setting::NotSet,
|
searchable_fields: Setting::NotSet,
|
||||||
displayed_fields: Setting::NotSet,
|
displayed_fields: Setting::NotSet,
|
||||||
filterable_fields: Setting::NotSet,
|
filterable_fields: Setting::NotSet,
|
||||||
@ -118,14 +109,10 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
distinct_field: Setting::NotSet,
|
distinct_field: Setting::NotSet,
|
||||||
synonyms: Setting::NotSet,
|
synonyms: Setting::NotSet,
|
||||||
primary_key: Setting::NotSet,
|
primary_key: Setting::NotSet,
|
||||||
max_positions_per_attributes: None,
|
indexer_config,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn log_every_n(&mut self, n: usize) {
|
|
||||||
self.log_every_n = Some(n);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn reset_searchable_fields(&mut self) {
|
pub fn reset_searchable_fields(&mut self) {
|
||||||
self.searchable_fields = Setting::Reset;
|
self.searchable_fields = Setting::Reset;
|
||||||
}
|
}
|
||||||
@ -210,25 +197,16 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
let transform = Transform {
|
let transform = Transform::new(
|
||||||
rtxn: &self.wtxn,
|
&self.index,
|
||||||
index: self.index,
|
&self.indexer_config,
|
||||||
log_every_n: self.log_every_n,
|
IndexDocumentsMethod::ReplaceDocuments,
|
||||||
chunk_compression_type: self.chunk_compression_type,
|
false,
|
||||||
chunk_compression_level: self.chunk_compression_level,
|
);
|
||||||
max_nb_chunks: self.max_nb_chunks,
|
|
||||||
max_memory: self.max_memory,
|
|
||||||
index_documents_method: IndexDocumentsMethod::ReplaceDocuments,
|
|
||||||
autogenerate_docids: false,
|
|
||||||
};
|
|
||||||
|
|
||||||
// There already has been a document addition, the primary key should be set by now.
|
|
||||||
let primary_key =
|
|
||||||
self.index.primary_key(&self.wtxn)?.ok_or(UserError::MissingPrimaryKey)?;
|
|
||||||
|
|
||||||
// We remap the documents fields based on the new `FieldsIdsMap`.
|
// We remap the documents fields based on the new `FieldsIdsMap`.
|
||||||
let output = transform.remap_index_documents(
|
let output = transform.remap_index_documents(
|
||||||
primary_key.to_string(),
|
self.wtxn,
|
||||||
old_fields_ids_map,
|
old_fields_ids_map,
|
||||||
fields_ids_map.clone(),
|
fields_ids_map.clone(),
|
||||||
)?;
|
)?;
|
||||||
@ -238,16 +216,14 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
|
|
||||||
// We index the generated `TransformOutput` which must contain
|
// We index the generated `TransformOutput` which must contain
|
||||||
// all the documents with fields in the newly defined searchable order.
|
// all the documents with fields in the newly defined searchable order.
|
||||||
let mut indexing_builder = IndexDocuments::new(self.wtxn, self.index);
|
let indexing_builder = IndexDocuments::new(
|
||||||
indexing_builder.log_every_n = self.log_every_n;
|
self.wtxn,
|
||||||
indexing_builder.max_nb_chunks = self.max_nb_chunks;
|
self.index,
|
||||||
indexing_builder.max_memory = self.max_memory;
|
&self.indexer_config,
|
||||||
indexing_builder.documents_chunk_size = self.documents_chunk_size;
|
IndexDocumentsConfig::default(),
|
||||||
indexing_builder.chunk_compression_type = self.chunk_compression_type;
|
&cb,
|
||||||
indexing_builder.chunk_compression_level = self.chunk_compression_level;
|
);
|
||||||
indexing_builder.thread_pool = self.thread_pool;
|
indexing_builder.execute_raw(output)?;
|
||||||
indexing_builder.max_positions_per_attributes = self.max_positions_per_attributes;
|
|
||||||
indexing_builder.execute_raw(output, &cb)?;
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@ -535,13 +511,17 @@ mod tests {
|
|||||||
{ "id": 2, "name": "kevina", "age": 21},
|
{ "id": 2, "name": "kevina", "age": 21},
|
||||||
{ "id": 3, "name": "benoit", "age": 34 }
|
{ "id": 3, "name": "benoit", "age": 34 }
|
||||||
]);
|
]);
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
builder.execute(content, |_| ()).unwrap();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// We change the searchable fields to be the "name" field only.
|
// We change the searchable fields to be the "name" field only.
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.set_searchable_fields(vec!["name".into()]);
|
builder.set_searchable_fields(vec!["name".into()]);
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -563,7 +543,7 @@ mod tests {
|
|||||||
|
|
||||||
// We change the searchable fields to be the "name" field only.
|
// We change the searchable fields to be the "name" field only.
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.reset_searchable_fields();
|
builder.reset_searchable_fields();
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -592,15 +572,19 @@ mod tests {
|
|||||||
{ "name": "kevina", "age": 21 },
|
{ "name": "kevina", "age": 21 },
|
||||||
{ "name": "benoit", "age": 34 }
|
{ "name": "benoit", "age": 34 }
|
||||||
]);
|
]);
|
||||||
let mut builder = IndexDocuments::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
builder.enable_autogenerate_docids();
|
let indexing_config =
|
||||||
builder.execute(content, |_| ()).unwrap();
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// In the same transaction we change the displayed fields to be only the "age".
|
// In the same transaction we change the displayed fields to be only the "age".
|
||||||
// We also change the searchable fields to be the "name" field only.
|
// We also change the searchable fields to be the "name" field only.
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.set_displayed_fields(vec!["age".into()]);
|
builder.set_displayed_fields(vec!["age".into()]);
|
||||||
builder.set_searchable_fields(vec!["name".into()]);
|
builder.set_searchable_fields(vec!["name".into()]);
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
@ -614,7 +598,7 @@ mod tests {
|
|||||||
|
|
||||||
// We change the searchable fields to be the "name" field only.
|
// We change the searchable fields to be the "name" field only.
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.reset_searchable_fields();
|
builder.reset_searchable_fields();
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -639,9 +623,13 @@ mod tests {
|
|||||||
{ "name": "kevina", "age": 21 },
|
{ "name": "kevina", "age": 21 },
|
||||||
{ "name": "benoit", "age": 34 }
|
{ "name": "benoit", "age": 34 }
|
||||||
]);
|
]);
|
||||||
let mut builder = IndexDocuments::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
builder.enable_autogenerate_docids();
|
let indexing_config =
|
||||||
builder.execute(content, |_| ()).unwrap();
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// Check that the displayed fields are correctly set to `None` (default value).
|
// Check that the displayed fields are correctly set to `None` (default value).
|
||||||
@ -664,12 +652,16 @@ mod tests {
|
|||||||
{ "name": "kevina", "age": 21 },
|
{ "name": "kevina", "age": 21 },
|
||||||
{ "name": "benoit", "age": 34 }
|
{ "name": "benoit", "age": 34 }
|
||||||
]);
|
]);
|
||||||
let mut builder = IndexDocuments::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
builder.enable_autogenerate_docids();
|
let indexing_config =
|
||||||
builder.execute(content, |_| ()).unwrap();
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
// In the same transaction we change the displayed fields to be only the age.
|
// In the same transaction we change the displayed fields to be only the age.
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.set_displayed_fields(vec!["age".into()]);
|
builder.set_displayed_fields(vec!["age".into()]);
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -682,7 +674,7 @@ mod tests {
|
|||||||
|
|
||||||
// We reset the fields ids to become `None`, the default value.
|
// We reset the fields ids to become `None`, the default value.
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.reset_displayed_fields();
|
builder.reset_displayed_fields();
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -700,9 +692,11 @@ mod tests {
|
|||||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||||
let index = Index::new(options, &path).unwrap();
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
|
||||||
// Set the filterable fields to be the age.
|
// Set the filterable fields to be the age.
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.set_filterable_fields(hashset! { S("age") });
|
builder.set_filterable_fields(hashset! { S("age") });
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
|
|
||||||
@ -712,9 +706,12 @@ mod tests {
|
|||||||
{ "name": "kevina", "age": 21 },
|
{ "name": "kevina", "age": 21 },
|
||||||
{ "name": "benoit", "age": 34 }
|
{ "name": "benoit", "age": 34 }
|
||||||
]);
|
]);
|
||||||
let mut builder = IndexDocuments::new(&mut wtxn, &index);
|
let indexing_config =
|
||||||
builder.enable_autogenerate_docids();
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
builder.execute(content, |_| ()).unwrap();
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// Check that the displayed fields are correctly set.
|
// Check that the displayed fields are correctly set.
|
||||||
@ -749,9 +746,12 @@ mod tests {
|
|||||||
{ "name": "benoit", "age": 35 }
|
{ "name": "benoit", "age": 35 }
|
||||||
]);
|
]);
|
||||||
|
|
||||||
let mut builder = IndexDocuments::new(&mut wtxn, &index);
|
let indexing_config =
|
||||||
builder.enable_autogenerate_docids();
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
builder.execute(content, |_| ()).unwrap();
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
@ -771,10 +771,11 @@ mod tests {
|
|||||||
let mut options = EnvOpenOptions::new();
|
let mut options = EnvOpenOptions::new();
|
||||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||||
let index = Index::new(options, &path).unwrap();
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
|
||||||
// Set the filterable fields to be the age.
|
// Set the filterable fields to be the age.
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
// Don't display the generated `id` field.
|
// Don't display the generated `id` field.
|
||||||
builder.set_displayed_fields(vec![S("name")]);
|
builder.set_displayed_fields(vec![S("name")]);
|
||||||
builder.set_criteria(vec![S("age:asc")]);
|
builder.set_criteria(vec![S("age:asc")]);
|
||||||
@ -786,9 +787,12 @@ mod tests {
|
|||||||
{ "name": "kevina", "age": 21 },
|
{ "name": "kevina", "age": 21 },
|
||||||
{ "name": "benoit", "age": 34 }
|
{ "name": "benoit", "age": 34 }
|
||||||
]);
|
]);
|
||||||
let mut builder = IndexDocuments::new(&mut wtxn, &index);
|
let indexing_config =
|
||||||
builder.enable_autogenerate_docids();
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
builder.execute(content, |_| ()).unwrap();
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// Run an empty query just to ensure that the search results are ordered.
|
// Run an empty query just to ensure that the search results are ordered.
|
||||||
@ -813,10 +817,11 @@ mod tests {
|
|||||||
let mut options = EnvOpenOptions::new();
|
let mut options = EnvOpenOptions::new();
|
||||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||||
let index = Index::new(options, &path).unwrap();
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
|
||||||
// Set the filterable fields to be the age.
|
// Set the filterable fields to be the age.
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
// Don't display the generated `id` field.
|
// Don't display the generated `id` field.
|
||||||
builder.set_displayed_fields(vec![S("name"), S("age")]);
|
builder.set_displayed_fields(vec![S("name"), S("age")]);
|
||||||
builder.set_distinct_field(S("age"));
|
builder.set_distinct_field(S("age"));
|
||||||
@ -832,9 +837,12 @@ mod tests {
|
|||||||
{ "name": "bernie", "age": 34 },
|
{ "name": "bernie", "age": 34 },
|
||||||
{ "name": "ben", "age": 34 }
|
{ "name": "ben", "age": 34 }
|
||||||
]);
|
]);
|
||||||
let mut builder = IndexDocuments::new(&mut wtxn, &index);
|
let indexing_config =
|
||||||
builder.enable_autogenerate_docids();
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
builder.execute(content, |_| ()).unwrap();
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// Run an empty query just to ensure that the search results are ordered.
|
// Run an empty query just to ensure that the search results are ordered.
|
||||||
@ -859,9 +867,13 @@ mod tests {
|
|||||||
{ "name": "kevina", "age": 21 },
|
{ "name": "kevina", "age": 21 },
|
||||||
{ "name": "benoit", "age": 34 }
|
{ "name": "benoit", "age": 34 }
|
||||||
]);
|
]);
|
||||||
let mut builder = IndexDocuments::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
builder.enable_autogenerate_docids();
|
let indexing_config =
|
||||||
builder.execute(content, |_| ()).unwrap();
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// Ensure there is no stop_words by default
|
// Ensure there is no stop_words by default
|
||||||
@ -884,12 +896,16 @@ mod tests {
|
|||||||
{ "name": "kevina", "age": 21, "maxim": "Doggos are the best" },
|
{ "name": "kevina", "age": 21, "maxim": "Doggos are the best" },
|
||||||
{ "name": "benoit", "age": 34, "maxim": "The crepes are really good" },
|
{ "name": "benoit", "age": 34, "maxim": "The crepes are really good" },
|
||||||
]);
|
]);
|
||||||
let mut builder = IndexDocuments::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
builder.enable_autogenerate_docids();
|
let indexing_config =
|
||||||
builder.execute(content, |_| ()).unwrap();
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
// In the same transaction we provide some stop_words
|
// In the same transaction we provide some stop_words
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
let set = btreeset! { "i".to_string(), "the".to_string(), "are".to_string() };
|
let set = btreeset! { "i".to_string(), "the".to_string(), "are".to_string() };
|
||||||
builder.set_stop_words(set.clone());
|
builder.set_stop_words(set.clone());
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
@ -920,7 +936,7 @@ mod tests {
|
|||||||
|
|
||||||
// now we'll reset the stop_words and ensure it's None
|
// now we'll reset the stop_words and ensure it's None
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.reset_stop_words();
|
builder.reset_stop_words();
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -958,12 +974,16 @@ mod tests {
|
|||||||
{ "name": "kevina", "age": 21, "maxim": "Doggos are the best"},
|
{ "name": "kevina", "age": 21, "maxim": "Doggos are the best"},
|
||||||
{ "name": "benoit", "age": 34, "maxim": "The crepes are really good"},
|
{ "name": "benoit", "age": 34, "maxim": "The crepes are really good"},
|
||||||
]);
|
]);
|
||||||
let mut builder = IndexDocuments::new(&mut wtxn, &index);
|
let config = IndexerConfig::default();
|
||||||
builder.enable_autogenerate_docids();
|
let indexing_config =
|
||||||
builder.execute(content, |_| ()).unwrap();
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
// In the same transaction provide some synonyms
|
// In the same transaction provide some synonyms
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.set_synonyms(hashmap! {
|
builder.set_synonyms(hashmap! {
|
||||||
"blini".to_string() => vec!["crepes".to_string()],
|
"blini".to_string() => vec!["crepes".to_string()],
|
||||||
"super like".to_string() => vec!["love".to_string()],
|
"super like".to_string() => vec!["love".to_string()],
|
||||||
@ -987,7 +1007,7 @@ mod tests {
|
|||||||
|
|
||||||
// Reset the synonyms
|
// Reset the synonyms
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.reset_synonyms();
|
builder.reset_synonyms();
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -1012,10 +1032,11 @@ mod tests {
|
|||||||
let mut options = EnvOpenOptions::new();
|
let mut options = EnvOpenOptions::new();
|
||||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||||
let index = Index::new(options, &path).unwrap();
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
|
||||||
// Set all the settings except searchable
|
// Set all the settings except searchable
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.set_displayed_fields(vec!["hello".to_string()]);
|
builder.set_displayed_fields(vec!["hello".to_string()]);
|
||||||
builder.set_filterable_fields(hashset! { S("age"), S("toto") });
|
builder.set_filterable_fields(hashset! { S("age"), S("toto") });
|
||||||
builder.set_criteria(vec!["toto:asc".to_string()]);
|
builder.set_criteria(vec!["toto:asc".to_string()]);
|
||||||
@ -1032,7 +1053,7 @@ mod tests {
|
|||||||
|
|
||||||
// We set toto and age as searchable to force reordering of the fields
|
// We set toto and age as searchable to force reordering of the fields
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.set_searchable_fields(vec!["toto".to_string(), "age".to_string()]);
|
builder.set_searchable_fields(vec!["toto".to_string(), "age".to_string()]);
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -1049,10 +1070,11 @@ mod tests {
|
|||||||
let mut options = EnvOpenOptions::new();
|
let mut options = EnvOpenOptions::new();
|
||||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||||
let index = Index::new(options, &path).unwrap();
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
|
||||||
// Set all the settings except searchable
|
// Set all the settings except searchable
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.set_displayed_fields(vec!["hello".to_string()]);
|
builder.set_displayed_fields(vec!["hello".to_string()]);
|
||||||
// It is only Asc(toto), there is a facet database but it is denied to filter with toto.
|
// It is only Asc(toto), there is a facet database but it is denied to filter with toto.
|
||||||
builder.set_criteria(vec!["toto:asc".to_string()]);
|
builder.set_criteria(vec!["toto:asc".to_string()]);
|
||||||
@ -1070,10 +1092,11 @@ mod tests {
|
|||||||
let mut options = EnvOpenOptions::new();
|
let mut options = EnvOpenOptions::new();
|
||||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||||
let index = Index::new(options, &path).unwrap();
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
|
||||||
// Set the primary key settings
|
// Set the primary key settings
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.set_primary_key(S("mykey"));
|
builder.set_primary_key(S("mykey"));
|
||||||
|
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
@ -1089,14 +1112,17 @@ mod tests {
|
|||||||
{ "mykey": 6, "name": "bernie", "age": 34 },
|
{ "mykey": 6, "name": "bernie", "age": 34 },
|
||||||
{ "mykey": 7, "name": "ben", "age": 34 }
|
{ "mykey": 7, "name": "ben", "age": 34 }
|
||||||
]);
|
]);
|
||||||
let mut builder = IndexDocuments::new(&mut wtxn, &index);
|
let indexing_config =
|
||||||
builder.disable_autogenerate_docids();
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
builder.execute(content, |_| ()).unwrap();
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// We now try to reset the primary key
|
// We now try to reset the primary key
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.reset_primary_key();
|
builder.reset_primary_key();
|
||||||
|
|
||||||
let err = builder.execute(|_| ()).unwrap_err();
|
let err = builder.execute(|_| ()).unwrap_err();
|
||||||
@ -1109,7 +1135,7 @@ mod tests {
|
|||||||
builder.execute().unwrap();
|
builder.execute().unwrap();
|
||||||
|
|
||||||
// ...we can change the primary key
|
// ...we can change the primary key
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.set_primary_key(S("myid"));
|
builder.set_primary_key(S("myid"));
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -1121,10 +1147,11 @@ mod tests {
|
|||||||
let mut options = EnvOpenOptions::new();
|
let mut options = EnvOpenOptions::new();
|
||||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||||
let index = Index::new(options, &path).unwrap();
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
|
||||||
// Set the genres setting
|
// Set the genres setting
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.set_filterable_fields(hashset! { S("genres") });
|
builder.set_filterable_fields(hashset! { S("genres") });
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
|
|
||||||
@ -1147,8 +1174,12 @@ mod tests {
|
|||||||
"release_date": 819676800
|
"release_date": 819676800
|
||||||
}
|
}
|
||||||
]);
|
]);
|
||||||
let builder = IndexDocuments::new(&mut wtxn, &index);
|
let indexing_config =
|
||||||
builder.execute(content, |_| ()).unwrap();
|
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
|
let mut builder =
|
||||||
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ());
|
||||||
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
// We now try to reset the primary key
|
// We now try to reset the primary key
|
||||||
|
@ -1,130 +0,0 @@
|
|||||||
use grenad::CompressionType;
|
|
||||||
use rayon::ThreadPool;
|
|
||||||
|
|
||||||
use super::{ClearDocuments, DeleteDocuments, Facets, IndexDocuments, Settings};
|
|
||||||
use crate::{Index, Result};
|
|
||||||
|
|
||||||
pub struct UpdateBuilder<'a> {
|
|
||||||
pub(crate) log_every_n: Option<usize>,
|
|
||||||
pub(crate) max_nb_chunks: Option<usize>,
|
|
||||||
pub(crate) documents_chunk_size: Option<usize>,
|
|
||||||
pub(crate) max_memory: Option<usize>,
|
|
||||||
pub(crate) chunk_compression_type: CompressionType,
|
|
||||||
pub(crate) chunk_compression_level: Option<u32>,
|
|
||||||
pub(crate) thread_pool: Option<&'a ThreadPool>,
|
|
||||||
pub(crate) max_positions_per_attributes: Option<u32>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> UpdateBuilder<'a> {
|
|
||||||
pub fn new() -> UpdateBuilder<'a> {
|
|
||||||
UpdateBuilder {
|
|
||||||
log_every_n: None,
|
|
||||||
max_nb_chunks: None,
|
|
||||||
documents_chunk_size: None,
|
|
||||||
max_memory: None,
|
|
||||||
chunk_compression_type: CompressionType::None,
|
|
||||||
chunk_compression_level: None,
|
|
||||||
thread_pool: None,
|
|
||||||
max_positions_per_attributes: None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn log_every_n(&mut self, log_every_n: usize) {
|
|
||||||
self.log_every_n = Some(log_every_n);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn max_nb_chunks(&mut self, max_nb_chunks: usize) {
|
|
||||||
self.max_nb_chunks = Some(max_nb_chunks);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn max_memory(&mut self, max_memory: usize) {
|
|
||||||
self.max_memory = Some(max_memory);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn documents_chunk_size(&mut self, documents_chunk_size: usize) {
|
|
||||||
self.documents_chunk_size = Some(documents_chunk_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn chunk_compression_type(&mut self, chunk_compression_type: CompressionType) {
|
|
||||||
self.chunk_compression_type = chunk_compression_type;
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn chunk_compression_level(&mut self, chunk_compression_level: u32) {
|
|
||||||
self.chunk_compression_level = Some(chunk_compression_level);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn thread_pool(&mut self, thread_pool: &'a ThreadPool) {
|
|
||||||
self.thread_pool = Some(thread_pool);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn max_positions_per_attributes(&mut self, max_positions_per_attributes: u32) {
|
|
||||||
self.max_positions_per_attributes = Some(max_positions_per_attributes);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn clear_documents<'t, 'u, 'i>(
|
|
||||||
self,
|
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
|
||||||
index: &'i Index,
|
|
||||||
) -> ClearDocuments<'t, 'u, 'i> {
|
|
||||||
ClearDocuments::new(wtxn, index)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn delete_documents<'t, 'u, 'i>(
|
|
||||||
self,
|
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
|
||||||
index: &'i Index,
|
|
||||||
) -> Result<DeleteDocuments<'t, 'u, 'i>> {
|
|
||||||
DeleteDocuments::new(wtxn, index)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn index_documents<'t, 'u, 'i>(
|
|
||||||
self,
|
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
|
||||||
index: &'i Index,
|
|
||||||
) -> IndexDocuments<'t, 'u, 'i, 'a> {
|
|
||||||
let mut builder = IndexDocuments::new(wtxn, index);
|
|
||||||
|
|
||||||
builder.log_every_n = self.log_every_n;
|
|
||||||
builder.max_nb_chunks = self.max_nb_chunks;
|
|
||||||
builder.max_memory = self.max_memory;
|
|
||||||
builder.documents_chunk_size = self.documents_chunk_size;
|
|
||||||
builder.chunk_compression_type = self.chunk_compression_type;
|
|
||||||
builder.chunk_compression_level = self.chunk_compression_level;
|
|
||||||
builder.thread_pool = self.thread_pool;
|
|
||||||
builder.max_positions_per_attributes = self.max_positions_per_attributes;
|
|
||||||
|
|
||||||
builder
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn settings<'t, 'u, 'i>(
|
|
||||||
self,
|
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
|
||||||
index: &'i Index,
|
|
||||||
) -> Settings<'a, 't, 'u, 'i> {
|
|
||||||
let mut builder = Settings::new(wtxn, index);
|
|
||||||
|
|
||||||
builder.log_every_n = self.log_every_n;
|
|
||||||
builder.max_nb_chunks = self.max_nb_chunks;
|
|
||||||
builder.max_memory = self.max_memory;
|
|
||||||
builder.documents_chunk_size = self.documents_chunk_size;
|
|
||||||
builder.chunk_compression_type = self.chunk_compression_type;
|
|
||||||
builder.chunk_compression_level = self.chunk_compression_level;
|
|
||||||
builder.thread_pool = self.thread_pool;
|
|
||||||
builder.max_positions_per_attributes = self.max_positions_per_attributes;
|
|
||||||
|
|
||||||
builder
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn facets<'t, 'u, 'i>(
|
|
||||||
self,
|
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
|
||||||
index: &'i Index,
|
|
||||||
) -> Facets<'t, 'u, 'i> {
|
|
||||||
let mut builder = Facets::new(wtxn, index);
|
|
||||||
|
|
||||||
builder.chunk_compression_type = self.chunk_compression_type;
|
|
||||||
builder.chunk_compression_level = self.chunk_compression_level;
|
|
||||||
|
|
||||||
builder
|
|
||||||
}
|
|
||||||
}
|
|
@ -16,7 +16,8 @@ macro_rules! test_distinct {
|
|||||||
|
|
||||||
// update distinct attribute
|
// update distinct attribute
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let config = milli::update::IndexerConfig::default();
|
||||||
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.set_distinct_field(S(stringify!($distinct)));
|
builder.set_distinct_field(S(stringify!($distinct)));
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
@ -7,7 +7,7 @@ use either::{Either, Left, Right};
|
|||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use maplit::{hashmap, hashset};
|
use maplit::{hashmap, hashset};
|
||||||
use milli::documents::{DocumentBatchBuilder, DocumentBatchReader};
|
use milli::documents::{DocumentBatchBuilder, DocumentBatchReader};
|
||||||
use milli::update::{Settings, UpdateBuilder};
|
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||||
use milli::{AscDesc, Criterion, DocumentId, Index, Member};
|
use milli::{AscDesc, Criterion, DocumentId, Index, Member};
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
@ -31,8 +31,9 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
|||||||
let index = Index::new(options, &path).unwrap();
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
|
|
||||||
let criteria = criteria.iter().map(|c| c.to_string()).collect();
|
let criteria = criteria.iter().map(|c| c.to_string()).collect();
|
||||||
builder.set_criteria(criteria);
|
builder.set_criteria(criteria);
|
||||||
@ -54,10 +55,10 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
|||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
|
|
||||||
// index documents
|
// index documents
|
||||||
let mut builder = UpdateBuilder::new();
|
let config = IndexerConfig { max_memory: Some(10 * 1024 * 1024), ..Default::default() };
|
||||||
builder.max_memory(10 * 1024 * 1024); // 10MiB
|
let indexing_config = IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
let mut builder = builder.index_documents(&mut wtxn, &index);
|
|
||||||
builder.enable_autogenerate_docids();
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
let mut cursor = Cursor::new(Vec::new());
|
let mut cursor = Cursor::new(Vec::new());
|
||||||
let mut documents_builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
|
let mut documents_builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
|
||||||
let reader = Cursor::new(CONTENT.as_bytes());
|
let reader = Cursor::new(CONTENT.as_bytes());
|
||||||
@ -73,7 +74,8 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
|||||||
|
|
||||||
// index documents
|
// index documents
|
||||||
let content = DocumentBatchReader::from_reader(cursor).unwrap();
|
let content = DocumentBatchReader::from_reader(cursor).unwrap();
|
||||||
builder.execute(content, |_| ()).unwrap();
|
builder.add_documents(content).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ use heed::EnvOpenOptions;
|
|||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use maplit::hashset;
|
use maplit::hashset;
|
||||||
use milli::documents::{DocumentBatchBuilder, DocumentBatchReader};
|
use milli::documents::{DocumentBatchBuilder, DocumentBatchReader};
|
||||||
use milli::update::{Settings, UpdateBuilder};
|
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||||
use milli::{AscDesc, Criterion, Index, Member, Search, SearchResult};
|
use milli::{AscDesc, Criterion, Index, Member, Search, SearchResult};
|
||||||
use rand::Rng;
|
use rand::Rng;
|
||||||
use Criterion::*;
|
use Criterion::*;
|
||||||
@ -337,11 +337,12 @@ fn criteria_mixup() {
|
|||||||
]
|
]
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let config = IndexerConfig::default();
|
||||||
for criteria in criteria_mix {
|
for criteria in criteria_mix {
|
||||||
eprintln!("Testing with criteria order: {:?}", &criteria);
|
eprintln!("Testing with criteria order: {:?}", &criteria);
|
||||||
//update criteria
|
//update criteria
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.set_criteria(criteria.iter().map(ToString::to_string).collect());
|
builder.set_criteria(criteria.iter().map(ToString::to_string).collect());
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -375,8 +376,9 @@ fn criteria_ascdesc() {
|
|||||||
let index = Index::new(options, &path).unwrap();
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
|
|
||||||
builder.set_sortable_fields(hashset! {
|
builder.set_sortable_fields(hashset! {
|
||||||
S("name"),
|
S("name"),
|
||||||
@ -385,10 +387,9 @@ fn criteria_ascdesc() {
|
|||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
|
|
||||||
// index documents
|
// index documents
|
||||||
let mut builder = UpdateBuilder::new();
|
let config = IndexerConfig { max_memory: Some(10 * 1024 * 1024), ..Default::default() };
|
||||||
builder.max_memory(10 * 1024 * 1024); // 10MiB
|
let indexing_config = IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||||
let mut builder = builder.index_documents(&mut wtxn, &index);
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ());
|
||||||
builder.enable_autogenerate_docids();
|
|
||||||
|
|
||||||
let mut cursor = Cursor::new(Vec::new());
|
let mut cursor = Cursor::new(Vec::new());
|
||||||
let mut batch_builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
|
let mut batch_builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
|
||||||
@ -419,7 +420,8 @@ fn criteria_ascdesc() {
|
|||||||
|
|
||||||
let reader = DocumentBatchReader::from_reader(cursor).unwrap();
|
let reader = DocumentBatchReader::from_reader(cursor).unwrap();
|
||||||
|
|
||||||
builder.execute(reader, |_| ()).unwrap();
|
builder.add_documents(reader).unwrap();
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
@ -430,7 +432,7 @@ fn criteria_ascdesc() {
|
|||||||
eprintln!("Testing with criterion: {:?}", &criterion);
|
eprintln!("Testing with criterion: {:?}", &criterion);
|
||||||
|
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut wtxn, &index);
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
builder.set_criteria(vec![criterion.to_string()]);
|
builder.set_criteria(vec![criterion.to_string()]);
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
Loading…
Reference in New Issue
Block a user