mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 13:24:27 +01:00
Fix test and use progress callback
This commit is contained in:
parent
1d314328f0
commit
823da19745
@ -11,6 +11,7 @@ use std::collections::HashSet;
|
|||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
|
||||||
use crossbeam_channel::Sender;
|
use crossbeam_channel::Sender;
|
||||||
|
use log::debug;
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
|
|
||||||
use self::extract_docid_word_positions::extract_docid_word_positions;
|
use self::extract_docid_word_positions::extract_docid_word_positions;
|
||||||
@ -192,6 +193,7 @@ fn spawn_extraction_task<FE, FS>(
|
|||||||
.map(|chunk| extract_fn(chunk, indexer.clone()).unwrap())
|
.map(|chunk| extract_fn(chunk, indexer.clone()).unwrap())
|
||||||
.collect();
|
.collect();
|
||||||
rayon::spawn(move || {
|
rayon::spawn(move || {
|
||||||
|
debug!("merge {} database", name);
|
||||||
let reader = merge_readers(chunks, merge_fn, indexer).unwrap();
|
let reader = merge_readers(chunks, merge_fn, indexer).unwrap();
|
||||||
lmdb_writer_sx.send(serialize_fn(reader)).unwrap();
|
lmdb_writer_sx.send(serialize_fn(reader)).unwrap();
|
||||||
});
|
});
|
||||||
|
@ -31,6 +31,10 @@ use crate::update::{
|
|||||||
};
|
};
|
||||||
use crate::{Index, Result};
|
use crate::{Index, Result};
|
||||||
|
|
||||||
|
static MERGED_DATABASE_COUNT: usize = 7;
|
||||||
|
static PREFIX_DATABASE_COUNT: usize = 5;
|
||||||
|
static TOTAL_POSTING_DATABASE_COUNT: usize = MERGED_DATABASE_COUNT + PREFIX_DATABASE_COUNT;
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||||
pub struct DocumentAdditionResult {
|
pub struct DocumentAdditionResult {
|
||||||
pub nb_documents: usize,
|
pub nb_documents: usize,
|
||||||
@ -278,15 +282,34 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
let index_is_empty = index_documents_ids.len() == 0;
|
let index_is_empty = index_documents_ids.len() == 0;
|
||||||
let mut final_documents_ids = RoaringBitmap::new();
|
let mut final_documents_ids = RoaringBitmap::new();
|
||||||
|
|
||||||
|
let mut databases_seen = 0;
|
||||||
|
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||||
|
databases_seen,
|
||||||
|
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
||||||
|
});
|
||||||
|
|
||||||
for typed_chunk in lmdb_writer_rx {
|
for typed_chunk in lmdb_writer_rx {
|
||||||
let docids =
|
let (docids, is_merged_database) =
|
||||||
write_typed_chunk_into_index(typed_chunk, &self.index, self.wtxn, index_is_empty)?;
|
write_typed_chunk_into_index(typed_chunk, &self.index, self.wtxn, index_is_empty)?;
|
||||||
final_documents_ids |= docids;
|
if !docids.is_empty() {
|
||||||
debug!(
|
final_documents_ids |= docids;
|
||||||
"We have seen {} documents on {} total document so far",
|
let documents_seen_count = final_documents_ids.len();
|
||||||
final_documents_ids.len(),
|
progress_callback(UpdateIndexingStep::IndexDocuments {
|
||||||
documents_count
|
documents_seen: documents_seen_count as usize,
|
||||||
);
|
total_documents: documents_count,
|
||||||
|
});
|
||||||
|
debug!(
|
||||||
|
"We have seen {} documents on {} total document so far",
|
||||||
|
documents_seen_count, documents_count
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if is_merged_database {
|
||||||
|
databases_seen += 1;
|
||||||
|
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||||
|
databases_seen: databases_seen,
|
||||||
|
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// We write the field distribution into the main database
|
// We write the field distribution into the main database
|
||||||
@ -298,20 +321,19 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
// We write the external documents ids into the main database.
|
// We write the external documents ids into the main database.
|
||||||
self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?;
|
self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?;
|
||||||
|
|
||||||
let all_documents_ids = index_documents_ids | new_documents_ids;
|
let all_documents_ids = index_documents_ids | new_documents_ids | replaced_documents_ids;
|
||||||
self.index.put_documents_ids(self.wtxn, &all_documents_ids)?;
|
self.index.put_documents_ids(self.wtxn, &all_documents_ids)?;
|
||||||
|
|
||||||
self.execute_prefix_databases(progress_callback)
|
self.execute_prefix_databases(progress_callback)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute_prefix_databases<F>(
|
pub fn execute_prefix_databases<F>(self, progress_callback: F) -> Result<()>
|
||||||
self,
|
|
||||||
// output: TransformOutput,
|
|
||||||
progress_callback: F,
|
|
||||||
) -> Result<()>
|
|
||||||
where
|
where
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
F: Fn(UpdateIndexingStep) + Sync,
|
||||||
{
|
{
|
||||||
|
// Merged databases are already been indexed, we start from this count;
|
||||||
|
let mut databases_seen = MERGED_DATABASE_COUNT;
|
||||||
|
|
||||||
// Run the facets update operation.
|
// Run the facets update operation.
|
||||||
let mut builder = Facets::new(self.wtxn, self.index, self.update_id);
|
let mut builder = Facets::new(self.wtxn, self.index, self.update_id);
|
||||||
builder.chunk_compression_type = self.chunk_compression_type;
|
builder.chunk_compression_type = self.chunk_compression_type;
|
||||||
@ -324,6 +346,12 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
}
|
}
|
||||||
builder.execute()?;
|
builder.execute()?;
|
||||||
|
|
||||||
|
databases_seen += 1;
|
||||||
|
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||||
|
databases_seen: databases_seen,
|
||||||
|
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
||||||
|
});
|
||||||
|
|
||||||
// Run the words prefixes update operation.
|
// Run the words prefixes update operation.
|
||||||
let mut builder = WordsPrefixesFst::new(self.wtxn, self.index, self.update_id);
|
let mut builder = WordsPrefixesFst::new(self.wtxn, self.index, self.update_id);
|
||||||
if let Some(value) = self.words_prefix_threshold {
|
if let Some(value) = self.words_prefix_threshold {
|
||||||
@ -334,6 +362,12 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
}
|
}
|
||||||
builder.execute()?;
|
builder.execute()?;
|
||||||
|
|
||||||
|
databases_seen += 1;
|
||||||
|
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||||
|
databases_seen: databases_seen,
|
||||||
|
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
||||||
|
});
|
||||||
|
|
||||||
// Run the word prefix docids update operation.
|
// Run the word prefix docids update operation.
|
||||||
let mut builder = WordPrefixDocids::new(self.wtxn, self.index);
|
let mut builder = WordPrefixDocids::new(self.wtxn, self.index);
|
||||||
builder.chunk_compression_type = self.chunk_compression_type;
|
builder.chunk_compression_type = self.chunk_compression_type;
|
||||||
@ -342,6 +376,12 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
builder.max_memory = self.max_memory;
|
builder.max_memory = self.max_memory;
|
||||||
builder.execute()?;
|
builder.execute()?;
|
||||||
|
|
||||||
|
databases_seen += 1;
|
||||||
|
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||||
|
databases_seen: databases_seen,
|
||||||
|
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
||||||
|
});
|
||||||
|
|
||||||
// Run the word prefix pair proximity docids update operation.
|
// Run the word prefix pair proximity docids update operation.
|
||||||
let mut builder = WordPrefixPairProximityDocids::new(self.wtxn, self.index);
|
let mut builder = WordPrefixPairProximityDocids::new(self.wtxn, self.index);
|
||||||
builder.chunk_compression_type = self.chunk_compression_type;
|
builder.chunk_compression_type = self.chunk_compression_type;
|
||||||
@ -350,6 +390,12 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
builder.max_memory = self.max_memory;
|
builder.max_memory = self.max_memory;
|
||||||
builder.execute()?;
|
builder.execute()?;
|
||||||
|
|
||||||
|
databases_seen += 1;
|
||||||
|
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||||
|
databases_seen: databases_seen,
|
||||||
|
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
||||||
|
});
|
||||||
|
|
||||||
// Run the words level positions update operation.
|
// Run the words level positions update operation.
|
||||||
let mut builder = WordsLevelPositions::new(self.wtxn, self.index);
|
let mut builder = WordsLevelPositions::new(self.wtxn, self.index);
|
||||||
builder.chunk_compression_type = self.chunk_compression_type;
|
builder.chunk_compression_type = self.chunk_compression_type;
|
||||||
@ -362,6 +408,12 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
}
|
}
|
||||||
builder.execute()?;
|
builder.execute()?;
|
||||||
|
|
||||||
|
databases_seen += 1;
|
||||||
|
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||||
|
databases_seen: databases_seen,
|
||||||
|
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
||||||
|
});
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -32,7 +32,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
index: &Index,
|
index: &Index,
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
index_is_empty: bool,
|
index_is_empty: bool,
|
||||||
) -> Result<RoaringBitmap> {
|
) -> Result<(RoaringBitmap, bool)> {
|
||||||
|
let mut is_merged_database = false;
|
||||||
match typed_chunk {
|
match typed_chunk {
|
||||||
TypedChunk::DocidWordPositions(docid_word_positions_iter) => {
|
TypedChunk::DocidWordPositions(docid_word_positions_iter) => {
|
||||||
write_entries_into_database(
|
write_entries_into_database(
|
||||||
@ -71,8 +72,11 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
|value, _buffer| Ok(value),
|
|value, _buffer| Ok(value),
|
||||||
merge_cbo_roaring_bitmaps,
|
merge_cbo_roaring_bitmaps,
|
||||||
)?;
|
)?;
|
||||||
|
is_merged_database = true;
|
||||||
|
}
|
||||||
|
TypedChunk::NewDocumentsIds(documents_ids) => {
|
||||||
|
return Ok((documents_ids, is_merged_database))
|
||||||
}
|
}
|
||||||
TypedChunk::NewDocumentsIds(documents_ids) => return Ok(documents_ids),
|
|
||||||
TypedChunk::WordDocids(word_docids_iter) => {
|
TypedChunk::WordDocids(word_docids_iter) => {
|
||||||
let mut word_docids_iter = unsafe { into_clonable_grenad(word_docids_iter) }?;
|
let mut word_docids_iter = unsafe { into_clonable_grenad(word_docids_iter) }?;
|
||||||
append_entries_into_database(
|
append_entries_into_database(
|
||||||
@ -100,6 +104,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
builder.extend_stream(union_stream)?;
|
builder.extend_stream(union_stream)?;
|
||||||
let fst = builder.into_set();
|
let fst = builder.into_set();
|
||||||
index.put_words_fst(wtxn, &fst)?;
|
index.put_words_fst(wtxn, &fst)?;
|
||||||
|
is_merged_database = true;
|
||||||
}
|
}
|
||||||
TypedChunk::WordLevelPositionDocids(word_level_position_docids_iter) => {
|
TypedChunk::WordLevelPositionDocids(word_level_position_docids_iter) => {
|
||||||
append_entries_into_database(
|
append_entries_into_database(
|
||||||
@ -110,6 +115,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
|value, _buffer| Ok(value),
|
|value, _buffer| Ok(value),
|
||||||
merge_cbo_roaring_bitmaps,
|
merge_cbo_roaring_bitmaps,
|
||||||
)?;
|
)?;
|
||||||
|
is_merged_database = true;
|
||||||
}
|
}
|
||||||
TypedChunk::FieldIdFacetNumberDocids(facet_id_f64_docids_iter) => {
|
TypedChunk::FieldIdFacetNumberDocids(facet_id_f64_docids_iter) => {
|
||||||
append_entries_into_database(
|
append_entries_into_database(
|
||||||
@ -120,6 +126,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
|value, _buffer| Ok(value),
|
|value, _buffer| Ok(value),
|
||||||
merge_cbo_roaring_bitmaps,
|
merge_cbo_roaring_bitmaps,
|
||||||
)?;
|
)?;
|
||||||
|
is_merged_database = true;
|
||||||
}
|
}
|
||||||
TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => {
|
TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => {
|
||||||
append_entries_into_database(
|
append_entries_into_database(
|
||||||
@ -130,6 +137,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
|value, _buffer| Ok(value),
|
|value, _buffer| Ok(value),
|
||||||
merge_cbo_roaring_bitmaps,
|
merge_cbo_roaring_bitmaps,
|
||||||
)?;
|
)?;
|
||||||
|
is_merged_database = true;
|
||||||
}
|
}
|
||||||
TypedChunk::FieldIdDocidFacetNumbers(mut fid_docid_facet_number) => {
|
TypedChunk::FieldIdDocidFacetNumbers(mut fid_docid_facet_number) => {
|
||||||
let index_fid_docid_facet_numbers =
|
let index_fid_docid_facet_numbers =
|
||||||
@ -166,10 +174,11 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
Ok(values.serialize_into(buffer)?)
|
Ok(values.serialize_into(buffer)?)
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
|
is_merged_database = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(RoaringBitmap::new())
|
Ok((RoaringBitmap::new(), is_merged_database))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn merge_roaring_bitmaps(new_value: &[u8], db_value: &[u8], buffer: &mut Vec<u8>) -> Result<()> {
|
fn merge_roaring_bitmaps(new_value: &[u8], db_value: &[u8], buffer: &mut Vec<u8>) -> Result<()> {
|
||||||
|
Loading…
Reference in New Issue
Block a user