Merge pull request #484 from meilisearch/fix-reindex-by-chunk

Stop reindexing by chunk during complete reindexing
This commit is contained in:
Clément Renault 2020-02-28 18:29:25 +01:00 committed by GitHub
commit 72450c765d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -279,49 +279,46 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
index.postings_lists.clear(writer)?; index.postings_lists.clear(writer)?;
index.docs_words.clear(writer)?; index.docs_words.clear(writer)?;
// 3. re-index chunks of documents (otherwise we make the borrow checker unhappy) let stop_words = match index.main.stop_words_fst(writer)? {
for documents_ids in documents_ids_to_reindex.chunks(100) { Some(stop_words) => stop_words,
let stop_words = match index.main.stop_words_fst(writer)? { None => fst::Set::default(),
Some(stop_words) => stop_words, };
None => fst::Set::default(),
};
let number_of_inserted_documents = documents_ids.len(); let number_of_inserted_documents = documents_ids_to_reindex.len();
let mut indexer = RawIndexer::new(stop_words); let mut indexer = RawIndexer::new(stop_words);
let mut ram_store = HashMap::new(); let mut ram_store = HashMap::new();
for document_id in documents_ids { for document_id in documents_ids_to_reindex {
for result in index.documents_fields.document_fields(writer, *document_id)? { for result in index.documents_fields.document_fields(writer, document_id)? {
let (field_id, bytes) = result?; let (field_id, bytes) = result?;
let value: serde_json::Value = serde_json::from_slice(bytes)?; let value: serde_json::Value = serde_json::from_slice(bytes)?;
ram_store.insert((document_id, field_id), value); ram_store.insert((document_id, field_id), value);
}
for ((docid, field_id), value) in ram_store.drain() {
serialize_value_with_id(
writer,
field_id,
&schema,
*docid,
index.documents_fields,
index.documents_fields_counts,
&mut indexer,
&mut ranked_map,
&value
)?;
}
} }
// 4. write the new index in the main store for ((docid, field_id), value) in ram_store.drain() {
write_documents_addition_index( serialize_value_with_id(
writer, writer,
index, field_id,
&ranked_map, &schema,
number_of_inserted_documents, docid,
indexer, index.documents_fields,
)?; index.documents_fields_counts,
&mut indexer,
&mut ranked_map,
&value
)?;
}
} }
// 4. write the new index in the main store
write_documents_addition_index(
writer,
index,
&ranked_map,
number_of_inserted_documents,
indexer,
)?;
index.main.put_schema(writer, &schema)?; index.main.put_schema(writer, &schema)?;
Ok(()) Ok(())