mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 13:24:27 +01:00
Fix an indexing process bug, where documents were not written in order
This commit is contained in:
parent
855a251489
commit
40993a0d25
@ -3,7 +3,7 @@ use std::convert::TryFrom;
|
|||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{Read, Seek, SeekFrom};
|
use std::io::{Read, Seek, SeekFrom};
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::{anyhow, Context};
|
||||||
use fst::{IntoStreamer, Streamer};
|
use fst::{IntoStreamer, Streamer};
|
||||||
use grenad::CompressionType;
|
use grenad::CompressionType;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -99,8 +99,14 @@ impl Transform<'_, '_> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Once we have sort and deduplicated the documents we write them into a final file.
|
// Once we have sort and deduplicated the documents we write them into a final file.
|
||||||
let file = tempfile::tempfile()?;
|
let mut final_sorter = create_sorter(
|
||||||
let mut writer = create_writer(self.chunk_compression_type, self.chunk_compression_level, file)?;
|
|_docid, _obkvs| Err(anyhow!("cannot merge two documents")),
|
||||||
|
self.chunk_compression_type,
|
||||||
|
self.chunk_compression_level,
|
||||||
|
self.chunk_fusing_shrink_size,
|
||||||
|
self.max_nb_chunks,
|
||||||
|
self.max_memory,
|
||||||
|
);
|
||||||
let mut new_users_ids_documents_ids_builder = fst::MapBuilder::memory();
|
let mut new_users_ids_documents_ids_builder = fst::MapBuilder::memory();
|
||||||
let mut replaced_documents_ids = RoaringBitmap::new();
|
let mut replaced_documents_ids = RoaringBitmap::new();
|
||||||
let mut new_documents_ids = RoaringBitmap::new();
|
let mut new_documents_ids = RoaringBitmap::new();
|
||||||
@ -143,12 +149,17 @@ impl Transform<'_, '_> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// We insert the document under the documents ids map into the final file.
|
// We insert the document under the documents ids map into the final file.
|
||||||
writer.insert(docid.to_be_bytes(), obkv)?;
|
final_sorter.insert(docid.to_be_bytes(), obkv)?;
|
||||||
documents_count += 1;
|
documents_count += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Once we have written all the documents into the final file, we extract it
|
// We create a final writer to write the new documents in order from the sorter.
|
||||||
// from the writer and reset the seek to be able to read it again.
|
let file = tempfile::tempfile()?;
|
||||||
|
let mut writer = create_writer(self.chunk_compression_type, self.chunk_compression_level, file)?;
|
||||||
|
|
||||||
|
// Once we have written all the documents into the final sorter, we write the documents
|
||||||
|
// into this writer, extract the file and reset the seek to be able to read it again.
|
||||||
|
final_sorter.write_into(&mut writer)?;
|
||||||
let mut documents_file = writer.into_inner()?;
|
let mut documents_file = writer.into_inner()?;
|
||||||
documents_file.seek(SeekFrom::Start(0))?;
|
documents_file.seek(SeekFrom::Start(0))?;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user