mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-25 04:37:32 +01:00
Make sure the first document is associated to the document id 0
This commit is contained in:
parent
0a44ff86ab
commit
21aafd603c
@ -434,32 +434,34 @@ fn index_csv(
|
|||||||
let mut document_id: usize = 0;
|
let mut document_id: usize = 0;
|
||||||
let mut document = csv::StringRecord::new();
|
let mut document = csv::StringRecord::new();
|
||||||
while rdr.read_record(&mut document)? {
|
while rdr.read_record(&mut document)? {
|
||||||
document_id = document_id + 1;
|
|
||||||
|
|
||||||
// We skip documents that must not be indexed by this thread
|
// We skip documents that must not be indexed by this thread.
|
||||||
if document_id % num_threads != thread_index { continue }
|
if document_id % num_threads == thread_index {
|
||||||
|
if document_id % ONE_MILLION == 0 {
|
||||||
let document_id = DocumentId::try_from(document_id).context("generated id is too big")?;
|
debug!("We have seen {}m documents so far ({:.02?}).",
|
||||||
if document_id % (ONE_MILLION as u32) == 0 {
|
document_id / ONE_MILLION, before.elapsed());
|
||||||
debug!("We have seen {}m documents so far ({:.02?}).",
|
before = Instant::now();
|
||||||
document_id / ONE_MILLION as u32, before.elapsed());
|
|
||||||
before = Instant::now();
|
|
||||||
}
|
|
||||||
|
|
||||||
for (attr, content) in document.iter().enumerate().take(MAX_ATTRIBUTES) {
|
|
||||||
for (pos, word) in lexer::break_string(&content).enumerate().take(MAX_POSITION) {
|
|
||||||
let word = word.cow_to_lowercase();
|
|
||||||
let position = (attr * MAX_POSITION + pos) as u32;
|
|
||||||
store.insert_word_position(&word, position)?;
|
|
||||||
store.insert_word_position_docid(&word, position, document_id)?;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let document_id = DocumentId::try_from(document_id).context("generated id is too big")?;
|
||||||
|
for (attr, content) in document.iter().enumerate().take(MAX_ATTRIBUTES) {
|
||||||
|
for (pos, word) in lexer::break_string(&content).enumerate().take(MAX_POSITION) {
|
||||||
|
let word = word.cow_to_lowercase();
|
||||||
|
let position = (attr * MAX_POSITION + pos) as u32;
|
||||||
|
store.insert_word_position(&word, position)?;
|
||||||
|
store.insert_word_position_docid(&word, position, document_id)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We write the document in the database.
|
||||||
|
let mut writer = csv::WriterBuilder::new().has_headers(false).from_writer(Vec::new());
|
||||||
|
writer.write_byte_record(document.as_byte_record())?;
|
||||||
|
let document = writer.into_inner()?;
|
||||||
|
store.write_document(document_id, &document)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We write the document in the database.
|
// Compute the document id of the the next document.
|
||||||
let mut writer = csv::WriterBuilder::new().has_headers(false).from_writer(Vec::new());
|
document_id = document_id + 1;
|
||||||
writer.write_byte_record(document.as_byte_record())?;
|
|
||||||
let document = writer.into_inner()?;
|
|
||||||
store.write_document(document_id, &document)?;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let (reader, docs_reader) = store.finish()?;
|
let (reader, docs_reader) = store.finish()?;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user