Move the arroy building after the writing loop

This commit is contained in:
Clément Renault 2024-11-27 10:19:59 +01:00
parent 8442db8101
commit 2094ce8a9a
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F

View File

@ -76,7 +76,7 @@ where
MSP: Fn() -> bool + Sync, MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync, SP: Fn(Progress) + Sync,
{ {
/// TODO restrict memory and remove this memory from the extractors bum allocators /// TODO restrict memory and remove this memory from the extractors bump allocators
let bbbuffers: Vec<_> = (0..rayon::current_num_threads()) let bbbuffers: Vec<_> = (0..rayon::current_num_threads())
.map(|_| bbqueue::BBBuffer::new(100 * 1024 * 1024)) // 100 MiB by thread .map(|_| bbqueue::BBBuffer::new(100 * 1024 * 1024)) // 100 MiB by thread
.collect(); .collect();
@ -100,6 +100,7 @@ where
send_progress, send_progress,
}; };
let mut index_embeddings = index.embedding_configs(wtxn)?;
let mut field_distribution = index.field_distribution(wtxn)?; let mut field_distribution = index.field_distribution(wtxn)?;
let mut document_ids = index.documents_ids(wtxn)?; let mut document_ids = index.documents_ids(wtxn)?;
@ -296,7 +297,6 @@ where
'vectors: { 'vectors: {
let mut index_embeddings = index.embedding_configs(&rtxn)?;
if index_embeddings.is_empty() { if index_embeddings.is_empty() {
break 'vectors; break 'vectors;
} }
@ -322,8 +322,6 @@ where
} }
} }
} }
embedding_sender.finish(index_embeddings).unwrap();
} }
'geo: { 'geo: {
@ -461,25 +459,30 @@ where
writer.add_items(wtxn, docid, &embeddings)?; writer.add_items(wtxn, docid, &embeddings)?;
} }
ArroyOperation::SetVector { docid, embedder_id, embedding } => { ArroyOperation::SetVector { docid, embedder_id, embedding } => {
let (_, _, writer, dimensions) = arroy_writers let (_, _, writer, dimensions) =
.get(&embedder_id) arroy_writers.get(&embedder_id).expect("requested a missing embedder");
.expect("requested a missing embedder");
writer.del_items(wtxn, *dimensions, docid)?; writer.del_items(wtxn, *dimensions, docid)?;
writer.add_item(wtxn, docid, &embedding)?; writer.add_item(wtxn, docid, &embedding)?;
} }
ArroyOperation::Finish { configs } => { _otherwise => unreachable!(),
let span = tracing::trace_span!(target: "indexing::vectors", parent: &indexer_span, "build"); },
}
}
'vectors: {
let span =
tracing::trace_span!(target: "indexing::vectors", parent: &indexer_span, "build");
let _entered = span.enter(); let _entered = span.enter();
if index_embeddings.is_empty() {
break 'vectors;
}
(indexing_context.send_progress)(Progress::from_step( (indexing_context.send_progress)(Progress::from_step(
Step::WritingEmbeddingsToDatabase, Step::WritingEmbeddingsToDatabase,
)); ));
for ( for (_index, (_embedder_name, _embedder, writer, dimensions)) in &mut arroy_writers {
_embedder_index,
(_embedder_name, _embedder, writer, dimensions),
) in &mut arroy_writers
{
let dimensions = *dimensions; let dimensions = *dimensions;
writer.build_and_quantize( writer.build_and_quantize(
wtxn, wtxn,
@ -490,11 +493,7 @@ where
)?; )?;
} }
index.put_embedding_configs(wtxn, configs)?; index.put_embedding_configs(wtxn, index_embeddings)?;
}
},
}
}
} }
(indexing_context.send_progress)(Progress::from_step(Step::WaitingForExtractors)); (indexing_context.send_progress)(Progress::from_step(Step::WaitingForExtractors));