Add conditions reducing wrok

This commit is contained in:
ManyTheFish 2024-04-22 14:18:35 +02:00
parent aa0bbbb246
commit a1aa999026

View File

@ -198,11 +198,16 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
if document_is_kept { if document_is_kept {
// Don't give up if the old prompt was failing // Don't give up if the old prompt was failing
let old_prompt = prompt let old_prompt = Some(prompt)
.render(obkv, DelAdd::Deletion, old_fields_ids_map) // TODO: this filter works because we erase the vec database when a embedding setting changes.
.unwrap_or_default(); // When vector pipeline will be optimized, this should be removed.
.filter(|_| !settings_diff.reindex_vectors())
.map(|p| {
p.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or_default()
});
let new_prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?; let new_prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?;
if old_prompt != new_prompt { if old_prompt.as_ref() != Some(&new_prompt) {
let old_prompt = old_prompt.unwrap_or_default();
tracing::trace!( tracing::trace!(
"🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}" "🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}"
); );
@ -224,6 +229,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
&mut manual_vectors_writer, &mut manual_vectors_writer,
&mut key_buffer, &mut key_buffer,
delta, delta,
settings_diff,
)?; )?;
} }
@ -264,10 +270,15 @@ fn push_vectors_diff(
manual_vectors_writer: &mut Writer<BufWriter<File>>, manual_vectors_writer: &mut Writer<BufWriter<File>>,
key_buffer: &mut Vec<u8>, key_buffer: &mut Vec<u8>,
delta: VectorStateDelta, delta: VectorStateDelta,
settings_diff: &InnerIndexSettingsDiff,
) -> Result<()> { ) -> Result<()> {
puffin::profile_function!(); puffin::profile_function!();
let (must_remove, prompt, (mut del_vectors, mut add_vectors)) = delta.into_values(); let (must_remove, prompt, (mut del_vectors, mut add_vectors)) = delta.into_values();
if must_remove { if must_remove
// TODO: the below condition works because we erase the vec database when a embedding setting changes.
// When vector pipeline will be optimized, this should be removed.
&& !settings_diff.reindex_vectors()
{
key_buffer.truncate(TRUNCATE_SIZE); key_buffer.truncate(TRUNCATE_SIZE);
remove_vectors_writer.insert(&key_buffer, [])?; remove_vectors_writer.insert(&key_buffer, [])?;
} }
@ -295,6 +306,9 @@ fn push_vectors_diff(
match eob { match eob {
EitherOrBoth::Both(_, _) => (), // no need to touch anything EitherOrBoth::Both(_, _) => (), // no need to touch anything
EitherOrBoth::Left(vector) => { EitherOrBoth::Left(vector) => {
// TODO: the below condition works because we erase the vec database when a embedding setting changes.
// When vector pipeline will be optimized, this should be removed.
if !settings_diff.reindex_vectors() {
// We insert only the Del part of the Obkv to inform // We insert only the Del part of the Obkv to inform
// that we only want to remove all those vectors. // that we only want to remove all those vectors.
let mut obkv = KvWriterDelAdd::memory(); let mut obkv = KvWriterDelAdd::memory();
@ -302,6 +316,7 @@ fn push_vectors_diff(
let bytes = obkv.into_inner()?; let bytes = obkv.into_inner()?;
manual_vectors_writer.insert(&key_buffer, bytes)?; manual_vectors_writer.insert(&key_buffer, bytes)?;
} }
}
EitherOrBoth::Right(vector) => { EitherOrBoth::Right(vector) => {
// We insert only the Add part of the Obkv to inform // We insert only the Add part of the Obkv to inform
// that we only want to remove all those vectors. // that we only want to remove all those vectors.