mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 05:54:30 +01:00
implements mor review comments
This commit is contained in:
parent
49fa41ce65
commit
b7349910d9
@ -5012,7 +5012,7 @@ mod tests {
|
||||
insta::assert_json_snapshot!(task.details);
|
||||
}
|
||||
|
||||
handle.advance_n_successful_batches(1);
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "settings_update_processed_vectors");
|
||||
|
||||
{
|
||||
|
@ -35,8 +35,8 @@ pub struct ExtractedVectorPoints {
|
||||
// embedder
|
||||
pub embedder_name: String,
|
||||
pub embedder: Arc<Embedder>,
|
||||
pub user_defined: RoaringBitmap,
|
||||
pub remove_from_user_defined: RoaringBitmap,
|
||||
pub user_provided: RoaringBitmap,
|
||||
pub remove_from_user_provided: RoaringBitmap,
|
||||
}
|
||||
|
||||
enum VectorStateDelta {
|
||||
@ -82,9 +82,9 @@ struct EmbedderVectorExtractor {
|
||||
remove_vectors_writer: Writer<BufWriter<File>>,
|
||||
|
||||
// The docids of the documents that contains a user defined embedding
|
||||
user_defined: RoaringBitmap,
|
||||
user_provided: RoaringBitmap,
|
||||
// The docids of the documents that contains an auto-generated embedding
|
||||
remove_from_user_defined: RoaringBitmap,
|
||||
remove_from_user_provided: RoaringBitmap,
|
||||
}
|
||||
|
||||
/// Extracts the embedding vector contained in each document under the `_vectors` field.
|
||||
@ -140,8 +140,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
manual_vectors_writer,
|
||||
prompts_writer,
|
||||
remove_vectors_writer,
|
||||
user_defined: RoaringBitmap::new(),
|
||||
remove_from_user_defined: RoaringBitmap::new(),
|
||||
user_provided: RoaringBitmap::new(),
|
||||
remove_from_user_provided: RoaringBitmap::new(),
|
||||
});
|
||||
}
|
||||
|
||||
@ -179,8 +179,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
manual_vectors_writer,
|
||||
prompts_writer,
|
||||
remove_vectors_writer,
|
||||
user_defined,
|
||||
remove_from_user_defined,
|
||||
user_provided,
|
||||
remove_from_user_provided,
|
||||
} in extractors.iter_mut()
|
||||
{
|
||||
let delta = match parsed_vectors.remove(embedder_name) {
|
||||
@ -188,10 +188,10 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
match (old.map_or(true, |old| old.is_user_provided()), new.is_user_provided()) {
|
||||
(true, true) | (false, false) => (),
|
||||
(true, false) => {
|
||||
remove_from_user_defined.insert(docid);
|
||||
remove_from_user_provided.insert(docid);
|
||||
}
|
||||
(false, true) => {
|
||||
user_defined.insert(docid);
|
||||
user_provided.insert(docid);
|
||||
}
|
||||
}
|
||||
|
||||
@ -214,7 +214,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
.map(|(_, deladd)| KvReaderDelAdd::new(deladd))
|
||||
.any(|deladd| deladd.get(DelAdd::Addition).is_some());
|
||||
if document_is_kept && old.is_some() {
|
||||
remove_from_user_defined.insert(docid);
|
||||
remove_from_user_provided.insert(docid);
|
||||
// becomes autogenerated
|
||||
VectorStateDelta::NowGenerated(prompt.render(
|
||||
obkv,
|
||||
@ -229,9 +229,9 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
}
|
||||
(None, Some(new)) => {
|
||||
if new.is_user_provided() {
|
||||
user_defined.insert(docid);
|
||||
user_provided.insert(docid);
|
||||
} else {
|
||||
remove_from_user_defined.insert(docid);
|
||||
remove_from_user_provided.insert(docid);
|
||||
}
|
||||
// was possibly autogenerated, remove all vectors for that document
|
||||
let add_vectors = new.into_array_of_vectors();
|
||||
@ -274,7 +274,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
VectorStateDelta::NoChange
|
||||
}
|
||||
} else {
|
||||
remove_from_user_defined.remove(docid);
|
||||
remove_from_user_provided.remove(docid);
|
||||
VectorStateDelta::NowRemoved
|
||||
}
|
||||
}
|
||||
@ -301,8 +301,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
manual_vectors_writer,
|
||||
prompts_writer,
|
||||
remove_vectors_writer,
|
||||
user_defined,
|
||||
remove_from_user_defined,
|
||||
user_provided,
|
||||
remove_from_user_provided,
|
||||
} in extractors
|
||||
{
|
||||
results.push(ExtractedVectorPoints {
|
||||
@ -311,8 +311,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
prompts: writer_into_reader(prompts_writer)?,
|
||||
embedder,
|
||||
embedder_name,
|
||||
user_defined,
|
||||
remove_from_user_defined,
|
||||
user_provided,
|
||||
remove_from_user_provided,
|
||||
})
|
||||
}
|
||||
|
||||
@ -347,9 +347,6 @@ fn push_vectors_diff(
|
||||
add_vectors.sort_unstable_by(|a, b| compare_vectors(a, b));
|
||||
add_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq());
|
||||
|
||||
// let merged_vectors_iter =
|
||||
// itertools::merge_join_by(del_vectors, add_vectors, |del, add| compare_vectors(del, add));
|
||||
|
||||
// insert vectors into the writer
|
||||
for (i, vector) in add_vectors.into_iter().enumerate().take(u16::MAX as usize) {
|
||||
// Generate the key by extending the unique index to it.
|
||||
|
@ -248,8 +248,8 @@ fn send_original_documents_data(
|
||||
prompts,
|
||||
embedder_name,
|
||||
embedder,
|
||||
user_defined,
|
||||
remove_from_user_defined: auto_generated,
|
||||
user_provided,
|
||||
remove_from_user_provided,
|
||||
} in extracted_vectors
|
||||
{
|
||||
let embeddings = match extract_embeddings(
|
||||
@ -274,8 +274,8 @@ fn send_original_documents_data(
|
||||
expected_dimension: embedder.dimensions(),
|
||||
manual_vectors,
|
||||
embedder_name,
|
||||
user_defined,
|
||||
remove_from_user_defined: auto_generated,
|
||||
user_provided,
|
||||
remove_from_user_provided,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
@ -503,8 +503,8 @@ where
|
||||
embeddings,
|
||||
manual_vectors,
|
||||
embedder_name,
|
||||
user_defined,
|
||||
remove_from_user_defined,
|
||||
user_provided,
|
||||
remove_from_user_provided,
|
||||
} => {
|
||||
dimension.insert(embedder_name.clone(), expected_dimension);
|
||||
TypedChunk::VectorPoints {
|
||||
@ -513,8 +513,8 @@ where
|
||||
expected_dimension,
|
||||
manual_vectors,
|
||||
embedder_name,
|
||||
user_defined,
|
||||
remove_from_user_defined,
|
||||
user_provided,
|
||||
remove_from_user_provided,
|
||||
}
|
||||
}
|
||||
otherwise => otherwise,
|
||||
|
@ -91,8 +91,8 @@ pub(crate) enum TypedChunk {
|
||||
expected_dimension: usize,
|
||||
manual_vectors: grenad::Reader<BufReader<File>>,
|
||||
embedder_name: String,
|
||||
user_defined: RoaringBitmap,
|
||||
remove_from_user_defined: RoaringBitmap,
|
||||
user_provided: RoaringBitmap,
|
||||
remove_from_user_provided: RoaringBitmap,
|
||||
},
|
||||
ScriptLanguageDocids(HashMap<(Script, Language), (RoaringBitmap, RoaringBitmap)>),
|
||||
}
|
||||
@ -635,8 +635,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
embeddings,
|
||||
expected_dimension,
|
||||
embedder_name,
|
||||
user_defined: ud,
|
||||
remove_from_user_defined: rud,
|
||||
user_provided: ud,
|
||||
remove_from_user_provided: rud,
|
||||
} = typed_chunk
|
||||
else {
|
||||
unreachable!();
|
||||
|
@ -230,7 +230,6 @@ where
|
||||
input_value
|
||||
}
|
||||
[input] => {
|
||||
dbg!(&options);
|
||||
let mut body = options.query.clone();
|
||||
|
||||
body.as_object_mut()
|
||||
|
Loading…
x
Reference in New Issue
Block a user