mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 05:54:30 +01:00
implements mor review comments
This commit is contained in:
parent
49fa41ce65
commit
b7349910d9
@ -5012,7 +5012,7 @@ mod tests {
|
|||||||
insta::assert_json_snapshot!(task.details);
|
insta::assert_json_snapshot!(task.details);
|
||||||
}
|
}
|
||||||
|
|
||||||
handle.advance_n_successful_batches(1);
|
handle.advance_one_successful_batch();
|
||||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "settings_update_processed_vectors");
|
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "settings_update_processed_vectors");
|
||||||
|
|
||||||
{
|
{
|
||||||
|
@ -35,8 +35,8 @@ pub struct ExtractedVectorPoints {
|
|||||||
// embedder
|
// embedder
|
||||||
pub embedder_name: String,
|
pub embedder_name: String,
|
||||||
pub embedder: Arc<Embedder>,
|
pub embedder: Arc<Embedder>,
|
||||||
pub user_defined: RoaringBitmap,
|
pub user_provided: RoaringBitmap,
|
||||||
pub remove_from_user_defined: RoaringBitmap,
|
pub remove_from_user_provided: RoaringBitmap,
|
||||||
}
|
}
|
||||||
|
|
||||||
enum VectorStateDelta {
|
enum VectorStateDelta {
|
||||||
@ -82,9 +82,9 @@ struct EmbedderVectorExtractor {
|
|||||||
remove_vectors_writer: Writer<BufWriter<File>>,
|
remove_vectors_writer: Writer<BufWriter<File>>,
|
||||||
|
|
||||||
// The docids of the documents that contains a user defined embedding
|
// The docids of the documents that contains a user defined embedding
|
||||||
user_defined: RoaringBitmap,
|
user_provided: RoaringBitmap,
|
||||||
// The docids of the documents that contains an auto-generated embedding
|
// The docids of the documents that contains an auto-generated embedding
|
||||||
remove_from_user_defined: RoaringBitmap,
|
remove_from_user_provided: RoaringBitmap,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extracts the embedding vector contained in each document under the `_vectors` field.
|
/// Extracts the embedding vector contained in each document under the `_vectors` field.
|
||||||
@ -140,8 +140,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
manual_vectors_writer,
|
manual_vectors_writer,
|
||||||
prompts_writer,
|
prompts_writer,
|
||||||
remove_vectors_writer,
|
remove_vectors_writer,
|
||||||
user_defined: RoaringBitmap::new(),
|
user_provided: RoaringBitmap::new(),
|
||||||
remove_from_user_defined: RoaringBitmap::new(),
|
remove_from_user_provided: RoaringBitmap::new(),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -179,8 +179,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
manual_vectors_writer,
|
manual_vectors_writer,
|
||||||
prompts_writer,
|
prompts_writer,
|
||||||
remove_vectors_writer,
|
remove_vectors_writer,
|
||||||
user_defined,
|
user_provided,
|
||||||
remove_from_user_defined,
|
remove_from_user_provided,
|
||||||
} in extractors.iter_mut()
|
} in extractors.iter_mut()
|
||||||
{
|
{
|
||||||
let delta = match parsed_vectors.remove(embedder_name) {
|
let delta = match parsed_vectors.remove(embedder_name) {
|
||||||
@ -188,10 +188,10 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
match (old.map_or(true, |old| old.is_user_provided()), new.is_user_provided()) {
|
match (old.map_or(true, |old| old.is_user_provided()), new.is_user_provided()) {
|
||||||
(true, true) | (false, false) => (),
|
(true, true) | (false, false) => (),
|
||||||
(true, false) => {
|
(true, false) => {
|
||||||
remove_from_user_defined.insert(docid);
|
remove_from_user_provided.insert(docid);
|
||||||
}
|
}
|
||||||
(false, true) => {
|
(false, true) => {
|
||||||
user_defined.insert(docid);
|
user_provided.insert(docid);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -214,7 +214,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
.map(|(_, deladd)| KvReaderDelAdd::new(deladd))
|
.map(|(_, deladd)| KvReaderDelAdd::new(deladd))
|
||||||
.any(|deladd| deladd.get(DelAdd::Addition).is_some());
|
.any(|deladd| deladd.get(DelAdd::Addition).is_some());
|
||||||
if document_is_kept && old.is_some() {
|
if document_is_kept && old.is_some() {
|
||||||
remove_from_user_defined.insert(docid);
|
remove_from_user_provided.insert(docid);
|
||||||
// becomes autogenerated
|
// becomes autogenerated
|
||||||
VectorStateDelta::NowGenerated(prompt.render(
|
VectorStateDelta::NowGenerated(prompt.render(
|
||||||
obkv,
|
obkv,
|
||||||
@ -229,9 +229,9 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
}
|
}
|
||||||
(None, Some(new)) => {
|
(None, Some(new)) => {
|
||||||
if new.is_user_provided() {
|
if new.is_user_provided() {
|
||||||
user_defined.insert(docid);
|
user_provided.insert(docid);
|
||||||
} else {
|
} else {
|
||||||
remove_from_user_defined.insert(docid);
|
remove_from_user_provided.insert(docid);
|
||||||
}
|
}
|
||||||
// was possibly autogenerated, remove all vectors for that document
|
// was possibly autogenerated, remove all vectors for that document
|
||||||
let add_vectors = new.into_array_of_vectors();
|
let add_vectors = new.into_array_of_vectors();
|
||||||
@ -274,7 +274,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
VectorStateDelta::NoChange
|
VectorStateDelta::NoChange
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
remove_from_user_defined.remove(docid);
|
remove_from_user_provided.remove(docid);
|
||||||
VectorStateDelta::NowRemoved
|
VectorStateDelta::NowRemoved
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -301,8 +301,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
manual_vectors_writer,
|
manual_vectors_writer,
|
||||||
prompts_writer,
|
prompts_writer,
|
||||||
remove_vectors_writer,
|
remove_vectors_writer,
|
||||||
user_defined,
|
user_provided,
|
||||||
remove_from_user_defined,
|
remove_from_user_provided,
|
||||||
} in extractors
|
} in extractors
|
||||||
{
|
{
|
||||||
results.push(ExtractedVectorPoints {
|
results.push(ExtractedVectorPoints {
|
||||||
@ -311,8 +311,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
prompts: writer_into_reader(prompts_writer)?,
|
prompts: writer_into_reader(prompts_writer)?,
|
||||||
embedder,
|
embedder,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
user_defined,
|
user_provided,
|
||||||
remove_from_user_defined,
|
remove_from_user_provided,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -347,9 +347,6 @@ fn push_vectors_diff(
|
|||||||
add_vectors.sort_unstable_by(|a, b| compare_vectors(a, b));
|
add_vectors.sort_unstable_by(|a, b| compare_vectors(a, b));
|
||||||
add_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq());
|
add_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq());
|
||||||
|
|
||||||
// let merged_vectors_iter =
|
|
||||||
// itertools::merge_join_by(del_vectors, add_vectors, |del, add| compare_vectors(del, add));
|
|
||||||
|
|
||||||
// insert vectors into the writer
|
// insert vectors into the writer
|
||||||
for (i, vector) in add_vectors.into_iter().enumerate().take(u16::MAX as usize) {
|
for (i, vector) in add_vectors.into_iter().enumerate().take(u16::MAX as usize) {
|
||||||
// Generate the key by extending the unique index to it.
|
// Generate the key by extending the unique index to it.
|
||||||
|
@ -248,8 +248,8 @@ fn send_original_documents_data(
|
|||||||
prompts,
|
prompts,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
embedder,
|
embedder,
|
||||||
user_defined,
|
user_provided,
|
||||||
remove_from_user_defined: auto_generated,
|
remove_from_user_provided,
|
||||||
} in extracted_vectors
|
} in extracted_vectors
|
||||||
{
|
{
|
||||||
let embeddings = match extract_embeddings(
|
let embeddings = match extract_embeddings(
|
||||||
@ -274,8 +274,8 @@ fn send_original_documents_data(
|
|||||||
expected_dimension: embedder.dimensions(),
|
expected_dimension: embedder.dimensions(),
|
||||||
manual_vectors,
|
manual_vectors,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
user_defined,
|
user_provided,
|
||||||
remove_from_user_defined: auto_generated,
|
remove_from_user_provided,
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -503,8 +503,8 @@ where
|
|||||||
embeddings,
|
embeddings,
|
||||||
manual_vectors,
|
manual_vectors,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
user_defined,
|
user_provided,
|
||||||
remove_from_user_defined,
|
remove_from_user_provided,
|
||||||
} => {
|
} => {
|
||||||
dimension.insert(embedder_name.clone(), expected_dimension);
|
dimension.insert(embedder_name.clone(), expected_dimension);
|
||||||
TypedChunk::VectorPoints {
|
TypedChunk::VectorPoints {
|
||||||
@ -513,8 +513,8 @@ where
|
|||||||
expected_dimension,
|
expected_dimension,
|
||||||
manual_vectors,
|
manual_vectors,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
user_defined,
|
user_provided,
|
||||||
remove_from_user_defined,
|
remove_from_user_provided,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
otherwise => otherwise,
|
otherwise => otherwise,
|
||||||
|
@ -91,8 +91,8 @@ pub(crate) enum TypedChunk {
|
|||||||
expected_dimension: usize,
|
expected_dimension: usize,
|
||||||
manual_vectors: grenad::Reader<BufReader<File>>,
|
manual_vectors: grenad::Reader<BufReader<File>>,
|
||||||
embedder_name: String,
|
embedder_name: String,
|
||||||
user_defined: RoaringBitmap,
|
user_provided: RoaringBitmap,
|
||||||
remove_from_user_defined: RoaringBitmap,
|
remove_from_user_provided: RoaringBitmap,
|
||||||
},
|
},
|
||||||
ScriptLanguageDocids(HashMap<(Script, Language), (RoaringBitmap, RoaringBitmap)>),
|
ScriptLanguageDocids(HashMap<(Script, Language), (RoaringBitmap, RoaringBitmap)>),
|
||||||
}
|
}
|
||||||
@ -635,8 +635,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
embeddings,
|
embeddings,
|
||||||
expected_dimension,
|
expected_dimension,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
user_defined: ud,
|
user_provided: ud,
|
||||||
remove_from_user_defined: rud,
|
remove_from_user_provided: rud,
|
||||||
} = typed_chunk
|
} = typed_chunk
|
||||||
else {
|
else {
|
||||||
unreachable!();
|
unreachable!();
|
||||||
|
@ -230,7 +230,6 @@ where
|
|||||||
input_value
|
input_value
|
||||||
}
|
}
|
||||||
[input] => {
|
[input] => {
|
||||||
dbg!(&options);
|
|
||||||
let mut body = options.query.clone();
|
let mut body = options.query.clone();
|
||||||
|
|
||||||
body.as_object_mut()
|
body.as_object_mut()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user