mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 14:04:31 +01:00
forward the embedding config to the extractors
This commit is contained in:
parent
9eb6f522ea
commit
a73ccc78a6
@ -14,6 +14,7 @@ use roaring::RoaringBitmap;
|
|||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
|
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
|
||||||
|
use crate::index::IndexEmbeddingConfig;
|
||||||
use crate::prompt::Prompt;
|
use crate::prompt::Prompt;
|
||||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||||
use crate::update::settings::InnerIndexSettingsDiff;
|
use crate::update::settings::InnerIndexSettingsDiff;
|
||||||
@ -96,6 +97,7 @@ struct EmbedderVectorExtractor {
|
|||||||
pub fn extract_vector_points<R: io::Read + io::Seek>(
|
pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||||
obkv_documents: grenad::Reader<R>,
|
obkv_documents: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
|
embedders_configs: &[IndexEmbeddingConfig],
|
||||||
settings_diff: &InnerIndexSettingsDiff,
|
settings_diff: &InnerIndexSettingsDiff,
|
||||||
) -> Result<Vec<ExtractedVectorPoints>> {
|
) -> Result<Vec<ExtractedVectorPoints>> {
|
||||||
let reindex_vectors = settings_diff.reindex_vectors();
|
let reindex_vectors = settings_diff.reindex_vectors();
|
||||||
|
@ -30,6 +30,7 @@ use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids
|
|||||||
use self::extract_word_position_docids::extract_word_position_docids;
|
use self::extract_word_position_docids::extract_word_position_docids;
|
||||||
use super::helpers::{as_cloneable_grenad, CursorClonableMmap, GrenadParameters};
|
use super::helpers::{as_cloneable_grenad, CursorClonableMmap, GrenadParameters};
|
||||||
use super::{helpers, TypedChunk};
|
use super::{helpers, TypedChunk};
|
||||||
|
use crate::index::IndexEmbeddingConfig;
|
||||||
use crate::update::settings::InnerIndexSettingsDiff;
|
use crate::update::settings::InnerIndexSettingsDiff;
|
||||||
use crate::{FieldId, Result, ThreadPoolNoAbortBuilder};
|
use crate::{FieldId, Result, ThreadPoolNoAbortBuilder};
|
||||||
|
|
||||||
@ -43,6 +44,7 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||||
primary_key_id: FieldId,
|
primary_key_id: FieldId,
|
||||||
|
embedders_configs: Arc<Vec<IndexEmbeddingConfig>>,
|
||||||
settings_diff: Arc<InnerIndexSettingsDiff>,
|
settings_diff: Arc<InnerIndexSettingsDiff>,
|
||||||
max_positions_per_attributes: Option<u32>,
|
max_positions_per_attributes: Option<u32>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
@ -55,6 +57,7 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
original_documents_chunk,
|
original_documents_chunk,
|
||||||
indexer,
|
indexer,
|
||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
|
embedders_configs.clone(),
|
||||||
settings_diff.clone(),
|
settings_diff.clone(),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
@ -210,6 +213,7 @@ fn send_original_documents_data(
|
|||||||
original_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
|
original_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||||
|
embedders_configs: Arc<Vec<IndexEmbeddingConfig>>,
|
||||||
settings_diff: Arc<InnerIndexSettingsDiff>,
|
settings_diff: Arc<InnerIndexSettingsDiff>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let original_documents_chunk =
|
let original_documents_chunk =
|
||||||
@ -226,11 +230,17 @@ fn send_original_documents_data(
|
|||||||
|
|
||||||
if index_vectors {
|
if index_vectors {
|
||||||
let settings_diff = settings_diff.clone();
|
let settings_diff = settings_diff.clone();
|
||||||
|
let embedders_configs = embedders_configs.clone();
|
||||||
|
|
||||||
let original_documents_chunk = original_documents_chunk.clone();
|
let original_documents_chunk = original_documents_chunk.clone();
|
||||||
let lmdb_writer_sx = lmdb_writer_sx.clone();
|
let lmdb_writer_sx = lmdb_writer_sx.clone();
|
||||||
rayon::spawn(move || {
|
rayon::spawn(move || {
|
||||||
match extract_vector_points(original_documents_chunk.clone(), indexer, &settings_diff) {
|
match extract_vector_points(
|
||||||
|
original_documents_chunk.clone(),
|
||||||
|
indexer,
|
||||||
|
&embedders_configs,
|
||||||
|
&settings_diff,
|
||||||
|
) {
|
||||||
Ok(extracted_vectors) => {
|
Ok(extracted_vectors) => {
|
||||||
for ExtractedVectorPoints {
|
for ExtractedVectorPoints {
|
||||||
manual_vectors,
|
manual_vectors,
|
||||||
|
@ -286,6 +286,7 @@ where
|
|||||||
settings_diff.new.recompute_searchables(self.wtxn, self.index)?;
|
settings_diff.new.recompute_searchables(self.wtxn, self.index)?;
|
||||||
|
|
||||||
let settings_diff = Arc::new(settings_diff);
|
let settings_diff = Arc::new(settings_diff);
|
||||||
|
let embedders_configs = Arc::new(self.index.embedding_configs(self.wtxn)?);
|
||||||
|
|
||||||
let backup_pool;
|
let backup_pool;
|
||||||
let pool = match self.indexer_config.thread_pool {
|
let pool = match self.indexer_config.thread_pool {
|
||||||
@ -399,6 +400,7 @@ where
|
|||||||
pool_params,
|
pool_params,
|
||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
primary_key_id,
|
primary_key_id,
|
||||||
|
embedders_configs.clone(),
|
||||||
settings_diff_cloned,
|
settings_diff_cloned,
|
||||||
max_positions_per_attributes,
|
max_positions_per_attributes,
|
||||||
)
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user