mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-02 03:18:30 +02:00
Remove options
This commit is contained in:
parent
695877043a
commit
d08e89ea3d
@ -684,12 +684,10 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
|
||||
embedder: Arc<Embedder>,
|
||||
embedder_name: &str,
|
||||
possible_embedding_mistakes: &PossibleEmbeddingMistakes,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
embedder_stats: Arc<EmbedderStats>,
|
||||
unused_vectors_distribution: &UnusedVectorsDistribution,
|
||||
request_threads: &ThreadPoolNoAbort,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
println!("Extract embedder stats {}:", embedder_stats.is_some());
|
||||
|
||||
let n_chunks = embedder.chunk_count_hint(); // chunk level parallelism
|
||||
let n_vectors_per_chunk = embedder.prompt_count_in_chunk_hint(); // number of vectors in a single chunk
|
||||
|
||||
@ -791,7 +789,7 @@ fn embed_chunks(
|
||||
text_chunks: Vec<Vec<String>>,
|
||||
embedder_name: &str,
|
||||
possible_embedding_mistakes: &PossibleEmbeddingMistakes,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
embedder_stats: Arc<EmbedderStats>,
|
||||
unused_vectors_distribution: &UnusedVectorsDistribution,
|
||||
request_threads: &ThreadPoolNoAbort,
|
||||
) -> Result<Vec<Vec<Embedding>>> {
|
||||
|
@ -274,7 +274,7 @@ fn send_original_documents_data(
|
||||
embedder.clone(),
|
||||
&embedder_name,
|
||||
&possible_embedding_mistakes,
|
||||
Some(embedder_stats.clone()),
|
||||
embedder_stats.clone(),
|
||||
&unused_vectors_distribution,
|
||||
request_threads(),
|
||||
) {
|
||||
|
@ -23,7 +23,7 @@ pub struct EmbeddingExtractor<'a, 'b> {
|
||||
embedders: &'a EmbeddingConfigs,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
possible_embedding_mistakes: PossibleEmbeddingMistakes,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
embedder_stats: Arc<EmbedderStats>,
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
}
|
||||
|
||||
@ -32,7 +32,7 @@ impl<'a, 'b> EmbeddingExtractor<'a, 'b> {
|
||||
embedders: &'a EmbeddingConfigs,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
field_distribution: &'a FieldDistribution,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
embedder_stats: Arc<EmbedderStats>,
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
) -> Self {
|
||||
let possible_embedding_mistakes = PossibleEmbeddingMistakes::new(field_distribution);
|
||||
@ -311,7 +311,7 @@ struct Chunks<'a, 'b, 'extractor> {
|
||||
dimensions: usize,
|
||||
prompt: &'a Prompt,
|
||||
possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
embedder_stats: Arc<EmbedderStats>,
|
||||
user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
@ -327,7 +327,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
prompt: &'a Prompt,
|
||||
user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
|
||||
possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
embedder_stats: Arc<EmbedderStats>,
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
doc_alloc: &'a Bump,
|
||||
@ -416,7 +416,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
embedder_id: u8,
|
||||
embedder_name: &str,
|
||||
possible_embedding_mistakes: &PossibleEmbeddingMistakes,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
embedder_stats: Arc<EmbedderStats>,
|
||||
unused_vectors_distribution: &UnusedVectorsDistributionBump,
|
||||
threads: &ThreadPoolNoAbort,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
|
@ -248,7 +248,7 @@ where
|
||||
embedders,
|
||||
embedding_sender,
|
||||
field_distribution,
|
||||
Some(embedder_stats),
|
||||
embedder_stats,
|
||||
request_threads(),
|
||||
);
|
||||
let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||
|
@ -196,7 +196,7 @@ impl SubEmbedder {
|
||||
&self,
|
||||
text_chunks: Vec<Vec<String>>,
|
||||
threads: &ThreadPoolNoAbort,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
embedder_stats: Arc<EmbedderStats>,
|
||||
) -> std::result::Result<Vec<Vec<Embedding>>, EmbedError> {
|
||||
match self {
|
||||
SubEmbedder::HuggingFace(embedder) => embedder.embed_index(text_chunks),
|
||||
@ -218,7 +218,7 @@ impl SubEmbedder {
|
||||
&self,
|
||||
texts: &[&str],
|
||||
threads: &ThreadPoolNoAbort,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
embedder_stats: Arc<EmbedderStats>,
|
||||
) -> std::result::Result<Vec<Embedding>, EmbedError> {
|
||||
match self {
|
||||
SubEmbedder::HuggingFace(embedder) => embedder.embed_index_ref(texts),
|
||||
|
@ -749,7 +749,7 @@ impl Embedder {
|
||||
&self,
|
||||
text_chunks: Vec<Vec<String>>,
|
||||
threads: &ThreadPoolNoAbort,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
embedder_stats: Arc<EmbedderStats>,
|
||||
) -> std::result::Result<Vec<Vec<Embedding>>, EmbedError> {
|
||||
match self {
|
||||
Embedder::HuggingFace(embedder) => embedder.embed_index(text_chunks),
|
||||
@ -772,7 +772,7 @@ impl Embedder {
|
||||
&self,
|
||||
texts: &[&str],
|
||||
threads: &ThreadPoolNoAbort,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
embedder_stats: Arc<EmbedderStats>,
|
||||
) -> std::result::Result<Vec<Embedding>, EmbedError> {
|
||||
match self {
|
||||
Embedder::HuggingFace(embedder) => embedder.embed_index_ref(texts),
|
||||
|
@ -121,21 +121,21 @@ impl Embedder {
|
||||
&self,
|
||||
text_chunks: Vec<Vec<String>>,
|
||||
threads: &ThreadPoolNoAbort,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
embedder_stats: Arc<EmbedderStats>,
|
||||
) -> Result<Vec<Vec<Embedding>>, EmbedError> {
|
||||
// This condition helps reduce the number of active rayon jobs
|
||||
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
||||
if threads.active_operations() >= REQUEST_PARALLELISM {
|
||||
text_chunks
|
||||
.into_iter()
|
||||
.map(move |chunk| self.embed(&chunk, None, embedder_stats.clone()))
|
||||
.map(move |chunk| self.embed(&chunk, None, Some(embedder_stats.clone())))
|
||||
.collect()
|
||||
} else {
|
||||
threads
|
||||
.install(move || {
|
||||
text_chunks
|
||||
.into_par_iter()
|
||||
.map(move |chunk| self.embed(&chunk, None, embedder_stats.clone()))
|
||||
.map(move |chunk| self.embed(&chunk, None, Some(embedder_stats.clone())))
|
||||
.collect()
|
||||
})
|
||||
.map_err(|error| EmbedError {
|
||||
@ -149,14 +149,14 @@ impl Embedder {
|
||||
&self,
|
||||
texts: &[&str],
|
||||
threads: &ThreadPoolNoAbort,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
embedder_stats: Arc<EmbedderStats>,
|
||||
) -> Result<Vec<Vec<f32>>, EmbedError> {
|
||||
// This condition helps reduce the number of active rayon jobs
|
||||
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
||||
if threads.active_operations() >= REQUEST_PARALLELISM {
|
||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||
.chunks(self.prompt_count_in_chunk_hint())
|
||||
.map(move |chunk| self.embed(chunk, None, embedder_stats.clone()))
|
||||
.map(move |chunk| self.embed(chunk, None, Some(embedder_stats.clone())))
|
||||
.collect();
|
||||
|
||||
let embeddings = embeddings?;
|
||||
@ -166,7 +166,7 @@ impl Embedder {
|
||||
.install(move || {
|
||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||
.par_chunks(self.prompt_count_in_chunk_hint())
|
||||
.map(move |chunk| self.embed(chunk, None, embedder_stats.clone()))
|
||||
.map(move |chunk| self.embed(chunk, None, Some(embedder_stats.clone())))
|
||||
.collect();
|
||||
|
||||
let embeddings = embeddings?;
|
||||
|
@ -262,21 +262,21 @@ impl Embedder {
|
||||
&self,
|
||||
text_chunks: Vec<Vec<String>>,
|
||||
threads: &ThreadPoolNoAbort,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
embedder_stats: Arc<EmbedderStats>,
|
||||
) -> Result<Vec<Vec<Embedding>>, EmbedError> {
|
||||
// This condition helps reduce the number of active rayon jobs
|
||||
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
||||
if threads.active_operations() >= REQUEST_PARALLELISM {
|
||||
text_chunks
|
||||
.into_iter()
|
||||
.map(move |chunk| self.embed(&chunk, None, embedder_stats.clone()))
|
||||
.map(move |chunk| self.embed(&chunk, None, Some(embedder_stats.clone())))
|
||||
.collect()
|
||||
} else {
|
||||
threads
|
||||
.install(move || {
|
||||
text_chunks
|
||||
.into_par_iter()
|
||||
.map(move |chunk| self.embed(&chunk, None, embedder_stats.clone()))
|
||||
.map(move |chunk| self.embed(&chunk, None, Some(embedder_stats.clone())))
|
||||
.collect()
|
||||
})
|
||||
.map_err(|error| EmbedError {
|
||||
@ -290,14 +290,14 @@ impl Embedder {
|
||||
&self,
|
||||
texts: &[&str],
|
||||
threads: &ThreadPoolNoAbort,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
embedder_stats: Arc<EmbedderStats>,
|
||||
) -> Result<Vec<Vec<f32>>, EmbedError> {
|
||||
// This condition helps reduce the number of active rayon jobs
|
||||
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
||||
if threads.active_operations() >= REQUEST_PARALLELISM {
|
||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||
.chunks(self.prompt_count_in_chunk_hint())
|
||||
.map(move |chunk| self.embed(chunk, None, embedder_stats.clone()))
|
||||
.map(move |chunk| self.embed(chunk, None, Some(embedder_stats.clone())))
|
||||
.collect();
|
||||
let embeddings = embeddings?;
|
||||
Ok(embeddings.into_iter().flatten().collect())
|
||||
@ -306,7 +306,7 @@ impl Embedder {
|
||||
.install(move || {
|
||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||
.par_chunks(self.prompt_count_in_chunk_hint())
|
||||
.map(move |chunk| self.embed(chunk, None, embedder_stats.clone()))
|
||||
.map(move |chunk| self.embed(chunk, None, Some(embedder_stats.clone())))
|
||||
.collect();
|
||||
|
||||
let embeddings = embeddings?;
|
||||
|
@ -208,21 +208,21 @@ impl Embedder {
|
||||
&self,
|
||||
text_chunks: Vec<Vec<String>>,
|
||||
threads: &ThreadPoolNoAbort,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
embedder_stats: Arc<EmbedderStats>,
|
||||
) -> Result<Vec<Vec<Embedding>>, EmbedError> {
|
||||
// This condition helps reduce the number of active rayon jobs
|
||||
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
||||
if threads.active_operations() >= REQUEST_PARALLELISM {
|
||||
text_chunks
|
||||
.into_iter()
|
||||
.map(move |chunk| self.embed(chunk, None, embedder_stats.clone()))
|
||||
.map(move |chunk| self.embed(chunk, None, Some(embedder_stats.clone())))
|
||||
.collect()
|
||||
} else {
|
||||
threads
|
||||
.install(move || {
|
||||
text_chunks
|
||||
.into_par_iter()
|
||||
.map(move |chunk| self.embed(chunk, None, embedder_stats.clone()))
|
||||
.map(move |chunk| self.embed(chunk, None, Some(embedder_stats.clone())))
|
||||
.collect()
|
||||
})
|
||||
.map_err(|error| EmbedError {
|
||||
@ -236,14 +236,14 @@ impl Embedder {
|
||||
&self,
|
||||
texts: &[&str],
|
||||
threads: &ThreadPoolNoAbort,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
embedder_stats: Arc<EmbedderStats>,
|
||||
) -> Result<Vec<Embedding>, EmbedError> {
|
||||
// This condition helps reduce the number of active rayon jobs
|
||||
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
||||
if threads.active_operations() >= REQUEST_PARALLELISM {
|
||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||
.chunks(self.prompt_count_in_chunk_hint())
|
||||
.map(move |chunk| self.embed_ref(chunk, None, embedder_stats.clone()))
|
||||
.map(move |chunk| self.embed_ref(chunk, None, Some(embedder_stats.clone())))
|
||||
.collect();
|
||||
|
||||
let embeddings = embeddings?;
|
||||
@ -253,7 +253,7 @@ impl Embedder {
|
||||
.install(move || {
|
||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||
.par_chunks(self.prompt_count_in_chunk_hint())
|
||||
.map(move |chunk| self.embed_ref(chunk, None, embedder_stats.clone()))
|
||||
.map(move |chunk| self.embed_ref(chunk, None, Some(embedder_stats.clone())))
|
||||
.collect();
|
||||
|
||||
let embeddings = embeddings?;
|
||||
|
Loading…
x
Reference in New Issue
Block a user