mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-04 18:45:46 +01:00
Fix most issues with the lifetimes
This commit is contained in:
parent
6ac5b3b136
commit
70802eb7c7
@ -93,6 +93,7 @@ pub struct WriterBbqueueReceiver<'a> {
|
||||
}
|
||||
|
||||
/// The action to perform on the receiver/writer side.
|
||||
#[derive(Debug)]
|
||||
pub enum ReceiverAction {
|
||||
/// Wake up, you have frames to read for the BBQueue buffers.
|
||||
WakeUp,
|
||||
@ -599,6 +600,7 @@ impl DatabaseType for WordPositionDocids {
|
||||
const DATABASE: Database = Database::WordPositionDocids;
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct WordDocidsSender<'a, 'b, D> {
|
||||
sender: &'a ExtractorBbqueueSender<'b>,
|
||||
_marker: PhantomData<D>,
|
||||
@ -621,6 +623,7 @@ impl<D: DatabaseType> WordDocidsSender<'_, '_, D> {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct FacetDocidsSender<'a, 'b> {
|
||||
sender: &'a ExtractorBbqueueSender<'b>,
|
||||
}
|
||||
@ -667,6 +670,7 @@ impl FacetDocidsSender<'_, '_> {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct FieldIdDocidFacetSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>);
|
||||
|
||||
impl FieldIdDocidFacetSender<'_, '_> {
|
||||
@ -691,6 +695,7 @@ impl FieldIdDocidFacetSender<'_, '_> {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct DocumentsSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>);
|
||||
|
||||
impl DocumentsSender<'_, '_> {
|
||||
@ -716,6 +721,7 @@ impl DocumentsSender<'_, '_> {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct EmbeddingSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>);
|
||||
|
||||
impl EmbeddingSender<'_, '_> {
|
||||
@ -741,6 +747,7 @@ impl EmbeddingSender<'_, '_> {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct GeoSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>);
|
||||
|
||||
impl GeoSender<'_, '_> {
|
||||
|
@ -25,14 +25,14 @@ use crate::update::new::DocumentChange;
|
||||
use crate::update::GrenadParameters;
|
||||
use crate::{DocumentId, FieldId, Index, Result, MAX_FACET_VALUE_LENGTH};
|
||||
|
||||
pub struct FacetedExtractorData<'a> {
|
||||
pub struct FacetedExtractorData<'a, 'b> {
|
||||
attributes_to_extract: &'a [&'a str],
|
||||
sender: &'a FieldIdDocidFacetSender<'a>,
|
||||
sender: &'a FieldIdDocidFacetSender<'a, 'b>,
|
||||
grenad_parameters: GrenadParameters,
|
||||
buckets: usize,
|
||||
}
|
||||
|
||||
impl<'a, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a> {
|
||||
impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b> {
|
||||
type Data = RefCell<BalancedCaches<'extractor>>;
|
||||
|
||||
fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
|
||||
|
@ -18,17 +18,17 @@ use crate::vector::error::{
|
||||
use crate::vector::{Embedder, Embedding, EmbeddingConfigs};
|
||||
use crate::{DocumentId, FieldDistribution, InternalError, Result, ThreadPoolNoAbort, UserError};
|
||||
|
||||
pub struct EmbeddingExtractor<'a> {
|
||||
pub struct EmbeddingExtractor<'a, 'b> {
|
||||
embedders: &'a EmbeddingConfigs,
|
||||
sender: EmbeddingSender<'a>,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
possible_embedding_mistakes: PossibleEmbeddingMistakes,
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
}
|
||||
|
||||
impl<'a> EmbeddingExtractor<'a> {
|
||||
impl<'a, 'b> EmbeddingExtractor<'a, 'b> {
|
||||
pub fn new(
|
||||
embedders: &'a EmbeddingConfigs,
|
||||
sender: EmbeddingSender<'a>,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
field_distribution: &'a FieldDistribution,
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
) -> Self {
|
||||
@ -43,7 +43,7 @@ pub struct EmbeddingExtractorData<'extractor>(
|
||||
|
||||
unsafe impl MostlySend for EmbeddingExtractorData<'_> {}
|
||||
|
||||
impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
|
||||
impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
|
||||
type Data = RefCell<EmbeddingExtractorData<'extractor>>;
|
||||
|
||||
fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> crate::Result<Self::Data> {
|
||||
@ -76,7 +76,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
|
||||
context.data,
|
||||
&self.possible_embedding_mistakes,
|
||||
self.threads,
|
||||
&self.sender,
|
||||
self.sender,
|
||||
&context.doc_alloc,
|
||||
))
|
||||
}
|
||||
@ -259,7 +259,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
|
||||
// Currently this is the case as:
|
||||
// 1. BVec are inside of the bumaplo
|
||||
// 2. All other fields are either trivial (u8) or references.
|
||||
struct Chunks<'a, 'extractor> {
|
||||
struct Chunks<'a, 'b, 'extractor> {
|
||||
texts: BVec<'a, &'a str>,
|
||||
ids: BVec<'a, DocumentId>,
|
||||
|
||||
@ -270,11 +270,11 @@ struct Chunks<'a, 'extractor> {
|
||||
possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
|
||||
user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
sender: &'a EmbeddingSender<'a>,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
has_manual_generation: Option<&'a str>,
|
||||
}
|
||||
|
||||
impl<'a, 'extractor> Chunks<'a, 'extractor> {
|
||||
impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new(
|
||||
embedder: &'a Embedder,
|
||||
@ -284,7 +284,7 @@ impl<'a, 'extractor> Chunks<'a, 'extractor> {
|
||||
user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
|
||||
possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
sender: &'a EmbeddingSender<'a>,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
doc_alloc: &'a Bump,
|
||||
) -> Self {
|
||||
let capacity = embedder.prompt_count_in_chunk_hint() * embedder.chunk_count_hint();
|
||||
@ -368,7 +368,7 @@ impl<'a, 'extractor> Chunks<'a, 'extractor> {
|
||||
possible_embedding_mistakes: &PossibleEmbeddingMistakes,
|
||||
unused_vectors_distribution: &UnusedVectorsDistributionBump,
|
||||
threads: &ThreadPoolNoAbort,
|
||||
sender: EmbeddingSender<'a>,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
has_manual_generation: Option<&'a str>,
|
||||
) -> Result<()> {
|
||||
if let Some(external_docid) = has_manual_generation {
|
||||
|
@ -80,7 +80,7 @@ where
|
||||
let bbbuffers: Vec<_> = (0..rayon::current_num_threads())
|
||||
.map(|_| bbqueue::BBBuffer::new(100 * 1024 * 1024)) // 100 MiB by thread
|
||||
.collect();
|
||||
let (extractor_sender, writer_receiver) = extractor_writer_bbqueue(&bbbuffers, 1000);
|
||||
let (extractor_sender, mut writer_receiver) = extractor_writer_bbqueue(&bbbuffers, 1000);
|
||||
let finished_extraction = AtomicBool::new(false);
|
||||
|
||||
let metadata_builder = MetadataBuilder::from_index(index, wtxn)?;
|
||||
@ -302,7 +302,7 @@ where
|
||||
}
|
||||
|
||||
let embedding_sender = extractor_sender.embeddings();
|
||||
let extractor = EmbeddingExtractor::new(embedders, &embedding_sender, field_distribution, request_threads());
|
||||
let extractor = EmbeddingExtractor::new(embedders, embedding_sender, field_distribution, request_threads());
|
||||
let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
|
||||
@ -363,7 +363,6 @@ where
|
||||
let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map);
|
||||
|
||||
let vector_arroy = index.vector_arroy;
|
||||
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
||||
let indexer_span = tracing::Span::current();
|
||||
let arroy_writers: Result<HashMap<_, _>> = embedders
|
||||
.inner_as_ref()
|
||||
@ -490,6 +489,7 @@ where
|
||||
Step::WritingEmbeddingsToDatabase,
|
||||
));
|
||||
|
||||
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
||||
for (_index, (_embedder_name, _embedder, writer, dimensions)) in &mut arroy_writers {
|
||||
let dimensions = *dimensions;
|
||||
writer.build_and_quantize(
|
||||
|
Loading…
Reference in New Issue
Block a user