mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-01 19:08:29 +02:00
Remove lots of Arcs
This commit is contained in:
parent
ef007d547d
commit
29f6eeff8f
@ -169,7 +169,7 @@ fn indexing_songs_default(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -236,7 +236,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -281,7 +281,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -350,7 +350,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -427,7 +427,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -472,7 +472,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -513,7 +513,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -581,7 +581,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -648,7 +648,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -715,7 +715,7 @@ fn indexing_wiki(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -781,7 +781,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -826,7 +826,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -894,7 +894,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -971,7 +971,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1017,7 +1017,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1059,7 +1059,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1126,7 +1126,7 @@ fn indexing_movies_default(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1192,7 +1192,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1237,7 +1237,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1305,7 +1305,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1354,7 +1354,7 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1419,7 +1419,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1464,7 +1464,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1505,7 +1505,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1595,7 +1595,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1686,7 +1686,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1769,7 +1769,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1836,7 +1836,7 @@ fn indexing_geo(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1902,7 +1902,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1947,7 +1947,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -2015,7 +2015,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
@ -128,7 +128,7 @@ pub fn base_setup(conf: &Conf) -> Index {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
@ -144,7 +144,7 @@ fn main() {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
@ -35,7 +35,7 @@ impl IndexScheduler {
|
|||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
operation: IndexOperation,
|
operation: IndexOperation,
|
||||||
progress: &Progress,
|
progress: &Progress,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: Arc<EmbedderStats>, // Cant change
|
||||||
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
|
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
|
||||||
let indexer_alloc = Bump::new();
|
let indexer_alloc = Bump::new();
|
||||||
let started_processing_at = std::time::Instant::now();
|
let started_processing_at = std::time::Instant::now();
|
||||||
@ -180,7 +180,7 @@ impl IndexScheduler {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| must_stop_processing.get(),
|
&|| must_stop_processing.get(),
|
||||||
progress,
|
progress,
|
||||||
embedder_stats,
|
&embedder_stats,
|
||||||
)
|
)
|
||||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?,
|
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?,
|
||||||
);
|
);
|
||||||
@ -292,7 +292,7 @@ impl IndexScheduler {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| must_stop_processing.get(),
|
&|| must_stop_processing.get(),
|
||||||
progress,
|
progress,
|
||||||
embedder_stats,
|
&embedder_stats,
|
||||||
)
|
)
|
||||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||||
);
|
);
|
||||||
@ -441,7 +441,7 @@ impl IndexScheduler {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| must_stop_processing.get(),
|
&|| must_stop_processing.get(),
|
||||||
progress,
|
progress,
|
||||||
embedder_stats,
|
&embedder_stats,
|
||||||
)
|
)
|
||||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||||
);
|
);
|
||||||
@ -478,7 +478,7 @@ impl IndexScheduler {
|
|||||||
.execute(
|
.execute(
|
||||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||||
|| must_stop_processing.get(),
|
|| must_stop_processing.get(),
|
||||||
embedder_stats,
|
embedder_stats.clone(),
|
||||||
)
|
)
|
||||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||||
|
|
||||||
|
@ -95,7 +95,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
@ -103,7 +103,7 @@ impl TempIndex {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.unwrap()?;
|
.unwrap()?;
|
||||||
@ -186,7 +186,7 @@ impl TempIndex {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.unwrap()?;
|
.unwrap()?;
|
||||||
@ -261,7 +261,7 @@ fn aborting_indexation() {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| should_abort.load(Relaxed),
|
&|| should_abort.load(Relaxed),
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.unwrap()
|
.unwrap()
|
||||||
|
@ -684,7 +684,7 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
|
|||||||
embedder: Arc<Embedder>,
|
embedder: Arc<Embedder>,
|
||||||
embedder_name: &str,
|
embedder_name: &str,
|
||||||
possible_embedding_mistakes: &PossibleEmbeddingMistakes,
|
possible_embedding_mistakes: &PossibleEmbeddingMistakes,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: &EmbedderStats,
|
||||||
unused_vectors_distribution: &UnusedVectorsDistribution,
|
unused_vectors_distribution: &UnusedVectorsDistribution,
|
||||||
request_threads: &ThreadPoolNoAbort,
|
request_threads: &ThreadPoolNoAbort,
|
||||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||||
@ -727,7 +727,7 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
|
|||||||
std::mem::replace(&mut chunks, Vec::with_capacity(n_chunks)),
|
std::mem::replace(&mut chunks, Vec::with_capacity(n_chunks)),
|
||||||
embedder_name,
|
embedder_name,
|
||||||
possible_embedding_mistakes,
|
possible_embedding_mistakes,
|
||||||
embedder_stats.clone(),
|
embedder_stats,
|
||||||
unused_vectors_distribution,
|
unused_vectors_distribution,
|
||||||
request_threads,
|
request_threads,
|
||||||
)?;
|
)?;
|
||||||
@ -750,7 +750,7 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
|
|||||||
std::mem::take(&mut chunks),
|
std::mem::take(&mut chunks),
|
||||||
embedder_name,
|
embedder_name,
|
||||||
possible_embedding_mistakes,
|
possible_embedding_mistakes,
|
||||||
embedder_stats.clone(),
|
embedder_stats,
|
||||||
unused_vectors_distribution,
|
unused_vectors_distribution,
|
||||||
request_threads,
|
request_threads,
|
||||||
)?;
|
)?;
|
||||||
@ -789,7 +789,7 @@ fn embed_chunks(
|
|||||||
text_chunks: Vec<Vec<String>>,
|
text_chunks: Vec<Vec<String>>,
|
||||||
embedder_name: &str,
|
embedder_name: &str,
|
||||||
possible_embedding_mistakes: &PossibleEmbeddingMistakes,
|
possible_embedding_mistakes: &PossibleEmbeddingMistakes,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: &EmbedderStats,
|
||||||
unused_vectors_distribution: &UnusedVectorsDistribution,
|
unused_vectors_distribution: &UnusedVectorsDistribution,
|
||||||
request_threads: &ThreadPoolNoAbort,
|
request_threads: &ThreadPoolNoAbort,
|
||||||
) -> Result<Vec<Vec<Embedding>>> {
|
) -> Result<Vec<Vec<Embedding>>> {
|
||||||
|
@ -50,7 +50,7 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
settings_diff: Arc<InnerIndexSettingsDiff>,
|
settings_diff: Arc<InnerIndexSettingsDiff>,
|
||||||
max_positions_per_attributes: Option<u32>,
|
max_positions_per_attributes: Option<u32>,
|
||||||
possible_embedding_mistakes: Arc<PossibleEmbeddingMistakes>,
|
possible_embedding_mistakes: Arc<PossibleEmbeddingMistakes>,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: Arc<EmbedderStats>, // Cant change
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let (original_pipeline_result, flattened_pipeline_result): (Result<_>, Result<_>) = rayon::join(
|
let (original_pipeline_result, flattened_pipeline_result): (Result<_>, Result<_>) = rayon::join(
|
||||||
|| {
|
|| {
|
||||||
@ -234,7 +234,7 @@ fn send_original_documents_data(
|
|||||||
embedders_configs: Arc<Vec<IndexEmbeddingConfig>>,
|
embedders_configs: Arc<Vec<IndexEmbeddingConfig>>,
|
||||||
settings_diff: Arc<InnerIndexSettingsDiff>,
|
settings_diff: Arc<InnerIndexSettingsDiff>,
|
||||||
possible_embedding_mistakes: Arc<PossibleEmbeddingMistakes>,
|
possible_embedding_mistakes: Arc<PossibleEmbeddingMistakes>,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: Arc<EmbedderStats>, // Cant change
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let original_documents_chunk =
|
let original_documents_chunk =
|
||||||
original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
|
original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
|
||||||
@ -274,7 +274,7 @@ fn send_original_documents_data(
|
|||||||
embedder.clone(),
|
embedder.clone(),
|
||||||
&embedder_name,
|
&embedder_name,
|
||||||
&possible_embedding_mistakes,
|
&possible_embedding_mistakes,
|
||||||
embedder_stats.clone(),
|
&embedder_stats,
|
||||||
&unused_vectors_distribution,
|
&unused_vectors_distribution,
|
||||||
request_threads(),
|
request_threads(),
|
||||||
) {
|
) {
|
||||||
|
@ -81,7 +81,7 @@ pub struct IndexDocuments<'t, 'i, 'a, FP, FA> {
|
|||||||
added_documents: u64,
|
added_documents: u64,
|
||||||
deleted_documents: u64,
|
deleted_documents: u64,
|
||||||
embedders: EmbeddingConfigs,
|
embedders: EmbeddingConfigs,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: Arc<EmbedderStats>, // Cant change
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default, Debug, Clone)]
|
#[derive(Default, Debug, Clone)]
|
||||||
@ -104,7 +104,7 @@ where
|
|||||||
config: IndexDocumentsConfig,
|
config: IndexDocumentsConfig,
|
||||||
progress: FP,
|
progress: FP,
|
||||||
should_abort: FA,
|
should_abort: FA,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: Arc<EmbedderStats>, // Cant change
|
||||||
) -> Result<IndexDocuments<'t, 'i, 'a, FP, FA>> {
|
) -> Result<IndexDocuments<'t, 'i, 'a, FP, FA>> {
|
||||||
let transform = Some(Transform::new(
|
let transform = Some(Transform::new(
|
||||||
wtxn,
|
wtxn,
|
||||||
@ -2030,7 +2030,7 @@ mod tests {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -2118,7 +2118,7 @@ mod tests {
|
|||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -2304,7 +2304,7 @@ mod tests {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -2367,7 +2367,7 @@ mod tests {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -2421,7 +2421,7 @@ mod tests {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -2474,7 +2474,7 @@ mod tests {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -2529,7 +2529,7 @@ mod tests {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -2589,7 +2589,7 @@ mod tests {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -2642,7 +2642,7 @@ mod tests {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -2695,7 +2695,7 @@ mod tests {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -2894,7 +2894,7 @@ mod tests {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -2954,7 +2954,7 @@ mod tests {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -3011,7 +3011,7 @@ mod tests {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use std::{cell::RefCell, sync::Arc};
|
use std::cell::RefCell;
|
||||||
|
|
||||||
use bumpalo::collections::Vec as BVec;
|
use bumpalo::collections::Vec as BVec;
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
@ -23,7 +23,7 @@ pub struct EmbeddingExtractor<'a, 'b> {
|
|||||||
embedders: &'a EmbeddingConfigs,
|
embedders: &'a EmbeddingConfigs,
|
||||||
sender: EmbeddingSender<'a, 'b>,
|
sender: EmbeddingSender<'a, 'b>,
|
||||||
possible_embedding_mistakes: PossibleEmbeddingMistakes,
|
possible_embedding_mistakes: PossibleEmbeddingMistakes,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: &'a EmbedderStats,
|
||||||
threads: &'a ThreadPoolNoAbort,
|
threads: &'a ThreadPoolNoAbort,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -32,7 +32,7 @@ impl<'a, 'b> EmbeddingExtractor<'a, 'b> {
|
|||||||
embedders: &'a EmbeddingConfigs,
|
embedders: &'a EmbeddingConfigs,
|
||||||
sender: EmbeddingSender<'a, 'b>,
|
sender: EmbeddingSender<'a, 'b>,
|
||||||
field_distribution: &'a FieldDistribution,
|
field_distribution: &'a FieldDistribution,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: &'a EmbedderStats,
|
||||||
threads: &'a ThreadPoolNoAbort,
|
threads: &'a ThreadPoolNoAbort,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let possible_embedding_mistakes = PossibleEmbeddingMistakes::new(field_distribution);
|
let possible_embedding_mistakes = PossibleEmbeddingMistakes::new(field_distribution);
|
||||||
@ -78,7 +78,7 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
|
|||||||
prompt,
|
prompt,
|
||||||
context.data,
|
context.data,
|
||||||
&self.possible_embedding_mistakes,
|
&self.possible_embedding_mistakes,
|
||||||
self.embedder_stats.clone(),
|
self.embedder_stats,
|
||||||
self.threads,
|
self.threads,
|
||||||
self.sender,
|
self.sender,
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
@ -311,7 +311,7 @@ struct Chunks<'a, 'b, 'extractor> {
|
|||||||
dimensions: usize,
|
dimensions: usize,
|
||||||
prompt: &'a Prompt,
|
prompt: &'a Prompt,
|
||||||
possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
|
possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: &'a EmbedderStats,
|
||||||
user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
|
user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
|
||||||
threads: &'a ThreadPoolNoAbort,
|
threads: &'a ThreadPoolNoAbort,
|
||||||
sender: EmbeddingSender<'a, 'b>,
|
sender: EmbeddingSender<'a, 'b>,
|
||||||
@ -327,7 +327,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
prompt: &'a Prompt,
|
prompt: &'a Prompt,
|
||||||
user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
|
user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
|
||||||
possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
|
possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: &'a EmbedderStats,
|
||||||
threads: &'a ThreadPoolNoAbort,
|
threads: &'a ThreadPoolNoAbort,
|
||||||
sender: EmbeddingSender<'a, 'b>,
|
sender: EmbeddingSender<'a, 'b>,
|
||||||
doc_alloc: &'a Bump,
|
doc_alloc: &'a Bump,
|
||||||
@ -378,7 +378,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
self.embedder_id,
|
self.embedder_id,
|
||||||
self.embedder_name,
|
self.embedder_name,
|
||||||
self.possible_embedding_mistakes,
|
self.possible_embedding_mistakes,
|
||||||
self.embedder_stats.clone(),
|
self.embedder_stats,
|
||||||
unused_vectors_distribution,
|
unused_vectors_distribution,
|
||||||
self.threads,
|
self.threads,
|
||||||
self.sender,
|
self.sender,
|
||||||
@ -397,7 +397,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
self.embedder_id,
|
self.embedder_id,
|
||||||
self.embedder_name,
|
self.embedder_name,
|
||||||
self.possible_embedding_mistakes,
|
self.possible_embedding_mistakes,
|
||||||
self.embedder_stats.clone(),
|
self.embedder_stats,
|
||||||
unused_vectors_distribution,
|
unused_vectors_distribution,
|
||||||
self.threads,
|
self.threads,
|
||||||
self.sender,
|
self.sender,
|
||||||
@ -416,7 +416,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
embedder_id: u8,
|
embedder_id: u8,
|
||||||
embedder_name: &str,
|
embedder_name: &str,
|
||||||
possible_embedding_mistakes: &PossibleEmbeddingMistakes,
|
possible_embedding_mistakes: &PossibleEmbeddingMistakes,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: &EmbedderStats,
|
||||||
unused_vectors_distribution: &UnusedVectorsDistributionBump,
|
unused_vectors_distribution: &UnusedVectorsDistributionBump,
|
||||||
threads: &ThreadPoolNoAbort,
|
threads: &ThreadPoolNoAbort,
|
||||||
sender: EmbeddingSender<'a, 'b>,
|
sender: EmbeddingSender<'a, 'b>,
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::sync::atomic::AtomicBool;
|
use std::sync::atomic::AtomicBool;
|
||||||
use std::sync::Arc;
|
|
||||||
use std::sync::OnceLock;
|
use std::sync::OnceLock;
|
||||||
|
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
@ -36,7 +35,7 @@ pub(super) fn extract_all<'pl, 'extractor, DC, MSP>(
|
|||||||
mut index_embeddings: Vec<IndexEmbeddingConfig>,
|
mut index_embeddings: Vec<IndexEmbeddingConfig>,
|
||||||
document_ids: &mut RoaringBitmap,
|
document_ids: &mut RoaringBitmap,
|
||||||
modified_docids: &mut RoaringBitmap,
|
modified_docids: &mut RoaringBitmap,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: &EmbedderStats,
|
||||||
) -> Result<(FacetFieldIdsDelta, Vec<IndexEmbeddingConfig>)>
|
) -> Result<(FacetFieldIdsDelta, Vec<IndexEmbeddingConfig>)>
|
||||||
where
|
where
|
||||||
DC: DocumentChanges<'pl>,
|
DC: DocumentChanges<'pl>,
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
use std::sync::atomic::AtomicBool;
|
use std::sync::atomic::AtomicBool;
|
||||||
use std::sync::Arc;
|
|
||||||
use std::sync::{Once, RwLock};
|
use std::sync::{Once, RwLock};
|
||||||
use std::thread::{self, Builder};
|
use std::thread::{self, Builder};
|
||||||
|
|
||||||
@ -56,7 +55,7 @@ pub fn index<'pl, 'indexer, 'index, DC, MSP>(
|
|||||||
embedders: EmbeddingConfigs,
|
embedders: EmbeddingConfigs,
|
||||||
must_stop_processing: &'indexer MSP,
|
must_stop_processing: &'indexer MSP,
|
||||||
progress: &'indexer Progress,
|
progress: &'indexer Progress,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: &'indexer EmbedderStats,
|
||||||
) -> Result<ChannelCongestion>
|
) -> Result<ChannelCongestion>
|
||||||
where
|
where
|
||||||
DC: DocumentChanges<'pl>,
|
DC: DocumentChanges<'pl>,
|
||||||
|
@ -475,7 +475,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
progress_callback: &FP,
|
progress_callback: &FP,
|
||||||
should_abort: &FA,
|
should_abort: &FA,
|
||||||
settings_diff: InnerIndexSettingsDiff,
|
settings_diff: InnerIndexSettingsDiff,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: Arc<EmbedderStats>, // Cant change
|
||||||
) -> Result<()>
|
) -> Result<()>
|
||||||
where
|
where
|
||||||
FP: Fn(UpdateIndexingStep) + Sync,
|
FP: Fn(UpdateIndexingStep) + Sync,
|
||||||
@ -1362,7 +1362,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
mut self,
|
mut self,
|
||||||
progress_callback: FP,
|
progress_callback: FP,
|
||||||
should_abort: FA,
|
should_abort: FA,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: Arc<EmbedderStats>, // Cant change
|
||||||
) -> Result<()>
|
) -> Result<()>
|
||||||
where
|
where
|
||||||
FP: Fn(UpdateIndexingStep) + Sync,
|
FP: Fn(UpdateIndexingStep) + Sync,
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
use std::sync::Arc;
|
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use arroy::Distance;
|
use arroy::Distance;
|
||||||
@ -154,7 +153,7 @@ impl SubEmbedder {
|
|||||||
&self,
|
&self,
|
||||||
texts: Vec<String>,
|
texts: Vec<String>,
|
||||||
deadline: Option<Instant>,
|
deadline: Option<Instant>,
|
||||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
embedder_stats: Option<&EmbedderStats>,
|
||||||
) -> std::result::Result<Vec<Embedding>, EmbedError> {
|
) -> std::result::Result<Vec<Embedding>, EmbedError> {
|
||||||
match self {
|
match self {
|
||||||
SubEmbedder::HuggingFace(embedder) => embedder.embed(texts),
|
SubEmbedder::HuggingFace(embedder) => embedder.embed(texts),
|
||||||
@ -169,7 +168,7 @@ impl SubEmbedder {
|
|||||||
&self,
|
&self,
|
||||||
text: &str,
|
text: &str,
|
||||||
deadline: Option<Instant>,
|
deadline: Option<Instant>,
|
||||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
embedder_stats: Option<&EmbedderStats>,
|
||||||
) -> std::result::Result<Embedding, EmbedError> {
|
) -> std::result::Result<Embedding, EmbedError> {
|
||||||
match self {
|
match self {
|
||||||
SubEmbedder::HuggingFace(embedder) => embedder.embed_one(text),
|
SubEmbedder::HuggingFace(embedder) => embedder.embed_one(text),
|
||||||
@ -196,7 +195,7 @@ impl SubEmbedder {
|
|||||||
&self,
|
&self,
|
||||||
text_chunks: Vec<Vec<String>>,
|
text_chunks: Vec<Vec<String>>,
|
||||||
threads: &ThreadPoolNoAbort,
|
threads: &ThreadPoolNoAbort,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: &EmbedderStats,
|
||||||
) -> std::result::Result<Vec<Vec<Embedding>>, EmbedError> {
|
) -> std::result::Result<Vec<Vec<Embedding>>, EmbedError> {
|
||||||
match self {
|
match self {
|
||||||
SubEmbedder::HuggingFace(embedder) => embedder.embed_index(text_chunks),
|
SubEmbedder::HuggingFace(embedder) => embedder.embed_index(text_chunks),
|
||||||
@ -218,7 +217,7 @@ impl SubEmbedder {
|
|||||||
&self,
|
&self,
|
||||||
texts: &[&str],
|
texts: &[&str],
|
||||||
threads: &ThreadPoolNoAbort,
|
threads: &ThreadPoolNoAbort,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: &EmbedderStats,
|
||||||
) -> std::result::Result<Vec<Embedding>, EmbedError> {
|
) -> std::result::Result<Vec<Embedding>, EmbedError> {
|
||||||
match self {
|
match self {
|
||||||
SubEmbedder::HuggingFace(embedder) => embedder.embed_index_ref(texts),
|
SubEmbedder::HuggingFace(embedder) => embedder.embed_index_ref(texts),
|
||||||
|
@ -749,7 +749,7 @@ impl Embedder {
|
|||||||
&self,
|
&self,
|
||||||
text_chunks: Vec<Vec<String>>,
|
text_chunks: Vec<Vec<String>>,
|
||||||
threads: &ThreadPoolNoAbort,
|
threads: &ThreadPoolNoAbort,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: &EmbedderStats,
|
||||||
) -> std::result::Result<Vec<Vec<Embedding>>, EmbedError> {
|
) -> std::result::Result<Vec<Vec<Embedding>>, EmbedError> {
|
||||||
match self {
|
match self {
|
||||||
Embedder::HuggingFace(embedder) => embedder.embed_index(text_chunks),
|
Embedder::HuggingFace(embedder) => embedder.embed_index(text_chunks),
|
||||||
@ -772,7 +772,7 @@ impl Embedder {
|
|||||||
&self,
|
&self,
|
||||||
texts: &[&str],
|
texts: &[&str],
|
||||||
threads: &ThreadPoolNoAbort,
|
threads: &ThreadPoolNoAbort,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: &EmbedderStats,
|
||||||
) -> std::result::Result<Vec<Embedding>, EmbedError> {
|
) -> std::result::Result<Vec<Embedding>, EmbedError> {
|
||||||
match self {
|
match self {
|
||||||
Embedder::HuggingFace(embedder) => embedder.embed_index_ref(texts),
|
Embedder::HuggingFace(embedder) => embedder.embed_index_ref(texts),
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
use std::sync::Arc;
|
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _};
|
use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _};
|
||||||
@ -106,7 +105,7 @@ impl Embedder {
|
|||||||
&self,
|
&self,
|
||||||
texts: &[S],
|
texts: &[S],
|
||||||
deadline: Option<Instant>,
|
deadline: Option<Instant>,
|
||||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
embedder_stats: Option<&EmbedderStats>,
|
||||||
) -> Result<Vec<Embedding>, EmbedError> {
|
) -> Result<Vec<Embedding>, EmbedError> {
|
||||||
match self.rest_embedder.embed_ref(texts, deadline, embedder_stats) {
|
match self.rest_embedder.embed_ref(texts, deadline, embedder_stats) {
|
||||||
Ok(embeddings) => Ok(embeddings),
|
Ok(embeddings) => Ok(embeddings),
|
||||||
@ -121,21 +120,21 @@ impl Embedder {
|
|||||||
&self,
|
&self,
|
||||||
text_chunks: Vec<Vec<String>>,
|
text_chunks: Vec<Vec<String>>,
|
||||||
threads: &ThreadPoolNoAbort,
|
threads: &ThreadPoolNoAbort,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: &EmbedderStats,
|
||||||
) -> Result<Vec<Vec<Embedding>>, EmbedError> {
|
) -> Result<Vec<Vec<Embedding>>, EmbedError> {
|
||||||
// This condition helps reduce the number of active rayon jobs
|
// This condition helps reduce the number of active rayon jobs
|
||||||
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
||||||
if threads.active_operations() >= REQUEST_PARALLELISM {
|
if threads.active_operations() >= REQUEST_PARALLELISM {
|
||||||
text_chunks
|
text_chunks
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(move |chunk| self.embed(&chunk, None, Some(embedder_stats.clone())))
|
.map(move |chunk| self.embed(&chunk, None, Some(embedder_stats)))
|
||||||
.collect()
|
.collect()
|
||||||
} else {
|
} else {
|
||||||
threads
|
threads
|
||||||
.install(move || {
|
.install(move || {
|
||||||
text_chunks
|
text_chunks
|
||||||
.into_par_iter()
|
.into_par_iter()
|
||||||
.map(move |chunk| self.embed(&chunk, None, Some(embedder_stats.clone())))
|
.map(move |chunk| self.embed(&chunk, None, Some(embedder_stats)))
|
||||||
.collect()
|
.collect()
|
||||||
})
|
})
|
||||||
.map_err(|error| EmbedError {
|
.map_err(|error| EmbedError {
|
||||||
@ -149,14 +148,14 @@ impl Embedder {
|
|||||||
&self,
|
&self,
|
||||||
texts: &[&str],
|
texts: &[&str],
|
||||||
threads: &ThreadPoolNoAbort,
|
threads: &ThreadPoolNoAbort,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: &EmbedderStats,
|
||||||
) -> Result<Vec<Vec<f32>>, EmbedError> {
|
) -> Result<Vec<Vec<f32>>, EmbedError> {
|
||||||
// This condition helps reduce the number of active rayon jobs
|
// This condition helps reduce the number of active rayon jobs
|
||||||
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
||||||
if threads.active_operations() >= REQUEST_PARALLELISM {
|
if threads.active_operations() >= REQUEST_PARALLELISM {
|
||||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||||
.chunks(self.prompt_count_in_chunk_hint())
|
.chunks(self.prompt_count_in_chunk_hint())
|
||||||
.map(move |chunk| self.embed(chunk, None, Some(embedder_stats.clone())))
|
.map(move |chunk| self.embed(chunk, None, Some(embedder_stats)))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let embeddings = embeddings?;
|
let embeddings = embeddings?;
|
||||||
@ -166,7 +165,7 @@ impl Embedder {
|
|||||||
.install(move || {
|
.install(move || {
|
||||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||||
.par_chunks(self.prompt_count_in_chunk_hint())
|
.par_chunks(self.prompt_count_in_chunk_hint())
|
||||||
.map(move |chunk| self.embed(chunk, None, Some(embedder_stats.clone())))
|
.map(move |chunk| self.embed(chunk, None, Some(embedder_stats)))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let embeddings = embeddings?;
|
let embeddings = embeddings?;
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::sync::Arc;
|
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use ordered_float::OrderedFloat;
|
use ordered_float::OrderedFloat;
|
||||||
@ -217,7 +216,7 @@ impl Embedder {
|
|||||||
&self,
|
&self,
|
||||||
texts: &[S],
|
texts: &[S],
|
||||||
deadline: Option<Instant>,
|
deadline: Option<Instant>,
|
||||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
embedder_stats: Option<&EmbedderStats>,
|
||||||
) -> Result<Vec<Embedding>, EmbedError> {
|
) -> Result<Vec<Embedding>, EmbedError> {
|
||||||
match self.rest_embedder.embed_ref(texts, deadline, embedder_stats) {
|
match self.rest_embedder.embed_ref(texts, deadline, embedder_stats) {
|
||||||
Ok(embeddings) => Ok(embeddings),
|
Ok(embeddings) => Ok(embeddings),
|
||||||
@ -262,21 +261,21 @@ impl Embedder {
|
|||||||
&self,
|
&self,
|
||||||
text_chunks: Vec<Vec<String>>,
|
text_chunks: Vec<Vec<String>>,
|
||||||
threads: &ThreadPoolNoAbort,
|
threads: &ThreadPoolNoAbort,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: &EmbedderStats,
|
||||||
) -> Result<Vec<Vec<Embedding>>, EmbedError> {
|
) -> Result<Vec<Vec<Embedding>>, EmbedError> {
|
||||||
// This condition helps reduce the number of active rayon jobs
|
// This condition helps reduce the number of active rayon jobs
|
||||||
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
||||||
if threads.active_operations() >= REQUEST_PARALLELISM {
|
if threads.active_operations() >= REQUEST_PARALLELISM {
|
||||||
text_chunks
|
text_chunks
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(move |chunk| self.embed(&chunk, None, Some(embedder_stats.clone())))
|
.map(move |chunk| self.embed(&chunk, None, Some(embedder_stats)))
|
||||||
.collect()
|
.collect()
|
||||||
} else {
|
} else {
|
||||||
threads
|
threads
|
||||||
.install(move || {
|
.install(move || {
|
||||||
text_chunks
|
text_chunks
|
||||||
.into_par_iter()
|
.into_par_iter()
|
||||||
.map(move |chunk| self.embed(&chunk, None, Some(embedder_stats.clone())))
|
.map(move |chunk| self.embed(&chunk, None, Some(embedder_stats)))
|
||||||
.collect()
|
.collect()
|
||||||
})
|
})
|
||||||
.map_err(|error| EmbedError {
|
.map_err(|error| EmbedError {
|
||||||
@ -290,14 +289,14 @@ impl Embedder {
|
|||||||
&self,
|
&self,
|
||||||
texts: &[&str],
|
texts: &[&str],
|
||||||
threads: &ThreadPoolNoAbort,
|
threads: &ThreadPoolNoAbort,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: &EmbedderStats,
|
||||||
) -> Result<Vec<Vec<f32>>, EmbedError> {
|
) -> Result<Vec<Vec<f32>>, EmbedError> {
|
||||||
// This condition helps reduce the number of active rayon jobs
|
// This condition helps reduce the number of active rayon jobs
|
||||||
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
||||||
if threads.active_operations() >= REQUEST_PARALLELISM {
|
if threads.active_operations() >= REQUEST_PARALLELISM {
|
||||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||||
.chunks(self.prompt_count_in_chunk_hint())
|
.chunks(self.prompt_count_in_chunk_hint())
|
||||||
.map(move |chunk| self.embed(chunk, None, Some(embedder_stats.clone())))
|
.map(move |chunk| self.embed(chunk, None, Some(embedder_stats)))
|
||||||
.collect();
|
.collect();
|
||||||
let embeddings = embeddings?;
|
let embeddings = embeddings?;
|
||||||
Ok(embeddings.into_iter().flatten().collect())
|
Ok(embeddings.into_iter().flatten().collect())
|
||||||
@ -306,7 +305,7 @@ impl Embedder {
|
|||||||
.install(move || {
|
.install(move || {
|
||||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||||
.par_chunks(self.prompt_count_in_chunk_hint())
|
.par_chunks(self.prompt_count_in_chunk_hint())
|
||||||
.map(move |chunk| self.embed(chunk, None, Some(embedder_stats.clone())))
|
.map(move |chunk| self.embed(chunk, None, Some(embedder_stats)))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let embeddings = embeddings?;
|
let embeddings = embeddings?;
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::sync::Arc;
|
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use deserr::Deserr;
|
use deserr::Deserr;
|
||||||
@ -170,7 +169,7 @@ impl Embedder {
|
|||||||
&self,
|
&self,
|
||||||
texts: Vec<String>,
|
texts: Vec<String>,
|
||||||
deadline: Option<Instant>,
|
deadline: Option<Instant>,
|
||||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
embedder_stats: Option<&EmbedderStats>,
|
||||||
) -> Result<Vec<Embedding>, EmbedError> {
|
) -> Result<Vec<Embedding>, EmbedError> {
|
||||||
embed(
|
embed(
|
||||||
&self.data,
|
&self.data,
|
||||||
@ -186,7 +185,7 @@ impl Embedder {
|
|||||||
&self,
|
&self,
|
||||||
texts: &[S],
|
texts: &[S],
|
||||||
deadline: Option<Instant>,
|
deadline: Option<Instant>,
|
||||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
embedder_stats: Option<&EmbedderStats>,
|
||||||
) -> Result<Vec<Embedding>, EmbedError>
|
) -> Result<Vec<Embedding>, EmbedError>
|
||||||
where
|
where
|
||||||
S: AsRef<str> + Serialize,
|
S: AsRef<str> + Serialize,
|
||||||
@ -208,21 +207,21 @@ impl Embedder {
|
|||||||
&self,
|
&self,
|
||||||
text_chunks: Vec<Vec<String>>,
|
text_chunks: Vec<Vec<String>>,
|
||||||
threads: &ThreadPoolNoAbort,
|
threads: &ThreadPoolNoAbort,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: &EmbedderStats,
|
||||||
) -> Result<Vec<Vec<Embedding>>, EmbedError> {
|
) -> Result<Vec<Vec<Embedding>>, EmbedError> {
|
||||||
// This condition helps reduce the number of active rayon jobs
|
// This condition helps reduce the number of active rayon jobs
|
||||||
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
||||||
if threads.active_operations() >= REQUEST_PARALLELISM {
|
if threads.active_operations() >= REQUEST_PARALLELISM {
|
||||||
text_chunks
|
text_chunks
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(move |chunk| self.embed(chunk, None, Some(embedder_stats.clone())))
|
.map(move |chunk| self.embed(chunk, None, Some(embedder_stats)))
|
||||||
.collect()
|
.collect()
|
||||||
} else {
|
} else {
|
||||||
threads
|
threads
|
||||||
.install(move || {
|
.install(move || {
|
||||||
text_chunks
|
text_chunks
|
||||||
.into_par_iter()
|
.into_par_iter()
|
||||||
.map(move |chunk| self.embed(chunk, None, Some(embedder_stats.clone())))
|
.map(move |chunk| self.embed(chunk, None, Some(embedder_stats)))
|
||||||
.collect()
|
.collect()
|
||||||
})
|
})
|
||||||
.map_err(|error| EmbedError {
|
.map_err(|error| EmbedError {
|
||||||
@ -236,14 +235,14 @@ impl Embedder {
|
|||||||
&self,
|
&self,
|
||||||
texts: &[&str],
|
texts: &[&str],
|
||||||
threads: &ThreadPoolNoAbort,
|
threads: &ThreadPoolNoAbort,
|
||||||
embedder_stats: Arc<EmbedderStats>,
|
embedder_stats: &EmbedderStats,
|
||||||
) -> Result<Vec<Embedding>, EmbedError> {
|
) -> Result<Vec<Embedding>, EmbedError> {
|
||||||
// This condition helps reduce the number of active rayon jobs
|
// This condition helps reduce the number of active rayon jobs
|
||||||
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
||||||
if threads.active_operations() >= REQUEST_PARALLELISM {
|
if threads.active_operations() >= REQUEST_PARALLELISM {
|
||||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||||
.chunks(self.prompt_count_in_chunk_hint())
|
.chunks(self.prompt_count_in_chunk_hint())
|
||||||
.map(move |chunk| self.embed_ref(chunk, None, Some(embedder_stats.clone())))
|
.map(move |chunk| self.embed_ref(chunk, None, Some(embedder_stats)))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let embeddings = embeddings?;
|
let embeddings = embeddings?;
|
||||||
@ -253,7 +252,7 @@ impl Embedder {
|
|||||||
.install(move || {
|
.install(move || {
|
||||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||||
.par_chunks(self.prompt_count_in_chunk_hint())
|
.par_chunks(self.prompt_count_in_chunk_hint())
|
||||||
.map(move |chunk| self.embed_ref(chunk, None, Some(embedder_stats.clone())))
|
.map(move |chunk| self.embed_ref(chunk, None, Some(embedder_stats)))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let embeddings = embeddings?;
|
let embeddings = embeddings?;
|
||||||
@ -303,7 +302,7 @@ fn embed<S>(
|
|||||||
expected_count: usize,
|
expected_count: usize,
|
||||||
expected_dimension: Option<usize>,
|
expected_dimension: Option<usize>,
|
||||||
deadline: Option<Instant>,
|
deadline: Option<Instant>,
|
||||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
embedder_stats: Option<&EmbedderStats>,
|
||||||
) -> Result<Vec<Embedding>, EmbedError>
|
) -> Result<Vec<Embedding>, EmbedError>
|
||||||
where
|
where
|
||||||
S: Serialize,
|
S: Serialize,
|
||||||
@ -323,7 +322,7 @@ where
|
|||||||
|
|
||||||
for attempt in 0..10 {
|
for attempt in 0..10 {
|
||||||
if let Some(embedder_stats) = &embedder_stats {
|
if let Some(embedder_stats) = &embedder_stats {
|
||||||
embedder_stats.as_ref().total_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
embedder_stats.total_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
let response = request.clone().send_json(&body);
|
let response = request.clone().send_json(&body);
|
||||||
let result = check_response(response, data.configuration_source).and_then(|response| {
|
let result = check_response(response, data.configuration_source).and_then(|response| {
|
||||||
@ -367,7 +366,7 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
if let Some(embedder_stats) = &embedder_stats {
|
if let Some(embedder_stats) = &embedder_stats {
|
||||||
embedder_stats.as_ref().total_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
embedder_stats.total_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
let response = request.send_json(&body);
|
let response = request.send_json(&body);
|
||||||
let result = check_response(response, data.configuration_source).and_then(|response| {
|
let result = check_response(response, data.configuration_source).and_then(|response| {
|
||||||
|
@ -74,7 +74,7 @@ fn test_facet_distribution_with_no_facet_values() {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
@ -114,7 +114,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
@ -344,7 +344,7 @@ fn criteria_ascdesc() {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
@ -153,7 +153,7 @@ fn test_typo_disabled_on_word() {
|
|||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&Progress::default(),
|
&Progress::default(),
|
||||||
Default::default(),
|
&Default::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user