inital implementation of the progress

This commit is contained in:
Tamo 2024-12-10 16:30:48 +01:00
parent 8c19cb0a0b
commit df9b68f8ed
No known key found for this signature in database
GPG key ID: 20CD8020AFA88D69
29 changed files with 585 additions and 414 deletions

View file

@ -16,10 +16,10 @@ use crate::update::del_add::DelAdd;
use crate::update::new::channel::FieldIdDocidFacetSender;
use crate::update::new::extract::perm_json_p;
use crate::update::new::indexer::document_changes::{
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress,
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
};
use crate::update::new::ref_cell_ext::RefCellExt as _;
use crate::update::new::steps::Step;
use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::{FullySend, ThreadLocal};
use crate::update::new::DocumentChange;
use crate::update::GrenadParameters;
@ -373,26 +373,16 @@ fn truncate_str(s: &str) -> &str {
impl FacetedDocidsExtractor {
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")]
pub fn run_extraction<
'pl,
'fid,
'indexer,
'index,
'extractor,
DC: DocumentChanges<'pl>,
MSP,
SP,
>(
pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
grenad_parameters: GrenadParameters,
document_changes: &DC,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
sender: &FieldIdDocidFacetSender,
step: Step,
step: IndexingStep,
) -> Result<Vec<BalancedCaches<'extractor>>>
where
MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync,
{
let index = indexing_context.index;
let rtxn = index.read_txn()?;

View file

@ -15,23 +15,22 @@ pub use geo::*;
pub use searchable::*;
pub use vectors::EmbeddingExtractor;
use super::indexer::document_changes::{DocumentChanges, IndexingContext, Progress};
use super::steps::Step;
use super::indexer::document_changes::{DocumentChanges, IndexingContext};
use super::steps::IndexingStep;
use super::thread_local::{FullySend, ThreadLocal};
use crate::update::GrenadParameters;
use crate::Result;
pub trait DocidsExtractor {
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>(
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
grenad_parameters: GrenadParameters,
document_changes: &DC,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
step: Step,
step: IndexingStep,
) -> Result<Vec<BalancedCaches<'extractor>>>
where
MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync;
MSP: Fn() -> bool + Sync;
}
/// TODO move in permissive json pointer

View file

@ -11,10 +11,10 @@ use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
use crate::update::new::extract::cache::BalancedCaches;
use crate::update::new::extract::perm_json_p::contained_in;
use crate::update::new::indexer::document_changes::{
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress,
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
};
use crate::update::new::ref_cell_ext::RefCellExt as _;
use crate::update::new::steps::Step;
use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
use crate::update::new::DocumentChange;
use crate::update::GrenadParameters;
@ -239,25 +239,15 @@ impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> {
pub struct WordDocidsExtractors;
impl WordDocidsExtractors {
pub fn run_extraction<
'pl,
'fid,
'indexer,
'index,
'extractor,
DC: DocumentChanges<'pl>,
MSP,
SP,
>(
pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
grenad_parameters: GrenadParameters,
document_changes: &DC,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
step: Step,
step: IndexingStep,
) -> Result<WordDocidsCaches<'extractor>>
where
MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync,
{
let index = indexing_context.index;
let rtxn = index.read_txn()?;

View file

@ -14,9 +14,9 @@ use tokenize_document::{tokenizer_builder, DocumentTokenizer};
use super::cache::BalancedCaches;
use super::DocidsExtractor;
use crate::update::new::indexer::document_changes::{
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress,
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
};
use crate::update::new::steps::Step;
use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::{FullySend, ThreadLocal};
use crate::update::new::DocumentChange;
use crate::update::GrenadParameters;
@ -56,16 +56,15 @@ impl<'a, 'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor>
}
pub trait SearchableExtractor: Sized + Sync {
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>(
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
grenad_parameters: GrenadParameters,
document_changes: &DC,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
step: Step,
step: IndexingStep,
) -> Result<Vec<BalancedCaches<'extractor>>>
where
MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync,
{
let rtxn = indexing_context.index.read_txn()?;
let stop_words = indexing_context.index.stop_words(&rtxn)?;
@ -134,16 +133,15 @@ pub trait SearchableExtractor: Sized + Sync {
}
impl<T: SearchableExtractor> DocidsExtractor for T {
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>(
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
grenad_parameters: GrenadParameters,
document_changes: &DC,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
step: Step,
step: IndexingStep,
) -> Result<Vec<BalancedCaches<'extractor>>>
where
MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync,
{
Self::run_extraction(
grenad_parameters,