new extractor bugfixes:

- fix old_has_fragments
- new_is_user_provided is always false when generating fragments,
  even if no fragment ever matches
This commit is contained in:
Louis Dureuil 2025-07-03 14:35:02 +02:00
parent dfe0c8664e
commit 90e6b6416f
No known key found for this signature in database

View file

@ -357,7 +357,7 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
chunks.is_user_provided_must_regenerate(document.docid()); chunks.is_user_provided_must_regenerate(document.docid());
let old_has_fragments = old_embedders let old_has_fragments = old_embedders
.get(embedder_name) .get(embedder_name)
.map(|embedder| embedder.fragments().is_empty()) .map(|embedder| !embedder.fragments().is_empty())
.unwrap_or_default(); .unwrap_or_default();
let new_has_fragments = chunks.has_fragments(); let new_has_fragments = chunks.has_fragments();
@ -628,9 +628,6 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
session.on_embed_mut().clear_vectors(docid); session.on_embed_mut().clear_vectors(docid);
} }
let mut extracted = false;
let extracted = &mut extracted;
settings_delta.try_for_each_fragment_diff( settings_delta.try_for_each_fragment_diff(
session.embedder_name(), session.embedder_name(),
|fragment_diff| { |fragment_diff| {
@ -660,7 +657,6 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
); );
} }
ExtractorDiff::Added(input) | ExtractorDiff::Updated(input) => { ExtractorDiff::Added(input) | ExtractorDiff::Updated(input) => {
*extracted = true;
session.request_embedding( session.request_embedding(
metadata, metadata,
input, input,
@ -673,13 +669,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
Result::Ok(()) Result::Ok(())
}, },
)?; )?;
self.set_status( self.set_status(docid, old_is_user_provided, true, false, true);
docid,
old_is_user_provided,
true,
old_is_user_provided & !*extracted,
true,
);
} }
ChunkType::DocumentTemplate { document_template, session } => { ChunkType::DocumentTemplate { document_template, session } => {
let doc_alloc = session.doc_alloc(); let doc_alloc = session.doc_alloc();
@ -732,7 +722,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
where where
'a: 'doc, 'a: 'doc,
{ {
let extracted = match &mut self.kind { match &mut self.kind {
ChunkType::DocumentTemplate { document_template, session } => { ChunkType::DocumentTemplate { document_template, session } => {
let doc_alloc = session.doc_alloc(); let doc_alloc = session.doc_alloc();
let ex = DocumentTemplateExtractor::new( let ex = DocumentTemplateExtractor::new(
@ -785,7 +775,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
docid, docid,
old_is_user_provided, old_is_user_provided,
old_must_regenerate, old_must_regenerate,
old_is_user_provided && !extracted, false,
new_must_regenerate, new_must_regenerate,
); );
@ -968,7 +958,7 @@ fn update_autogenerated<'doc, 'a: 'doc, 'b, E, OD, ND>(
old_must_regenerate: bool, old_must_regenerate: bool,
session: &mut EmbedSession<'a, OnEmbeddingDocumentUpdates<'a, 'b>, E::Input>, session: &mut EmbedSession<'a, OnEmbeddingDocumentUpdates<'a, 'b>, E::Input>,
unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>, unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>,
) -> Result<bool> ) -> Result<()>
where where
OD: Document<'doc> + Debug, OD: Document<'doc> + Debug,
ND: Document<'doc> + Debug, ND: Document<'doc> + Debug,
@ -976,7 +966,6 @@ where
E::Input: Input, E::Input: Input,
crate::Error: From<E::Error>, crate::Error: From<E::Error>,
{ {
let mut extracted = false;
for extractor in extractors { for extractor in extractors {
let new_rendered = extractor.extract(&new_document, meta)?; let new_rendered = extractor.extract(&new_document, meta)?;
let must_regenerate = if !old_must_regenerate { let must_regenerate = if !old_must_regenerate {
@ -995,7 +984,6 @@ where
}; };
if must_regenerate { if must_regenerate {
extracted = true;
let metadata = let metadata =
Metadata { docid, external_docid, extractor_id: extractor.extractor_id() }; Metadata { docid, external_docid, extractor_id: extractor.extractor_id() };
@ -1011,7 +999,7 @@ where
} }
} }
Ok(extracted) Ok(())
} }
fn insert_autogenerated<'a, 'b, E, D: Document<'a> + Debug>( fn insert_autogenerated<'a, 'b, E, D: Document<'a> + Debug>(