mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 12:27:13 +02:00
new extractor bugfixes:
- fix old_has_fragments - new_is_user_provided is always false when generating fragments, even if no fragment ever matches
This commit is contained in:
parent
dfe0c8664e
commit
90e6b6416f
1 changed files with 6 additions and 18 deletions
|
@ -357,7 +357,7 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
|
||||||
chunks.is_user_provided_must_regenerate(document.docid());
|
chunks.is_user_provided_must_regenerate(document.docid());
|
||||||
let old_has_fragments = old_embedders
|
let old_has_fragments = old_embedders
|
||||||
.get(embedder_name)
|
.get(embedder_name)
|
||||||
.map(|embedder| embedder.fragments().is_empty())
|
.map(|embedder| !embedder.fragments().is_empty())
|
||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
|
|
||||||
let new_has_fragments = chunks.has_fragments();
|
let new_has_fragments = chunks.has_fragments();
|
||||||
|
@ -628,9 +628,6 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||||
session.on_embed_mut().clear_vectors(docid);
|
session.on_embed_mut().clear_vectors(docid);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut extracted = false;
|
|
||||||
let extracted = &mut extracted;
|
|
||||||
|
|
||||||
settings_delta.try_for_each_fragment_diff(
|
settings_delta.try_for_each_fragment_diff(
|
||||||
session.embedder_name(),
|
session.embedder_name(),
|
||||||
|fragment_diff| {
|
|fragment_diff| {
|
||||||
|
@ -660,7 +657,6 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
ExtractorDiff::Added(input) | ExtractorDiff::Updated(input) => {
|
ExtractorDiff::Added(input) | ExtractorDiff::Updated(input) => {
|
||||||
*extracted = true;
|
|
||||||
session.request_embedding(
|
session.request_embedding(
|
||||||
metadata,
|
metadata,
|
||||||
input,
|
input,
|
||||||
|
@ -673,13 +669,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||||
Result::Ok(())
|
Result::Ok(())
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
self.set_status(
|
self.set_status(docid, old_is_user_provided, true, false, true);
|
||||||
docid,
|
|
||||||
old_is_user_provided,
|
|
||||||
true,
|
|
||||||
old_is_user_provided & !*extracted,
|
|
||||||
true,
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
ChunkType::DocumentTemplate { document_template, session } => {
|
ChunkType::DocumentTemplate { document_template, session } => {
|
||||||
let doc_alloc = session.doc_alloc();
|
let doc_alloc = session.doc_alloc();
|
||||||
|
@ -732,7 +722,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||||
where
|
where
|
||||||
'a: 'doc,
|
'a: 'doc,
|
||||||
{
|
{
|
||||||
let extracted = match &mut self.kind {
|
match &mut self.kind {
|
||||||
ChunkType::DocumentTemplate { document_template, session } => {
|
ChunkType::DocumentTemplate { document_template, session } => {
|
||||||
let doc_alloc = session.doc_alloc();
|
let doc_alloc = session.doc_alloc();
|
||||||
let ex = DocumentTemplateExtractor::new(
|
let ex = DocumentTemplateExtractor::new(
|
||||||
|
@ -785,7 +775,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||||
docid,
|
docid,
|
||||||
old_is_user_provided,
|
old_is_user_provided,
|
||||||
old_must_regenerate,
|
old_must_regenerate,
|
||||||
old_is_user_provided && !extracted,
|
false,
|
||||||
new_must_regenerate,
|
new_must_regenerate,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -968,7 +958,7 @@ fn update_autogenerated<'doc, 'a: 'doc, 'b, E, OD, ND>(
|
||||||
old_must_regenerate: bool,
|
old_must_regenerate: bool,
|
||||||
session: &mut EmbedSession<'a, OnEmbeddingDocumentUpdates<'a, 'b>, E::Input>,
|
session: &mut EmbedSession<'a, OnEmbeddingDocumentUpdates<'a, 'b>, E::Input>,
|
||||||
unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>,
|
unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>,
|
||||||
) -> Result<bool>
|
) -> Result<()>
|
||||||
where
|
where
|
||||||
OD: Document<'doc> + Debug,
|
OD: Document<'doc> + Debug,
|
||||||
ND: Document<'doc> + Debug,
|
ND: Document<'doc> + Debug,
|
||||||
|
@ -976,7 +966,6 @@ where
|
||||||
E::Input: Input,
|
E::Input: Input,
|
||||||
crate::Error: From<E::Error>,
|
crate::Error: From<E::Error>,
|
||||||
{
|
{
|
||||||
let mut extracted = false;
|
|
||||||
for extractor in extractors {
|
for extractor in extractors {
|
||||||
let new_rendered = extractor.extract(&new_document, meta)?;
|
let new_rendered = extractor.extract(&new_document, meta)?;
|
||||||
let must_regenerate = if !old_must_regenerate {
|
let must_regenerate = if !old_must_regenerate {
|
||||||
|
@ -995,7 +984,6 @@ where
|
||||||
};
|
};
|
||||||
|
|
||||||
if must_regenerate {
|
if must_regenerate {
|
||||||
extracted = true;
|
|
||||||
let metadata =
|
let metadata =
|
||||||
Metadata { docid, external_docid, extractor_id: extractor.extractor_id() };
|
Metadata { docid, external_docid, extractor_id: extractor.extractor_id() };
|
||||||
|
|
||||||
|
@ -1011,7 +999,7 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(extracted)
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn insert_autogenerated<'a, 'b, E, D: Document<'a> + Debug>(
|
fn insert_autogenerated<'a, 'b, E, D: Document<'a> + Debug>(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue