mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-10 05:24:32 +01:00
Fetch the compression dictionary only once to decompress documents
This commit is contained in:
parent
c1dd489adc
commit
7d75988a53
@ -5,6 +5,7 @@ use bumparaw_collections::RawMap;
|
|||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use rustc_hash::FxBuildHasher;
|
use rustc_hash::FxBuildHasher;
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
|
use zstd::dict::DecoderDictionary;
|
||||||
|
|
||||||
use super::vector_document::VectorDocument;
|
use super::vector_document::VectorDocument;
|
||||||
use super::{KvReaderFieldId, KvWriterFieldId};
|
use super::{KvReaderFieldId, KvWriterFieldId};
|
||||||
@ -130,12 +131,12 @@ impl<'t, Mapper: FieldIdMapper> DocumentFromDb<'t, Mapper> {
|
|||||||
rtxn: &'t RoTxn,
|
rtxn: &'t RoTxn,
|
||||||
index: &'t Index,
|
index: &'t Index,
|
||||||
db_fields_ids_map: &'t Mapper,
|
db_fields_ids_map: &'t Mapper,
|
||||||
|
db_document_decompression_dictionary: Option<&DecoderDictionary<'static>>,
|
||||||
doc_alloc: &'t Bump,
|
doc_alloc: &'t Bump,
|
||||||
) -> Result<Option<Self>> {
|
) -> Result<Option<Self>> {
|
||||||
match index.compressed_document(rtxn, docid)? {
|
match index.compressed_document(rtxn, docid)? {
|
||||||
Some(compressed) => {
|
Some(compressed) => {
|
||||||
/// TODO maybe give the dictionary as a parameter
|
let content = match db_document_decompression_dictionary {
|
||||||
let content = match index.document_decompression_dictionary(rtxn)? {
|
|
||||||
Some(dictionary) => compressed.decompress_into_bump(doc_alloc, &dictionary)?,
|
Some(dictionary) => compressed.decompress_into_bump(doc_alloc, &dictionary)?,
|
||||||
None => compressed.as_non_compressed(),
|
None => compressed.as_non_compressed(),
|
||||||
};
|
};
|
||||||
@ -206,10 +207,18 @@ impl<'a, 'doc, 't, Mapper: FieldIdMapper> MergedDocument<'a, 'doc, 't, Mapper> {
|
|||||||
rtxn: &'t RoTxn,
|
rtxn: &'t RoTxn,
|
||||||
index: &'t Index,
|
index: &'t Index,
|
||||||
db_fields_ids_map: &'t Mapper,
|
db_fields_ids_map: &'t Mapper,
|
||||||
|
db_document_decompression_dictionary: Option<&'t DecoderDictionary<'static>>,
|
||||||
doc_alloc: &'t Bump,
|
doc_alloc: &'t Bump,
|
||||||
new_doc: DocumentFromVersions<'a, 'doc>,
|
new_doc: DocumentFromVersions<'a, 'doc>,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
let db = DocumentFromDb::new(docid, rtxn, index, db_fields_ids_map, doc_alloc)?;
|
let db = DocumentFromDb::new(
|
||||||
|
docid,
|
||||||
|
rtxn,
|
||||||
|
index,
|
||||||
|
db_fields_ids_map,
|
||||||
|
db_document_decompression_dictionary,
|
||||||
|
doc_alloc,
|
||||||
|
)?;
|
||||||
Ok(Self { new_doc, db })
|
Ok(Self { new_doc, db })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
|
use zstd::dict::DecoderDictionary;
|
||||||
|
|
||||||
use super::document::{
|
use super::document::{
|
||||||
Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions,
|
Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions,
|
||||||
@ -72,9 +73,10 @@ impl<'doc> Deletion<'doc> {
|
|||||||
rtxn: &'a RoTxn,
|
rtxn: &'a RoTxn,
|
||||||
index: &'a Index,
|
index: &'a Index,
|
||||||
mapper: &'a Mapper,
|
mapper: &'a Mapper,
|
||||||
|
dictionary: Option<&'a DecoderDictionary<'static>>,
|
||||||
doc_alloc: &'a Bump,
|
doc_alloc: &'a Bump,
|
||||||
) -> Result<DocumentFromDb<'a, Mapper>> {
|
) -> Result<DocumentFromDb<'a, Mapper>> {
|
||||||
Ok(DocumentFromDb::new(self.docid, rtxn, index, mapper, doc_alloc)?.ok_or(
|
Ok(DocumentFromDb::new(self.docid, rtxn, index, mapper, dictionary, doc_alloc)?.ok_or(
|
||||||
crate::error::UserError::UnknownInternalDocumentId { document_id: self.docid },
|
crate::error::UserError::UnknownInternalDocumentId { document_id: self.docid },
|
||||||
)?)
|
)?)
|
||||||
}
|
}
|
||||||
@ -128,9 +130,10 @@ impl<'doc> Update<'doc> {
|
|||||||
rtxn: &'a RoTxn,
|
rtxn: &'a RoTxn,
|
||||||
index: &'a Index,
|
index: &'a Index,
|
||||||
mapper: &'a Mapper,
|
mapper: &'a Mapper,
|
||||||
|
dictionary: Option<&'a DecoderDictionary<'static>>,
|
||||||
doc_alloc: &'a Bump,
|
doc_alloc: &'a Bump,
|
||||||
) -> Result<DocumentFromDb<'a, Mapper>> {
|
) -> Result<DocumentFromDb<'a, Mapper>> {
|
||||||
Ok(DocumentFromDb::new(self.docid, rtxn, index, mapper, doc_alloc)?.ok_or(
|
Ok(DocumentFromDb::new(self.docid, rtxn, index, mapper, dictionary, doc_alloc)?.ok_or(
|
||||||
crate::error::UserError::UnknownInternalDocumentId { document_id: self.docid },
|
crate::error::UserError::UnknownInternalDocumentId { document_id: self.docid },
|
||||||
)?)
|
)?)
|
||||||
}
|
}
|
||||||
@ -140,11 +143,13 @@ impl<'doc> Update<'doc> {
|
|||||||
rtxn: &'a RoTxn,
|
rtxn: &'a RoTxn,
|
||||||
index: &'a Index,
|
index: &'a Index,
|
||||||
mapper: &'a Mapper,
|
mapper: &'a Mapper,
|
||||||
|
dictionary: Option<&'a DecoderDictionary<'static>>,
|
||||||
doc_alloc: &'a Bump,
|
doc_alloc: &'a Bump,
|
||||||
) -> Result<VectorDocumentFromDb<'a>> {
|
) -> Result<VectorDocumentFromDb<'a>> {
|
||||||
Ok(VectorDocumentFromDb::new(self.docid, index, rtxn, mapper, doc_alloc)?.ok_or(
|
Ok(VectorDocumentFromDb::new(self.docid, index, rtxn, mapper, dictionary, doc_alloc)?
|
||||||
crate::error::UserError::UnknownInternalDocumentId { document_id: self.docid },
|
.ok_or(crate::error::UserError::UnknownInternalDocumentId {
|
||||||
)?)
|
document_id: self.docid,
|
||||||
|
})?)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn updated(&self) -> DocumentFromVersions<'_, 'doc> {
|
pub fn updated(&self) -> DocumentFromVersions<'_, 'doc> {
|
||||||
@ -156,6 +161,7 @@ impl<'doc> Update<'doc> {
|
|||||||
rtxn: &'t RoTxn,
|
rtxn: &'t RoTxn,
|
||||||
index: &'t Index,
|
index: &'t Index,
|
||||||
mapper: &'t Mapper,
|
mapper: &'t Mapper,
|
||||||
|
dictionary: Option<&'t DecoderDictionary<'static>>,
|
||||||
doc_alloc: &'t Bump,
|
doc_alloc: &'t Bump,
|
||||||
) -> Result<MergedDocument<'_, 'doc, 't, Mapper>> {
|
) -> Result<MergedDocument<'_, 'doc, 't, Mapper>> {
|
||||||
if self.has_deletion {
|
if self.has_deletion {
|
||||||
@ -166,6 +172,7 @@ impl<'doc> Update<'doc> {
|
|||||||
rtxn,
|
rtxn,
|
||||||
index,
|
index,
|
||||||
mapper,
|
mapper,
|
||||||
|
dictionary,
|
||||||
doc_alloc,
|
doc_alloc,
|
||||||
DocumentFromVersions::new(&self.new),
|
DocumentFromVersions::new(&self.new),
|
||||||
)
|
)
|
||||||
@ -182,6 +189,7 @@ impl<'doc> Update<'doc> {
|
|||||||
rtxn: &'t RoTxn,
|
rtxn: &'t RoTxn,
|
||||||
index: &'t Index,
|
index: &'t Index,
|
||||||
mapper: &'t Mapper,
|
mapper: &'t Mapper,
|
||||||
|
dictionary: Option<&'t DecoderDictionary<'static>>,
|
||||||
doc_alloc: &'t Bump,
|
doc_alloc: &'t Bump,
|
||||||
) -> Result<bool> {
|
) -> Result<bool> {
|
||||||
let mut changed = false;
|
let mut changed = false;
|
||||||
@ -198,7 +206,7 @@ impl<'doc> Update<'doc> {
|
|||||||
updated_selected_field_count += 1;
|
updated_selected_field_count += 1;
|
||||||
let current = match cached_current {
|
let current = match cached_current {
|
||||||
Some(current) => current,
|
Some(current) => current,
|
||||||
None => self.current(rtxn, index, mapper, doc_alloc)?,
|
None => self.current(rtxn, index, mapper, dictionary, doc_alloc)?,
|
||||||
};
|
};
|
||||||
let current_value = current.top_level_field(key)?;
|
let current_value = current.top_level_field(key)?;
|
||||||
let Some(current_value) = current_value else {
|
let Some(current_value) = current_value else {
|
||||||
@ -228,7 +236,7 @@ impl<'doc> Update<'doc> {
|
|||||||
let has_deleted_fields = {
|
let has_deleted_fields = {
|
||||||
let current = match cached_current {
|
let current = match cached_current {
|
||||||
Some(current) => current,
|
Some(current) => current,
|
||||||
None => self.current(rtxn, index, mapper, doc_alloc)?,
|
None => self.current(rtxn, index, mapper, dictionary, doc_alloc)?,
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut current_selected_field_count = 0;
|
let mut current_selected_field_count = 0;
|
||||||
@ -260,6 +268,7 @@ impl<'doc> Update<'doc> {
|
|||||||
rtxn: &'doc RoTxn,
|
rtxn: &'doc RoTxn,
|
||||||
index: &'doc Index,
|
index: &'doc Index,
|
||||||
mapper: &'doc Mapper,
|
mapper: &'doc Mapper,
|
||||||
|
dictionary: Option<&'doc DecoderDictionary<'static>>,
|
||||||
doc_alloc: &'doc Bump,
|
doc_alloc: &'doc Bump,
|
||||||
embedders: &'doc EmbeddingConfigs,
|
embedders: &'doc EmbeddingConfigs,
|
||||||
) -> Result<Option<MergedVectorDocument<'doc>>> {
|
) -> Result<Option<MergedVectorDocument<'doc>>> {
|
||||||
@ -277,6 +286,7 @@ impl<'doc> Update<'doc> {
|
|||||||
index,
|
index,
|
||||||
rtxn,
|
rtxn,
|
||||||
mapper,
|
mapper,
|
||||||
|
dictionary,
|
||||||
&self.new,
|
&self.new,
|
||||||
doc_alloc,
|
doc_alloc,
|
||||||
embedders,
|
embedders,
|
||||||
|
@ -82,6 +82,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a, 'b> {
|
|||||||
&context.rtxn,
|
&context.rtxn,
|
||||||
context.index,
|
context.index,
|
||||||
&context.db_fields_ids_map,
|
&context.db_fields_ids_map,
|
||||||
|
context.db_document_decompression_dictionary,
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
)?;
|
)?;
|
||||||
let geo_iter =
|
let geo_iter =
|
||||||
@ -103,6 +104,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a, 'b> {
|
|||||||
&context.rtxn,
|
&context.rtxn,
|
||||||
context.index,
|
context.index,
|
||||||
&context.db_fields_ids_map,
|
&context.db_fields_ids_map,
|
||||||
|
context.db_document_decompression_dictionary,
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
)?;
|
)?;
|
||||||
let geo_iter =
|
let geo_iter =
|
||||||
@ -131,12 +133,14 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a, 'b> {
|
|||||||
&context.rtxn,
|
&context.rtxn,
|
||||||
context.index,
|
context.index,
|
||||||
&context.db_fields_ids_map,
|
&context.db_fields_ids_map,
|
||||||
|
context.db_document_decompression_dictionary,
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
)?;
|
)?;
|
||||||
let vector_content = update.merged_vectors(
|
let vector_content = update.merged_vectors(
|
||||||
&context.rtxn,
|
&context.rtxn,
|
||||||
context.index,
|
context.index,
|
||||||
&context.db_fields_ids_map,
|
&context.db_fields_ids_map,
|
||||||
|
context.db_document_decompression_dictionary,
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
self.embedders,
|
self.embedders,
|
||||||
)?;
|
)?;
|
||||||
|
@ -79,7 +79,13 @@ impl FacetedDocidsExtractor {
|
|||||||
let res = match document_change {
|
let res = match document_change {
|
||||||
DocumentChange::Deletion(inner) => extract_document_facets(
|
DocumentChange::Deletion(inner) => extract_document_facets(
|
||||||
attributes_to_extract,
|
attributes_to_extract,
|
||||||
inner.current(rtxn, index, context.db_fields_ids_map, &context.doc_alloc)?,
|
inner.current(
|
||||||
|
rtxn,
|
||||||
|
index,
|
||||||
|
context.db_fields_ids_map,
|
||||||
|
context.db_document_decompression_dictionary,
|
||||||
|
&context.doc_alloc,
|
||||||
|
)?,
|
||||||
inner.external_document_id(),
|
inner.external_document_id(),
|
||||||
new_fields_ids_map.deref_mut(),
|
new_fields_ids_map.deref_mut(),
|
||||||
&mut |fid, depth, value| {
|
&mut |fid, depth, value| {
|
||||||
@ -102,6 +108,7 @@ impl FacetedDocidsExtractor {
|
|||||||
rtxn,
|
rtxn,
|
||||||
index,
|
index,
|
||||||
context.db_fields_ids_map,
|
context.db_fields_ids_map,
|
||||||
|
context.db_document_decompression_dictionary,
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
)? {
|
)? {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
@ -109,7 +116,13 @@ impl FacetedDocidsExtractor {
|
|||||||
|
|
||||||
extract_document_facets(
|
extract_document_facets(
|
||||||
attributes_to_extract,
|
attributes_to_extract,
|
||||||
inner.current(rtxn, index, context.db_fields_ids_map, &context.doc_alloc)?,
|
inner.current(
|
||||||
|
rtxn,
|
||||||
|
index,
|
||||||
|
context.db_fields_ids_map,
|
||||||
|
context.db_document_decompression_dictionary,
|
||||||
|
&context.doc_alloc,
|
||||||
|
)?,
|
||||||
inner.external_document_id(),
|
inner.external_document_id(),
|
||||||
new_fields_ids_map.deref_mut(),
|
new_fields_ids_map.deref_mut(),
|
||||||
&mut |fid, depth, value| {
|
&mut |fid, depth, value| {
|
||||||
@ -129,7 +142,13 @@ impl FacetedDocidsExtractor {
|
|||||||
|
|
||||||
extract_document_facets(
|
extract_document_facets(
|
||||||
attributes_to_extract,
|
attributes_to_extract,
|
||||||
inner.merged(rtxn, index, context.db_fields_ids_map, &context.doc_alloc)?,
|
inner.merged(
|
||||||
|
rtxn,
|
||||||
|
index,
|
||||||
|
context.db_fields_ids_map,
|
||||||
|
context.db_document_decompression_dictionary,
|
||||||
|
&context.doc_alloc,
|
||||||
|
)?,
|
||||||
inner.external_document_id(),
|
inner.external_document_id(),
|
||||||
new_fields_ids_map.deref_mut(),
|
new_fields_ids_map.deref_mut(),
|
||||||
&mut |fid, depth, value| {
|
&mut |fid, depth, value| {
|
||||||
|
@ -158,6 +158,7 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor {
|
|||||||
let index = context.index;
|
let index = context.index;
|
||||||
let max_memory = self.grenad_parameters.max_memory_by_thread();
|
let max_memory = self.grenad_parameters.max_memory_by_thread();
|
||||||
let db_fields_ids_map = context.db_fields_ids_map;
|
let db_fields_ids_map = context.db_fields_ids_map;
|
||||||
|
let db_document_decompression_dictionary = context.db_document_decompression_dictionary;
|
||||||
let doc_alloc = &context.doc_alloc;
|
let doc_alloc = &context.doc_alloc;
|
||||||
let mut data_ref = context.data.borrow_mut_or_yield();
|
let mut data_ref = context.data.borrow_mut_or_yield();
|
||||||
|
|
||||||
@ -174,7 +175,13 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor {
|
|||||||
DocumentChange::Deletion(deletion) => {
|
DocumentChange::Deletion(deletion) => {
|
||||||
let docid = deletion.docid();
|
let docid = deletion.docid();
|
||||||
let external_id = deletion.external_document_id();
|
let external_id = deletion.external_document_id();
|
||||||
let current = deletion.current(rtxn, index, db_fields_ids_map, doc_alloc)?;
|
let current = deletion.current(
|
||||||
|
rtxn,
|
||||||
|
index,
|
||||||
|
db_fields_ids_map,
|
||||||
|
db_document_decompression_dictionary,
|
||||||
|
doc_alloc,
|
||||||
|
)?;
|
||||||
let current_geo = current
|
let current_geo = current
|
||||||
.geo_field()?
|
.geo_field()?
|
||||||
.map(|geo| extract_geo_coordinates(external_id, geo))
|
.map(|geo| extract_geo_coordinates(external_id, geo))
|
||||||
@ -189,7 +196,13 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
DocumentChange::Update(update) => {
|
DocumentChange::Update(update) => {
|
||||||
let current = update.current(rtxn, index, db_fields_ids_map, doc_alloc)?;
|
let current = update.current(
|
||||||
|
rtxn,
|
||||||
|
index,
|
||||||
|
db_fields_ids_map,
|
||||||
|
db_document_decompression_dictionary,
|
||||||
|
doc_alloc,
|
||||||
|
)?;
|
||||||
let external_id = update.external_document_id();
|
let external_id = update.external_document_id();
|
||||||
let docid = update.docid();
|
let docid = update.docid();
|
||||||
|
|
||||||
|
@ -339,7 +339,13 @@ impl WordDocidsExtractors {
|
|||||||
)
|
)
|
||||||
};
|
};
|
||||||
document_tokenizer.tokenize_document(
|
document_tokenizer.tokenize_document(
|
||||||
inner.current(rtxn, index, context.db_fields_ids_map, &context.doc_alloc)?,
|
inner.current(
|
||||||
|
rtxn,
|
||||||
|
index,
|
||||||
|
context.db_fields_ids_map,
|
||||||
|
context.db_document_decompression_dictionary,
|
||||||
|
&context.doc_alloc,
|
||||||
|
)?,
|
||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
&mut token_fn,
|
&mut token_fn,
|
||||||
)?;
|
)?;
|
||||||
@ -350,6 +356,7 @@ impl WordDocidsExtractors {
|
|||||||
&context.rtxn,
|
&context.rtxn,
|
||||||
context.index,
|
context.index,
|
||||||
context.db_fields_ids_map,
|
context.db_fields_ids_map,
|
||||||
|
context.db_document_decompression_dictionary,
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
)? {
|
)? {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
@ -366,7 +373,13 @@ impl WordDocidsExtractors {
|
|||||||
)
|
)
|
||||||
};
|
};
|
||||||
document_tokenizer.tokenize_document(
|
document_tokenizer.tokenize_document(
|
||||||
inner.current(rtxn, index, context.db_fields_ids_map, &context.doc_alloc)?,
|
inner.current(
|
||||||
|
rtxn,
|
||||||
|
index,
|
||||||
|
context.db_fields_ids_map,
|
||||||
|
context.db_document_decompression_dictionary,
|
||||||
|
&context.doc_alloc,
|
||||||
|
)?,
|
||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
&mut token_fn,
|
&mut token_fn,
|
||||||
)?;
|
)?;
|
||||||
@ -382,7 +395,13 @@ impl WordDocidsExtractors {
|
|||||||
)
|
)
|
||||||
};
|
};
|
||||||
document_tokenizer.tokenize_document(
|
document_tokenizer.tokenize_document(
|
||||||
inner.merged(rtxn, index, context.db_fields_ids_map, &context.doc_alloc)?,
|
inner.merged(
|
||||||
|
rtxn,
|
||||||
|
index,
|
||||||
|
context.db_fields_ids_map,
|
||||||
|
context.db_document_decompression_dictionary,
|
||||||
|
&context.doc_alloc,
|
||||||
|
)?,
|
||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
&mut token_fn,
|
&mut token_fn,
|
||||||
)?;
|
)?;
|
||||||
|
@ -58,8 +58,13 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
|||||||
let docid = document_change.docid();
|
let docid = document_change.docid();
|
||||||
match document_change {
|
match document_change {
|
||||||
DocumentChange::Deletion(inner) => {
|
DocumentChange::Deletion(inner) => {
|
||||||
let document =
|
let document = inner.current(
|
||||||
inner.current(rtxn, index, context.db_fields_ids_map, &context.doc_alloc)?;
|
rtxn,
|
||||||
|
index,
|
||||||
|
context.db_fields_ids_map,
|
||||||
|
context.db_document_decompression_dictionary,
|
||||||
|
&context.doc_alloc,
|
||||||
|
)?;
|
||||||
process_document_tokens(
|
process_document_tokens(
|
||||||
document,
|
document,
|
||||||
document_tokenizer,
|
document_tokenizer,
|
||||||
@ -76,13 +81,19 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
|||||||
rtxn,
|
rtxn,
|
||||||
index,
|
index,
|
||||||
context.db_fields_ids_map,
|
context.db_fields_ids_map,
|
||||||
|
context.db_document_decompression_dictionary,
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
)? {
|
)? {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
let document =
|
let document = inner.current(
|
||||||
inner.current(rtxn, index, context.db_fields_ids_map, &context.doc_alloc)?;
|
rtxn,
|
||||||
|
index,
|
||||||
|
context.db_fields_ids_map,
|
||||||
|
context.db_document_decompression_dictionary,
|
||||||
|
&context.doc_alloc,
|
||||||
|
)?;
|
||||||
process_document_tokens(
|
process_document_tokens(
|
||||||
document,
|
document,
|
||||||
document_tokenizer,
|
document_tokenizer,
|
||||||
@ -92,8 +103,13 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
|||||||
del_word_pair_proximity.push(((w1, w2), prox));
|
del_word_pair_proximity.push(((w1, w2), prox));
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
let document =
|
let document = inner.merged(
|
||||||
inner.merged(rtxn, index, context.db_fields_ids_map, &context.doc_alloc)?;
|
rtxn,
|
||||||
|
index,
|
||||||
|
context.db_fields_ids_map,
|
||||||
|
context.db_document_decompression_dictionary,
|
||||||
|
&context.doc_alloc,
|
||||||
|
)?;
|
||||||
process_document_tokens(
|
process_document_tokens(
|
||||||
document,
|
document,
|
||||||
document_tokenizer,
|
document_tokenizer,
|
||||||
|
@ -97,6 +97,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
|
|||||||
&context.rtxn,
|
&context.rtxn,
|
||||||
context.index,
|
context.index,
|
||||||
context.db_fields_ids_map,
|
context.db_fields_ids_map,
|
||||||
|
context.db_document_decompression_dictionary,
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
)?;
|
)?;
|
||||||
let new_vectors = update.updated_vectors(&context.doc_alloc, self.embedders)?;
|
let new_vectors = update.updated_vectors(&context.doc_alloc, self.embedders)?;
|
||||||
@ -135,6 +136,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
|
|||||||
&context.rtxn,
|
&context.rtxn,
|
||||||
context.index,
|
context.index,
|
||||||
context.db_fields_ids_map,
|
context.db_fields_ids_map,
|
||||||
|
context.db_document_decompression_dictionary,
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
)?,
|
)?,
|
||||||
context.new_fields_ids_map,
|
context.new_fields_ids_map,
|
||||||
@ -146,6 +148,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
|
|||||||
&context.rtxn,
|
&context.rtxn,
|
||||||
context.index,
|
context.index,
|
||||||
context.db_fields_ids_map,
|
context.db_fields_ids_map,
|
||||||
|
context.db_document_decompression_dictionary,
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
)?,
|
)?,
|
||||||
context.new_fields_ids_map,
|
context.new_fields_ids_map,
|
||||||
@ -167,6 +170,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
|
|||||||
&context.rtxn,
|
&context.rtxn,
|
||||||
context.index,
|
context.index,
|
||||||
context.db_fields_ids_map,
|
context.db_fields_ids_map,
|
||||||
|
context.db_document_decompression_dictionary,
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
)?,
|
)?,
|
||||||
context.new_fields_ids_map,
|
context.new_fields_ids_map,
|
||||||
@ -178,6 +182,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
|
|||||||
&context.rtxn,
|
&context.rtxn,
|
||||||
context.index,
|
context.index,
|
||||||
context.db_fields_ids_map,
|
context.db_fields_ids_map,
|
||||||
|
context.db_document_decompression_dictionary,
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
)?,
|
)?,
|
||||||
context.new_fields_ids_map,
|
context.new_fields_ids_map,
|
||||||
|
@ -7,6 +7,7 @@ use heed::RoTxn;
|
|||||||
use rustc_hash::FxBuildHasher;
|
use rustc_hash::FxBuildHasher;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use serde_json::value::RawValue;
|
use serde_json::value::RawValue;
|
||||||
|
use zstd::dict::DecoderDictionary;
|
||||||
|
|
||||||
use super::document::{Document, DocumentFromDb, DocumentFromVersions, Versions};
|
use super::document::{Document, DocumentFromDb, DocumentFromVersions, Versions};
|
||||||
use super::indexer::de::DeserrRawValue;
|
use super::indexer::de::DeserrRawValue;
|
||||||
@ -96,9 +97,17 @@ impl<'t> VectorDocumentFromDb<'t> {
|
|||||||
index: &'t Index,
|
index: &'t Index,
|
||||||
rtxn: &'t RoTxn,
|
rtxn: &'t RoTxn,
|
||||||
db_fields_ids_map: &'t Mapper,
|
db_fields_ids_map: &'t Mapper,
|
||||||
|
db_document_decompression_dictionary: Option<&'t DecoderDictionary<'static>>,
|
||||||
doc_alloc: &'t Bump,
|
doc_alloc: &'t Bump,
|
||||||
) -> Result<Option<Self>> {
|
) -> Result<Option<Self>> {
|
||||||
let Some(document) = DocumentFromDb::new(docid, rtxn, index, db_fields_ids_map, doc_alloc)?
|
let Some(document) = DocumentFromDb::new(
|
||||||
|
docid,
|
||||||
|
rtxn,
|
||||||
|
index,
|
||||||
|
db_fields_ids_map,
|
||||||
|
db_document_decompression_dictionary,
|
||||||
|
doc_alloc,
|
||||||
|
)?
|
||||||
else {
|
else {
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
};
|
};
|
||||||
@ -283,11 +292,19 @@ impl<'doc> MergedVectorDocument<'doc> {
|
|||||||
index: &'doc Index,
|
index: &'doc Index,
|
||||||
rtxn: &'doc RoTxn,
|
rtxn: &'doc RoTxn,
|
||||||
db_fields_ids_map: &'doc Mapper,
|
db_fields_ids_map: &'doc Mapper,
|
||||||
|
db_document_decompression_dictionary: Option<&'doc DecoderDictionary<'static>>,
|
||||||
versions: &Versions<'doc>,
|
versions: &Versions<'doc>,
|
||||||
doc_alloc: &'doc Bump,
|
doc_alloc: &'doc Bump,
|
||||||
embedders: &'doc EmbeddingConfigs,
|
embedders: &'doc EmbeddingConfigs,
|
||||||
) -> Result<Option<Self>> {
|
) -> Result<Option<Self>> {
|
||||||
let db = VectorDocumentFromDb::new(docid, index, rtxn, db_fields_ids_map, doc_alloc)?;
|
let db = VectorDocumentFromDb::new(
|
||||||
|
docid,
|
||||||
|
index,
|
||||||
|
rtxn,
|
||||||
|
db_fields_ids_map,
|
||||||
|
db_document_decompression_dictionary,
|
||||||
|
doc_alloc,
|
||||||
|
)?;
|
||||||
let new_doc =
|
let new_doc =
|
||||||
VectorDocumentFromVersions::new(external_document_id, versions, doc_alloc, embedders)?;
|
VectorDocumentFromVersions::new(external_document_id, versions, doc_alloc, embedders)?;
|
||||||
Ok(if db.is_none() && new_doc.is_none() { None } else { Some(Self { new_doc, db }) })
|
Ok(if db.is_none() && new_doc.is_none() { None } else { Some(Self { new_doc, db }) })
|
||||||
|
Loading…
x
Reference in New Issue
Block a user