Format code

This commit is contained in:
f3r10 2022-10-17 06:51:04 -05:00 committed by ManyTheFish
parent 369c05732e
commit fd60a39f1c
5 changed files with 28 additions and 17 deletions

View File

@ -5,10 +5,10 @@ mod field_id_word_count_codec;
mod obkv_codec;
mod roaring_bitmap;
mod roaring_bitmap_length;
mod script_language_codec;
mod str_beu32_codec;
mod str_ref;
mod str_str_u8_codec;
mod script_language_codec;
pub use byte_slice_ref::ByteSliceRefCodec;
pub use str_ref::StrRefCodec;
@ -20,6 +20,7 @@ pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, Roar
pub use self::roaring_bitmap_length::{
BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
};
pub use self::script_language_codec::ScriptLanguageCodec;
pub use self::str_beu32_codec::StrBEU32Codec;
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
pub use self::script_language_codec::ScriptLanguageCodec;

View File

@ -15,12 +15,12 @@ use time::OffsetDateTime;
use crate::error::{InternalError, UserError};
use crate::facet::FacetType;
use crate::fields_ids_map::FieldsIdsMap;
use crate::heed_codec::ScriptLanguageCodec;
use crate::heed_codec::facet::{
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
FieldIdCodec, OrderedF64Codec,
};
use crate::heed_codec::StrRefCodec;
use crate::heed_codec::ScriptLanguageCodec;
use crate::{
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
@ -125,7 +125,7 @@ pub struct Index {
/// Maps the position of a word prefix with all the docids where this prefix appears.
pub word_prefix_position_docids: Database<StrBEU32Codec, CboRoaringBitmapCodec>,
/// Maps the script and language with all the docids that corresponds to it.
/// Maps the script and language with all the docids that corresponds to it.
pub script_language_docids: Database<ScriptLanguageCodec, RoaringBitmapCodec>,
/// Maps the facet field id and the docids for which this field exists
@ -1198,7 +1198,11 @@ impl Index {
/* script language docids */
/// Retrieve all the documents ids that correspond with (Script, Language) key, `None` if it is any.
pub fn script_language_documents_ids(&self, rtxn: &RoTxn, key: &(Script, Language)) -> heed::Result<Option<RoaringBitmap>> {
pub fn script_language_documents_ids(
&self,
rtxn: &RoTxn,
key: &(Script, Language),
) -> heed::Result<Option<RoaringBitmap>> {
let soft_deleted_documents = self.soft_deleted_documents_ids(rtxn)?;
let doc_ids = self.script_language_docids.get(rtxn, key)?;
Ok(doc_ids.map(|ids| ids - soft_deleted_documents))

View File

@ -95,7 +95,8 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
}
}
sorter_into_reader(docid_word_positions_sorter, indexer).map(|reader| (documents_ids, reader, script_language_pair))
sorter_into_reader(docid_word_positions_sorter, indexer)
.map(|reader| (documents_ids, reader, script_language_pair))
}
/// Transform a JSON value into a string that can be indexed.

View File

@ -257,13 +257,14 @@ fn send_and_extract_flattened_documents_data(
let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) =
rayon::join(
|| {
let (documents_ids, docid_word_positions_chunk, script_language_pair) = extract_docid_word_positions(
flattened_documents_chunk.clone(),
indexer,
searchable_fields,
stop_words.as_ref(),
max_positions_per_attributes,
)?;
let (documents_ids, docid_word_positions_chunk, script_language_pair) =
extract_docid_word_positions(
flattened_documents_chunk.clone(),
indexer.clone(),
searchable_fields,
stop_words.as_ref(),
max_positions_per_attributes,
)?;
// send documents_ids to DB writer
let _ = lmdb_writer_sx.send(Ok(TypedChunk::NewDocumentsIds(documents_ids)));
@ -274,7 +275,8 @@ fn send_and_extract_flattened_documents_data(
let _ = lmdb_writer_sx
.send(Ok(TypedChunk::DocidWordPositions(docid_word_positions_chunk.clone())));
let _ = lmdb_writer_sx.send(Ok(TypedChunk::ScriptLanguageDocids(script_language_pair)));
let _ =
lmdb_writer_sx.send(Ok(TypedChunk::ScriptLanguageDocids(script_language_pair)));
Ok(docid_word_positions_chunk)
},

View File

@ -18,7 +18,10 @@ use super::{ClonableMmap, MergeFn};
use crate::facet::FacetType;
use crate::update::facet::FacetsUpdate;
use crate::update::index_documents::helpers::as_cloneable_grenad;
use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result, lat_lng_to_xyz};
use crate::{
lat_lng_to_xyz, BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index,
Result,
};
pub(crate) enum TypedChunk {
DocidWordPositions(grenad::Reader<CursorClonableMmap>),
@ -37,7 +40,7 @@ pub(crate) enum TypedChunk {
FieldIdFacetNumberDocids(grenad::Reader<File>),
FieldIdFacetExistsDocids(grenad::Reader<File>),
GeoPoints(grenad::Reader<File>),
ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>)
ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
}
/// Write typed chunk in the corresponding LMDB database of the provided index.
@ -224,11 +227,11 @@ pub(crate) fn write_typed_chunk_into_index(
let merged_db_values = RoaringBitmap::deserialize_from(&buffer[..])?;
merged_db_values
}
None => value
None => value,
};
index.script_language_docids.put(wtxn, &key, &final_value)?;
}
}
}
}
Ok((RoaringBitmap::new(), is_merged_database))