From fd60a39f1c4d56941cb3cd3f58d8590096458545 Mon Sep 17 00:00:00 2001 From: f3r10 Date: Mon, 17 Oct 2022 06:51:04 -0500 Subject: [PATCH] Format code --- milli/src/heed_codec/mod.rs | 3 ++- milli/src/index.rs | 10 +++++++--- .../extract/extract_docid_word_positions.rs | 3 ++- .../src/update/index_documents/extract/mod.rs | 18 ++++++++++-------- .../src/update/index_documents/typed_chunk.rs | 11 +++++++---- 5 files changed, 28 insertions(+), 17 deletions(-) diff --git a/milli/src/heed_codec/mod.rs b/milli/src/heed_codec/mod.rs index 2ac130f48..f3ca5f0d1 100644 --- a/milli/src/heed_codec/mod.rs +++ b/milli/src/heed_codec/mod.rs @@ -5,10 +5,10 @@ mod field_id_word_count_codec; mod obkv_codec; mod roaring_bitmap; mod roaring_bitmap_length; +mod script_language_codec; mod str_beu32_codec; mod str_ref; mod str_str_u8_codec; -mod script_language_codec; pub use byte_slice_ref::ByteSliceRefCodec; pub use str_ref::StrRefCodec; @@ -20,6 +20,7 @@ pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, Roar pub use self::roaring_bitmap_length::{ BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec, }; +pub use self::script_language_codec::ScriptLanguageCodec; pub use self::str_beu32_codec::StrBEU32Codec; pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec}; pub use self::script_language_codec::ScriptLanguageCodec; diff --git a/milli/src/index.rs b/milli/src/index.rs index dc9cb7994..ef26fc305 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -15,12 +15,12 @@ use time::OffsetDateTime; use crate::error::{InternalError, UserError}; use crate::facet::FacetType; use crate::fields_ids_map::FieldsIdsMap; -use crate::heed_codec::ScriptLanguageCodec; use crate::heed_codec::facet::{ FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, FieldIdCodec, OrderedF64Codec, }; use crate::heed_codec::StrRefCodec; +use crate::heed_codec::ScriptLanguageCodec; use crate::{ default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, @@ -125,7 +125,7 @@ pub struct Index { /// Maps the position of a word prefix with all the docids where this prefix appears. pub word_prefix_position_docids: Database, - /// Maps the script and language with all the docids that corresponds to it. + /// Maps the script and language with all the docids that corresponds to it. pub script_language_docids: Database, /// Maps the facet field id and the docids for which this field exists @@ -1198,7 +1198,11 @@ impl Index { /* script language docids */ /// Retrieve all the documents ids that correspond with (Script, Language) key, `None` if it is any. - pub fn script_language_documents_ids(&self, rtxn: &RoTxn, key: &(Script, Language)) -> heed::Result> { + pub fn script_language_documents_ids( + &self, + rtxn: &RoTxn, + key: &(Script, Language), + ) -> heed::Result> { let soft_deleted_documents = self.soft_deleted_documents_ids(rtxn)?; let doc_ids = self.script_language_docids.get(rtxn, key)?; Ok(doc_ids.map(|ids| ids - soft_deleted_documents)) diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs index 66b2c768b..8a9f7e04f 100644 --- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs +++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs @@ -95,7 +95,8 @@ pub fn extract_docid_word_positions( } } - sorter_into_reader(docid_word_positions_sorter, indexer).map(|reader| (documents_ids, reader, script_language_pair)) + sorter_into_reader(docid_word_positions_sorter, indexer) + .map(|reader| (documents_ids, reader, script_language_pair)) } /// Transform a JSON value into a string that can be indexed. diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 540b8993b..f38bdd497 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -257,13 +257,14 @@ fn send_and_extract_flattened_documents_data( let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) = rayon::join( || { - let (documents_ids, docid_word_positions_chunk, script_language_pair) = extract_docid_word_positions( - flattened_documents_chunk.clone(), - indexer, - searchable_fields, - stop_words.as_ref(), - max_positions_per_attributes, - )?; + let (documents_ids, docid_word_positions_chunk, script_language_pair) = + extract_docid_word_positions( + flattened_documents_chunk.clone(), + indexer.clone(), + searchable_fields, + stop_words.as_ref(), + max_positions_per_attributes, + )?; // send documents_ids to DB writer let _ = lmdb_writer_sx.send(Ok(TypedChunk::NewDocumentsIds(documents_ids))); @@ -274,7 +275,8 @@ fn send_and_extract_flattened_documents_data( let _ = lmdb_writer_sx .send(Ok(TypedChunk::DocidWordPositions(docid_word_positions_chunk.clone()))); - let _ = lmdb_writer_sx.send(Ok(TypedChunk::ScriptLanguageDocids(script_language_pair))); + let _ = + lmdb_writer_sx.send(Ok(TypedChunk::ScriptLanguageDocids(script_language_pair))); Ok(docid_word_positions_chunk) }, diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 920971eec..35f09c051 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -18,7 +18,10 @@ use super::{ClonableMmap, MergeFn}; use crate::facet::FacetType; use crate::update::facet::FacetsUpdate; use crate::update::index_documents::helpers::as_cloneable_grenad; -use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result, lat_lng_to_xyz}; +use crate::{ + lat_lng_to_xyz, BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, + Result, +}; pub(crate) enum TypedChunk { DocidWordPositions(grenad::Reader), @@ -37,7 +40,7 @@ pub(crate) enum TypedChunk { FieldIdFacetNumberDocids(grenad::Reader), FieldIdFacetExistsDocids(grenad::Reader), GeoPoints(grenad::Reader), - ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>) + ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>), } /// Write typed chunk in the corresponding LMDB database of the provided index. @@ -224,11 +227,11 @@ pub(crate) fn write_typed_chunk_into_index( let merged_db_values = RoaringBitmap::deserialize_from(&buffer[..])?; merged_db_values } - None => value + None => value, }; index.script_language_docids.put(wtxn, &key, &final_value)?; } - } + } } Ok((RoaringBitmap::new(), is_merged_database))