Move the Object type in the lib.rs file and use it everywhere

2025-07-04 04:17:10 +02:00 · 2022-06-15 15:36:27 +02:00 · 2022-06-15 15:36:27 +02:00 · fcfc4caf8c
commit fcfc4caf8c
parent 0146175fe6
11 changed files with 43 additions and 51 deletions
--- a/benchmarks/benches/utils.rs
+++ b/benchmarks/benches/utils.rs
@ -11,8 +11,8 @@ use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
 use milli::update::{
    IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings,
 };
-use milli::{Filter, Index};
-use serde_json::{Map, Value};
+use milli::{Filter, Index, Object};
+use serde_json::Value;

 pub struct Conf<'a> {
    /// where we are going to create our database.mmdb directory
@ -96,12 +96,10 @@ pub fn base_setup(conf: &Conf) -> Index {
        update_method: IndexDocumentsMethod::ReplaceDocuments,
        ..Default::default()
    };
-    let mut builder =
-        IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
+    let builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
    let documents = documents_from(conf.dataset, conf.dataset_format);
-
-    builder.add_documents(documents).unwrap();
-
+    let (builder, user_error) = builder.add_documents(documents).unwrap();
+    user_error.unwrap();
    builder.execute().unwrap();
    wtxn.commit().unwrap();

@ -156,7 +154,7 @@ pub fn documents_from(filename: &str, filetype: &str) -> DocumentBatchReader<imp
 fn documents_from_jsonl(reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
    let mut documents = DocumentsBatchBuilder::new(Vec::new());

-    for result in serde_json::Deserializer::from_reader(reader).into_iter::<Map<String, Value>>() {
+    for result in serde_json::Deserializer::from_reader(reader).into_iter::<Object>() {
        let object = result?;
        documents.append_json_object(&object)?;
    }
@ -166,7 +164,7 @@ fn documents_from_jsonl(reader: impl BufRead) -> anyhow::Result<Vec<u8>> {

 fn documents_from_json(reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
    let mut documents = DocumentsBatchBuilder::new(Vec::new());
-    let list: Vec<Map<String, Value>> = serde_json::from_reader(reader)?;
+    let list: Vec<Object> = serde_json::from_reader(reader)?;

    for object in list {
        documents.append_json_object(&object)?;
@ -221,14 +219,14 @@ impl<R: Read> CSVDocumentDeserializer<R> {
 }

 impl<R: Read> Iterator for CSVDocumentDeserializer<R> {
-    type Item = anyhow::Result<Map<String, Value>>;
+    type Item = anyhow::Result<Object>;

    fn next(&mut self) -> Option<Self::Item> {
        let csv_document = self.documents.next()?;

        match csv_document {
            Ok(csv_document) => {
-                let mut document = Map::new();
+                let mut document = Object::new();

                for ((field_name, field_type), value) in
                    self.headers.iter().zip(csv_document.into_iter())
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@ -13,8 +13,7 @@ use milli::update::UpdateIndexingStep::{
    ComputeIdsAndMergeDocuments, IndexDocuments, MergeDataIntoFinalDatabase, RemapDocumentAddition,
 };
 use milli::update::{self, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig};
-use milli::Index;
-use serde_json::{Map, Value};
+use milli::{Index, Object};
 use structopt::StructOpt;

 #[cfg(target_os = "linux")]
@ -325,7 +324,7 @@ fn documents_from_jsonl(reader: impl Read) -> Result<Vec<u8>> {
    let mut documents = DocumentsBatchBuilder::new(Vec::new());
    let reader = BufReader::new(reader);

-    for result in serde_json::Deserializer::from_reader(reader).into_iter::<Map<String, Value>>() {
+    for result in serde_json::Deserializer::from_reader(reader).into_iter::<Object>() {
        let object = result?;
        documents.append_json_object(&object)?;
    }
@ -335,7 +334,7 @@ fn documents_from_jsonl(reader: impl Read) -> Result<Vec<u8>> {

 fn documents_from_json(reader: impl Read) -> Result<Vec<u8>> {
    let mut documents = DocumentsBatchBuilder::new(Vec::new());
-    let list: Vec<Map<String, Value>> = serde_json::from_reader(reader)?;
+    let list: Vec<Object> = serde_json::from_reader(reader)?;

    for object in list {
        documents.append_json_object(&object)?;
@ -424,7 +423,7 @@ impl Search {
        filter: &Option<String>,
        offset: &Option<usize>,
        limit: &Option<usize>,
-    ) -> Result<Vec<Map<String, Value>>> {
+    ) -> Result<Vec<Object>> {
        let txn = index.read_txn()?;
        let mut search = index.search(&txn);

--- a/http-ui/src/main.rs
+++ b/http-ui/src/main.rs
@ -26,11 +26,11 @@ use milli::update::{
 };
 use milli::{
    obkv_to_json, CompressionType, Filter as MilliFilter, FilterCondition, FormatOptions, Index,
-    MatcherBuilder, SearchResult, SortError,
+    MatcherBuilder, Object, SearchResult, SortError,
 };
 use once_cell::sync::OnceCell;
 use serde::{Deserialize, Serialize};
-use serde_json::{Map, Value};
+use serde_json::Value;
 use structopt::StructOpt;
 use tokio::fs::File as TFile;
 use tokio::io::AsyncWriteExt;
@ -169,11 +169,7 @@ impl<'s, A: AsRef<[u8]>> Highlighter<'s, A> {
        }
    }

-    fn highlight_record(
-        &self,
-        object: &mut Map<String, Value>,
-        attributes_to_highlight: &HashSet<String>,
-    ) {
+    fn highlight_record(&self, object: &mut Object, attributes_to_highlight: &HashSet<String>) {
        // TODO do we need to create a string for element that are not and needs to be highlight?
        for (key, value) in object.iter_mut() {
            if attributes_to_highlight.contains(key) {
@ -708,7 +704,7 @@ async fn main() -> anyhow::Result<()> {
    #[derive(Debug, Serialize)]
    #[serde(rename_all = "camelCase")]
    struct Answer {
-        documents: Vec<Map<String, Value>>,
+        documents: Vec<Object>,
        number_of_candidates: u64,
        facets: BTreeMap<String, BTreeMap<String, u64>>,
    }
@ -1036,7 +1032,7 @@ fn documents_from_jsonl(reader: impl Read) -> anyhow::Result<Vec<u8>> {
    let mut documents = DocumentsBatchBuilder::new(Vec::new());
    let reader = BufReader::new(reader);

-    for result in serde_json::Deserializer::from_reader(reader).into_iter::<Map<String, Value>>() {
+    for result in serde_json::Deserializer::from_reader(reader).into_iter::<Object>() {
        let object = result?;
        documents.append_json_object(&object)?;
    }
@ -1046,7 +1042,7 @@ fn documents_from_jsonl(reader: impl Read) -> anyhow::Result<Vec<u8>> {

 fn documents_from_json(reader: impl Read) -> anyhow::Result<Vec<u8>> {
    let mut documents = DocumentsBatchBuilder::new(Vec::new());
-    let list: Vec<Map<String, Value>> = serde_json::from_reader(reader)?;
+    let list: Vec<Object> = serde_json::from_reader(reader)?;

    for object in list {
        documents.append_json_object(&object)?;
--- a/milli/fuzz/fuzz_targets/indexing.rs
+++ b/milli/fuzz/fuzz_targets/indexing.rs
@ -21,7 +21,7 @@ pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result<usize> {
    let writer = BufWriter::new(writer);
    let mut builder = DocumentsBatchBuilder::new(writer);

-    let values: Vec<Map<String, Value>> = serde_json::from_reader(input)?;
+    let values: Vec<Object> = serde_json::from_reader(input)?;
    if builder.documents_count() == 0 {
        bail!("Empty payload");
    }
--- a/milli/src/documents/builder.rs
+++ b/milli/src/documents/builder.rs
@ -1,9 +1,10 @@
 use std::io::{self, Write};

 use grenad::{CompressionType, WriterBuilder};
-use serde_json::{to_writer, Map, Value};
+use serde_json::{to_writer, Value};

 use super::{DocumentsBatchIndex, Error, DOCUMENTS_BATCH_INDEX_KEY};
+use crate::Object;

 /// The `DocumentsBatchBuilder` provides a way to build a documents batch in the intermediary
 /// format used by milli.
@ -55,7 +56,7 @@ impl<W: Write> DocumentsBatchBuilder<W> {
    }

    /// Appends a new JSON object into the batch and updates the `DocumentsBatchIndex` accordingly.
-    pub fn append_json_object(&mut self, object: &Map<String, Value>) -> io::Result<()> {
+    pub fn append_json_object(&mut self, object: &Object) -> io::Result<()> {
        // Make sure that we insert the fields ids in order as the obkv writer has this requirement.
        let mut fields_ids: Vec<_> = object.keys().map(|k| self.fields_index.insert(&k)).collect();
        fields_ids.sort_unstable();
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@ -4,12 +4,10 @@ use std::{io, str};

 use heed::{Error as HeedError, MdbError};
 use rayon::ThreadPoolBuildError;
-use serde_json::{Map, Value};
+use serde_json::Value;
 use thiserror::Error;

-use crate::{CriterionError, DocumentId, FieldId, SortError};
-
-pub type Object = Map<String, Value>;
+use crate::{CriterionError, DocumentId, FieldId, Object, SortError};

 pub fn is_reserved_keyword(keyword: &str) -> bool {
    ["_geo", "_geoDistance", "_geoPoint", "_geoRadius"].contains(&keyword)
--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@ -20,7 +20,7 @@ use std::hash::BuildHasherDefault;
 pub use filter_parser::{Condition, FilterCondition};
 use fxhash::{FxHasher32, FxHasher64};
 pub use grenad::CompressionType;
-use serde_json::{Map, Value};
+use serde_json::Value;
 pub use {charabia as tokenizer, heed};

 pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
@ -43,20 +43,21 @@ pub use self::search::{

 pub type Result<T> = std::result::Result<T, error::Error>;

+pub type Attribute = u32;
+pub type BEU32 = heed::zerocopy::U32<heed::byteorder::BE>;
+pub type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
+pub type DocumentId = u32;
 pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
 pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;
+pub type FieldDistribution = BTreeMap<String, u64>;
+pub type FieldId = u16;
+pub type Object = serde_json::Map<String, serde_json::Value>;
+pub type Position = u32;
+pub type RelativePosition = u16;
 pub type SmallString32 = smallstr::SmallString<[u8; 32]>;
 pub type SmallVec16<T> = smallvec::SmallVec<[T; 16]>;
 pub type SmallVec32<T> = smallvec::SmallVec<[T; 32]>;
 pub type SmallVec8<T> = smallvec::SmallVec<[T; 8]>;
-pub type BEU32 = heed::zerocopy::U32<heed::byteorder::BE>;
-pub type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
-pub type Attribute = u32;
-pub type DocumentId = u32;
-pub type FieldId = u16;
-pub type Position = u32;
-pub type RelativePosition = u16;
-pub type FieldDistribution = BTreeMap<String, u64>;

 /// A GeoPoint is a point in cartesian plan, called xyz_point in the code. Its metadata
 /// is a tuple composed of 1. the DocumentId of the associated document and 2. the original point
@ -82,7 +83,7 @@ pub fn obkv_to_json(
    displayed_fields: &[FieldId],
    fields_ids_map: &FieldsIdsMap,
    obkv: obkv::KvReaderU16,
-) -> Result<Map<String, Value>> {
+) -> Result<Object> {
    displayed_fields
        .iter()
        .copied()
--- a/milli/src/update/index_documents/extract/extract_geo_points.rs
+++ b/milli/src/update/index_documents/extract/extract_geo_points.rs
@ -1,6 +1,5 @@
 use std::fs::File;
 use std::io;
-use std::result::Result as StdResult;

 use concat_arrays::concat_arrays;
 use serde_json::Value;
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@ -13,7 +13,7 @@ use serde_json::{Map, Value};
 use smartstring::SmartString;

 use super::helpers::{create_sorter, create_writer, keep_latest_obkv, merge_obkvs, MergeFn};
-use super::{validate_document_id, IndexDocumentsMethod, IndexerConfig};
+use super::{IndexDocumentsMethod, IndexerConfig};
 use crate::documents::{DocumentsBatchIndex, DocumentsBatchReader};
 use crate::error::{Error, InternalError, UserError};
 use crate::index::db_name;
--- a/milli/tests/search/facet_distribution.rs
+++ b/milli/tests/search/facet_distribution.rs
@ -5,8 +5,8 @@ use heed::EnvOpenOptions;
 use maplit::hashset;
 use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
 use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
-use milli::{FacetDistribution, Index};
-use serde_json::{Deserializer, Map, Value};
+use milli::{FacetDistribution, Index, Object};
+use serde_json::Deserializer;

 #[test]
 fn test_facet_distribution_with_no_facet_values() {
@ -46,7 +46,7 @@ fn test_facet_distribution_with_no_facet_values() {
        }"#,
    );

-    for result in Deserializer::from_reader(reader).into_iter::<Map<String, Value>>() {
+    for result in Deserializer::from_reader(reader).into_iter::<Object>() {
        let object = result.unwrap();
        documents_builder.append_json_object(&object).unwrap();
    }
--- a/milli/tests/search/mod.rs
+++ b/milli/tests/search/mod.rs
@ -8,9 +8,9 @@ use heed::EnvOpenOptions;
 use maplit::{hashmap, hashset};
 use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
 use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
-use milli::{AscDesc, Criterion, DocumentId, Index, Member};
+use milli::{AscDesc, Criterion, DocumentId, Index, Member, Object};
 use serde::Deserialize;
-use serde_json::{Deserializer, Map, Value};
+use serde_json::Deserializer;
 use slice_group_by::GroupBy;

 mod distinct;
@ -66,7 +66,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
    let mut documents_builder = DocumentsBatchBuilder::new(Vec::new());
    let reader = Cursor::new(CONTENT.as_bytes());

-    for result in Deserializer::from_reader(reader).into_iter::<Map<String, Value>>() {
+    for result in Deserializer::from_reader(reader).into_iter::<Object>() {
        let object = result.unwrap();
        documents_builder.append_json_object(&object).unwrap();
    }