Move the Object type in the lib.rs file and use it everywhere

This commit is contained in:
Kerollmops 2022-06-15 15:36:27 +02:00
parent 0146175fe6
commit fcfc4caf8c
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
11 changed files with 43 additions and 51 deletions

View File

@ -11,8 +11,8 @@ use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use milli::update::{
IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings,
};
use milli::{Filter, Index};
use serde_json::{Map, Value};
use milli::{Filter, Index, Object};
use serde_json::Value;
pub struct Conf<'a> {
/// where we are going to create our database.mmdb directory
@ -96,12 +96,10 @@ pub fn base_setup(conf: &Conf) -> Index {
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
};
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
let builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
let documents = documents_from(conf.dataset, conf.dataset_format);
builder.add_documents(documents).unwrap();
let (builder, user_error) = builder.add_documents(documents).unwrap();
user_error.unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
@ -156,7 +154,7 @@ pub fn documents_from(filename: &str, filetype: &str) -> DocumentBatchReader<imp
fn documents_from_jsonl(reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
let mut documents = DocumentsBatchBuilder::new(Vec::new());
for result in serde_json::Deserializer::from_reader(reader).into_iter::<Map<String, Value>>() {
for result in serde_json::Deserializer::from_reader(reader).into_iter::<Object>() {
let object = result?;
documents.append_json_object(&object)?;
}
@ -166,7 +164,7 @@ fn documents_from_jsonl(reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
fn documents_from_json(reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
let mut documents = DocumentsBatchBuilder::new(Vec::new());
let list: Vec<Map<String, Value>> = serde_json::from_reader(reader)?;
let list: Vec<Object> = serde_json::from_reader(reader)?;
for object in list {
documents.append_json_object(&object)?;
@ -221,14 +219,14 @@ impl<R: Read> CSVDocumentDeserializer<R> {
}
impl<R: Read> Iterator for CSVDocumentDeserializer<R> {
type Item = anyhow::Result<Map<String, Value>>;
type Item = anyhow::Result<Object>;
fn next(&mut self) -> Option<Self::Item> {
let csv_document = self.documents.next()?;
match csv_document {
Ok(csv_document) => {
let mut document = Map::new();
let mut document = Object::new();
for ((field_name, field_type), value) in
self.headers.iter().zip(csv_document.into_iter())

View File

@ -13,8 +13,7 @@ use milli::update::UpdateIndexingStep::{
ComputeIdsAndMergeDocuments, IndexDocuments, MergeDataIntoFinalDatabase, RemapDocumentAddition,
};
use milli::update::{self, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig};
use milli::Index;
use serde_json::{Map, Value};
use milli::{Index, Object};
use structopt::StructOpt;
#[cfg(target_os = "linux")]
@ -325,7 +324,7 @@ fn documents_from_jsonl(reader: impl Read) -> Result<Vec<u8>> {
let mut documents = DocumentsBatchBuilder::new(Vec::new());
let reader = BufReader::new(reader);
for result in serde_json::Deserializer::from_reader(reader).into_iter::<Map<String, Value>>() {
for result in serde_json::Deserializer::from_reader(reader).into_iter::<Object>() {
let object = result?;
documents.append_json_object(&object)?;
}
@ -335,7 +334,7 @@ fn documents_from_jsonl(reader: impl Read) -> Result<Vec<u8>> {
fn documents_from_json(reader: impl Read) -> Result<Vec<u8>> {
let mut documents = DocumentsBatchBuilder::new(Vec::new());
let list: Vec<Map<String, Value>> = serde_json::from_reader(reader)?;
let list: Vec<Object> = serde_json::from_reader(reader)?;
for object in list {
documents.append_json_object(&object)?;
@ -424,7 +423,7 @@ impl Search {
filter: &Option<String>,
offset: &Option<usize>,
limit: &Option<usize>,
) -> Result<Vec<Map<String, Value>>> {
) -> Result<Vec<Object>> {
let txn = index.read_txn()?;
let mut search = index.search(&txn);

View File

@ -26,11 +26,11 @@ use milli::update::{
};
use milli::{
obkv_to_json, CompressionType, Filter as MilliFilter, FilterCondition, FormatOptions, Index,
MatcherBuilder, SearchResult, SortError,
MatcherBuilder, Object, SearchResult, SortError,
};
use once_cell::sync::OnceCell;
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use serde_json::Value;
use structopt::StructOpt;
use tokio::fs::File as TFile;
use tokio::io::AsyncWriteExt;
@ -169,11 +169,7 @@ impl<'s, A: AsRef<[u8]>> Highlighter<'s, A> {
}
}
fn highlight_record(
&self,
object: &mut Map<String, Value>,
attributes_to_highlight: &HashSet<String>,
) {
fn highlight_record(&self, object: &mut Object, attributes_to_highlight: &HashSet<String>) {
// TODO do we need to create a string for element that are not and needs to be highlight?
for (key, value) in object.iter_mut() {
if attributes_to_highlight.contains(key) {
@ -708,7 +704,7 @@ async fn main() -> anyhow::Result<()> {
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
struct Answer {
documents: Vec<Map<String, Value>>,
documents: Vec<Object>,
number_of_candidates: u64,
facets: BTreeMap<String, BTreeMap<String, u64>>,
}
@ -1036,7 +1032,7 @@ fn documents_from_jsonl(reader: impl Read) -> anyhow::Result<Vec<u8>> {
let mut documents = DocumentsBatchBuilder::new(Vec::new());
let reader = BufReader::new(reader);
for result in serde_json::Deserializer::from_reader(reader).into_iter::<Map<String, Value>>() {
for result in serde_json::Deserializer::from_reader(reader).into_iter::<Object>() {
let object = result?;
documents.append_json_object(&object)?;
}
@ -1046,7 +1042,7 @@ fn documents_from_jsonl(reader: impl Read) -> anyhow::Result<Vec<u8>> {
fn documents_from_json(reader: impl Read) -> anyhow::Result<Vec<u8>> {
let mut documents = DocumentsBatchBuilder::new(Vec::new());
let list: Vec<Map<String, Value>> = serde_json::from_reader(reader)?;
let list: Vec<Object> = serde_json::from_reader(reader)?;
for object in list {
documents.append_json_object(&object)?;

View File

@ -21,7 +21,7 @@ pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result<usize> {
let writer = BufWriter::new(writer);
let mut builder = DocumentsBatchBuilder::new(writer);
let values: Vec<Map<String, Value>> = serde_json::from_reader(input)?;
let values: Vec<Object> = serde_json::from_reader(input)?;
if builder.documents_count() == 0 {
bail!("Empty payload");
}

View File

@ -1,9 +1,10 @@
use std::io::{self, Write};
use grenad::{CompressionType, WriterBuilder};
use serde_json::{to_writer, Map, Value};
use serde_json::{to_writer, Value};
use super::{DocumentsBatchIndex, Error, DOCUMENTS_BATCH_INDEX_KEY};
use crate::Object;
/// The `DocumentsBatchBuilder` provides a way to build a documents batch in the intermediary
/// format used by milli.
@ -55,7 +56,7 @@ impl<W: Write> DocumentsBatchBuilder<W> {
}
/// Appends a new JSON object into the batch and updates the `DocumentsBatchIndex` accordingly.
pub fn append_json_object(&mut self, object: &Map<String, Value>) -> io::Result<()> {
pub fn append_json_object(&mut self, object: &Object) -> io::Result<()> {
// Make sure that we insert the fields ids in order as the obkv writer has this requirement.
let mut fields_ids: Vec<_> = object.keys().map(|k| self.fields_index.insert(&k)).collect();
fields_ids.sort_unstable();

View File

@ -4,12 +4,10 @@ use std::{io, str};
use heed::{Error as HeedError, MdbError};
use rayon::ThreadPoolBuildError;
use serde_json::{Map, Value};
use serde_json::Value;
use thiserror::Error;
use crate::{CriterionError, DocumentId, FieldId, SortError};
pub type Object = Map<String, Value>;
use crate::{CriterionError, DocumentId, FieldId, Object, SortError};
pub fn is_reserved_keyword(keyword: &str) -> bool {
["_geo", "_geoDistance", "_geoPoint", "_geoRadius"].contains(&keyword)

View File

@ -20,7 +20,7 @@ use std::hash::BuildHasherDefault;
pub use filter_parser::{Condition, FilterCondition};
use fxhash::{FxHasher32, FxHasher64};
pub use grenad::CompressionType;
use serde_json::{Map, Value};
use serde_json::Value;
pub use {charabia as tokenizer, heed};
pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
@ -43,20 +43,21 @@ pub use self::search::{
pub type Result<T> = std::result::Result<T, error::Error>;
pub type Attribute = u32;
pub type BEU32 = heed::zerocopy::U32<heed::byteorder::BE>;
pub type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
pub type DocumentId = u32;
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;
pub type FieldDistribution = BTreeMap<String, u64>;
pub type FieldId = u16;
pub type Object = serde_json::Map<String, serde_json::Value>;
pub type Position = u32;
pub type RelativePosition = u16;
pub type SmallString32 = smallstr::SmallString<[u8; 32]>;
pub type SmallVec16<T> = smallvec::SmallVec<[T; 16]>;
pub type SmallVec32<T> = smallvec::SmallVec<[T; 32]>;
pub type SmallVec8<T> = smallvec::SmallVec<[T; 8]>;
pub type BEU32 = heed::zerocopy::U32<heed::byteorder::BE>;
pub type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
pub type Attribute = u32;
pub type DocumentId = u32;
pub type FieldId = u16;
pub type Position = u32;
pub type RelativePosition = u16;
pub type FieldDistribution = BTreeMap<String, u64>;
/// A GeoPoint is a point in cartesian plan, called xyz_point in the code. Its metadata
/// is a tuple composed of 1. the DocumentId of the associated document and 2. the original point
@ -82,7 +83,7 @@ pub fn obkv_to_json(
displayed_fields: &[FieldId],
fields_ids_map: &FieldsIdsMap,
obkv: obkv::KvReaderU16,
) -> Result<Map<String, Value>> {
) -> Result<Object> {
displayed_fields
.iter()
.copied()

View File

@ -1,6 +1,5 @@
use std::fs::File;
use std::io;
use std::result::Result as StdResult;
use concat_arrays::concat_arrays;
use serde_json::Value;

View File

@ -13,7 +13,7 @@ use serde_json::{Map, Value};
use smartstring::SmartString;
use super::helpers::{create_sorter, create_writer, keep_latest_obkv, merge_obkvs, MergeFn};
use super::{validate_document_id, IndexDocumentsMethod, IndexerConfig};
use super::{IndexDocumentsMethod, IndexerConfig};
use crate::documents::{DocumentsBatchIndex, DocumentsBatchReader};
use crate::error::{Error, InternalError, UserError};
use crate::index::db_name;

View File

@ -5,8 +5,8 @@ use heed::EnvOpenOptions;
use maplit::hashset;
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
use milli::{FacetDistribution, Index};
use serde_json::{Deserializer, Map, Value};
use milli::{FacetDistribution, Index, Object};
use serde_json::Deserializer;
#[test]
fn test_facet_distribution_with_no_facet_values() {
@ -46,7 +46,7 @@ fn test_facet_distribution_with_no_facet_values() {
}"#,
);
for result in Deserializer::from_reader(reader).into_iter::<Map<String, Value>>() {
for result in Deserializer::from_reader(reader).into_iter::<Object>() {
let object = result.unwrap();
documents_builder.append_json_object(&object).unwrap();
}

View File

@ -8,9 +8,9 @@ use heed::EnvOpenOptions;
use maplit::{hashmap, hashset};
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
use milli::{AscDesc, Criterion, DocumentId, Index, Member};
use milli::{AscDesc, Criterion, DocumentId, Index, Member, Object};
use serde::Deserialize;
use serde_json::{Deserializer, Map, Value};
use serde_json::Deserializer;
use slice_group_by::GroupBy;
mod distinct;
@ -66,7 +66,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
let mut documents_builder = DocumentsBatchBuilder::new(Vec::new());
let reader = Cursor::new(CONTENT.as_bytes());
for result in Deserializer::from_reader(reader).into_iter::<Map<String, Value>>() {
for result in Deserializer::from_reader(reader).into_iter::<Object>() {
let object = result.unwrap();
documents_builder.append_json_object(&object).unwrap();
}