merge with main

This commit is contained in:
Tamo 2021-11-06 16:34:30 +01:00
commit 6831c23449
No known key found for this signature in database
GPG key ID: 20CD8020AFA88D69
25 changed files with 824 additions and 953 deletions

View file

@ -1,5 +1,5 @@
use std::fs::File;
use std::io::{stdin, Cursor, Read};
use std::io::{stdin, BufRead, BufReader, Cursor, Read};
use std::path::PathBuf;
use std::str::FromStr;
@ -9,7 +9,6 @@ use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
use milli::update::UpdateIndexingStep::{
ComputeIdsAndMergeDocuments, IndexDocuments, MergeDataIntoFinalDatabase, RemapDocumentAddition,
};
use serde_json::{Map, Value};
use structopt::StructOpt;
#[cfg(target_os = "linux")]
@ -202,11 +201,11 @@ fn documents_from_jsonl(reader: impl Read) -> Result<Vec<u8>> {
let mut writer = Cursor::new(Vec::new());
let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?;
let values = serde_json::Deserializer::from_reader(reader)
.into_iter::<serde_json::Map<String, serde_json::Value>>();
for document in values {
let document = document?;
documents.add_documents(document)?;
let mut buf = String::new();
let mut reader = BufReader::new(reader);
while reader.read_line(&mut buf)? > 0 {
documents.extend_from_json(&mut buf.as_bytes())?;
}
documents.finish()?;
@ -217,8 +216,7 @@ fn documents_from_json(reader: impl Read) -> Result<Vec<u8>> {
let mut writer = Cursor::new(Vec::new());
let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?;
let json: serde_json::Value = serde_json::from_reader(reader)?;
documents.add_documents(json)?;
documents.extend_from_json(reader)?;
documents.finish()?;
Ok(writer.into_inner())
@ -226,17 +224,7 @@ fn documents_from_json(reader: impl Read) -> Result<Vec<u8>> {
fn documents_from_csv(reader: impl Read) -> Result<Vec<u8>> {
let mut writer = Cursor::new(Vec::new());
let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?;
let mut records = csv::Reader::from_reader(reader);
let iter = records.deserialize::<Map<String, Value>>();
for doc in iter {
let doc = doc?;
documents.add_documents(doc)?;
}
documents.finish()?;
milli::documents::DocumentBatchBuilder::from_csv(reader, &mut writer)?.finish()?;
Ok(writer.into_inner())
}