mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Simplify the code when array_each failed
This commit is contained in:
parent
1af590d3bc
commit
271685cceb
@ -4,10 +4,8 @@ use std::fs::File;
|
|||||||
use std::io::{self, Seek, Write};
|
use std::io::{self, Seek, Write};
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
use either::Either;
|
|
||||||
use memmap2::MmapOptions;
|
use memmap2::MmapOptions;
|
||||||
use milli::documents::{DocumentsBatchBuilder, Error};
|
use milli::documents::{DocumentsBatchBuilder, Error};
|
||||||
use milli::Object;
|
|
||||||
use serde::de::{SeqAccess, Visitor};
|
use serde::de::{SeqAccess, Visitor};
|
||||||
use serde::{Deserialize, Deserializer};
|
use serde::{Deserialize, Deserializer};
|
||||||
use serde_json::error::Category;
|
use serde_json::error::Category;
|
||||||
@ -125,7 +123,7 @@ pub fn read_ndjson(file: &File, writer: impl Write + Seek) -> Result<usize> {
|
|||||||
read_json_inner(file, writer, PayloadType::Ndjson)
|
read_json_inner(file, writer, PayloadType::Ndjson)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Reads JSON from temporary file and write an obkv batch to writer.
|
/// Reads JSON from temporary file and write an obkv batch to writer.
|
||||||
fn read_json_inner(
|
fn read_json_inner(
|
||||||
file: &File,
|
file: &File,
|
||||||
writer: impl Write + Seek,
|
writer: impl Write + Seek,
|
||||||
@ -135,35 +133,26 @@ fn read_json_inner(
|
|||||||
let mmap = unsafe { MmapOptions::new().map(file)? };
|
let mmap = unsafe { MmapOptions::new().map(file)? };
|
||||||
let mut deserializer = serde_json::Deserializer::from_slice(&mmap);
|
let mut deserializer = serde_json::Deserializer::from_slice(&mmap);
|
||||||
|
|
||||||
match array_each(&mut deserializer, |obj: Object| builder.append_json_object(&obj)) {
|
match array_each(&mut deserializer, |obj| builder.append_json_object(&obj)) {
|
||||||
// The json data has been successfully deserialised and does not need to be processed again.
|
// The json data has been deserialized and does not need to be processed again.
|
||||||
// the data has been successfully transferred to the "update_file" during the deserialisation process.
|
// The data has been transferred to the writer during the deserialization process.
|
||||||
// count ==0 means an empty array
|
|
||||||
Ok(Ok(count)) => {
|
Ok(Ok(count)) => {
|
||||||
if count == 0 {
|
if count == 0 {
|
||||||
return Ok(count as usize);
|
return Ok(count as usize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(Err(e)) => return Err(DocumentFormatError::Internal(Box::new(e))),
|
Ok(Err(e)) => return Err(DocumentFormatError::Internal(Box::new(e))),
|
||||||
// Prefer deserialization as a json array. Failure to do deserialisation using the traditional method.
|
Err(_) => {
|
||||||
Err(_e) => {
|
// If we cannot deserialize the content as an array of object then
|
||||||
#[derive(Deserialize, Debug)]
|
// we try to deserialize it as a single JSON object.
|
||||||
#[serde(transparent)]
|
let object = serde_json::from_reader(file)
|
||||||
struct ArrayOrSingleObject {
|
|
||||||
#[serde(with = "either::serde_untagged")]
|
|
||||||
inner: Either<Vec<Object>, Object>,
|
|
||||||
}
|
|
||||||
|
|
||||||
let content: ArrayOrSingleObject = serde_json::from_reader(file)
|
|
||||||
.map_err(Error::Json)
|
.map_err(Error::Json)
|
||||||
.map_err(|e| (payload_type, e))?;
|
.map_err(|e| (payload_type, e))?;
|
||||||
|
|
||||||
for object in content.inner.map_right(|o| vec![o]).into_inner() {
|
builder
|
||||||
builder
|
.append_json_object(&object)
|
||||||
.append_json_object(&object)
|
.map_err(Into::into)
|
||||||
.map_err(Into::into)
|
.map_err(DocumentFormatError::Internal)?;
|
||||||
.map_err(DocumentFormatError::Internal)?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -173,12 +162,12 @@ fn read_json_inner(
|
|||||||
Ok(count as usize)
|
Ok(count as usize)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/// The actual handling of the deserialization process in serde
|
||||||
* The actual handling of the deserialization process in the serde avoids storing the deserialized object in memory.
|
/// avoids storing the deserialized object in memory.
|
||||||
* Reference:
|
///
|
||||||
* https://serde.rs/stream-array.html
|
/// ## References
|
||||||
* https://github.com/serde-rs/json/issues/160
|
/// <https://serde.rs/stream-array.html>
|
||||||
*/
|
/// <https://github.com/serde-rs/json/issues/160>
|
||||||
fn array_each<'de, D, T, F>(deserializer: D, f: F) -> std::result::Result<io::Result<u64>, D::Error>
|
fn array_each<'de, D, T, F>(deserializer: D, f: F) -> std::result::Result<io::Result<u64>, D::Error>
|
||||||
where
|
where
|
||||||
D: Deserializer<'de>,
|
D: Deserializer<'de>,
|
||||||
|
Loading…
Reference in New Issue
Block a user