First batch of PR comment

This commit is contained in:
ManyTheFish 2024-11-21 16:37:55 +01:00
parent 32bcacefd5
commit 36962b943b
16 changed files with 18 additions and 531 deletions

View file

@ -24,7 +24,7 @@ flate2 = "1.0.30"
fst = "0.4.7"
memmap2 = "0.9.4"
milli = { path = "../milli" }
raw-collections = { git = "https://github.com/dureuill/raw-collections.git", version = "0.1.0" }
raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
roaring = { version = "0.10.6", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] }
serde-cs = "0.2.4"

View file

@ -128,7 +128,6 @@ impl ErrorCode for DocumentFormatError {
}
}
// TODO remove that from the place I've borrowed it
#[derive(Debug)]
enum AllowedType {
String,
@ -213,7 +212,7 @@ pub fn read_csv(input: &File, output: impl io::Write, delimiter: u8) -> Result<u
/// Reads JSON from file and write it in NDJSON in a file checking it along the way.
pub fn read_json(input: &File, output: impl io::Write) -> Result<u64> {
// We memory map to be able to deserailize into a TopLevelMap<'pl> that
// We memory map to be able to deserialize into a TopLevelMap<'pl> that
// does not allocate when possible and only materialize the first/top level.
let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? };
let mut doc_alloc = Bump::with_capacity(1024 * 1024 * 1024); // 1MiB
@ -254,7 +253,7 @@ pub fn read_json(input: &File, output: impl io::Write) -> Result<u64> {
/// Reads NDJSON from file and write it in NDJSON in a file checking it along the way.
pub fn read_ndjson(input: &File, output: impl io::Write) -> Result<u64> {
// We memory map to be able to deserailize into a TopLevelMap<'pl> that
// We memory map to be able to deserialize into a TopLevelMap<'pl> that
// does not allocate when possible and only materialize the first/top level.
let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? };
let mut output = BufWriter::new(output);