2021-08-16 13:36:30 +02:00
|
|
|
mod clonable_mmap;
|
|
|
|
mod grenad_helpers;
|
|
|
|
mod merge_functions;
|
|
|
|
|
2022-01-19 14:30:03 +01:00
|
|
|
use std::collections::HashSet;
|
2021-08-16 13:36:30 +02:00
|
|
|
use std::convert::{TryFrom, TryInto};
|
|
|
|
|
|
|
|
pub use clonable_mmap::{ClonableMmap, CursorClonableMmap};
|
2022-01-19 14:30:03 +01:00
|
|
|
use fst::{IntoStreamer, Streamer};
|
2021-08-16 13:36:30 +02:00
|
|
|
pub use grenad_helpers::{
|
2022-03-24 15:22:57 +01:00
|
|
|
as_cloneable_grenad, create_sorter, create_writer, grenad_obkv_into_chunks, merge_nothing,
|
2021-08-16 13:36:30 +02:00
|
|
|
sorter_into_lmdb_database, sorter_into_reader, write_into_lmdb_database, writer_into_reader,
|
2022-03-23 14:48:15 +01:00
|
|
|
GrenadParameters, MergeableReader,
|
2021-08-16 13:36:30 +02:00
|
|
|
};
|
|
|
|
pub use merge_functions::{
|
|
|
|
concat_u32s_array, keep_first, keep_first_prefix_value_merge_roaring_bitmaps, keep_latest_obkv,
|
|
|
|
merge_cbo_roaring_bitmaps, merge_obkvs, merge_roaring_bitmaps, merge_two_obkvs,
|
|
|
|
roaring_bitmap_from_u32s_array, serialize_roaring_bitmap, MergeFn,
|
|
|
|
};
|
|
|
|
|
|
|
|
pub fn valid_lmdb_key(key: impl AsRef<[u8]>) -> bool {
|
2021-09-08 15:24:32 +02:00
|
|
|
key.as_ref().len() <= 511 && !key.as_ref().is_empty()
|
2021-08-16 13:36:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Divides one slice into two at an index, returns `None` if mid is out of bounds.
|
|
|
|
pub fn try_split_at<T>(slice: &[T], mid: usize) -> Option<(&[T], &[T])> {
|
|
|
|
if mid <= slice.len() {
|
|
|
|
Some(slice.split_at(mid))
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Divides one slice into an array and the tail at an index,
|
|
|
|
/// returns `None` if `N` is out of bounds.
|
|
|
|
pub fn try_split_array_at<T, const N: usize>(slice: &[T]) -> Option<([T; N], &[T])>
|
|
|
|
where
|
|
|
|
[T; N]: for<'a> TryFrom<&'a [T]>,
|
|
|
|
{
|
|
|
|
let (head, tail) = try_split_at(slice, N)?;
|
|
|
|
let head = head.try_into().ok()?;
|
|
|
|
Some((head, tail))
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn read_u32_ne_bytes(bytes: &[u8]) -> impl Iterator<Item = u32> + '_ {
|
|
|
|
bytes.chunks_exact(4).flat_map(TryInto::try_into).map(u32::from_ne_bytes)
|
|
|
|
}
|
2022-01-19 14:30:03 +01:00
|
|
|
|
2022-01-20 17:55:52 +01:00
|
|
|
/// Converts an fst Stream into an HashSet of Strings.
|
2022-01-19 14:30:03 +01:00
|
|
|
pub fn fst_stream_into_hashset<'f, I, S>(stream: I) -> HashSet<Vec<u8>>
|
|
|
|
where
|
|
|
|
I: for<'a> IntoStreamer<'a, Into = S, Item = &'a [u8]>,
|
|
|
|
S: 'f + for<'a> Streamer<'a, Item = &'a [u8]>,
|
|
|
|
{
|
|
|
|
let mut hashset = HashSet::new();
|
|
|
|
let mut stream = stream.into_stream();
|
|
|
|
while let Some(value) = stream.next() {
|
|
|
|
hashset.insert(value.to_owned());
|
|
|
|
}
|
|
|
|
hashset
|
|
|
|
}
|
2022-01-20 17:55:52 +01:00
|
|
|
|
|
|
|
// Converts an fst Stream into a Vec of Strings.
|
|
|
|
pub fn fst_stream_into_vec<'f, I, S>(stream: I) -> Vec<String>
|
|
|
|
where
|
|
|
|
I: for<'a> IntoStreamer<'a, Into = S, Item = &'a [u8]>,
|
|
|
|
S: 'f + for<'a> Streamer<'a, Item = &'a [u8]>,
|
|
|
|
{
|
|
|
|
let mut strings = Vec::new();
|
|
|
|
let mut stream = stream.into_stream();
|
|
|
|
while let Some(word) = stream.next() {
|
|
|
|
let s = std::str::from_utf8(word).unwrap();
|
|
|
|
strings.push(s.to_owned());
|
|
|
|
}
|
|
|
|
strings
|
|
|
|
}
|