mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-27 07:00:05 +01:00
More efficiently merge MTBLs, more than two at a time
This commit is contained in:
parent
1df1f88fe1
commit
3a23dc242e
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -996,7 +996,7 @@ checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c"
|
||||
[[package]]
|
||||
name = "oxidized-mtbl"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/Kerollmops/oxidized-mtbl.git?rev=8918476#8918476f61f4430890d067db7b4a6cfb2d549c43"
|
||||
source = "git+https://github.com/Kerollmops/oxidized-mtbl.git?rev=6acef3d#6acef3d0fc7fec6a3701038860e51f8bbcee1ee6"
|
||||
dependencies = [
|
||||
"byteorder 1.3.4",
|
||||
"crc32c",
|
||||
|
@ -18,7 +18,7 @@ jemallocator = "0.3.2"
|
||||
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
|
||||
memmap = "0.7.0"
|
||||
once_cell = "1.4.0"
|
||||
oxidized-mtbl = { git = "https://github.com/Kerollmops/oxidized-mtbl.git", rev = "8918476" }
|
||||
oxidized-mtbl = { git = "https://github.com/Kerollmops/oxidized-mtbl.git", rev = "6acef3d" }
|
||||
rayon = "1.3.0"
|
||||
roaring = "0.5.2"
|
||||
slice-group-by = "0.2.6"
|
||||
|
@ -100,36 +100,38 @@ impl MtblKvStore {
|
||||
Ok(MtblKvStore(Some(out)))
|
||||
}
|
||||
|
||||
fn merge(key: &[u8], left: &[u8], right: &[u8]) -> Option<Vec<u8>> {
|
||||
fn merge(key: &[u8], values: &[Vec<u8>]) -> Option<Vec<u8>> {
|
||||
if key == b"\0words-fst" {
|
||||
let left_fst = fst::Set::new(left).unwrap();
|
||||
let right_fst = fst::Set::new(right).unwrap();
|
||||
let fsts: Vec<_> = values.iter().map(|v| fst::Set::new(v).unwrap()).collect();
|
||||
|
||||
// Union of the two FSTs
|
||||
let op = fst::set::OpBuilder::new()
|
||||
.add(left_fst.into_stream())
|
||||
.add(right_fst.into_stream())
|
||||
.r#union();
|
||||
let mut op = fst::set::OpBuilder::new();
|
||||
fsts.iter().for_each(|fst| op.push(fst.into_stream()));
|
||||
let op = op.r#union();
|
||||
|
||||
let mut build = fst::SetBuilder::memory();
|
||||
build.extend_stream(op.into_stream()).unwrap();
|
||||
Some(build.into_inner().unwrap())
|
||||
}
|
||||
else if key == b"\0headers" {
|
||||
assert_eq!(left, right);
|
||||
Some(left.to_vec())
|
||||
assert!(values.windows(2).all(|vs| vs[0] == vs[1]));
|
||||
Some(values[0].to_vec())
|
||||
}
|
||||
else if key.starts_with(&[1]) || key.starts_with(&[2]) {
|
||||
let mut left = RoaringBitmap::deserialize_from(left).unwrap();
|
||||
let right = RoaringBitmap::deserialize_from(right).unwrap();
|
||||
left.union_with(&right);
|
||||
let mut first = RoaringBitmap::deserialize_from(values[0].as_slice()).unwrap();
|
||||
|
||||
for value in &values[1..] {
|
||||
let bitmap = RoaringBitmap::deserialize_from(value.as_slice()).unwrap();
|
||||
first.union_with(&bitmap);
|
||||
}
|
||||
|
||||
let mut vec = Vec::new();
|
||||
left.serialize_into(&mut vec).unwrap();
|
||||
first.serialize_into(&mut vec).unwrap();
|
||||
Some(vec)
|
||||
}
|
||||
else if key.starts_with(&[3]) {
|
||||
assert_eq!(left, right);
|
||||
Some(left.to_vec())
|
||||
assert!(values.windows(2).all(|vs| vs[0] == vs[1]));
|
||||
Some(values[0].to_vec())
|
||||
}
|
||||
else {
|
||||
panic!("wut? {:?}", key)
|
||||
|
Loading…
x
Reference in New Issue
Block a user