Move the spin looping for BBQueue frames into a dedicated function

This commit is contained in:
Clément Renault 2024-12-02 10:33:49 +01:00
parent be7d2fbe63
commit 263c5a348e
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
3 changed files with 49 additions and 44 deletions

13
Cargo.lock generated
View File

@ -1910,6 +1910,15 @@ dependencies = [
"serde_json", "serde_json",
] ]
[[package]]
name = "flume"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095"
dependencies = [
"spin",
]
[[package]] [[package]]
name = "fnv" name = "fnv"
version = "1.0.7" version = "1.0.7"
@ -3623,6 +3632,7 @@ dependencies = [
"enum-iterator", "enum-iterator",
"filter-parser", "filter-parser",
"flatten-serde-json", "flatten-serde-json",
"flume",
"fst", "fst",
"fxhash", "fxhash",
"geoutils", "geoutils",
@ -5180,6 +5190,9 @@ name = "spin"
version = "0.9.8" version = "0.9.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
dependencies = [
"lock_api",
]
[[package]] [[package]]
name = "spm_precompiled" name = "spm_precompiled"

View File

@ -99,6 +99,7 @@ rustc-hash = "2.0.0"
uell = "0.1.0" uell = "0.1.0"
enum-iterator = "2.1.0" enum-iterator = "2.1.0"
bbqueue = { git = "https://github.com/kerollmops/bbqueue" } bbqueue = { git = "https://github.com/kerollmops/bbqueue" }
flume = { version = "0.11.1", default-features = false }
[dev-dependencies] [dev-dependencies]
mimalloc = { version = "0.1.43", default-features = false } mimalloc = { version = "0.1.43", default-features = false }

View File

@ -4,10 +4,10 @@ use std::marker::PhantomData;
use std::mem; use std::mem;
use std::num::NonZeroU16; use std::num::NonZeroU16;
use bbqueue::framed::{FrameGrantR, FrameProducer}; use bbqueue::framed::{FrameGrantR, FrameGrantW, FrameProducer};
use bbqueue::BBBuffer; use bbqueue::BBBuffer;
use bytemuck::{checked, CheckedBitPattern, NoUninit}; use bytemuck::{checked, CheckedBitPattern, NoUninit};
use crossbeam_channel::SendError; use flume::SendError;
use heed::types::Bytes; use heed::types::Bytes;
use heed::BytesDecode; use heed::BytesDecode;
use memmap2::{Mmap, MmapMut}; use memmap2::{Mmap, MmapMut};
@ -33,7 +33,7 @@ use crate::{CboRoaringBitmapCodec, DocumentId, Index};
/// ///
/// The `channel_capacity` parameter defines the number of /// The `channel_capacity` parameter defines the number of
/// too-large-to-fit-in-BBQueue entries that can be sent through /// too-large-to-fit-in-BBQueue entries that can be sent through
/// a crossbeam channel. This parameter must stay low to make /// a flume channel. This parameter must stay low to make
/// sure we do not use too much memory. /// sure we do not use too much memory.
/// ///
/// Note that the channel is also used to wake-up the receiver /// Note that the channel is also used to wake-up the receiver
@ -61,7 +61,7 @@ pub fn extractor_writer_bbqueue(
consumer consumer
}); });
let (sender, receiver) = crossbeam_channel::bounded(channel_capacity); let (sender, receiver) = flume::bounded(channel_capacity);
let sender = ExtractorBbqueueSender { sender, producers, capacity }; let sender = ExtractorBbqueueSender { sender, producers, capacity };
let receiver = WriterBbqueueReceiver { receiver, consumers }; let receiver = WriterBbqueueReceiver { receiver, consumers };
(sender, receiver) (sender, receiver)
@ -70,7 +70,7 @@ pub fn extractor_writer_bbqueue(
pub struct ExtractorBbqueueSender<'a> { pub struct ExtractorBbqueueSender<'a> {
/// This channel is used to wake-up the receiver and /// This channel is used to wake-up the receiver and
/// send large entries that cannot fit in the BBQueue. /// send large entries that cannot fit in the BBQueue.
sender: crossbeam_channel::Sender<ReceiverAction>, sender: flume::Sender<ReceiverAction>,
/// A memory buffer, one by thread, is used to serialize /// A memory buffer, one by thread, is used to serialize
/// the entries directly in this shared, lock-free space. /// the entries directly in this shared, lock-free space.
producers: ThreadLocal<FullySend<RefCell<FrameProducer<'a>>>>, producers: ThreadLocal<FullySend<RefCell<FrameProducer<'a>>>>,
@ -87,7 +87,7 @@ pub struct WriterBbqueueReceiver<'a> {
/// Used to wake up when new entries are available either in /// Used to wake up when new entries are available either in
/// any BBQueue buffer or directly sent throught this channel /// any BBQueue buffer or directly sent throught this channel
/// (still written to disk). /// (still written to disk).
receiver: crossbeam_channel::Receiver<ReceiverAction>, receiver: flume::Receiver<ReceiverAction>,
/// The BBQueue frames to read when waking-up. /// The BBQueue frames to read when waking-up.
consumers: Vec<bbqueue::framed::FrameConsumer<'a>>, consumers: Vec<bbqueue::framed::FrameConsumer<'a>>,
} }
@ -437,19 +437,9 @@ impl<'b> ExtractorBbqueueSender<'b> {
} }
// Spin loop to have a frame the size we requested. // Spin loop to have a frame the size we requested.
let mut grant = loop { let mut grant = reserve_grant(&mut producer, total_length, &self.sender);
match producer.grant(total_length) {
Ok(grant) => break grant,
Err(bbqueue::Error::InsufficientSize) => continue,
Err(e) => unreachable!("{e:?}"),
}
};
payload_header.serialize_into(&mut grant); payload_header.serialize_into(&mut grant);
// We could commit only the used memory.
grant.commit(total_length);
// We only send a wake up message when the channel is empty // We only send a wake up message when the channel is empty
// so that we don't fill the channel with too many WakeUps. // so that we don't fill the channel with too many WakeUps.
if self.sender.is_empty() { if self.sender.is_empty() {
@ -494,13 +484,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
} }
// Spin loop to have a frame the size we requested. // Spin loop to have a frame the size we requested.
let mut grant = loop { let mut grant = reserve_grant(&mut producer, total_length, &self.sender);
match producer.grant(total_length) {
Ok(grant) => break grant,
Err(bbqueue::Error::InsufficientSize) => continue,
Err(e) => unreachable!("{e:?}"),
}
};
let header_size = payload_header.header_size(); let header_size = payload_header.header_size();
let (header_bytes, remaining) = grant.split_at_mut(header_size); let (header_bytes, remaining) = grant.split_at_mut(header_size);
@ -571,13 +555,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
} }
// Spin loop to have a frame the size we requested. // Spin loop to have a frame the size we requested.
let mut grant = loop { let mut grant = reserve_grant(&mut producer, total_length, &self.sender);
match producer.grant(total_length) {
Ok(grant) => break grant,
Err(bbqueue::Error::InsufficientSize) => continue,
Err(e) => unreachable!("{e:?}"),
}
};
let header_size = payload_header.header_size(); let header_size = payload_header.header_size();
let (header_bytes, remaining) = grant.split_at_mut(header_size); let (header_bytes, remaining) = grant.split_at_mut(header_size);
@ -585,9 +563,6 @@ impl<'b> ExtractorBbqueueSender<'b> {
let (key_buffer, value_buffer) = remaining.split_at_mut(key_length.get() as usize); let (key_buffer, value_buffer) = remaining.split_at_mut(key_length.get() as usize);
key_value_writer(key_buffer, value_buffer)?; key_value_writer(key_buffer, value_buffer)?;
// We could commit only the used memory.
grant.commit(total_length);
// We only send a wake up message when the channel is empty // We only send a wake up message when the channel is empty
// so that we don't fill the channel with too many WakeUps. // so that we don't fill the channel with too many WakeUps.
if self.sender.is_empty() { if self.sender.is_empty() {
@ -628,22 +603,13 @@ impl<'b> ExtractorBbqueueSender<'b> {
} }
// Spin loop to have a frame the size we requested. // Spin loop to have a frame the size we requested.
let mut grant = loop { let mut grant = reserve_grant(&mut producer, total_length, &self.sender);
match producer.grant(total_length) {
Ok(grant) => break grant,
Err(bbqueue::Error::InsufficientSize) => continue,
Err(e) => unreachable!("{e:?}"),
}
};
let header_size = payload_header.header_size(); let header_size = payload_header.header_size();
let (header_bytes, remaining) = grant.split_at_mut(header_size); let (header_bytes, remaining) = grant.split_at_mut(header_size);
payload_header.serialize_into(header_bytes); payload_header.serialize_into(header_bytes);
key_writer(remaining)?; key_writer(remaining)?;
// We could commit only the used memory.
grant.commit(total_length);
// We only send a wake up message when the channel is empty // We only send a wake up message when the channel is empty
// so that we don't fill the channel with too many WakeUps. // so that we don't fill the channel with too many WakeUps.
if self.sender.is_empty() { if self.sender.is_empty() {
@ -654,6 +620,31 @@ impl<'b> ExtractorBbqueueSender<'b> {
} }
} }
/// Try to reserve a frame grant of `total_length` by spin looping
/// on the BBQueue buffer and panics if the receiver has been disconnected.
fn reserve_grant<'b>(
producer: &mut FrameProducer<'b>,
total_length: usize,
sender: &flume::Sender<ReceiverAction>,
) -> FrameGrantW<'b> {
loop {
for _ in 0..10_000 {
match producer.grant(total_length) {
Ok(mut grant) => {
// We could commit only the used memory.
grant.to_commit(total_length);
return grant;
}
Err(bbqueue::Error::InsufficientSize) => continue,
Err(e) => unreachable!("{e:?}"),
}
}
if sender.is_disconnected() {
panic!("channel is disconnected");
}
}
}
pub enum ExactWordDocids {} pub enum ExactWordDocids {}
pub enum FidWordCountDocids {} pub enum FidWordCountDocids {}
pub enum WordDocids {} pub enum WordDocids {}