use std::sync::atomic::{AtomicBool, Ordering};
pub(super) const IO_BLOCK_SIZE: usize = 4096;
pub(super) const IO_SECTOR_SIZE: u64 = 512;
pub(super) const IO_NUM_STRIPES: u64 = 64;
pub(super) const BLKGETSIZE64: libc::c_ulong = 0x80081272;
pub(super) const IO_TEMPFILE_CAPACITY: u64 = 16 * 1024 * 1024;
pub(super) struct IoBacking {
pub(super) file: std::fs::File,
pub(super) capacity_bytes: u64,
pub(super) tempfile_path: Option<String>,
}
impl Drop for IoBacking {
fn drop(&mut self) {
if let Some(path) = self.tempfile_path.take() {
let _ = std::fs::remove_file(&path);
}
}
}
pub(super) struct PhaseIoTempfile {
pub(super) file: std::fs::File,
pub(super) path: String,
}
impl Drop for PhaseIoTempfile {
fn drop(&mut self) {
let _ = std::fs::remove_file(&self.path);
}
}
pub(super) struct DirectIoBuf {
ptr: std::ptr::NonNull<u8>,
layout: std::alloc::Layout,
}
impl DirectIoBuf {
pub(super) fn alloc() -> Option<Self> {
let layout = std::alloc::Layout::from_size_align(IO_BLOCK_SIZE, IO_BLOCK_SIZE)
.expect("logical-block-aligned 4 KiB layout is valid");
let ptr = unsafe { std::alloc::alloc_zeroed(layout) };
let ptr = std::ptr::NonNull::new(ptr)?;
Some(Self { ptr, layout })
}
pub(super) fn as_ptr(&self) -> *mut u8 {
self.ptr.as_ptr()
}
}
impl Drop for DirectIoBuf {
fn drop(&mut self) {
unsafe { std::alloc::dealloc(self.ptr.as_ptr(), self.layout) };
}
}
pub(super) fn open_io_backing(extra_flags: libc::c_int, tid: libc::pid_t) -> Option<IoBacking> {
use std::os::unix::io::FromRawFd;
let dev_vda = std::path::Path::new("/dev/vda");
if dev_vda.exists() {
let cstr = c"/dev/vda";
let fd = unsafe { libc::open(cstr.as_ptr(), libc::O_RDWR | extra_flags) };
if fd < 0 {
return None;
}
let mut size_bytes: u64 = 0;
let rc = unsafe { libc::ioctl(fd, BLKGETSIZE64, &mut size_bytes as *mut u64) };
if rc != 0 {
unsafe { libc::close(fd) };
return None;
}
let file = unsafe { std::fs::File::from_raw_fd(fd) };
return Some(IoBacking {
file,
capacity_bytes: size_bytes,
tempfile_path: None,
});
}
let path = std::env::temp_dir()
.join(format!("ktstr_iodev_{tid}"))
.to_string_lossy()
.to_string();
static FALLBACK_WARNED: AtomicBool = AtomicBool::new(false);
if !FALLBACK_WARNED.swap(true, Ordering::Relaxed) {
tracing::warn!(
path = %path,
"virtio-blk /dev/vda absent; using tempfile fallback at {path}. \
IO workload pathology may not reproduce."
);
}
use std::os::unix::fs::OpenOptionsExt;
let file = std::fs::OpenOptions::new()
.read(true)
.write(true)
.create(true)
.truncate(true)
.custom_flags(extra_flags)
.open(&path)
.ok()?;
file.set_len(IO_TEMPFILE_CAPACITY).ok()?;
Some(IoBacking {
file,
capacity_bytes: IO_TEMPFILE_CAPACITY,
tempfile_path: Some(path),
})
}
pub(super) fn ensure_io_disk(
io_disk: &mut Option<IoBacking>,
extra_flags: libc::c_int,
tid: libc::pid_t,
) -> bool {
if io_disk.is_some() {
return true;
}
if let Some(d) = open_io_backing(extra_flags, tid) {
*io_disk = Some(d);
true
} else {
static OPEN_FAILED_WARNED: AtomicBool = AtomicBool::new(false);
if !OPEN_FAILED_WARNED.swap(true, Ordering::Relaxed) {
tracing::error!("IO backing open failed; worker yielding without IO.");
}
false
}
}
pub(super) fn ensure_io_buf(io_buf: &mut Option<DirectIoBuf>) -> bool {
if io_buf.is_some() {
return true;
}
match DirectIoBuf::alloc() {
Some(b) => {
*io_buf = Some(b);
true
}
None => false,
}
}
#[inline]
pub(super) fn xorshift64(state: u64) -> u64 {
let mut x = state;
x ^= x << 13;
x ^= x >> 7;
x ^= x << 17;
x
}
pub(super) fn rand_io_offset(rng_state: &mut u64, capacity_bytes: u64) -> u64 {
*rng_state = xorshift64(*rng_state);
let max_offset = capacity_bytes.saturating_sub(IO_BLOCK_SIZE as u64);
if max_offset == 0 {
return 0;
}
let raw = *rng_state % max_offset;
raw & !(IO_SECTOR_SIZE - 1)
}
pub(super) fn stripe_base(tid: libc::pid_t, capacity_bytes: u64) -> u64 {
let stripe_size = (capacity_bytes / IO_NUM_STRIPES) & !(IO_SECTOR_SIZE - 1);
let stripe_idx = (tid as u64) % IO_NUM_STRIPES;
stripe_idx * stripe_size
}