use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering};
use std::sync::Arc;
#[derive(Debug, Clone, Default)]
pub struct DiskStats {
pub total_bytes: u64,
pub available_bytes: u64,
pub used_bytes: u64,
pub checks_performed: u64,
pub soft_limit_warnings: u64,
pub hard_limit_exceeded: bool,
pub estimated_bytes_written: u64,
}
#[derive(Debug, Clone)]
pub struct DiskSpaceGuardConfig {
pub hard_limit_mb: usize,
pub soft_limit_mb: usize,
pub check_interval: usize,
pub reserve_buffer_mb: usize,
pub monitor_path: Option<PathBuf>,
}
impl Default for DiskSpaceGuardConfig {
fn default() -> Self {
Self {
hard_limit_mb: 100, soft_limit_mb: 500, check_interval: 500, reserve_buffer_mb: 50, monitor_path: None,
}
}
}
impl DiskSpaceGuardConfig {
pub fn with_min_free_mb(hard_limit_mb: usize) -> Self {
Self {
hard_limit_mb,
soft_limit_mb: hard_limit_mb * 5, ..Default::default()
}
}
pub fn with_path<P: AsRef<Path>>(mut self, path: P) -> Self {
self.monitor_path = Some(path.as_ref().to_path_buf());
self
}
pub fn with_reserve(mut self, reserve_mb: usize) -> Self {
self.reserve_buffer_mb = reserve_mb;
self
}
}
#[derive(Debug, Clone)]
pub struct DiskSpaceExhausted {
pub available_mb: usize,
pub required_mb: usize,
pub is_soft_limit: bool,
pub message: String,
}
impl std::fmt::Display for DiskSpaceExhausted {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.message)
}
}
impl std::error::Error for DiskSpaceExhausted {}
#[derive(Debug)]
pub struct DiskSpaceGuard {
config: DiskSpaceGuardConfig,
operation_counter: AtomicU64,
soft_warnings_count: AtomicU64,
hard_limit_exceeded: AtomicBool,
bytes_written_estimate: AtomicU64,
last_available_mb: AtomicUsize,
}
impl DiskSpaceGuard {
pub fn new(config: DiskSpaceGuardConfig) -> Self {
Self {
config,
operation_counter: AtomicU64::new(0),
soft_warnings_count: AtomicU64::new(0),
hard_limit_exceeded: AtomicBool::new(false),
bytes_written_estimate: AtomicU64::new(0),
last_available_mb: AtomicUsize::new(0),
}
}
pub fn default_guard() -> Self {
Self::new(DiskSpaceGuardConfig::default())
}
pub fn with_min_free(min_free_mb: usize) -> Self {
Self::new(DiskSpaceGuardConfig::with_min_free_mb(min_free_mb))
}
pub fn shared(config: DiskSpaceGuardConfig) -> Arc<Self> {
Arc::new(Self::new(config))
}
pub fn check(&self) -> Result<(), DiskSpaceExhausted> {
if self.config.hard_limit_mb == 0 {
return Ok(());
}
let count = self.operation_counter.fetch_add(1, Ordering::Relaxed);
if !count.is_multiple_of(self.config.check_interval as u64) {
return Ok(());
}
self.check_now()
}
pub fn check_now(&self) -> Result<(), DiskSpaceExhausted> {
if self.config.hard_limit_mb == 0 {
return Ok(());
}
let path = self
.config
.monitor_path
.as_deref()
.unwrap_or(Path::new("."));
let available_mb = get_available_space_mb(path).unwrap_or(usize::MAX);
self.last_available_mb
.store(available_mb, Ordering::Relaxed);
let required_mb = self.config.hard_limit_mb + self.config.reserve_buffer_mb;
if available_mb < required_mb {
self.hard_limit_exceeded.store(true, Ordering::Relaxed);
return Err(DiskSpaceExhausted {
available_mb,
required_mb,
is_soft_limit: false,
message: format!(
"Disk space exhausted: only {available_mb} MB available, need at least {required_mb} MB. \
Free up disk space or reduce output volume."
),
});
}
if available_mb < self.config.soft_limit_mb {
self.soft_warnings_count.fetch_add(1, Ordering::Relaxed);
}
Ok(())
}
pub fn check_before_write(&self, estimated_bytes: u64) -> Result<(), DiskSpaceExhausted> {
if self.config.hard_limit_mb == 0 {
return Ok(());
}
let path = self
.config
.monitor_path
.as_deref()
.unwrap_or(Path::new("."));
let available_mb = get_available_space_mb(path).unwrap_or(usize::MAX);
let estimated_mb = (estimated_bytes / (1024 * 1024)) as usize;
let required_mb = self.config.hard_limit_mb + self.config.reserve_buffer_mb + estimated_mb;
if available_mb < required_mb {
return Err(DiskSpaceExhausted {
available_mb,
required_mb,
is_soft_limit: false,
message: format!(
"Insufficient disk space for write: {} MB available, need {} MB \
(estimated write: {} MB, reserve: {} MB).",
available_mb, required_mb, estimated_mb, self.config.reserve_buffer_mb
),
});
}
Ok(())
}
pub fn record_write(&self, bytes: u64) {
self.bytes_written_estimate
.fetch_add(bytes, Ordering::Relaxed);
}
pub fn stats(&self) -> DiskStats {
let path = self
.config
.monitor_path
.as_deref()
.unwrap_or(Path::new("."));
let (total, available) = get_disk_space(path).unwrap_or((0, 0));
DiskStats {
total_bytes: total,
available_bytes: available,
used_bytes: total.saturating_sub(available),
checks_performed: self.operation_counter.load(Ordering::Relaxed),
soft_limit_warnings: self.soft_warnings_count.load(Ordering::Relaxed),
hard_limit_exceeded: self.hard_limit_exceeded.load(Ordering::Relaxed),
estimated_bytes_written: self.bytes_written_estimate.load(Ordering::Relaxed),
}
}
pub fn available_space_mb(&self) -> usize {
let path = self
.config
.monitor_path
.as_deref()
.unwrap_or(Path::new("."));
get_available_space_mb(path).unwrap_or(0)
}
pub fn is_available() -> bool {
get_available_space_mb(Path::new(".")).is_some()
}
pub fn reset_stats(&self) {
self.operation_counter.store(0, Ordering::Relaxed);
self.soft_warnings_count.store(0, Ordering::Relaxed);
self.hard_limit_exceeded.store(false, Ordering::Relaxed);
self.bytes_written_estimate.store(0, Ordering::Relaxed);
}
}
impl Default for DiskSpaceGuard {
fn default() -> Self {
Self::default_guard()
}
}
#[cfg(unix)]
#[allow(clippy::unnecessary_cast)] pub fn get_available_space_mb(path: &Path) -> Option<usize> {
use std::ffi::CString;
use std::os::unix::ffi::OsStrExt;
let path_cstr = CString::new(path.as_os_str().as_bytes()).ok()?;
#[repr(C)]
struct Statvfs {
f_bsize: libc::c_ulong,
f_frsize: libc::c_ulong,
f_blocks: libc::fsblkcnt_t,
f_bfree: libc::fsblkcnt_t,
f_bavail: libc::fsblkcnt_t,
_rest: [u8; 128],
}
let mut stat: Statvfs = unsafe { std::mem::zeroed() };
let result = unsafe { libc::statvfs(path_cstr.as_ptr(), &mut stat as *mut _ as *mut _) };
if result == 0 {
let block_size = stat.f_frsize as u64;
let available_blocks = stat.f_bavail as u64;
let available_bytes = available_blocks * block_size;
Some((available_bytes / (1024 * 1024)) as usize)
} else {
None
}
}
#[cfg(unix)]
#[allow(clippy::unnecessary_cast)] pub fn get_disk_space(path: &Path) -> Option<(u64, u64)> {
use std::ffi::CString;
use std::os::unix::ffi::OsStrExt;
let path_cstr = CString::new(path.as_os_str().as_bytes()).ok()?;
#[repr(C)]
struct Statvfs {
f_bsize: libc::c_ulong,
f_frsize: libc::c_ulong,
f_blocks: libc::fsblkcnt_t,
f_bfree: libc::fsblkcnt_t,
f_bavail: libc::fsblkcnt_t,
_rest: [u8; 128],
}
let mut stat: Statvfs = unsafe { std::mem::zeroed() };
let result = unsafe { libc::statvfs(path_cstr.as_ptr(), &mut stat as *mut _ as *mut _) };
if result == 0 {
let block_size = stat.f_frsize as u64;
let total = stat.f_blocks as u64 * block_size;
let available = stat.f_bavail as u64 * block_size;
Some((total, available))
} else {
None
}
}
#[cfg(target_os = "windows")]
pub fn get_available_space_mb(path: &Path) -> Option<usize> {
use std::os::windows::ffi::OsStrExt;
let wide_path: Vec<u16> = path
.as_os_str()
.encode_wide()
.chain(std::iter::once(0))
.collect();
let mut free_bytes_available: u64 = 0;
let mut total_bytes: u64 = 0;
let mut total_free_bytes: u64 = 0;
#[link(name = "kernel32")]
extern "system" {
fn GetDiskFreeSpaceExW(
lpDirectoryName: *const u16,
lpFreeBytesAvailableToCaller: *mut u64,
lpTotalNumberOfBytes: *mut u64,
lpTotalNumberOfFreeBytes: *mut u64,
) -> i32;
}
let result = unsafe {
GetDiskFreeSpaceExW(
wide_path.as_ptr(),
&mut free_bytes_available,
&mut total_bytes,
&mut total_free_bytes,
)
};
if result != 0 {
Some((free_bytes_available / (1024 * 1024)) as usize)
} else {
None
}
}
#[cfg(target_os = "windows")]
pub fn get_disk_space(path: &Path) -> Option<(u64, u64)> {
use std::os::windows::ffi::OsStrExt;
let wide_path: Vec<u16> = path
.as_os_str()
.encode_wide()
.chain(std::iter::once(0))
.collect();
let mut free_bytes_available: u64 = 0;
let mut total_bytes: u64 = 0;
let mut total_free_bytes: u64 = 0;
#[link(name = "kernel32")]
extern "system" {
fn GetDiskFreeSpaceExW(
lpDirectoryName: *const u16,
lpFreeBytesAvailableToCaller: *mut u64,
lpTotalNumberOfBytes: *mut u64,
lpTotalNumberOfFreeBytes: *mut u64,
) -> i32;
}
let result = unsafe {
GetDiskFreeSpaceExW(
wide_path.as_ptr(),
&mut free_bytes_available,
&mut total_bytes,
&mut total_free_bytes,
)
};
if result != 0 {
Some((total_bytes, free_bytes_available))
} else {
None
}
}
#[cfg(not(any(unix, target_os = "windows")))]
pub fn get_available_space_mb(_path: &Path) -> Option<usize> {
None
}
#[cfg(not(any(unix, target_os = "windows")))]
pub fn get_disk_space(_path: &Path) -> Option<(u64, u64)> {
None
}
pub fn estimate_output_size_mb(
num_entries: usize,
formats: &[OutputFormat],
compression: bool,
) -> usize {
let base_bytes_per_entry = |format: &OutputFormat| -> usize {
match format {
OutputFormat::Csv => 400, OutputFormat::Json => 800, OutputFormat::Parquet => 200, }
};
let total: usize = formats
.iter()
.map(|f| num_entries * base_bytes_per_entry(f))
.sum();
let with_compression = if compression {
total / 5 } else {
total
};
let with_overhead = (with_compression as f64 * 1.3) as usize;
with_overhead.div_ceil(1024 * 1024)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum OutputFormat {
Csv,
Json,
Parquet,
}
pub fn check_sufficient_disk_space(
path: &Path,
planned_entries: usize,
formats: &[OutputFormat],
compression: bool,
min_free_mb: usize,
) -> Result<(), String> {
let estimated = estimate_output_size_mb(planned_entries, formats, compression);
let available = get_available_space_mb(path)
.ok_or_else(|| "Unable to determine available disk space on this platform".to_string())?;
let required = estimated + min_free_mb;
if available < required {
Err(format!(
"Insufficient disk space: {available} MB available, need {required} MB \
(estimated output: {estimated} MB, minimum free: {min_free_mb} MB). \
Reduce output volume or free up disk space."
))
} else {
Ok(())
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
#[test]
fn test_disk_guard_creation() {
let guard = DiskSpaceGuard::with_min_free(100);
assert_eq!(guard.config.hard_limit_mb, 100);
assert_eq!(guard.config.soft_limit_mb, 500);
}
#[test]
fn test_disk_guard_disabled() {
let config = DiskSpaceGuardConfig {
hard_limit_mb: 0,
..Default::default()
};
let guard = DiskSpaceGuard::new(config);
assert!(guard.check().is_ok());
assert!(guard.check_now().is_ok());
}
#[test]
fn test_output_size_estimation() {
let formats = vec![OutputFormat::Csv, OutputFormat::Json];
let est = estimate_output_size_mb(1000, &formats, false);
assert!(est > 0);
assert!(est < 10);
let est_compressed = estimate_output_size_mb(1000, &formats, true);
assert!(est_compressed < est); }
#[test]
fn test_stats_tracking() {
let guard = DiskSpaceGuard::with_min_free(1);
for _ in 0..1000 {
let _ = guard.check();
}
guard.record_write(1024 * 1024);
let stats = guard.stats();
assert!(stats.checks_performed > 0);
assert_eq!(stats.estimated_bytes_written, 1024 * 1024);
}
#[test]
fn test_is_available() {
#[cfg(unix)]
assert!(DiskSpaceGuard::is_available());
}
#[test]
fn test_check_before_write() {
let guard = DiskSpaceGuard::with_min_free(1);
let result = guard.check_before_write(1024);
assert!(result.is_ok());
}
}