1use std::path::{Path, PathBuf};
7use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering};
8use std::sync::Arc;
9
10#[derive(Debug, Clone, Default)]
12pub struct DiskStats {
13 pub total_bytes: u64,
15 pub available_bytes: u64,
17 pub used_bytes: u64,
19 pub checks_performed: u64,
21 pub soft_limit_warnings: u64,
23 pub hard_limit_exceeded: bool,
25 pub estimated_bytes_written: u64,
27}
28
29#[derive(Debug, Clone)]
31pub struct DiskSpaceGuardConfig {
32 pub hard_limit_mb: usize,
34 pub soft_limit_mb: usize,
36 pub check_interval: usize,
38 pub reserve_buffer_mb: usize,
40 pub monitor_path: Option<PathBuf>,
42}
43
44impl Default for DiskSpaceGuardConfig {
45 fn default() -> Self {
46 Self {
47 hard_limit_mb: 100, soft_limit_mb: 500, check_interval: 500, reserve_buffer_mb: 50, monitor_path: None,
52 }
53 }
54}
55
56impl DiskSpaceGuardConfig {
57 pub fn with_min_free_mb(hard_limit_mb: usize) -> Self {
59 Self {
60 hard_limit_mb,
61 soft_limit_mb: hard_limit_mb * 5, ..Default::default()
63 }
64 }
65
66 pub fn with_path<P: AsRef<Path>>(mut self, path: P) -> Self {
68 self.monitor_path = Some(path.as_ref().to_path_buf());
69 self
70 }
71
72 pub fn with_reserve(mut self, reserve_mb: usize) -> Self {
74 self.reserve_buffer_mb = reserve_mb;
75 self
76 }
77}
78
79#[derive(Debug, Clone)]
81pub struct DiskSpaceExhausted {
82 pub available_mb: usize,
83 pub required_mb: usize,
84 pub is_soft_limit: bool,
85 pub message: String,
86}
87
88impl std::fmt::Display for DiskSpaceExhausted {
89 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
90 write!(f, "{}", self.message)
91 }
92}
93
94impl std::error::Error for DiskSpaceExhausted {}
95
96#[derive(Debug)]
98pub struct DiskSpaceGuard {
99 config: DiskSpaceGuardConfig,
100 operation_counter: AtomicU64,
101 soft_warnings_count: AtomicU64,
102 hard_limit_exceeded: AtomicBool,
103 bytes_written_estimate: AtomicU64,
104 last_available_mb: AtomicUsize,
105}
106
107impl DiskSpaceGuard {
108 pub fn new(config: DiskSpaceGuardConfig) -> Self {
110 Self {
111 config,
112 operation_counter: AtomicU64::new(0),
113 soft_warnings_count: AtomicU64::new(0),
114 hard_limit_exceeded: AtomicBool::new(false),
115 bytes_written_estimate: AtomicU64::new(0),
116 last_available_mb: AtomicUsize::new(0),
117 }
118 }
119
120 pub fn default_guard() -> Self {
122 Self::new(DiskSpaceGuardConfig::default())
123 }
124
125 pub fn with_min_free(min_free_mb: usize) -> Self {
127 Self::new(DiskSpaceGuardConfig::with_min_free_mb(min_free_mb))
128 }
129
130 pub fn shared(config: DiskSpaceGuardConfig) -> Arc<Self> {
132 Arc::new(Self::new(config))
133 }
134
135 pub fn check(&self) -> Result<(), DiskSpaceExhausted> {
141 if self.config.hard_limit_mb == 0 {
143 return Ok(());
144 }
145
146 let count = self.operation_counter.fetch_add(1, Ordering::Relaxed);
147
148 if !count.is_multiple_of(self.config.check_interval as u64) {
150 return Ok(());
151 }
152
153 self.check_now()
154 }
155
156 pub fn check_now(&self) -> Result<(), DiskSpaceExhausted> {
158 if self.config.hard_limit_mb == 0 {
159 return Ok(());
160 }
161
162 let path = self
163 .config
164 .monitor_path
165 .as_deref()
166 .unwrap_or(Path::new("."));
167
168 let available_mb = get_available_space_mb(path).unwrap_or(usize::MAX);
169 self.last_available_mb
170 .store(available_mb, Ordering::Relaxed);
171
172 let required_mb = self.config.hard_limit_mb + self.config.reserve_buffer_mb;
174 if available_mb < required_mb {
175 self.hard_limit_exceeded.store(true, Ordering::Relaxed);
176 return Err(DiskSpaceExhausted {
177 available_mb,
178 required_mb,
179 is_soft_limit: false,
180 message: format!(
181 "Disk space exhausted: only {available_mb} MB available, need at least {required_mb} MB. \
182 Free up disk space or reduce output volume."
183 ),
184 });
185 }
186
187 if available_mb < self.config.soft_limit_mb {
189 self.soft_warnings_count.fetch_add(1, Ordering::Relaxed);
190 }
192
193 Ok(())
194 }
195
196 pub fn check_before_write(&self, estimated_bytes: u64) -> Result<(), DiskSpaceExhausted> {
198 if self.config.hard_limit_mb == 0 {
199 return Ok(());
200 }
201
202 let path = self
203 .config
204 .monitor_path
205 .as_deref()
206 .unwrap_or(Path::new("."));
207
208 let available_mb = get_available_space_mb(path).unwrap_or(usize::MAX);
209 let estimated_mb = (estimated_bytes / (1024 * 1024)) as usize;
210 let required_mb = self.config.hard_limit_mb + self.config.reserve_buffer_mb + estimated_mb;
211
212 if available_mb < required_mb {
213 return Err(DiskSpaceExhausted {
214 available_mb,
215 required_mb,
216 is_soft_limit: false,
217 message: format!(
218 "Insufficient disk space for write: {} MB available, need {} MB \
219 (estimated write: {} MB, reserve: {} MB).",
220 available_mb, required_mb, estimated_mb, self.config.reserve_buffer_mb
221 ),
222 });
223 }
224
225 Ok(())
226 }
227
228 pub fn record_write(&self, bytes: u64) {
230 self.bytes_written_estimate
231 .fetch_add(bytes, Ordering::Relaxed);
232 }
233
234 pub fn stats(&self) -> DiskStats {
236 let path = self
237 .config
238 .monitor_path
239 .as_deref()
240 .unwrap_or(Path::new("."));
241
242 let (total, available) = get_disk_space(path).unwrap_or((0, 0));
243
244 DiskStats {
245 total_bytes: total,
246 available_bytes: available,
247 used_bytes: total.saturating_sub(available),
248 checks_performed: self.operation_counter.load(Ordering::Relaxed),
249 soft_limit_warnings: self.soft_warnings_count.load(Ordering::Relaxed),
250 hard_limit_exceeded: self.hard_limit_exceeded.load(Ordering::Relaxed),
251 estimated_bytes_written: self.bytes_written_estimate.load(Ordering::Relaxed),
252 }
253 }
254
255 pub fn available_space_mb(&self) -> usize {
257 let path = self
258 .config
259 .monitor_path
260 .as_deref()
261 .unwrap_or(Path::new("."));
262 get_available_space_mb(path).unwrap_or(0)
263 }
264
265 pub fn is_available() -> bool {
267 get_available_space_mb(Path::new(".")).is_some()
268 }
269
270 pub fn reset_stats(&self) {
272 self.operation_counter.store(0, Ordering::Relaxed);
273 self.soft_warnings_count.store(0, Ordering::Relaxed);
274 self.hard_limit_exceeded.store(false, Ordering::Relaxed);
275 self.bytes_written_estimate.store(0, Ordering::Relaxed);
276 }
277}
278
279impl Default for DiskSpaceGuard {
280 fn default() -> Self {
281 Self::default_guard()
282 }
283}
284
285#[cfg(unix)]
287#[allow(clippy::unnecessary_cast)] pub fn get_available_space_mb(path: &Path) -> Option<usize> {
289 use std::ffi::CString;
290 use std::os::unix::ffi::OsStrExt;
291
292 let path_cstr = CString::new(path.as_os_str().as_bytes()).ok()?;
293
294 #[repr(C)]
295 struct Statvfs {
296 f_bsize: libc::c_ulong,
297 f_frsize: libc::c_ulong,
298 f_blocks: libc::fsblkcnt_t,
299 f_bfree: libc::fsblkcnt_t,
300 f_bavail: libc::fsblkcnt_t,
301 _rest: [u8; 128],
303 }
304
305 let mut stat: Statvfs = unsafe { std::mem::zeroed() };
306
307 let result = unsafe { libc::statvfs(path_cstr.as_ptr(), &mut stat as *mut _ as *mut _) };
308
309 if result == 0 {
310 let block_size = stat.f_frsize as u64;
311 let available_blocks = stat.f_bavail as u64;
312 let available_bytes = available_blocks * block_size;
313 Some((available_bytes / (1024 * 1024)) as usize)
314 } else {
315 None
316 }
317}
318
319#[cfg(unix)]
321#[allow(clippy::unnecessary_cast)] pub fn get_disk_space(path: &Path) -> Option<(u64, u64)> {
323 use std::ffi::CString;
324 use std::os::unix::ffi::OsStrExt;
325
326 let path_cstr = CString::new(path.as_os_str().as_bytes()).ok()?;
327
328 #[repr(C)]
329 struct Statvfs {
330 f_bsize: libc::c_ulong,
331 f_frsize: libc::c_ulong,
332 f_blocks: libc::fsblkcnt_t,
333 f_bfree: libc::fsblkcnt_t,
334 f_bavail: libc::fsblkcnt_t,
335 _rest: [u8; 128],
336 }
337
338 let mut stat: Statvfs = unsafe { std::mem::zeroed() };
339
340 let result = unsafe { libc::statvfs(path_cstr.as_ptr(), &mut stat as *mut _ as *mut _) };
341
342 if result == 0 {
343 let block_size = stat.f_frsize as u64;
344 let total = stat.f_blocks as u64 * block_size;
345 let available = stat.f_bavail as u64 * block_size;
346 Some((total, available))
347 } else {
348 None
349 }
350}
351
352#[cfg(target_os = "windows")]
354pub fn get_available_space_mb(path: &Path) -> Option<usize> {
355 use std::os::windows::ffi::OsStrExt;
356
357 let wide_path: Vec<u16> = path
359 .as_os_str()
360 .encode_wide()
361 .chain(std::iter::once(0))
362 .collect();
363
364 let mut free_bytes_available: u64 = 0;
365 let mut total_bytes: u64 = 0;
366 let mut total_free_bytes: u64 = 0;
367
368 #[link(name = "kernel32")]
369 extern "system" {
370 fn GetDiskFreeSpaceExW(
371 lpDirectoryName: *const u16,
372 lpFreeBytesAvailableToCaller: *mut u64,
373 lpTotalNumberOfBytes: *mut u64,
374 lpTotalNumberOfFreeBytes: *mut u64,
375 ) -> i32;
376 }
377
378 let result = unsafe {
379 GetDiskFreeSpaceExW(
380 wide_path.as_ptr(),
381 &mut free_bytes_available,
382 &mut total_bytes,
383 &mut total_free_bytes,
384 )
385 };
386
387 if result != 0 {
388 Some((free_bytes_available / (1024 * 1024)) as usize)
389 } else {
390 None
391 }
392}
393
394#[cfg(target_os = "windows")]
396pub fn get_disk_space(path: &Path) -> Option<(u64, u64)> {
397 use std::os::windows::ffi::OsStrExt;
398
399 let wide_path: Vec<u16> = path
400 .as_os_str()
401 .encode_wide()
402 .chain(std::iter::once(0))
403 .collect();
404
405 let mut free_bytes_available: u64 = 0;
406 let mut total_bytes: u64 = 0;
407 let mut total_free_bytes: u64 = 0;
408
409 #[link(name = "kernel32")]
410 extern "system" {
411 fn GetDiskFreeSpaceExW(
412 lpDirectoryName: *const u16,
413 lpFreeBytesAvailableToCaller: *mut u64,
414 lpTotalNumberOfBytes: *mut u64,
415 lpTotalNumberOfFreeBytes: *mut u64,
416 ) -> i32;
417 }
418
419 let result = unsafe {
420 GetDiskFreeSpaceExW(
421 wide_path.as_ptr(),
422 &mut free_bytes_available,
423 &mut total_bytes,
424 &mut total_free_bytes,
425 )
426 };
427
428 if result != 0 {
429 Some((total_bytes, free_bytes_available))
430 } else {
431 None
432 }
433}
434
435#[cfg(not(any(unix, target_os = "windows")))]
437pub fn get_available_space_mb(_path: &Path) -> Option<usize> {
438 None
439}
440
441#[cfg(not(any(unix, target_os = "windows")))]
442pub fn get_disk_space(_path: &Path) -> Option<(u64, u64)> {
443 None
444}
445
446pub fn estimate_output_size_mb(
448 num_entries: usize,
449 formats: &[OutputFormat],
450 compression: bool,
451) -> usize {
452 let base_bytes_per_entry = |format: &OutputFormat| -> usize {
454 match format {
455 OutputFormat::Csv => 400, OutputFormat::Json => 800, OutputFormat::Parquet => 200, }
459 };
460
461 let total: usize = formats
462 .iter()
463 .map(|f| num_entries * base_bytes_per_entry(f))
464 .sum();
465
466 let with_compression = if compression {
467 total / 5 } else {
469 total
470 };
471
472 let with_overhead = (with_compression as f64 * 1.3) as usize;
474
475 with_overhead.div_ceil(1024 * 1024)
476}
477
478#[derive(Debug, Clone, Copy, PartialEq, Eq)]
480pub enum OutputFormat {
481 Csv,
482 Json,
483 Parquet,
484}
485
486pub fn check_sufficient_disk_space(
488 path: &Path,
489 planned_entries: usize,
490 formats: &[OutputFormat],
491 compression: bool,
492 min_free_mb: usize,
493) -> Result<(), String> {
494 let estimated = estimate_output_size_mb(planned_entries, formats, compression);
495 let available = get_available_space_mb(path)
496 .ok_or_else(|| "Unable to determine available disk space on this platform".to_string())?;
497
498 let required = estimated + min_free_mb;
499
500 if available < required {
501 Err(format!(
502 "Insufficient disk space: {available} MB available, need {required} MB \
503 (estimated output: {estimated} MB, minimum free: {min_free_mb} MB). \
504 Reduce output volume or free up disk space."
505 ))
506 } else {
507 Ok(())
508 }
509}
510
511#[cfg(test)]
512#[allow(clippy::unwrap_used)]
513mod tests {
514 use super::*;
515
516 #[test]
517 fn test_disk_guard_creation() {
518 let guard = DiskSpaceGuard::with_min_free(100);
519 assert_eq!(guard.config.hard_limit_mb, 100);
520 assert_eq!(guard.config.soft_limit_mb, 500);
521 }
522
523 #[test]
524 fn test_disk_guard_disabled() {
525 let config = DiskSpaceGuardConfig {
526 hard_limit_mb: 0,
527 ..Default::default()
528 };
529 let guard = DiskSpaceGuard::new(config);
530 assert!(guard.check().is_ok());
532 assert!(guard.check_now().is_ok());
533 }
534
535 #[test]
536 fn test_output_size_estimation() {
537 let formats = vec![OutputFormat::Csv, OutputFormat::Json];
538 let est = estimate_output_size_mb(1000, &formats, false);
539 assert!(est > 0);
540 assert!(est < 10); let est_compressed = estimate_output_size_mb(1000, &formats, true);
543 assert!(est_compressed < est); }
545
546 #[test]
547 fn test_stats_tracking() {
548 let guard = DiskSpaceGuard::with_min_free(1);
549
550 for _ in 0..1000 {
551 let _ = guard.check();
552 }
553
554 guard.record_write(1024 * 1024);
555
556 let stats = guard.stats();
557 assert!(stats.checks_performed > 0);
558 assert_eq!(stats.estimated_bytes_written, 1024 * 1024);
559 }
560
561 #[test]
562 fn test_is_available() {
563 #[cfg(unix)]
564 assert!(DiskSpaceGuard::is_available());
565 }
566
567 #[test]
568 fn test_check_before_write() {
569 let guard = DiskSpaceGuard::with_min_free(1);
570 let result = guard.check_before_write(1024);
572 assert!(result.is_ok());
573 }
574}