1use std::path::{Path, PathBuf};
7use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering};
8use std::sync::Arc;
9
10#[derive(Debug, Clone, Default)]
12pub struct DiskStats {
13 pub total_bytes: u64,
15 pub available_bytes: u64,
17 pub used_bytes: u64,
19 pub checks_performed: u64,
21 pub soft_limit_warnings: u64,
23 pub hard_limit_exceeded: bool,
25 pub estimated_bytes_written: u64,
27}
28
29#[derive(Debug, Clone)]
31pub struct DiskSpaceGuardConfig {
32 pub hard_limit_mb: usize,
34 pub soft_limit_mb: usize,
36 pub check_interval: usize,
38 pub reserve_buffer_mb: usize,
40 pub monitor_path: Option<PathBuf>,
42}
43
44impl Default for DiskSpaceGuardConfig {
45 fn default() -> Self {
46 Self {
47 hard_limit_mb: 100, soft_limit_mb: 500, check_interval: 500, reserve_buffer_mb: 50, monitor_path: None,
52 }
53 }
54}
55
56impl DiskSpaceGuardConfig {
57 pub fn with_min_free_mb(hard_limit_mb: usize) -> Self {
59 Self {
60 hard_limit_mb,
61 soft_limit_mb: hard_limit_mb * 5, ..Default::default()
63 }
64 }
65
66 pub fn with_path<P: AsRef<Path>>(mut self, path: P) -> Self {
68 self.monitor_path = Some(path.as_ref().to_path_buf());
69 self
70 }
71
72 pub fn with_reserve(mut self, reserve_mb: usize) -> Self {
74 self.reserve_buffer_mb = reserve_mb;
75 self
76 }
77}
78
79#[derive(Debug, Clone)]
81pub struct DiskSpaceExhausted {
82 pub available_mb: usize,
83 pub required_mb: usize,
84 pub is_soft_limit: bool,
85 pub message: String,
86}
87
88impl std::fmt::Display for DiskSpaceExhausted {
89 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
90 write!(f, "{}", self.message)
91 }
92}
93
94impl std::error::Error for DiskSpaceExhausted {}
95
96#[derive(Debug)]
98pub struct DiskSpaceGuard {
99 config: DiskSpaceGuardConfig,
100 operation_counter: AtomicU64,
101 soft_warnings_count: AtomicU64,
102 hard_limit_exceeded: AtomicBool,
103 bytes_written_estimate: AtomicU64,
104 last_available_mb: AtomicUsize,
105}
106
107impl DiskSpaceGuard {
108 pub fn new(config: DiskSpaceGuardConfig) -> Self {
110 Self {
111 config,
112 operation_counter: AtomicU64::new(0),
113 soft_warnings_count: AtomicU64::new(0),
114 hard_limit_exceeded: AtomicBool::new(false),
115 bytes_written_estimate: AtomicU64::new(0),
116 last_available_mb: AtomicUsize::new(0),
117 }
118 }
119
120 pub fn default_guard() -> Self {
122 Self::new(DiskSpaceGuardConfig::default())
123 }
124
125 pub fn with_min_free(min_free_mb: usize) -> Self {
127 Self::new(DiskSpaceGuardConfig::with_min_free_mb(min_free_mb))
128 }
129
130 pub fn shared(config: DiskSpaceGuardConfig) -> Arc<Self> {
132 Arc::new(Self::new(config))
133 }
134
135 pub fn check(&self) -> Result<(), DiskSpaceExhausted> {
141 if self.config.hard_limit_mb == 0 {
143 return Ok(());
144 }
145
146 let count = self.operation_counter.fetch_add(1, Ordering::Relaxed);
147
148 if count % self.config.check_interval as u64 != 0 {
150 return Ok(());
151 }
152
153 self.check_now()
154 }
155
156 pub fn check_now(&self) -> Result<(), DiskSpaceExhausted> {
158 if self.config.hard_limit_mb == 0 {
159 return Ok(());
160 }
161
162 let path = self
163 .config
164 .monitor_path
165 .as_deref()
166 .unwrap_or(Path::new("."));
167
168 let available_mb = get_available_space_mb(path).unwrap_or(usize::MAX);
169 self.last_available_mb
170 .store(available_mb, Ordering::Relaxed);
171
172 let required_mb = self.config.hard_limit_mb + self.config.reserve_buffer_mb;
174 if available_mb < required_mb {
175 self.hard_limit_exceeded.store(true, Ordering::Relaxed);
176 return Err(DiskSpaceExhausted {
177 available_mb,
178 required_mb,
179 is_soft_limit: false,
180 message: format!(
181 "Disk space exhausted: only {} MB available, need at least {} MB. \
182 Free up disk space or reduce output volume.",
183 available_mb, required_mb
184 ),
185 });
186 }
187
188 if available_mb < self.config.soft_limit_mb {
190 self.soft_warnings_count.fetch_add(1, Ordering::Relaxed);
191 }
193
194 Ok(())
195 }
196
197 pub fn check_before_write(&self, estimated_bytes: u64) -> Result<(), DiskSpaceExhausted> {
199 if self.config.hard_limit_mb == 0 {
200 return Ok(());
201 }
202
203 let path = self
204 .config
205 .monitor_path
206 .as_deref()
207 .unwrap_or(Path::new("."));
208
209 let available_mb = get_available_space_mb(path).unwrap_or(usize::MAX);
210 let estimated_mb = (estimated_bytes / (1024 * 1024)) as usize;
211 let required_mb = self.config.hard_limit_mb + self.config.reserve_buffer_mb + estimated_mb;
212
213 if available_mb < required_mb {
214 return Err(DiskSpaceExhausted {
215 available_mb,
216 required_mb,
217 is_soft_limit: false,
218 message: format!(
219 "Insufficient disk space for write: {} MB available, need {} MB \
220 (estimated write: {} MB, reserve: {} MB).",
221 available_mb, required_mb, estimated_mb, self.config.reserve_buffer_mb
222 ),
223 });
224 }
225
226 Ok(())
227 }
228
229 pub fn record_write(&self, bytes: u64) {
231 self.bytes_written_estimate
232 .fetch_add(bytes, Ordering::Relaxed);
233 }
234
235 pub fn stats(&self) -> DiskStats {
237 let path = self
238 .config
239 .monitor_path
240 .as_deref()
241 .unwrap_or(Path::new("."));
242
243 let (total, available) = get_disk_space(path).unwrap_or((0, 0));
244
245 DiskStats {
246 total_bytes: total,
247 available_bytes: available,
248 used_bytes: total.saturating_sub(available),
249 checks_performed: self.operation_counter.load(Ordering::Relaxed),
250 soft_limit_warnings: self.soft_warnings_count.load(Ordering::Relaxed),
251 hard_limit_exceeded: self.hard_limit_exceeded.load(Ordering::Relaxed),
252 estimated_bytes_written: self.bytes_written_estimate.load(Ordering::Relaxed),
253 }
254 }
255
256 pub fn available_space_mb(&self) -> usize {
258 let path = self
259 .config
260 .monitor_path
261 .as_deref()
262 .unwrap_or(Path::new("."));
263 get_available_space_mb(path).unwrap_or(0)
264 }
265
266 pub fn is_available() -> bool {
268 get_available_space_mb(Path::new(".")).is_some()
269 }
270
271 pub fn reset_stats(&self) {
273 self.operation_counter.store(0, Ordering::Relaxed);
274 self.soft_warnings_count.store(0, Ordering::Relaxed);
275 self.hard_limit_exceeded.store(false, Ordering::Relaxed);
276 self.bytes_written_estimate.store(0, Ordering::Relaxed);
277 }
278}
279
280impl Default for DiskSpaceGuard {
281 fn default() -> Self {
282 Self::default_guard()
283 }
284}
285
286#[cfg(unix)]
288pub fn get_available_space_mb(path: &Path) -> Option<usize> {
289 use std::ffi::CString;
290 use std::os::unix::ffi::OsStrExt;
291
292 let path_cstr = CString::new(path.as_os_str().as_bytes()).ok()?;
293
294 #[repr(C)]
295 struct Statvfs {
296 f_bsize: libc::c_ulong,
297 f_frsize: libc::c_ulong,
298 f_blocks: libc::fsblkcnt_t,
299 f_bfree: libc::fsblkcnt_t,
300 f_bavail: libc::fsblkcnt_t,
301 _rest: [u8; 128],
303 }
304
305 let mut stat: Statvfs = unsafe { std::mem::zeroed() };
306
307 let result = unsafe { libc::statvfs(path_cstr.as_ptr(), &mut stat as *mut _ as *mut _) };
308
309 if result == 0 {
310 let block_size = stat.f_frsize as u64;
311 let available_blocks = stat.f_bavail as u64;
312 let available_bytes = available_blocks * block_size;
313 Some((available_bytes / (1024 * 1024)) as usize)
314 } else {
315 None
316 }
317}
318
319#[cfg(unix)]
321pub fn get_disk_space(path: &Path) -> Option<(u64, u64)> {
322 use std::ffi::CString;
323 use std::os::unix::ffi::OsStrExt;
324
325 let path_cstr = CString::new(path.as_os_str().as_bytes()).ok()?;
326
327 #[repr(C)]
328 struct Statvfs {
329 f_bsize: libc::c_ulong,
330 f_frsize: libc::c_ulong,
331 f_blocks: libc::fsblkcnt_t,
332 f_bfree: libc::fsblkcnt_t,
333 f_bavail: libc::fsblkcnt_t,
334 _rest: [u8; 128],
335 }
336
337 let mut stat: Statvfs = unsafe { std::mem::zeroed() };
338
339 let result = unsafe { libc::statvfs(path_cstr.as_ptr(), &mut stat as *mut _ as *mut _) };
340
341 if result == 0 {
342 let block_size = stat.f_frsize as u64;
343 let total = stat.f_blocks as u64 * block_size;
344 let available = stat.f_bavail as u64 * block_size;
345 Some((total, available))
346 } else {
347 None
348 }
349}
350
351#[cfg(target_os = "windows")]
353pub fn get_available_space_mb(path: &Path) -> Option<usize> {
354 use std::os::windows::ffi::OsStrExt;
355
356 let wide_path: Vec<u16> = path
358 .as_os_str()
359 .encode_wide()
360 .chain(std::iter::once(0))
361 .collect();
362
363 let mut free_bytes_available: u64 = 0;
364 let mut total_bytes: u64 = 0;
365 let mut total_free_bytes: u64 = 0;
366
367 #[link(name = "kernel32")]
368 extern "system" {
369 fn GetDiskFreeSpaceExW(
370 lpDirectoryName: *const u16,
371 lpFreeBytesAvailableToCaller: *mut u64,
372 lpTotalNumberOfBytes: *mut u64,
373 lpTotalNumberOfFreeBytes: *mut u64,
374 ) -> i32;
375 }
376
377 let result = unsafe {
378 GetDiskFreeSpaceExW(
379 wide_path.as_ptr(),
380 &mut free_bytes_available,
381 &mut total_bytes,
382 &mut total_free_bytes,
383 )
384 };
385
386 if result != 0 {
387 Some((free_bytes_available / (1024 * 1024)) as usize)
388 } else {
389 None
390 }
391}
392
393#[cfg(target_os = "windows")]
395pub fn get_disk_space(path: &Path) -> Option<(u64, u64)> {
396 use std::os::windows::ffi::OsStrExt;
397
398 let wide_path: Vec<u16> = path
399 .as_os_str()
400 .encode_wide()
401 .chain(std::iter::once(0))
402 .collect();
403
404 let mut free_bytes_available: u64 = 0;
405 let mut total_bytes: u64 = 0;
406 let mut total_free_bytes: u64 = 0;
407
408 #[link(name = "kernel32")]
409 extern "system" {
410 fn GetDiskFreeSpaceExW(
411 lpDirectoryName: *const u16,
412 lpFreeBytesAvailableToCaller: *mut u64,
413 lpTotalNumberOfBytes: *mut u64,
414 lpTotalNumberOfFreeBytes: *mut u64,
415 ) -> i32;
416 }
417
418 let result = unsafe {
419 GetDiskFreeSpaceExW(
420 wide_path.as_ptr(),
421 &mut free_bytes_available,
422 &mut total_bytes,
423 &mut total_free_bytes,
424 )
425 };
426
427 if result != 0 {
428 Some((total_bytes, free_bytes_available))
429 } else {
430 None
431 }
432}
433
434#[cfg(not(any(unix, target_os = "windows")))]
436pub fn get_available_space_mb(_path: &Path) -> Option<usize> {
437 None
438}
439
440#[cfg(not(any(unix, target_os = "windows")))]
441pub fn get_disk_space(_path: &Path) -> Option<(u64, u64)> {
442 None
443}
444
445pub fn estimate_output_size_mb(
447 num_entries: usize,
448 formats: &[OutputFormat],
449 compression: bool,
450) -> usize {
451 let base_bytes_per_entry = |format: &OutputFormat| -> usize {
453 match format {
454 OutputFormat::Csv => 400, OutputFormat::Json => 800, OutputFormat::Parquet => 200, }
458 };
459
460 let total: usize = formats
461 .iter()
462 .map(|f| num_entries * base_bytes_per_entry(f))
463 .sum();
464
465 let with_compression = if compression {
466 total / 5 } else {
468 total
469 };
470
471 let with_overhead = (with_compression as f64 * 1.3) as usize;
473
474 with_overhead.div_ceil(1024 * 1024)
475}
476
477#[derive(Debug, Clone, Copy, PartialEq, Eq)]
479pub enum OutputFormat {
480 Csv,
481 Json,
482 Parquet,
483}
484
485pub fn check_sufficient_disk_space(
487 path: &Path,
488 planned_entries: usize,
489 formats: &[OutputFormat],
490 compression: bool,
491 min_free_mb: usize,
492) -> Result<(), String> {
493 let estimated = estimate_output_size_mb(planned_entries, formats, compression);
494 let available = get_available_space_mb(path)
495 .ok_or_else(|| "Unable to determine available disk space on this platform".to_string())?;
496
497 let required = estimated + min_free_mb;
498
499 if available < required {
500 Err(format!(
501 "Insufficient disk space: {} MB available, need {} MB \
502 (estimated output: {} MB, minimum free: {} MB). \
503 Reduce output volume or free up disk space.",
504 available, required, estimated, min_free_mb
505 ))
506 } else {
507 Ok(())
508 }
509}
510
511#[cfg(test)]
512mod tests {
513 use super::*;
514
515 #[test]
516 fn test_disk_guard_creation() {
517 let guard = DiskSpaceGuard::with_min_free(100);
518 assert_eq!(guard.config.hard_limit_mb, 100);
519 assert_eq!(guard.config.soft_limit_mb, 500);
520 }
521
522 #[test]
523 fn test_disk_guard_disabled() {
524 let config = DiskSpaceGuardConfig {
525 hard_limit_mb: 0,
526 ..Default::default()
527 };
528 let guard = DiskSpaceGuard::new(config);
529 assert!(guard.check().is_ok());
531 assert!(guard.check_now().is_ok());
532 }
533
534 #[test]
535 fn test_output_size_estimation() {
536 let formats = vec![OutputFormat::Csv, OutputFormat::Json];
537 let est = estimate_output_size_mb(1000, &formats, false);
538 assert!(est > 0);
539 assert!(est < 10); let est_compressed = estimate_output_size_mb(1000, &formats, true);
542 assert!(est_compressed < est); }
544
545 #[test]
546 fn test_stats_tracking() {
547 let guard = DiskSpaceGuard::with_min_free(1);
548
549 for _ in 0..1000 {
550 let _ = guard.check();
551 }
552
553 guard.record_write(1024 * 1024);
554
555 let stats = guard.stats();
556 assert!(stats.checks_performed > 0);
557 assert_eq!(stats.estimated_bytes_written, 1024 * 1024);
558 }
559
560 #[test]
561 fn test_is_available() {
562 #[cfg(unix)]
563 assert!(DiskSpaceGuard::is_available());
564 }
565
566 #[test]
567 fn test_check_before_write() {
568 let guard = DiskSpaceGuard::with_min_free(1);
569 let result = guard.check_before_write(1024);
571 assert!(result.is_ok());
572 }
573}