1use std::path::{Path, PathBuf};
7use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering};
8use std::sync::Arc;
9
10#[derive(Debug, Clone, Default)]
12pub struct DiskStats {
13 pub total_bytes: u64,
15 pub available_bytes: u64,
17 pub used_bytes: u64,
19 pub checks_performed: u64,
21 pub soft_limit_warnings: u64,
23 pub hard_limit_exceeded: bool,
25 pub estimated_bytes_written: u64,
27}
28
29#[derive(Debug, Clone)]
31pub struct DiskSpaceGuardConfig {
32 pub hard_limit_mb: usize,
34 pub soft_limit_mb: usize,
36 pub check_interval: usize,
38 pub reserve_buffer_mb: usize,
40 pub monitor_path: Option<PathBuf>,
42}
43
44impl Default for DiskSpaceGuardConfig {
45 fn default() -> Self {
46 Self {
47 hard_limit_mb: 100, soft_limit_mb: 500, check_interval: 500, reserve_buffer_mb: 50, monitor_path: None,
52 }
53 }
54}
55
56impl DiskSpaceGuardConfig {
57 pub fn with_min_free_mb(hard_limit_mb: usize) -> Self {
59 Self {
60 hard_limit_mb,
61 soft_limit_mb: hard_limit_mb * 5, ..Default::default()
63 }
64 }
65
66 pub fn with_path<P: AsRef<Path>>(mut self, path: P) -> Self {
68 self.monitor_path = Some(path.as_ref().to_path_buf());
69 self
70 }
71
72 pub fn with_reserve(mut self, reserve_mb: usize) -> Self {
74 self.reserve_buffer_mb = reserve_mb;
75 self
76 }
77}
78
79#[derive(Debug, Clone)]
81pub struct DiskSpaceExhausted {
82 pub available_mb: usize,
83 pub required_mb: usize,
84 pub is_soft_limit: bool,
85 pub message: String,
86}
87
88impl std::fmt::Display for DiskSpaceExhausted {
89 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
90 write!(f, "{}", self.message)
91 }
92}
93
94impl std::error::Error for DiskSpaceExhausted {}
95
96#[derive(Debug)]
98pub struct DiskSpaceGuard {
99 config: DiskSpaceGuardConfig,
100 operation_counter: AtomicU64,
101 soft_warnings_count: AtomicU64,
102 hard_limit_exceeded: AtomicBool,
103 bytes_written_estimate: AtomicU64,
104 last_available_mb: AtomicUsize,
105}
106
107impl DiskSpaceGuard {
108 pub fn new(config: DiskSpaceGuardConfig) -> Self {
110 Self {
111 config,
112 operation_counter: AtomicU64::new(0),
113 soft_warnings_count: AtomicU64::new(0),
114 hard_limit_exceeded: AtomicBool::new(false),
115 bytes_written_estimate: AtomicU64::new(0),
116 last_available_mb: AtomicUsize::new(0),
117 }
118 }
119
120 pub fn default_guard() -> Self {
122 Self::new(DiskSpaceGuardConfig::default())
123 }
124
125 pub fn with_min_free(min_free_mb: usize) -> Self {
127 Self::new(DiskSpaceGuardConfig::with_min_free_mb(min_free_mb))
128 }
129
130 pub fn shared(config: DiskSpaceGuardConfig) -> Arc<Self> {
132 Arc::new(Self::new(config))
133 }
134
135 pub fn check(&self) -> Result<(), DiskSpaceExhausted> {
141 if self.config.hard_limit_mb == 0 {
143 return Ok(());
144 }
145
146 let count = self.operation_counter.fetch_add(1, Ordering::Relaxed);
147
148 if count % self.config.check_interval as u64 != 0 {
150 return Ok(());
151 }
152
153 self.check_now()
154 }
155
156 pub fn check_now(&self) -> Result<(), DiskSpaceExhausted> {
158 if self.config.hard_limit_mb == 0 {
159 return Ok(());
160 }
161
162 let path = self
163 .config
164 .monitor_path
165 .as_deref()
166 .unwrap_or(Path::new("."));
167
168 let available_mb = get_available_space_mb(path).unwrap_or(usize::MAX);
169 self.last_available_mb
170 .store(available_mb, Ordering::Relaxed);
171
172 let required_mb = self.config.hard_limit_mb + self.config.reserve_buffer_mb;
174 if available_mb < required_mb {
175 self.hard_limit_exceeded.store(true, Ordering::Relaxed);
176 return Err(DiskSpaceExhausted {
177 available_mb,
178 required_mb,
179 is_soft_limit: false,
180 message: format!(
181 "Disk space exhausted: only {} MB available, need at least {} MB. \
182 Free up disk space or reduce output volume.",
183 available_mb, required_mb
184 ),
185 });
186 }
187
188 if available_mb < self.config.soft_limit_mb {
190 self.soft_warnings_count.fetch_add(1, Ordering::Relaxed);
191 }
193
194 Ok(())
195 }
196
197 pub fn check_before_write(&self, estimated_bytes: u64) -> Result<(), DiskSpaceExhausted> {
199 if self.config.hard_limit_mb == 0 {
200 return Ok(());
201 }
202
203 let path = self
204 .config
205 .monitor_path
206 .as_deref()
207 .unwrap_or(Path::new("."));
208
209 let available_mb = get_available_space_mb(path).unwrap_or(usize::MAX);
210 let estimated_mb = (estimated_bytes / (1024 * 1024)) as usize;
211 let required_mb = self.config.hard_limit_mb + self.config.reserve_buffer_mb + estimated_mb;
212
213 if available_mb < required_mb {
214 return Err(DiskSpaceExhausted {
215 available_mb,
216 required_mb,
217 is_soft_limit: false,
218 message: format!(
219 "Insufficient disk space for write: {} MB available, need {} MB \
220 (estimated write: {} MB, reserve: {} MB).",
221 available_mb, required_mb, estimated_mb, self.config.reserve_buffer_mb
222 ),
223 });
224 }
225
226 Ok(())
227 }
228
229 pub fn record_write(&self, bytes: u64) {
231 self.bytes_written_estimate
232 .fetch_add(bytes, Ordering::Relaxed);
233 }
234
235 pub fn stats(&self) -> DiskStats {
237 let path = self
238 .config
239 .monitor_path
240 .as_deref()
241 .unwrap_or(Path::new("."));
242
243 let (total, available) = get_disk_space(path).unwrap_or((0, 0));
244
245 DiskStats {
246 total_bytes: total,
247 available_bytes: available,
248 used_bytes: total.saturating_sub(available),
249 checks_performed: self.operation_counter.load(Ordering::Relaxed),
250 soft_limit_warnings: self.soft_warnings_count.load(Ordering::Relaxed),
251 hard_limit_exceeded: self.hard_limit_exceeded.load(Ordering::Relaxed),
252 estimated_bytes_written: self.bytes_written_estimate.load(Ordering::Relaxed),
253 }
254 }
255
256 pub fn available_space_mb(&self) -> usize {
258 let path = self
259 .config
260 .monitor_path
261 .as_deref()
262 .unwrap_or(Path::new("."));
263 get_available_space_mb(path).unwrap_or(0)
264 }
265
266 pub fn is_available() -> bool {
268 get_available_space_mb(Path::new(".")).is_some()
269 }
270
271 pub fn reset_stats(&self) {
273 self.operation_counter.store(0, Ordering::Relaxed);
274 self.soft_warnings_count.store(0, Ordering::Relaxed);
275 self.hard_limit_exceeded.store(false, Ordering::Relaxed);
276 self.bytes_written_estimate.store(0, Ordering::Relaxed);
277 }
278}
279
280impl Default for DiskSpaceGuard {
281 fn default() -> Self {
282 Self::default_guard()
283 }
284}
285
286#[cfg(unix)]
288#[allow(clippy::unnecessary_cast)] pub fn get_available_space_mb(path: &Path) -> Option<usize> {
290 use std::ffi::CString;
291 use std::os::unix::ffi::OsStrExt;
292
293 let path_cstr = CString::new(path.as_os_str().as_bytes()).ok()?;
294
295 #[repr(C)]
296 struct Statvfs {
297 f_bsize: libc::c_ulong,
298 f_frsize: libc::c_ulong,
299 f_blocks: libc::fsblkcnt_t,
300 f_bfree: libc::fsblkcnt_t,
301 f_bavail: libc::fsblkcnt_t,
302 _rest: [u8; 128],
304 }
305
306 let mut stat: Statvfs = unsafe { std::mem::zeroed() };
307
308 let result = unsafe { libc::statvfs(path_cstr.as_ptr(), &mut stat as *mut _ as *mut _) };
309
310 if result == 0 {
311 let block_size = stat.f_frsize as u64;
312 let available_blocks = stat.f_bavail as u64;
313 let available_bytes = available_blocks * block_size;
314 Some((available_bytes / (1024 * 1024)) as usize)
315 } else {
316 None
317 }
318}
319
320#[cfg(unix)]
322#[allow(clippy::unnecessary_cast)] pub fn get_disk_space(path: &Path) -> Option<(u64, u64)> {
324 use std::ffi::CString;
325 use std::os::unix::ffi::OsStrExt;
326
327 let path_cstr = CString::new(path.as_os_str().as_bytes()).ok()?;
328
329 #[repr(C)]
330 struct Statvfs {
331 f_bsize: libc::c_ulong,
332 f_frsize: libc::c_ulong,
333 f_blocks: libc::fsblkcnt_t,
334 f_bfree: libc::fsblkcnt_t,
335 f_bavail: libc::fsblkcnt_t,
336 _rest: [u8; 128],
337 }
338
339 let mut stat: Statvfs = unsafe { std::mem::zeroed() };
340
341 let result = unsafe { libc::statvfs(path_cstr.as_ptr(), &mut stat as *mut _ as *mut _) };
342
343 if result == 0 {
344 let block_size = stat.f_frsize as u64;
345 let total = stat.f_blocks as u64 * block_size;
346 let available = stat.f_bavail as u64 * block_size;
347 Some((total, available))
348 } else {
349 None
350 }
351}
352
353#[cfg(target_os = "windows")]
355pub fn get_available_space_mb(path: &Path) -> Option<usize> {
356 use std::os::windows::ffi::OsStrExt;
357
358 let wide_path: Vec<u16> = path
360 .as_os_str()
361 .encode_wide()
362 .chain(std::iter::once(0))
363 .collect();
364
365 let mut free_bytes_available: u64 = 0;
366 let mut total_bytes: u64 = 0;
367 let mut total_free_bytes: u64 = 0;
368
369 #[link(name = "kernel32")]
370 extern "system" {
371 fn GetDiskFreeSpaceExW(
372 lpDirectoryName: *const u16,
373 lpFreeBytesAvailableToCaller: *mut u64,
374 lpTotalNumberOfBytes: *mut u64,
375 lpTotalNumberOfFreeBytes: *mut u64,
376 ) -> i32;
377 }
378
379 let result = unsafe {
380 GetDiskFreeSpaceExW(
381 wide_path.as_ptr(),
382 &mut free_bytes_available,
383 &mut total_bytes,
384 &mut total_free_bytes,
385 )
386 };
387
388 if result != 0 {
389 Some((free_bytes_available / (1024 * 1024)) as usize)
390 } else {
391 None
392 }
393}
394
395#[cfg(target_os = "windows")]
397pub fn get_disk_space(path: &Path) -> Option<(u64, u64)> {
398 use std::os::windows::ffi::OsStrExt;
399
400 let wide_path: Vec<u16> = path
401 .as_os_str()
402 .encode_wide()
403 .chain(std::iter::once(0))
404 .collect();
405
406 let mut free_bytes_available: u64 = 0;
407 let mut total_bytes: u64 = 0;
408 let mut total_free_bytes: u64 = 0;
409
410 #[link(name = "kernel32")]
411 extern "system" {
412 fn GetDiskFreeSpaceExW(
413 lpDirectoryName: *const u16,
414 lpFreeBytesAvailableToCaller: *mut u64,
415 lpTotalNumberOfBytes: *mut u64,
416 lpTotalNumberOfFreeBytes: *mut u64,
417 ) -> i32;
418 }
419
420 let result = unsafe {
421 GetDiskFreeSpaceExW(
422 wide_path.as_ptr(),
423 &mut free_bytes_available,
424 &mut total_bytes,
425 &mut total_free_bytes,
426 )
427 };
428
429 if result != 0 {
430 Some((total_bytes, free_bytes_available))
431 } else {
432 None
433 }
434}
435
436#[cfg(not(any(unix, target_os = "windows")))]
438pub fn get_available_space_mb(_path: &Path) -> Option<usize> {
439 None
440}
441
442#[cfg(not(any(unix, target_os = "windows")))]
443pub fn get_disk_space(_path: &Path) -> Option<(u64, u64)> {
444 None
445}
446
447pub fn estimate_output_size_mb(
449 num_entries: usize,
450 formats: &[OutputFormat],
451 compression: bool,
452) -> usize {
453 let base_bytes_per_entry = |format: &OutputFormat| -> usize {
455 match format {
456 OutputFormat::Csv => 400, OutputFormat::Json => 800, OutputFormat::Parquet => 200, }
460 };
461
462 let total: usize = formats
463 .iter()
464 .map(|f| num_entries * base_bytes_per_entry(f))
465 .sum();
466
467 let with_compression = if compression {
468 total / 5 } else {
470 total
471 };
472
473 let with_overhead = (with_compression as f64 * 1.3) as usize;
475
476 with_overhead.div_ceil(1024 * 1024)
477}
478
479#[derive(Debug, Clone, Copy, PartialEq, Eq)]
481pub enum OutputFormat {
482 Csv,
483 Json,
484 Parquet,
485}
486
487pub fn check_sufficient_disk_space(
489 path: &Path,
490 planned_entries: usize,
491 formats: &[OutputFormat],
492 compression: bool,
493 min_free_mb: usize,
494) -> Result<(), String> {
495 let estimated = estimate_output_size_mb(planned_entries, formats, compression);
496 let available = get_available_space_mb(path)
497 .ok_or_else(|| "Unable to determine available disk space on this platform".to_string())?;
498
499 let required = estimated + min_free_mb;
500
501 if available < required {
502 Err(format!(
503 "Insufficient disk space: {} MB available, need {} MB \
504 (estimated output: {} MB, minimum free: {} MB). \
505 Reduce output volume or free up disk space.",
506 available, required, estimated, min_free_mb
507 ))
508 } else {
509 Ok(())
510 }
511}
512
513#[cfg(test)]
514mod tests {
515 use super::*;
516
517 #[test]
518 fn test_disk_guard_creation() {
519 let guard = DiskSpaceGuard::with_min_free(100);
520 assert_eq!(guard.config.hard_limit_mb, 100);
521 assert_eq!(guard.config.soft_limit_mb, 500);
522 }
523
524 #[test]
525 fn test_disk_guard_disabled() {
526 let config = DiskSpaceGuardConfig {
527 hard_limit_mb: 0,
528 ..Default::default()
529 };
530 let guard = DiskSpaceGuard::new(config);
531 assert!(guard.check().is_ok());
533 assert!(guard.check_now().is_ok());
534 }
535
536 #[test]
537 fn test_output_size_estimation() {
538 let formats = vec![OutputFormat::Csv, OutputFormat::Json];
539 let est = estimate_output_size_mb(1000, &formats, false);
540 assert!(est > 0);
541 assert!(est < 10); let est_compressed = estimate_output_size_mb(1000, &formats, true);
544 assert!(est_compressed < est); }
546
547 #[test]
548 fn test_stats_tracking() {
549 let guard = DiskSpaceGuard::with_min_free(1);
550
551 for _ in 0..1000 {
552 let _ = guard.check();
553 }
554
555 guard.record_write(1024 * 1024);
556
557 let stats = guard.stats();
558 assert!(stats.checks_performed > 0);
559 assert_eq!(stats.estimated_bytes_written, 1024 * 1024);
560 }
561
562 #[test]
563 fn test_is_available() {
564 #[cfg(unix)]
565 assert!(DiskSpaceGuard::is_available());
566 }
567
568 #[test]
569 fn test_check_before_write() {
570 let guard = DiskSpaceGuard::with_min_free(1);
571 let result = guard.check_before_write(1024);
573 assert!(result.is_ok());
574 }
575}