Skip to main content

st/
scanner_safety.rs

1//! Safety mechanisms for scanning large directories
2//!
3//! This module provides safety limits and optimizations to prevent
4//! crashes when scanning very large directories like home directories.
5
6use std::sync::atomic::{AtomicUsize, Ordering};
7use std::time::{Duration, Instant};
8
9/// Safety limits for directory scanning
10#[derive(Debug, Clone)]
11pub struct ScannerSafetyLimits {
12    /// Maximum number of files to scan (0 = unlimited)
13    pub max_files: usize,
14    /// Maximum time to spend scanning
15    pub max_duration: Duration,
16    /// Maximum memory usage in bytes (estimated)
17    pub max_memory_bytes: usize,
18    /// Warn when exceeding this many files
19    pub warn_threshold: usize,
20}
21
22impl Default for ScannerSafetyLimits {
23    fn default() -> Self {
24        Self {
25            max_files: 1_000_000,                     // 1 million files max by default
26            max_duration: Duration::from_secs(300),   // 5 minutes max
27            max_memory_bytes: 2 * 1024 * 1024 * 1024, // 2GB max
28            warn_threshold: 100_000,                  // Warn at 100k files
29        }
30    }
31}
32
33impl ScannerSafetyLimits {
34    /// Create unlimited safety limits (use with caution!)
35    pub fn unlimited() -> Self {
36        Self {
37            max_files: 0,
38            max_duration: Duration::from_secs(u64::MAX),
39            max_memory_bytes: usize::MAX,
40            warn_threshold: usize::MAX,
41        }
42    }
43
44    /// Create limits suitable for home directory scanning
45    pub fn for_home_directory() -> Self {
46        Self {
47            max_files: 500_000,                     // 500k files max for home dirs
48            max_duration: Duration::from_secs(120), // 2 minutes max
49            max_memory_bytes: 1024 * 1024 * 1024,   // 1GB max
50            warn_threshold: 50_000,                 // Warn at 50k files
51        }
52    }
53
54    /// Create limits for MCP operations (more conservative)
55    pub fn for_mcp() -> Self {
56        Self {
57            max_files: 100_000,                    // 100k files max for MCP
58            max_duration: Duration::from_secs(60), // 1 minute max
59            max_memory_bytes: 512 * 1024 * 1024,   // 512MB max
60            warn_threshold: 10_000,                // Warn at 10k files
61        }
62    }
63}
64
65/// Tracks safety metrics during scanning
66pub struct ScannerSafetyTracker {
67    start_time: Instant,
68    file_count: AtomicUsize,
69    estimated_memory: AtomicUsize,
70    limits: ScannerSafetyLimits,
71    warned: AtomicUsize,
72}
73
74impl ScannerSafetyTracker {
75    pub fn new(limits: ScannerSafetyLimits) -> Self {
76        Self {
77            start_time: Instant::now(),
78            file_count: AtomicUsize::new(0),
79            estimated_memory: AtomicUsize::new(0),
80            limits,
81            warned: AtomicUsize::new(0),
82        }
83    }
84
85    /// Check if we should continue scanning
86    pub fn should_continue(&self) -> Result<(), String> {
87        // Check file count
88        let count = self.file_count.load(Ordering::Relaxed);
89        if self.limits.max_files > 0 && count >= self.limits.max_files {
90            return Err(format!(
91                "Scan aborted: Reached maximum file limit of {} files",
92                self.limits.max_files
93            ));
94        }
95
96        // Check duration
97        if self.start_time.elapsed() > self.limits.max_duration {
98            return Err(format!(
99                "Scan aborted: Exceeded maximum duration of {:?}",
100                self.limits.max_duration
101            ));
102        }
103
104        // Check memory (estimated)
105        let memory = self.estimated_memory.load(Ordering::Relaxed);
106        if memory > self.limits.max_memory_bytes {
107            return Err(format!(
108                "Scan aborted: Estimated memory usage ({} MB) exceeds limit ({} MB)",
109                memory / (1024 * 1024),
110                self.limits.max_memory_bytes / (1024 * 1024)
111            ));
112        }
113
114        // Warn if approaching limits
115        if count > self.limits.warn_threshold && self.warned.load(Ordering::Relaxed) == 0 {
116            self.warned.store(1, Ordering::Relaxed);
117            eprintln!(
118                "⚠️  Warning: Scanning large directory ({} files so far)",
119                count
120            );
121            eprintln!("   Consider using --max-depth or --stream mode");
122        }
123
124        Ok(())
125    }
126
127    /// Increment file count
128    pub fn add_file(&self, estimated_node_size: usize) {
129        self.file_count.fetch_add(1, Ordering::Relaxed);
130        self.estimated_memory
131            .fetch_add(estimated_node_size, Ordering::Relaxed);
132    }
133
134    /// Get current stats
135    pub fn stats(&self) -> (usize, Duration, usize) {
136        (
137            self.file_count.load(Ordering::Relaxed),
138            self.start_time.elapsed(),
139            self.estimated_memory.load(Ordering::Relaxed),
140        )
141    }
142}
143
144/// Estimate memory size of a FileNode (rough approximation)
145pub fn estimate_node_size(path_len: usize) -> usize {
146    // Base struct size + path string + some overhead
147    std::mem::size_of::<crate::scanner::FileNode>() + path_len + 64
148}
149
150#[cfg(test)]
151mod tests {
152    use super::*;
153
154    #[test]
155    fn test_safety_limits() {
156        let limits = ScannerSafetyLimits::for_home_directory();
157        assert_eq!(limits.max_files, 500_000);
158
159        let mcp_limits = ScannerSafetyLimits::for_mcp();
160        assert!(mcp_limits.max_files < limits.max_files);
161    }
162
163    #[test]
164    fn test_safety_tracker() {
165        let limits = ScannerSafetyLimits {
166            max_files: 10,
167            max_duration: Duration::from_secs(1),
168            max_memory_bytes: 1024,
169            warn_threshold: 5,
170        };
171
172        let tracker = ScannerSafetyTracker::new(limits);
173
174        // Should start OK
175        assert!(tracker.should_continue().is_ok());
176
177        // Add files until we hit the limit
178        for _ in 0..10 {
179            tracker.add_file(100);
180        }
181
182        // Should now fail
183        assert!(tracker.should_continue().is_err());
184    }
185}