smart-tree 8.0.1

Smart Tree - An intelligent, AI-friendly directory visualization tool
Documentation
//! Safety mechanisms for scanning large directories
//!
//! This module provides safety limits and optimizations to prevent
//! crashes when scanning very large directories like home directories.

use std::sync::atomic::{AtomicUsize, Ordering};
use std::time::{Duration, Instant};

/// Safety limits for directory scanning
#[derive(Debug, Clone)]
pub struct ScannerSafetyLimits {
    /// Maximum number of files to scan (0 = unlimited)
    pub max_files: usize,
    /// Maximum time to spend scanning
    pub max_duration: Duration,
    /// Maximum memory usage in bytes (estimated)
    pub max_memory_bytes: usize,
    /// Warn when exceeding this many files
    pub warn_threshold: usize,
}

impl Default for ScannerSafetyLimits {
    fn default() -> Self {
        Self {
            max_files: 1_000_000,                     // 1 million files max by default
            max_duration: Duration::from_secs(300),   // 5 minutes max
            max_memory_bytes: 2 * 1024 * 1024 * 1024, // 2GB max
            warn_threshold: 100_000,                  // Warn at 100k files
        }
    }
}

impl ScannerSafetyLimits {
    /// Create unlimited safety limits (use with caution!)
    pub fn unlimited() -> Self {
        Self {
            max_files: 0,
            max_duration: Duration::from_secs(u64::MAX),
            max_memory_bytes: usize::MAX,
            warn_threshold: usize::MAX,
        }
    }

    /// Create limits suitable for home directory scanning
    pub fn for_home_directory() -> Self {
        Self {
            max_files: 500_000,                     // 500k files max for home dirs
            max_duration: Duration::from_secs(120), // 2 minutes max
            max_memory_bytes: 1024 * 1024 * 1024,   // 1GB max
            warn_threshold: 50_000,                 // Warn at 50k files
        }
    }

    /// Create limits for MCP operations (more conservative)
    pub fn for_mcp() -> Self {
        Self {
            max_files: 100_000,                    // 100k files max for MCP
            max_duration: Duration::from_secs(60), // 1 minute max
            max_memory_bytes: 512 * 1024 * 1024,   // 512MB max
            warn_threshold: 10_000,                // Warn at 10k files
        }
    }
}

/// Tracks safety metrics during scanning
pub struct ScannerSafetyTracker {
    start_time: Instant,
    file_count: AtomicUsize,
    estimated_memory: AtomicUsize,
    limits: ScannerSafetyLimits,
    warned: AtomicUsize,
}

impl ScannerSafetyTracker {
    pub fn new(limits: ScannerSafetyLimits) -> Self {
        Self {
            start_time: Instant::now(),
            file_count: AtomicUsize::new(0),
            estimated_memory: AtomicUsize::new(0),
            limits,
            warned: AtomicUsize::new(0),
        }
    }

    /// Check if we should continue scanning
    pub fn should_continue(&self) -> Result<(), String> {
        // Check file count
        let count = self.file_count.load(Ordering::Relaxed);
        if self.limits.max_files > 0 && count >= self.limits.max_files {
            return Err(format!(
                "Scan aborted: Reached maximum file limit of {} files",
                self.limits.max_files
            ));
        }

        // Check duration
        if self.start_time.elapsed() > self.limits.max_duration {
            return Err(format!(
                "Scan aborted: Exceeded maximum duration of {:?}",
                self.limits.max_duration
            ));
        }

        // Check memory (estimated)
        let memory = self.estimated_memory.load(Ordering::Relaxed);
        if memory > self.limits.max_memory_bytes {
            return Err(format!(
                "Scan aborted: Estimated memory usage ({} MB) exceeds limit ({} MB)",
                memory / (1024 * 1024),
                self.limits.max_memory_bytes / (1024 * 1024)
            ));
        }

        // Warn if approaching limits
        if count > self.limits.warn_threshold && self.warned.load(Ordering::Relaxed) == 0 {
            self.warned.store(1, Ordering::Relaxed);
            eprintln!(
                "⚠️  Warning: Scanning large directory ({} files so far)",
                count
            );
            eprintln!("   Consider using --max-depth or --stream mode");
        }

        Ok(())
    }

    /// Increment file count
    pub fn add_file(&self, estimated_node_size: usize) {
        self.file_count.fetch_add(1, Ordering::Relaxed);
        self.estimated_memory
            .fetch_add(estimated_node_size, Ordering::Relaxed);
    }

    /// Get current stats
    pub fn stats(&self) -> (usize, Duration, usize) {
        (
            self.file_count.load(Ordering::Relaxed),
            self.start_time.elapsed(),
            self.estimated_memory.load(Ordering::Relaxed),
        )
    }
}

/// Estimate memory size of a FileNode (rough approximation)
pub fn estimate_node_size(path_len: usize) -> usize {
    // Base struct size + path string + some overhead
    std::mem::size_of::<crate::scanner::FileNode>() + path_len + 64
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_safety_limits() {
        let limits = ScannerSafetyLimits::for_home_directory();
        assert_eq!(limits.max_files, 500_000);

        let mcp_limits = ScannerSafetyLimits::for_mcp();
        assert!(mcp_limits.max_files < limits.max_files);
    }

    #[test]
    fn test_safety_tracker() {
        let limits = ScannerSafetyLimits {
            max_files: 10,
            max_duration: Duration::from_secs(1),
            max_memory_bytes: 1024,
            warn_threshold: 5,
        };

        let tracker = ScannerSafetyTracker::new(limits);

        // Should start OK
        assert!(tracker.should_continue().is_ok());

        // Add files until we hit the limit
        for _ in 0..10 {
            tracker.add_file(100);
        }

        // Should now fail
        assert!(tracker.should_continue().is_err());
    }
}