smart-tree 8.0.0

//
// -----------------------------------------------------------------------------
//  WELCOME TO THE JUNGLE! ...The filesystem jungle, that is. 🌴
//
//  You've found scanner.rs, the intrepid explorer and engine room of st.
//  This module is the Indiana Jones of our codebase. It bravely dives into
//  the deepest, darkest directories, dodges `.gitignore` traps, inspects
//  every file for treasure (metadata), and reports back its findings.
//
//  So grab your hat, and let's go on an adventure!
//
//  Brought to you by The Cheet - making filesystem traversal a rock concert! 🥁🧻
// -----------------------------------------------------------------------------
//

use crate::interest_calculator::InterestCalculator;
use crate::scanner_interest::{ChangeType, InterestScore, TraversalContext};
use crate::scanner_safety::{estimate_node_size, ScannerSafetyLimits, ScannerSafetyTracker};
use crate::scanner_state::ScanState;
use crate::security_scan::{SecurityFinding, SecurityScanner};
use anyhow::Result;
use globset::{Glob, GlobSet, GlobSetBuilder}; // For powerful gitignore-style pattern matching.
use regex::Regex; // For user-defined find patterns.
use std::collections::{HashMap, HashSet}; // Our trusty hash-based collections.
use std::fs; // Filesystem operations, the bread and butter here.
use std::io::{BufRead, BufReader}; // For efficient reading, especially for content search.
use std::path::{Path, PathBuf}; // Path manipulation is key.
use std::sync::mpsc; // For streaming results from a worker thread.
use std::time::SystemTime; // To know when files were last touched.
use walkdir::{DirEntry, WalkDir}; // The excellent `walkdir` crate does the actual directory walking.

// Unix-specific imports for richer metadata like permissions, UID, GID.
// On other platforms, we'll use sensible defaults.
#[cfg(unix)]
use std::os::unix::fs::{MetadataExt, PermissionsExt};

/// # FileNode: The Ultimate Backstage Pass
///
/// Every file and directory we meet gets one of these. It's a VIP pass that
/// holds all the juicy details: its name, size, when it was last cool (modified),
/// and whether it's on the super-secret "ignored" list. It's the atom of our
/// `st` universe.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct FileNode {
    /// The full path to the file or directory. The source of truth for location!
    pub path: PathBuf,
    /// Is it a directory? `true` if yes, `false` if it's a file or symlink.
    pub is_dir: bool,
    /// Size of the file in bytes. For directories, this is often 0 or metadata-dependent.
    pub size: u64,
    /// File permissions (e.g., `rwxr-xr-x`). Stored as a u32, typically from Unix mode.
    pub permissions: u32,
    /// User ID of the owner (Unix-specific).
    pub uid: u32,
    /// Group ID of the owner (Unix-specific).
    pub gid: u32,
    /// Timestamp of the last modification. Tells us how fresh or ancient a file is.
    pub modified: SystemTime,
    /// Is it a symbolic link? `true` if yes. We handle these with care.
    pub is_symlink: bool,
    /// Is it a hidden file (e.g., starts with a `.` on Unix)?
    pub is_hidden: bool,
    /// Did we encounter a "Permission Denied" error when trying to access this?
    /// Important for gracefully handling parts of the filesystem we can't read.
    pub permission_denied: bool,
    /// Is this file or directory ignored based on `.gitignore` or default ignore rules?
    pub is_ignored: bool,
    /// The depth of this entry relative to the scan root (root is depth 0).
    pub depth: usize,
    /// The specific type of the file (e.g., RegularFile, Symlink, Executable).
    pub file_type: FileType,
    /// A category assigned based on extension or name, used for coloring and context.
    /// (e.g., Rust, Python, Image, Archive).
    pub category: FileCategory,
    /// For content search: Information about where matches were found
    /// `None` if no search was performed or no matches.
    pub search_matches: Option<SearchMatches>,
    /// The filesystem type this file resides on
    pub filesystem_type: FilesystemType,
    /// Git branch if this directory contains a .git folder
    pub git_branch: Option<String>,

    // --- Smart Scanning Fields (Phase 2: Intelligent Context-Aware Scanning) ---
    // These fields enable "surface what matters" scanning

    /// How we reached this location (direct, symlink, mount, dependency)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub traversal_context: Option<TraversalContext>,

    /// Interest score - how relevant is this file right now?
    #[serde(skip_serializing_if = "Option::is_none")]
    pub interest: Option<InterestScore>,

    /// Security findings detected during scan
    #[serde(skip_serializing_if = "Vec::is_empty", default)]
    pub security_findings: Vec<SecurityFinding>,

    /// Change status since last scan (Added, Modified, Deleted, etc.)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub change_status: Option<ChangeType>,

    /// Content hash for change detection (Blake3/SHA256)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub content_hash: Option<String>,
}

/// Information about search matches within a file
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct SearchMatches {
    /// First match position (line, column)
    pub first_match: (usize, usize),
    /// Total number of matches found
    pub total_count: usize,
    /// List of all match positions (line, column) - limited to prevent memory issues
    pub positions: Vec<(usize, usize)>,
    /// Whether the search was truncated due to too many matches
    pub truncated: bool,
    /// Line content for each match (line number, line content, column) - optional for compatibility
    #[serde(skip_serializing_if = "Option::is_none")]
    pub line_content: Option<Vec<(usize, String, usize)>>,
}

/// # FileType: Distinguishing Different Kinds of Filesystem Objects
///
/// This enum helps us categorize entries beyond just "file" or "directory".
/// It's especially useful on Unix-like systems where you have sockets, pipes, etc.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
pub enum FileType {
    Directory,   // A folder, a container of other things.
    RegularFile, // Your everyday, garden-variety file.
    Symlink,     // A pointer to another file or directory.
    Executable,  // A file that can be run (has execute permissions).
    Socket,      // A Unix domain socket.
    Pipe,        // A named pipe (FIFO).
    BlockDevice, // A block special file (e.g., /dev/sda).
    CharDevice,  // A character special file (e.g., /dev/tty).
}

/// # FilesystemType: Identifying the underlying filesystem
///
/// This enum represents different filesystem types with single-character codes
/// for compact display. The mapping is designed to be memorable and intuitive.
#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)]
pub enum FilesystemType {
    Ext4,    // '4' - The most common Linux filesystem
    Ext3,    // '3' - Older ext filesystem
    Ext2,    // '2' - Even older ext filesystem
    Xfs,     // 'X' - XFS filesystem
    Btrfs,   // 'B' - Btrfs (B-tree filesystem)
    Zfs,     // 'Z' - ZFS filesystem
    Ntfs,    // 'N' - Windows NTFS
    Fat32,   // 'F' - FAT32
    ExFat,   // 'E' - exFAT
    Apfs,    // 'A' - Apple File System
    Hfs,     // 'H' - HFS+ (older Mac)
    Nfs,     // 'R' - Remote NFS mount
    Smb,     // 'S' - SMB/CIFS network filesystem
    Tmpfs,   // 'T' - Temporary filesystem (RAM)
    Procfs,  // 'P' - /proc virtual filesystem
    Sysfs,   // 'Y' - /sys virtual filesystem
    Devfs,   // 'D' - /dev virtual filesystem
    Mem8,    // 'M' - MEM|8 filesystem (Coming soon - Quantum File System) - https://m8.is
    Unknown, // '?' - Unknown filesystem
}

impl FilesystemType {
    /// Get the single-character code for this filesystem type
    pub fn to_char(&self) -> char {
        match self {
            FilesystemType::Ext4 => '4',
            FilesystemType::Ext3 => '3',
            FilesystemType::Ext2 => '2',
            FilesystemType::Xfs => 'X',
            FilesystemType::Btrfs => 'B',
            FilesystemType::Zfs => 'Z',
            FilesystemType::Ntfs => 'N',
            FilesystemType::Fat32 => 'F',
            FilesystemType::ExFat => 'E',
            FilesystemType::Apfs => 'A',
            FilesystemType::Hfs => 'H',
            FilesystemType::Nfs => 'R',
            FilesystemType::Smb => 'S',
            FilesystemType::Tmpfs => 'T',
            FilesystemType::Procfs => 'P',
            FilesystemType::Sysfs => 'Y',
            FilesystemType::Devfs => 'D',
            FilesystemType::Mem8 => 'M',
            FilesystemType::Unknown => '?',
        }
    }

    /// Check if this is a virtual filesystem that should be skipped
    pub fn is_virtual(&self) -> bool {
        matches!(
            self,
            FilesystemType::Procfs
                | FilesystemType::Sysfs
                | FilesystemType::Devfs
                | FilesystemType::Tmpfs
        )
    }

    /// Check if this filesystem type should be shown by default
    /// (only "interesting" filesystems based on platform)
    pub fn should_show_by_default(&self) -> bool {
        #[cfg(target_os = "linux")]
        {
            matches!(
                self,
                FilesystemType::Ext4
                    | FilesystemType::Ext3
                    | FilesystemType::Xfs
                    | FilesystemType::Btrfs
                    | FilesystemType::Zfs
                    | FilesystemType::Nfs
                    | FilesystemType::Smb
                    | FilesystemType::Mem8
            )
        }
        #[cfg(target_os = "macos")]
        {
            matches!(
                self,
                FilesystemType::Apfs
                    | FilesystemType::Hfs
                    | FilesystemType::Nfs
                    | FilesystemType::Smb
                    | FilesystemType::Mem8
            )
        }
        #[cfg(target_os = "windows")]
        {
            matches!(
                self,
                FilesystemType::Ntfs
                    | FilesystemType::Fat32
                    | FilesystemType::ExFat
                    | FilesystemType::Mem8
            )
        }
        #[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "windows")))]
        {
            // Show all non-virtual filesystems on other platforms
            !self.is_virtual()
        }
    }
}

/// # FileCategory: Adding Semantic Flavor to Files
///
/// This enum provides a higher-level categorization based on common file extensions
/// or names. It's primarily used for display purposes, like coloring output,
/// and can also help in understanding the nature of a directory's contents.
/// Trish loves how this makes the tree output more intuitive!
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
pub enum FileCategory {
    // --- Programming Languages ---
    Rust,       // .rs
    Python,     // .py, .pyw, .pyx, .pyi
    JavaScript, // .js, .mjs, .cjs
    TypeScript, // .ts, .tsx
    Java,       // .java, .class, .jar
    C,          // .c, .h
    Cpp,        // .cpp, .cc, .cxx, .hpp, .hxx
    Go,         // .go
    Ruby,       // .rb
    PHP,        // .php - Not sure php is programming.
    Shell,      // .sh, .bash, .zsh, .fish

    // --- Markup & Data Formats ---
    Markdown, // .md, .markdown
    Html,     // .html, .htm
    Css,      // .css, .scss, .sass, .less
    Json,     // .json, .jsonc
    Yaml,     // .yaml, .yml
    Xml,      // .xml, .svg (SVG is XML-based)
    Toml,     // .toml
    Csv,      // .csv

    // --- Build Systems & Configuration ---
    Makefile,   // Makefile, makefile, GNUmakefile
    Dockerfile, // Dockerfile, .dockerfile
    GitConfig,  // .gitignore, .gitconfig, .gitmodules

    // --- Archives & Compressed Files ---
    Archive, // .zip, .tar, .gz, .bz2, .xz, .7z, .rar

    // --- Media Files ---
    Image, // .jpg, .jpeg, .png, .gif, .bmp, .ico, .webp
    Video, // .mp4, .avi, .mkv, .mov, .wmv, .flv, .webm
    Audio, // .mp3, .wav, .flac, .aac, .ogg, .wma

    // --- System & Binary Files ---
    SystemFile, // Special system files like swap.img, vmlinuz
    Binary,     // Executables, shared libraries (.exe, .dll, .so, .dylib, .o, .a)

    // --- Database ---
    Database, // .db, .sqlite, .mdb, .accdb, .dbf

    // --- Office & Documents ---
    Office,      // .doc, .docx, .odt
    Spreadsheet, // .xls, .xlsx, .ods, .csv
    PowerPoint,  // .ppt, .pptx, .odp
    Pdf,         // .pdf
    Ebook,       // .epub, .mobi, .azw

    // --- Text Variants ---
    Log,     // .log
    Config,  // .ini, .cfg, .conf, .env, .properties
    License, // LICENSE, COPYING files
    Readme,  // README files
    Txt,     // .txt
    Rtf,     // .rtf

    // --- Security & Crypto ---
    Certificate, // .crt, .cert, .pem, .key
    Encrypted,   // .gpg, .pgp, .aes

    // --- Fonts ---
    Font, // .ttf, .otf, .woff, .woff2

    // --- Virtual & Disk Images ---
    DiskImage, // .img, .iso, .vdi, .vmdk, .vhd, .dd, .dmg

    // --- 3D & CAD ---
    Model3D, // .obj, .stl, .dae, .fbx, .blend

    // --- Scientific & Data ---
    Jupyter, // .ipynb
    RData,   // .rdata, .rds
    Matlab,  // .m, .mat

    // --- Web Assets ---
    WebAsset, // .wasm, .map

    // --- Package & Dependencies ---
    Package, // package.json, Cargo.toml, requirements.txt, etc.
    Lock,    // package-lock.json, Cargo.lock, yarn.lock

    // --- Testing ---
    Test, // Files with test_, _test, .test, .spec patterns

    // --- Memory Files (Our special type!) ---
    Memory, // .mem8, .m8 - MEM|8 memory files

    // --- Others ---
    Backup,  // .bak, .backup, ~
    Temp,    // .tmp, .temp, .swp
    Unknown, // If we can't categorize it, it's a mysterious Unknown!
}

/// # TreeStats: The Final Scoreboard
///
/// After the concert is over, this is where we see how we did. It's the
/// scoreboard that tracks total files, total directories, the biggest hits
/// (largest files), and more. It's the answer to "So, how was the show?"
#[derive(Debug, Default)]
pub struct TreeStats {
    /// Total number of files encountered (excluding directories).
    pub total_files: u64,
    /// Total number of directories encountered.
    pub total_dirs: u64,
    /// Total size of all files (in bytes).
    pub total_size: u64,
    /// A map of file extensions to their counts (e.g., {"rs": 10, "toml": 2}).
    pub file_types: HashMap<String, u64>,
    /// Top N largest files found (path and size). N is usually 10.
    pub largest_files: Vec<(u64, PathBuf)>,
    /// Top N newest files found (path and modification time).
    pub newest_files: Vec<(SystemTime, PathBuf)>,
    /// Top N oldest files found (path and modification time).
    pub oldest_files: Vec<(SystemTime, PathBuf)>,
}

impl TreeStats {
    /// Updates the statistics based on a newly processed `FileNode`.
    /// This method is called for each non-permission-denied node.
    pub fn update_file(&mut self, node: &FileNode) {
        if node.is_dir {
            self.total_dirs += 1;
        } else {
            // It's a file!
            self.total_files += 1;
            self.total_size += node.size;

            // Track file extensions for type distribution.
            if let Some(ext) = node.path.extension() {
                if let Some(ext_str) = ext.to_str() {
                    *self.file_types.entry(ext_str.to_string()).or_insert(0) += 1;
                }
            }

            // --- Update Top N Lists ---
            // These lists are kept sorted and truncated to maintain a fixed size (e.g., top 10).

            // Update largest files: Add, sort by size (desc), truncate.
            self.largest_files.push((node.size, node.path.clone()));
            self.largest_files.sort_by(|a, b| b.0.cmp(&a.0)); // Largest first
            self.largest_files.truncate(10); // Keep only the top 10

            // Update newest files: Add, sort by modification time (desc), truncate.
            self.newest_files.push((node.modified, node.path.clone()));
            self.newest_files.sort_by(|a, b| b.0.cmp(&a.0)); // Newest first
            self.newest_files.truncate(10);

            // Update oldest files: Add, sort by modification time (asc), truncate.
            self.oldest_files.push((node.modified, node.path.clone()));
            self.oldest_files.sort_by(|a, b| a.0.cmp(&b.0)); // Oldest first
            self.oldest_files.truncate(10);
        }
    }
}

/// # ScannerConfig: The Rider for our Rock Star Scanner
///
/// This is the list of demands for our scanner. "Don't show me hidden files,"
/// "I only want to see files bigger than a tour bus," "Ignore the messy backstage
/// area (`.gitignore`)." We build this from the user's command-line arguments
/// to make sure the scanner puts on the exact show the user wants to see.
#[derive(Default, Clone)]
pub struct ScannerConfig {
    /// Maximum depth to traverse into subdirectories.
    pub max_depth: usize,
    /// Should symbolic links be followed? (Currently always `false`).
    pub follow_symlinks: bool,
    /// Should `.gitignore` files be respected?
    pub respect_gitignore: bool,
    /// Should hidden files (starting with `.`) be shown?
    pub show_hidden: bool,
    /// Should ignored files/directories be shown (usually in brackets)?
    pub show_ignored: bool,
    /// An optional regex pattern to filter files/directories by name.
    pub find_pattern: Option<Regex>,
    /// An optional file extension to filter by (e.g., "rs").
    pub file_type_filter: Option<String>,
    /// Optional entry type filter ("f" for files, "d" for directories).
    pub entry_type_filter: Option<String>,
    /// Optional minimum file size filter.
    pub min_size: Option<u64>,
    /// Optional maximum file size filter.
    pub max_size: Option<u64>,
    /// Optional filter for files newer than a specific date.
    pub newer_than: Option<SystemTime>,
    /// Optional filter for files older than a specific date.
    pub older_than: Option<SystemTime>,
    /// Should the scanner use its built-in list of default ignore patterns
    /// (like `node_modules`, `__pycache__`, `target/`)?
    pub use_default_ignores: bool,
    /// An optional keyword to search for within file contents.
    pub search_keyword: Option<String>,
    /// Should filesystem type indicators be shown?
    pub show_filesystems: bool,
    /// Sort field for results (name, size, date, type)
    pub sort_field: Option<String>,
    /// Limit results to top N entries (useful with sort)
    pub top_n: Option<usize>,
    /// Include actual line content in search results (for AI/MCP use)
    pub include_line_content: bool,

    // --- Smart Scanning Options (Phase 2: Intelligent Context-Aware Scanning) ---

    /// Compute interest scores for each node (default: true when smart mode is enabled)
    pub compute_interest: bool,
    /// Perform security scanning during traversal (default: true)
    pub security_scan: bool,
    /// Minimum interest score to include in results (0.0-1.0, default: 0.0)
    pub min_interest: f32,
    /// Track how we reached each location (symlink, mount, etc.)
    pub track_traversal: bool,
    /// Only show changes since last scan
    pub changes_only: bool,
    /// Path to previous state file for comparison (or auto-detect from ~/.st/scan_states/)
    pub compare_state: Option<PathBuf>,
    /// Enable smart mode - groups by interest, shows changes, minimal output
    pub smart_mode: bool,
}

// --- Default Ignore Patterns: The "Please Don't Play These Songs" List ---
// Every band has songs they'd rather not play. This is our list of files and
// directories (`node_modules`, `target/`, etc.) that we usually skip to keep
// the show clean and focused on the hits. A tidy tree is a happy tree!
const DEFAULT_IGNORE_PATTERNS: &[&str] = &[
    // Version control systems (but not all hidden dirs like .ssh)
    ".git",
    ".svn",
    ".hg",
    ".bzr",
    "_darcs",
    // Python artifacts
    "__pycache__",
    "*.pyc",
    "*.pyo",
    "*.pyd",
    ".Python",
    ".pytest_cache",
    ".tox",
    ".coverage",
    "*.egg-info",
    ".eggs",
    // Node.js / JavaScript artifacts
    "node_modules",
    ".npm",
    ".yarn",
    ".pnpm-store",
    "bower_components",
    ".next",
    ".nuxt",
    // General cache directories often found in projects
    ".cache", // Common cache dir name
    // Virtual environments
    "venv",
    "env",
    "ENV",
    "virtualenv",
    ".venv",
    ".env",
    "conda-meta",
    // Build/compilation artifacts from various languages/systems
    "target", // Rust
    "build",
    "dist",
    "out",
    "bin",
    "obj", // Common build output dirs
    "*.o",
    "*.a",
    "*.so",
    "*.dll",
    "*.dylib", // Object files, libraries
    // Package manager caches/data
    ".cargo",
    ".rustup", // Rust
    ".gem",
    ".bundle", // Ruby
    // IDEs and editor-specific files/directories
    ".idea",
    ".vscode",
    ".vs", // Common IDE metadata
    "*.swp",
    "*.swo",
    "*~", // Vim/editor backup/swap files
    ".project",
    ".classpath",
    ".settings", // Eclipse/Java
    // Development tool caches
    ".mypy_cache",
    ".ruff_cache",
    ".hypothesis",
    ".pytest_cache",
    ".tox",
    ".coverage",
    ".sass-cache",
    // OS-specific junk files
    ".DS_Store",    // macOS
    "Thumbs.db",    // Windows
    "desktop.ini",  // Windows
    "$RECYCLE.BIN", // Windows recycle bin
    // Common temporary file/directory names and patterns
    "tmp",
    "temp",
    ".tmp",
    ".temp",
    "*.tmp",
    "*.temp",
    // More cache directories
    ".sass-cache", // Sass CSS preprocessor
    "__MACOSX",    // macOS archive metadata
    // System directories that are almost never useful to traverse deeply from a user's project root.
    // These are more aggressively ignored if `st` is run on `/`.
    // "proc", "sys", "dev", "lost+found", "mnt", "media", // Handled by DEFAULT_SYSTEM_PATHS
    // Other common ignores
    ".vagrant",
    ".terraform",
];

// Default paths that are almost always too noisy or problematic to scan,
// especially if `st` is run from `/` or a very high-level directory.
// These are typically mount points for virtual filesystems or system-critical areas.
const DEFAULT_SYSTEM_PATHS: &[&str] = &[
    "/proc",
    "/sys",
    "/dev",
    "/run",
    "/tmp",
    "/var/tmp",
    "/lost+found",
    "/mnt",
    "/media",
    "/snap", // Common mount points or special dirs
];

// Specific individual files (absolute paths) that should always be ignored
// due to their special nature (e.g., virtual files representing system memory).
const DEFAULT_IGNORE_FILES: &[&str] = &[
    "/proc/kcore",    // Virtual file representing physical memory, can be huge & slow.
    "/proc/kmsg",     // Kernel messages, can be an infinite stream.
    "/proc/kallsyms", // Kernel symbols, can be large.
];

/// # Scanner: The Rock Star of our Show
///
/// BEHOLD! The `Scanner` itself! This is the main act. It takes the config,
/// the ignore lists, and a path, and it puts on a spectacular show of directory
/// traversal. It's fast, it's smart, and it knows all the best moves.
pub struct Scanner {
    /// The configuration for this scanning operation.
    config: ScannerConfig,
    /// Compiled `GlobSet` from `.gitignore` files, if respected and found.
    gitignore: Option<GlobSet>,
    /// Compiled `GlobSet` from our `DEFAULT_IGNORE_PATTERNS`.
    default_ignores: Option<GlobSet>,
    /// A set of absolute system paths to ignore (e.g., /proc, /sys).
    system_paths: HashSet<PathBuf>,
    /// A set of specific absolute file paths to ignore (e.g., /proc/kcore).
    ignore_files: HashSet<PathBuf>,
    /// The root path from which the scan originates.
    root: PathBuf,
    /// Safety limits to prevent crashes on large directories
    safety_limits: ScannerSafetyLimits,

    // --- Smart Scanning Components (Phase 4) ---

    /// Security scanner for detecting supply chain attack patterns
    security_scanner: Option<SecurityScanner>,
    /// Interest calculator for scoring file relevance
    interest_calculator: Option<InterestCalculator>,
}

impl Scanner {
    /// Returns the canonicalized root path of the scanner
    pub fn root(&self) -> &Path {
        &self.root
    }

    /// Quick scan for basic project analysis - lighter weight than full scan
    /// Returns only basic stats and key files for faster integration
    pub fn quick_scan(&self) -> Result<(Vec<FileNode>, TreeStats)> {
        let mut config = self.config.clone();
        config.max_depth = 3; // Limit depth for quick scan

        let quick_scanner = Scanner::new(&self.root, config)?;
        quick_scanner.scan()
    }

    /// Find files modified within a specific time range
    /// Useful for finding recent activity in projects
    pub fn find_recent_files(&self, hours_ago: u64) -> Result<Vec<FileNode>> {
        let cutoff_time =
            std::time::SystemTime::now() - std::time::Duration::from_secs(hours_ago * 3600);

        let (nodes, _) = self.scan()?;
        Ok(nodes
            .into_iter()
            .filter(|node| !node.is_dir && node.modified > cutoff_time)
            .collect())
    }

    /// Get key project files (build configs, main files, etc.)
    /// Returns a filtered list of important files for project analysis
    pub fn find_key_files(&self) -> Result<Vec<FileNode>> {
        let (nodes, _) = self.scan()?;

        let important_patterns = [
            "main.rs",
            "lib.rs",
            "mod.rs",
            "package.json",
            "Cargo.toml",
            "requirements.txt",
            "pyproject.toml",
            "README.md",
            "LICENSE",
            "Makefile",
            "CMakeLists.txt",
            "index.js",
            "app.js",
            "server.js",
            "main.js",
            "main.py",
            "__init__.py",
            "setup.py",
            "go.mod",
            "main.go",
            "pom.xml",
            "build.gradle",
            "build.xml",
            ".gitignore",
            "docker-compose.yml",
            "Dockerfile",
        ];

        Ok(nodes
            .into_iter()
            .filter(|node| {
                if node.is_dir {
                    return false;
                }

                let file_name = node.path.file_name().and_then(|n| n.to_str()).unwrap_or("");

                important_patterns.contains(&file_name)
            })
            .collect())
    }

    /// ## `get_file_category`
    /// Determines a `FileCategory` for a given path and `FileType`.
    /// This function uses a series of heuristics based on file extensions and common names
    /// to classify files into broad categories, useful for display and understanding content.
    /// It's like a quick identification guide for files!
    fn get_file_category(path: &Path, file_type: FileType) -> FileCategory {
        // Directories don't get a specific content category here; their content defines them.
        if matches!(file_type, FileType::Directory) {
            return FileCategory::Unknown;
        }

        // First, check for some very specific system file names.
        if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
            if name == "swap.img"
                || name == "swapfile"
                || name.starts_with("vmlinuz")
                || name.starts_with("initrd")
            {
                return FileCategory::SystemFile;
            }
        }

        // Primary categorization is by file extension.
        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
            match ext.to_lowercase().as_str() {
                // --- Programming Languages ---
                "rs" => FileCategory::Rust,
                "py" | "pyw" | "pyx" | "pyi" => FileCategory::Python,
                "js" | "mjs" | "cjs" => FileCategory::JavaScript,
                "ts" | "tsx" => FileCategory::TypeScript,
                "java" | "class" | "jar" => FileCategory::Java,
                "c" | "h" => FileCategory::C,
                "cpp" | "cc" | "cxx" | "hpp" | "hxx" => FileCategory::Cpp,
                "go" => FileCategory::Go,
                "rb" => FileCategory::Ruby,
                "php" => FileCategory::PHP,
                "sh" | "bash" | "zsh" | "fish" | "ps1" | "bat" | "cmd" => FileCategory::Shell,

                // --- Markup/Data ---
                "md" | "markdown" => FileCategory::Markdown,
                "html" | "htm" => FileCategory::Html,
                "css" | "scss" | "sass" | "less" => FileCategory::Css,
                "json" | "jsonc" | "geojson" => FileCategory::Json,
                "yaml" | "yml" => FileCategory::Yaml,
                "xml" | "svg" | "plist" | "kml" | "gpx" => FileCategory::Xml, // SVG and others are XML-based
                "toml" => FileCategory::Toml,

                // --- Build/Config (some are also by name) ---
                "dockerfile" => FileCategory::Dockerfile, // Extension variant
                // .gitignore, .gitconfig are usually by name, handled below

                // --- Archives ---
                "zip" | "tar" | "gz" | "tgz" | "bz2" | "tbz2" | "xz" | "txz" | "7z" | "rar" => {
                    FileCategory::Archive
                }

                // --- Media ---
                "jpg" | "jpeg" | "png" | "gif" | "bmp" | "ico" | "webp" | "tiff" | "tif"
                | "heic" | "heif" => FileCategory::Image,
                "mp4" | "avi" | "mkv" | "mov" | "wmv" | "flv" | "webm" | "mpeg" | "mpg" => {
                    FileCategory::Video
                }
                "mp3" | "wav" | "flac" | "aac" | "ogg" | "wma" | "m4a" => FileCategory::Audio,

                // --- Binary/Executable (some overlap with system, but these are common distributable/object formats) ---
                "exe" | "dll" | "so" | "dylib" | "o" | "a" | "lib" | "msi" | "deb" | "rpm"
                | "app" => FileCategory::Binary,

                // --- Database Files ---
                "db" | "sqlite" | "sqlitedb" | "sqlite3" | "db3" | "db4" | "db5" | "mdb"
                | "accdb" | "dbf" => FileCategory::Database,

                // --- Office & Documents ---
                "doc" | "docx" | "odt" | "rtf" => FileCategory::Office,
                "xls" | "xlsx" | "ods" | "csv" | "tsv" => FileCategory::Spreadsheet,
                "ppt" | "pptx" | "odp" => FileCategory::PowerPoint,
                "pdf" => FileCategory::Pdf,
                "epub" | "mobi" | "azw" | "azw3" | "fb2" => FileCategory::Ebook,

                // --- Text & Config Files ---
                "txt" | "text" => FileCategory::Txt,
                "log" => FileCategory::Log,
                "ini" | "cfg" | "conf" | "config" | "properties" | "env" => FileCategory::Config,

                // --- Security & Crypto ---
                "crt" | "cert" | "pem" | "key" | "pub" | "cer" | "der" => FileCategory::Certificate,
                "gpg" | "pgp" | "aes" | "enc" | "asc" => FileCategory::Encrypted,

                // --- Fonts ---
                "ttf" | "otf" | "woff" | "woff2" | "eot" | "fon" | "fnt" => FileCategory::Font,

                // --- Disk Images ---
                "img" | "vdi" | "vmdk" | "vhd" | "vhdx" | "dd" | "hdd" | "qcow" | "qcow2" => {
                    FileCategory::DiskImage
                }
                "iso" | "dmg" => FileCategory::DiskImage, // These can be both archives and disk images, but treating as disk images

                // --- 3D & CAD ---
                "obj" | "stl" | "dae" | "fbx" | "blend" | "3ds" | "ply" | "gltf" | "glb" => {
                    FileCategory::Model3D
                }

                // --- Scientific & Data ---
                "ipynb" => FileCategory::Jupyter,
                "rdata" | "rds" | "rda" => FileCategory::RData,
                "m" | "mat" | "mlx" => FileCategory::Matlab,

                // --- Web Assets ---
                "wasm" | "map" | "sourcemap" => FileCategory::WebAsset,

                // --- Memory Files (MEM|8!) ---
                "mem8" | "m8" | "mq" => FileCategory::Memory,

                // --- Backup & Temp ---
                "bak" | "backup" | "old" | "orig" => FileCategory::Backup,
                "tmp" | "temp" | "swp" | "swo" | "swn" => FileCategory::Temp,

                _ => FileCategory::Unknown, // Extension not recognized
            }
        } else {
            // No extension, or extension parsing failed. Try common filenames.
            if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
                // Check for test files
                if name.starts_with("test_")
                    || name.ends_with("_test")
                    || name.contains(".test.")
                    || name.contains(".spec.")
                {
                    return FileCategory::Test;
                }

                // Check for specific filenames
                match name {
                    "Makefile" | "makefile" | "GNUmakefile" => FileCategory::Makefile,
                    "Dockerfile" => FileCategory::Dockerfile,
                    ".gitignore" | ".gitconfig" | ".gitattributes" | ".gitmodules" => {
                        FileCategory::GitConfig
                    }
                    "LICENSE" | "LICENCE" | "COPYING" => FileCategory::License,
                    "README" | "README.md" | "README.txt" | "README.rst" => FileCategory::Readme,
                    "package.json" | "Cargo.toml" | "requirements.txt" | "pyproject.toml"
                    | "pom.xml" | "build.gradle" | "go.mod" | "composer.json" => {
                        FileCategory::Package
                    }
                    "package-lock.json" | "Cargo.lock" | "yarn.lock" | "pnpm-lock.yaml"
                    | "poetry.lock" | "Gemfile.lock" => FileCategory::Lock,
                    _ => {
                        // Check for backup files ending with ~
                        if name.ends_with('~') {
                            FileCategory::Backup
                        } else if matches!(file_type, FileType::Executable) {
                            FileCategory::Binary
                        } else {
                            FileCategory::Unknown
                        }
                    }
                }
            } else {
                FileCategory::Unknown // Path has no filename component (should be rare for actual files).
            }
        }
    }

    /// ## `Scanner::new` - Constructor
    ///
    /// Creates a new `Scanner` instance. This involves:
    /// 1. Storing the provided `config` and `root` path.
    /// 2. Loading and compiling `.gitignore` patterns if `config.respect_gitignore` is true.
    /// 3. Compiling the `DEFAULT_IGNORE_PATTERNS` if `config.use_default_ignores` is true.
    /// 4. Initializing sets of system paths and specific files to always ignore.
    ///
    /// This setup prepares the scanner for efficient `should_ignore` checks during traversal.
    pub fn new(root: &Path, config: ScannerConfig) -> Result<Self> {
        // Canonicalize the root path to get the absolute path
        // If canonicalize fails (e.g., path doesn't exist), fall back to absolute path
        let canonical_root = root
            .canonicalize()
            .or_else(|_| std::env::current_dir().map(|cwd| cwd.join(root)))
            .unwrap_or_else(|_| root.to_path_buf());

        // Load .gitignore patterns from the root directory if requested.
        let gitignore = if config.respect_gitignore {
            Self::load_gitignore(&canonical_root)? // This can return None if no .gitignore or error.
        } else {
            None // Not respecting .gitignore.
        };

        // Build the GlobSet for default ignore patterns if requested.
        let default_ignores = if config.use_default_ignores {
            Self::build_default_ignores()? // This can return None if patterns are invalid (unlikely for defaults).
        } else {
            None // Not using default ignores.st
        };

        // Initialize the set of system paths to ignore (e.g., /proc, /sys).
        let system_paths: HashSet<PathBuf> = if config.use_default_ignores {
            DEFAULT_SYSTEM_PATHS
                .iter()
                .map(PathBuf::from) // Convert string slices to PathBufs
                .collect() // Collect into a HashSet for quick lookups.
        } else {
            HashSet::new() // Empty set if not using default ignores.
        };

        // Initialize the set of specific files to ignore (e.g., /proc/kcore).
        let ignore_files: HashSet<PathBuf> = if config.use_default_ignores {
            DEFAULT_IGNORE_FILES.iter().map(PathBuf::from).collect()
        } else {
            HashSet::new()
        };

        // Determine appropriate safety limits based on the path
        let safety_limits =
            if canonical_root.as_os_str() == std::env::var("HOME").unwrap_or_default().as_str() {
                // Home directory needs special care
                ScannerSafetyLimits::for_home_directory()
            } else if canonical_root.starts_with("/") && canonical_root.components().count() <= 2 {
                // Root or near-root paths need limits
                ScannerSafetyLimits::for_home_directory()
            } else {
                // Regular directories can use default limits
                ScannerSafetyLimits::default()
            };

        // Initialize security scanner if enabled
        let security_scanner = if config.security_scan {
            Some(SecurityScanner::new())
        } else {
            None
        };

        // Initialize interest calculator if smart mode or interest computation enabled
        let interest_calculator = if config.compute_interest || config.smart_mode {
            // Try to load previous state for change detection
            let calc = InterestCalculator::new();
            let calc = if let Ok(Some(prev_state)) = ScanState::load(&canonical_root) {
                calc.with_previous_state(prev_state)
            } else {
                calc
            };
            Some(calc)
        } else {
            None
        };

        Ok(Self {
            config,
            gitignore,
            default_ignores,
            system_paths,
            ignore_files,
            root: canonical_root, // Store a copy of the root path.
            safety_limits,
            security_scanner,
            interest_calculator,
        })
    }

    /// ## `build_default_ignores`
    ///
    /// Compiles the `DEFAULT_IGNORE_PATTERNS` array into a `GlobSet` for efficient matching.
    /// This `GlobSet` is used to quickly check if a file/directory name matches any of the
    /// common patterns we want to ignore by default (like `node_modules`, `target/`).
    /// Returns `Ok(Some(GlobSet))` on success, or `Ok(None)` if no patterns (should not happen),
    /// or an `Err` if glob compilation fails (very unlikely for our hardcoded patterns).
    fn build_default_ignores() -> Result<Option<GlobSet>> {
        let mut builder = GlobSetBuilder::new(); // Start with an empty builder.

        // Add each default pattern to the builder.
        for pattern_str in DEFAULT_IGNORE_PATTERNS {
            // Glob::new can fail if the pattern is malformed, but ours should be fine.
            if let Ok(glob) = Glob::new(pattern_str) {
                builder.add(glob);
            }
            // Silently ignore malformed default patterns, though this shouldn't occur.
        }

        // Build the GlobSet from the accumulated patterns.
        // This can fail if, for example, the set is empty or patterns are incompatible,
        // but again, highly unlikely for our predefined set.
        Ok(Some(builder.build()?))
    }

    /// ## `load_gitignore`
    ///
    /// Reads the `.gitignore` file from the specified `root` directory (if it exists)
    /// and compiles its patterns into a `GlobSet`.
    /// Lines starting with `#` (comments) and empty lines are ignored.
    /// Returns `Ok(Some(GlobSet))` if `.gitignore` is found and parsed,
    /// `Ok(None)` if no `.gitignore` file exists, or an `Err` on I/O or parsing issues.
    fn load_gitignore(root: &Path) -> Result<Option<GlobSet>> {
        let gitignore_path = root.join(".gitignore"); // Construct path to .gitignore.
        if !gitignore_path.exists() {
            return Ok(None); // No .gitignore file found, nothing to load.
        }

        let mut builder = GlobSetBuilder::new();
        // Read the entire .gitignore file, handling non-UTF-8 content gracefully
        let content = match fs::read(&gitignore_path) {
            Ok(bytes) => String::from_utf8_lossy(&bytes).to_string(),
            Err(e) => {
                eprintln!(
                    "Warning: Could not read .gitignore at {:?}: {}",
                    gitignore_path, e
                );
                return Ok(None);
            }
        };

        // Process each line of the .gitignore file.
        for line in content.lines() {
            let trimmed_line = line.trim(); // Remove leading/trailing whitespace.
                                            // Ignore empty lines and lines that are comments (start with '#').
            if !trimmed_line.is_empty() && !trimmed_line.starts_with('#') {
                // Attempt to compile the line as a glob pattern.
                // If successful, add it to our GlobSet builder.
                if let Ok(glob) = Glob::new(trimmed_line) {
                    builder.add(glob);
                }
                // Malformed patterns in user's .gitignore are silently skipped.
            }
        }

        // Build the final GlobSet from all valid patterns.
        Ok(Some(builder.build()?))
    }

    /// Stream nodes as they are discovered
    /// This version of scan is optimized for the `--stream` flag.
    /// It sends `FileNode` objects through the `sender` channel as soon as they are processed.
    /// This allows the formatter to start displaying output immediately, which is great for large directories.
    /// Returns the final `TreeStats` once the scan is complete.
    pub fn scan_stream(&self, sender: mpsc::Sender<FileNode>) -> Result<TreeStats> {
        let mut stats = TreeStats::default();

        // When searching, we need to collect all nodes first to determine which directories to show
        if self.config.search_keyword.is_some() {
            // Use the non-streaming scan and then send results in order
            let (nodes, stats) = self.scan()?;
            for node in nodes {
                if sender.send(node).is_err() {
                    break; // Receiver disconnected
                }
            }
            return Ok(stats);
        }

        // Initialize safety tracker for streaming mode
        let safety_tracker = ScannerSafetyTracker::new(self.safety_limits.clone());

        // Original streaming logic for non-search cases
        let mut walker = WalkDir::new(&self.root)
            .max_depth(self.config.max_depth)
            .follow_links(self.config.follow_symlinks)
            .into_iter();

        // Loop through each entry provided by WalkDir.
        while let Some(entry_result) = walker.next() {
            // Check safety limits
            if let Err(safety_error) = safety_tracker.should_continue() {
                eprintln!("⚠️  {}", safety_error);
                eprintln!("   Use --max-depth or scan a more specific directory");
                break;
            }

            match entry_result {
                Ok(entry) => {
                    // Successfully read a directory entry.
                    let depth = entry.depth();
                    let path = entry.path();

                    // Determine if this entry should be ignored based on various rules.
                    let is_ignored_by_rules = self.should_ignore(path)?;

                    if is_ignored_by_rules {
                        // The entry matches an ignore rule.
                        if self.config.show_ignored {
                            // If we're showing ignored items, process it but mark as ignored.
                            if let Some(mut node) =
                                self.process_entry(&entry, depth, is_ignored_by_rules)?
                            {
                                // Perform content search if applicable, even for ignored files being shown.
                                if !node.is_dir && self.should_search_file(&node) {
                                    node.search_matches = self.search_in_file(&node.path);
                                }

                                // Track node for safety limits
                                safety_tracker.add_file(estimate_node_size(
                                    node.path.to_string_lossy().len(),
                                ));

                                // Send the (ignored) node through the channel.
                                if sender.send(node.clone()).is_err() {
                                    break; // Receiver has disconnected, stop scanning.
                                }

                                // Update stats for ignored items if they aren't permission-denied.
                                // This ensures `show_ignored` gives a full picture.
                                if !node.permission_denied {
                                    stats.update_file(&node);
                                }
                            }
                            // If this ignored item is a directory, tell WalkDir not to descend into it.
                            if entry.file_type().is_dir() {
                                // `ignored_dirs.insert(path.to_path_buf());` // Not strictly needed if just skipping.
                                walker.skip_current_dir();
                            }
                        } else {
                            // We are *not* showing ignored items, and this one is ignored.
                            // If it's a directory, skip its contents. Otherwise, just continue.
                            if entry.file_type().is_dir() {
                                walker.skip_current_dir();
                            }
                            // `continue;` // Implicitly done by not processing further.
                        }
                    } else {
                        // The entry is NOT ignored by rules. Process it normally.
                        if let Some(mut node) = self.process_entry(&entry, depth, false)? {
                            // `is_ignored` is false here
                            // Perform content search if applicable.
                            if !node.is_dir && self.should_search_file(&node) {
                                node.search_matches = self.search_in_file(&node.path);
                            }

                            // Apply filters (size, date, type, find pattern).
                            // A file is included if it's a directory, or it matches filters, or it has a search match.
                            let has_search_match = node
                                .search_matches
                                .as_ref()
                                .is_some_and(|m| m.total_count > 0);

                            // If we have a search keyword, only include files with matches
                            let should_include_file = if self.config.search_keyword.is_some() {
                                has_search_match
                            } else {
                                self.should_include(&node)
                            };

                            if node.is_dir || should_include_file {
                                // Track node for safety limits
                                safety_tracker.add_file(estimate_node_size(
                                    node.path.to_string_lossy().len(),
                                ));

                                // Send the processed node through the channel.
                                if sender.send(node.clone()).is_err() {
                                    break; // Receiver disconnected.
                                }

                                // Update statistics for included, non-permission-denied items.
                                if !node.permission_denied {
                                    stats.update_file(&node);
                                }
                            }
                        } else {
                            // process_entry returned None, which means this is a hidden entry and show_hidden is false
                            // If it's a directory, we need to skip its contents
                            if entry.file_type().is_dir() {
                                walker.skip_current_dir();
                            }
                        }
                    }
                }
                Err(e) => {
                    // An error occurred trying to access a directory entry (e.g., permission denied).
                    if let Some(path) = e.path() {
                        let depth = e.depth();

                        // Check if this is a "directory contents" error vs "directory entry" error.
                        // If this is a permission error, it's likely we already processed the directory
                        // entry successfully but can't read its contents. In that case, skip creating
                        // a duplicate node since we already marked the original as permission_denied.
                        let is_contents_error = e.io_error().is_some_and(|io_err| {
                            io_err.kind() == std::io::ErrorKind::PermissionDenied
                        });

                        if !is_contents_error {
                            // Create a special node representing the permission-denied entry.
                            let node = self.create_permission_denied_node(path, depth);
                            safety_tracker
                                .add_file(estimate_node_size(node.path.to_string_lossy().len()));

                            if sender.send(node.clone()).is_err() {
                                break; // Receiver disconnected.
                            }
                            // Still update stats (e.g., directory count) for permission-denied entries if shown.
                            stats.update_file(&node);
                        }

                        // Tell WalkDir not to try to descend into this unreadable directory.
                        walker.skip_current_dir();
                    }
                    // If the error is not path-specific, it might be logged or ignored depending on severity.
                    // For now, we primarily handle path-specific errors like permission issues.
                }
            }
        }
        // Scan complete, return the accumulated statistics.
        Ok(stats)
    }

    /// ## `should_search_file`
    /// This function is called before `search_in_file` to decide if it's worth attempting a search.
    /// It checks if a search keyword is configured and if the file is likely text-based.
    fn should_search_file(&self, node: &FileNode) -> bool {
        // No search keyword? No search.
        if self.config.search_keyword.is_none() {
            return false;
        }

        // If there's a file type filter, only search files that match it
        if let Some(ref filter_ext) = self.config.file_type_filter {
            if let Some(ext) = node.path.extension() {
                if ext.to_str() != Some(filter_ext) {
                    return false;
                }
            } else {
                // No extension, doesn't match filter
                return false;
            }
        }

        // Skip directories, symlinks, and special files.
        if node.is_dir || node.is_symlink || node.permission_denied {
            return false;
        }

        // Skip binary and system files based on category.
        matches!(
            node.category,
            FileCategory::Rust
                | FileCategory::Python
                | FileCategory::JavaScript
                | FileCategory::TypeScript
                | FileCategory::Java
                | FileCategory::C
                | FileCategory::Cpp
                | FileCategory::Go
                | FileCategory::Ruby
                | FileCategory::PHP
                | FileCategory::Shell
                | FileCategory::Markdown
                | FileCategory::Html
                | FileCategory::Css
                | FileCategory::Json
                | FileCategory::Yaml
                | FileCategory::Xml
                | FileCategory::Toml
                | FileCategory::Makefile
                | FileCategory::Dockerfile
                | FileCategory::GitConfig
        )
    }

    /// ## `search_in_file`
    ///
    /// Searches for the configured keyword within a file and returns match information.
    /// Returns line and column positions for each match, up to a reasonable limit.
    /// The search is case-sensitive. Optionally includes the actual line content.
    fn search_in_file(&self, path: &Path) -> Option<SearchMatches> {
        // Ensure there's a keyword to search for.
        let keyword = self.config.search_keyword.as_ref()?;
        if keyword.is_empty() {
            return None;
        }

        // Attempt to open the file for reading.
        let file = match fs::File::open(path) {
            Ok(f) => f,
            Err(_) => return None,
        };

        let mut positions = Vec::new();
        let mut line_content_vec = Vec::new();
        let reader = BufReader::new(file);
        let mut line_number = 1;
        let mut first_match: Option<(usize, usize)> = None;
        let mut total_count = 0;

        // Read and process the file line by line.
        for line_result in reader.lines() {
            match line_result {
                Ok(line_content) => {
                    // Find all occurrences of the keyword in the current line.
                    let mut line_has_match = false;
                    let mut first_column_in_line = None;

                    for (column_index, _) in line_content.match_indices(keyword) {
                        total_count += 1;
                        line_has_match = true;

                        // Column numbers are 1-based for user display
                        let match_pos = (line_number, column_index + 1);

                        if first_match.is_none() {
                            first_match = Some(match_pos);
                        }

                        if first_column_in_line.is_none() {
                            first_column_in_line = Some(column_index + 1);
                        }

                        // Only store first 100 positions to prevent memory issues
                        if positions.len() < 100 {
                            positions.push(match_pos);
                        }

                        // Stop processing this file if we've found too many matches
                        if total_count > 100 {
                            let line_content_option = if self.config.include_line_content {
                                Some(line_content_vec)
                            } else {
                                None
                            };

                            return Some(SearchMatches {
                                first_match: first_match.unwrap(),
                                total_count,
                                positions,
                                truncated: true,
                                line_content: line_content_option,
                            });
                        }
                    }

                    // If this line has matches and we're including content, add it
                    if line_has_match
                        && self.config.include_line_content
                        && line_content_vec.len() < 100
                    {
                        line_content_vec.push((
                            line_number,
                            line_content.clone(),
                            first_column_in_line.unwrap(),
                        ));
                    }

                    line_number += 1;
                }
                Err(_) => {
                    // Invalid UTF-8 or other error, stop searching this file
                    break;
                }
            }
        }

        // Return matches if any were found
        first_match.map(|first| {
            let line_content_option =
                if self.config.include_line_content && !line_content_vec.is_empty() {
                    Some(line_content_vec)
                } else {
                    None
                };

            SearchMatches {
                first_match: first,
                total_count,
                positions,
                truncated: false,
                line_content: line_content_option,
            }
        })
    }

    /// ## `enrich_with_smart_scanning` - Add Security & Interest Data
    ///
    /// Enriches a FileNode with security findings and interest scores.
    /// This is the heart of "surface what matters" - we analyze each file
    /// for potential security issues and calculate how interesting it is.
    fn enrich_with_smart_scanning(&self, node: &mut FileNode) {
        // Skip directories and very large files for content-based analysis
        if node.is_dir || node.size > 10_000_000 {
            // Still calculate interest score for directories
            if let Some(calc) = &self.interest_calculator {
                node.interest = Some(calc.calculate(node));
                node.traversal_context = Some(calc.build_traversal_context(node, None));
            }
            return;
        }

        // Try to read file content for security scanning
        let content = if self.security_scanner.is_some() && self.should_scan_for_security(node) {
            fs::read_to_string(&node.path).ok()
        } else {
            None
        };

        // Security scanning
        if let (Some(scanner), Some(ref content)) = (&self.security_scanner, &content) {
            let findings = scanner.scan_file_content(&node.path, content);
            if !findings.is_empty() {
                node.security_findings = findings;
            }
        }

        // Interest calculation (with or without security findings)
        if let Some(calc) = &self.interest_calculator {
            let (score, _additional_findings) = if let Some(ref content) = content {
                calc.calculate_with_security(node, Some(content))
            } else {
                (calc.calculate(node), Vec::new())
            };
            node.interest = Some(score);
            node.traversal_context = Some(calc.build_traversal_context(node, None));
        }
    }

    /// Check if a file should be scanned for security patterns
    fn should_scan_for_security(&self, node: &FileNode) -> bool {
        // Skip binary files based on category
        !matches!(
            node.category,
            FileCategory::Binary
                | FileCategory::Archive
                | FileCategory::Image
                | FileCategory::Video
                | FileCategory::Audio
                | FileCategory::DiskImage
                | FileCategory::Font
                | FileCategory::Encrypted
        )
    }

    /// ## `scan` - The Full Scan (Non-Streaming)
    ///
    /// Performs a complete directory scan, collecting all `FileNode`s that meet the criteria
    /// (not ignored, or shown if ignored, and pass filters if any).
    /// This method first traverses the entire directory structure defined by `config.max_depth`,
    /// creating `FileNode` objects for each entry. It then performs a second pass if filters
    /// are active to ensure that directories are only included if they (or their subdirectories)
    /// contain files that match the filters.
    /// Returns a tuple: `(Vec<FileNode>, TreeStats)`.
    /// ## `scan` - The "Scan-It-All-Then-Sort-It-Out" Method
    ///
    /// This is the classic way to scan. It's a two-act show:
    /// 1. **Act I**: Walk through every single file and directory, collecting a huge list of `FileNode`s.
    /// 2. **Act II**: If there are filters, go through that huge list and pick out only the ones that
    ///    match, making sure to keep their parent directories so the tree still makes sense.
    ///    It's thorough and great for when you need the whole picture before making decisions.
    pub fn scan(&self) -> Result<(Vec<FileNode>, TreeStats)> {
        let mut all_nodes_collected = Vec::new(); // Stores all nodes initially encountered.
                                                  // `ignored_dirs` was here, but its primary use with `skip_current_dir` is within the loop.
                                                  // If we need to track them for other reasons post-loop, it could be reinstated.

        // Initialize safety tracker
        let safety_tracker = ScannerSafetyTracker::new(self.safety_limits.clone());

        let mut walker = WalkDir::new(&self.root)
            .max_depth(self.config.max_depth)
            .follow_links(self.config.follow_symlinks)
            .into_iter();

        while let Some(entry_result) = walker.next() {
            // Check safety limits
            if let Err(safety_error) = safety_tracker.should_continue() {
                eprintln!("⚠️  {}", safety_error);
                eprintln!("   Use --max-depth, --stream mode, or scan a more specific directory");
                break;
            }

            match entry_result {
                Ok(entry) => {
                    let depth = entry.depth();
                    let path = entry.path();
                    let is_ignored_by_rules = self.should_ignore(path)?;

                    if is_ignored_by_rules {
                        if self.config.show_ignored {
                            // Process and add the ignored entry.
                            if let Some(mut node) = self.process_entry(&entry, depth, true)? {
                                if !node.is_dir && self.should_search_file(&node) {
                                    node.search_matches = self.search_in_file(&node.path);
                                }
                                // Smart scanning even for ignored files (they might have security issues!)
                                self.enrich_with_smart_scanning(&mut node);
                                safety_tracker.add_file(estimate_node_size(
                                    node.path.to_string_lossy().len(),
                                ));
                                all_nodes_collected.push(node);
                            }
                            if entry.file_type().is_dir() {
                                walker.skip_current_dir(); // Don't descend into ignored dirs if showing them.
                            }
                        } else {
                            // Not showing ignored, and it's a directory: skip its contents.
                            if entry.file_type().is_dir() {
                                walker.skip_current_dir();
                            }
                            // If it's a file, it's simply skipped by not adding to `all_nodes_collected`.
                        }
                    } else {
                        // Not ignored by rules, process normally.
                        if let Some(mut node) = self.process_entry(&entry, depth, false)? {
                            if !node.is_dir && self.should_search_file(&node) {
                                node.search_matches = self.search_in_file(&node.path);
                            }
                            // Smart scanning: add security findings and interest scores
                            self.enrich_with_smart_scanning(&mut node);
                            all_nodes_collected.push(node);
                        } else {
                            // process_entry returned None, which means this is a hidden entry and show_hidden is false
                            // If it's a directory, we need to skip its contents
                            if entry.file_type().is_dir() {
                                walker.skip_current_dir();
                            }
                        }
                    }
                }
                Err(e) => {
                    // Handle errors like permission denied.
                    if let Some(path) = e.path() {
                        let depth = e.depth();
                        all_nodes_collected.push(self.create_permission_denied_node(path, depth));
                        if e.io_error().is_some_and(|io_err| {
                            io_err.kind() == std::io::ErrorKind::PermissionDenied
                        }) {
                            walker.skip_current_dir(); // Skip unreadable directory.
                        }
                    }
                }
            }
        }

        // If filters are active, we need a second pass to ensure directories are only included
        // if they contain (or lead to) matching files.
        // Also, calculate stats based on the *final* list of nodes.
        let (final_nodes, final_stats) = if self.has_active_filters() {
            self.filter_nodes_and_calculate_stats(all_nodes_collected)
        } else {
            // No filters, so all collected nodes are final. Calculate stats on them.
            let mut stats = TreeStats::default();
            for node in &all_nodes_collected {
                // Only update stats for non-permission-denied items, or items that are directories.
                // (Permission denied files usually have size 0 and aren't "counted" in the same way).
                if !node.permission_denied || node.is_dir {
                    stats.update_file(node);
                }
            }
            (all_nodes_collected, stats)
        };

        // Apply sorting and top-N filtering if requested
        let sorted_nodes = self.apply_sorting_and_limit(final_nodes);

        // Save scan state for future change detection (if smart mode enabled)
        if self.config.smart_mode || self.config.compute_interest {
            self.save_scan_state(&sorted_nodes);
        }

        Ok((sorted_nodes, final_stats))
    }

    /// Save the current scan state for future change detection
    fn save_scan_state(&self, nodes: &[FileNode]) {
        use crate::scanner_state::FileSignature;

        let mut state = ScanState::new(self.root.clone());

        for node in nodes {
            if let Ok(sig) = FileSignature::from_path(&node.path) {
                state.add_signature(node.path.clone(), sig);
            }
        }

        // Save state (ignore errors - this is best-effort)
        if let Err(e) = state.save() {
            // Only log in debug mode, don't clutter normal output
            tracing::debug!("Could not save scan state: {}", e);
        }
    }

    /// ## `has_active_filters`
    ///
    /// Helper function to quickly check if any of the primary filtering criteria
    /// (find pattern, type, size, date) are currently set in the configuration.
    /// This determines if the second filtering pass (`filter_nodes_and_calculate_stats`) is needed.
    /// Note: `search_keyword` is handled slightly differently; it can make a file appear
    /// even if other filters would exclude it, so it's part of `should_include` logic.
    fn has_active_filters(&self) -> bool {
        self.config.find_pattern.is_some()
            || self.config.file_type_filter.is_some()
            || self.config.entry_type_filter.is_some()
            || self.config.min_size.is_some()
            || self.config.max_size.is_some()
            || self.config.newer_than.is_some()
            || self.config.older_than.is_some()
            || self.config.search_keyword.is_some() // Now search_keyword is also a filter
    }

    /// ## `filter_nodes_and_calculate_stats` (Formerly `filter_nodes_with_ancestors`)
    ///
    /// This crucial function takes all nodes collected during the initial traversal
    /// and filters them based on the `ScannerConfig`. It ensures that:
    /// 1. Files are included if they directly match all active filters OR if they contain a search match.
    /// 2. Directories are included if they themselves match a `--find` pattern OR
    ///    if they are an ancestor of an included file.
    ///    It then calculates `TreeStats` based on this final, filtered list of nodes.
    ///    This replaces the older `filter_nodes_with_ancestors` to integrate stat calculation
    ///    and clarify the logic for directory inclusion with `--find`.
    fn filter_nodes_and_calculate_stats(
        &self,
        all_nodes_collected: Vec<FileNode>,
    ) -> (Vec<FileNode>, TreeStats) {
        let mut final_stats = TreeStats::default();
        let mut included_files_and_matching_dirs = Vec::new(); // Files that pass filters, and Dirs that match --find
        let mut required_ancestor_dirs = HashSet::new(); // Ancestors of included_files

        // --- Pass 1: Identify matching files and directories that directly match --find ---
        for node in &all_nodes_collected {
            if node.permission_denied {
                // Skip permission denied entries for filtering logic
                continue;
            }

            let has_search_match = node
                .search_matches
                .as_ref()
                .is_some_and(|m| m.total_count > 0);

            if node.is_dir {
                // For directories, only the --find pattern applies directly.
                // Other filters (size, date, type) don't apply to directories themselves.
                if self
                    .config
                    .find_pattern
                    .as_ref()
                    .is_some_and(|p| p.is_match(&node.path.to_string_lossy()))
                {
                    included_files_and_matching_dirs.push(node.clone());
                    // Add ancestors of this directly matched directory
                    let mut current = node.path.parent();
                    while let Some(parent_path) = current {
                        if parent_path == self.root || required_ancestor_dirs.contains(parent_path)
                        {
                            break;
                        }
                        required_ancestor_dirs.insert(parent_path.to_path_buf());
                        current = parent_path.parent();
                    }
                }
            } else {
                // For files, check if it passes all filters OR has a search match.
                // If we have a search keyword, ONLY include files with search matches
                if self.config.search_keyword.is_some() {
                    if has_search_match {
                        // Even with search matches, the file must still pass other filters
                        if self.should_include(node) {
                            included_files_and_matching_dirs.push(node.clone());
                            // Add all ancestors of this matching file to `required_ancestor_dirs`.
                            let mut current = node.path.parent();
                            while let Some(parent_path) = current {
                                // Stop if we reach the root or an already added ancestor.
                                if parent_path == self.root
                                    || required_ancestor_dirs.contains(parent_path)
                                {
                                    break;
                                }
                                required_ancestor_dirs.insert(parent_path.to_path_buf());
                                current = parent_path.parent();
                            }
                        }
                    }
                } else {
                    // No search keyword, use normal filtering
                    if has_search_match || self.should_include(node) {
                        included_files_and_matching_dirs.push(node.clone());
                        // Add all ancestors of this matching file to `required_ancestor_dirs`.
                        let mut current = node.path.parent();
                        while let Some(parent_path) = current {
                            // Stop if we reach the root or an already added ancestor.
                            if parent_path == self.root
                                || required_ancestor_dirs.contains(parent_path)
                            {
                                break;
                            }
                            required_ancestor_dirs.insert(parent_path.to_path_buf());
                            current = parent_path.parent();
                        }
                    }
                }
            }
        }

        // --- Pass 2: Build the final list of nodes ---
        let mut final_node_list = Vec::new();
        let mut added_paths = HashSet::new(); // To prevent duplicates if a dir is both an ancestor and matches --find

        // Always add the root node if there's anything to show.
        if !included_files_and_matching_dirs.is_empty() {
            if let Some(root_node) = all_nodes_collected.iter().find(|n| n.path == self.root) {
                if added_paths.insert(root_node.path.clone()) {
                    final_node_list.push(root_node.clone());
                }
            }
        }

        // Add required ancestor directories and directly matching directories from `all_nodes_collected`.
        for node in &all_nodes_collected {
            if node.permission_denied {
                // Also include permission denied nodes if they are part of the path
                if (required_ancestor_dirs.contains(&node.path)
                    || node.path == self.root && !final_node_list.is_empty())
                    && added_paths.insert(node.path.clone())
                {
                    final_node_list.push(node.clone());
                }
                continue;
            }

            if node.is_dir {
                // Is it a required ancestor OR a directory that itself matched --find?
                let is_find_match = self
                    .config
                    .find_pattern
                    .as_ref()
                    .is_some_and(|p| p.is_match(&node.path.to_string_lossy()));
                if (required_ancestor_dirs.contains(&node.path)
                    || (is_find_match && node.path != self.root))
                    && added_paths.insert(node.path.clone())
                {
                    final_node_list.push(node.clone());
                }
            }
        }

        // Add the files that passed filters or had search matches.
        for node in included_files_and_matching_dirs {
            // If it's a directory, it was already handled above (if it matched --find).
            // If it's a file, add it now.
            if !node.is_dir {
                if added_paths.insert(node.path.clone()) {
                    final_node_list.push(node);
                }
            } else {
                // It's a directory that matched --find
                if added_paths.insert(node.path.clone()) {
                    final_node_list.push(node);
                }
            }
        }

        // Sort the final list by path for consistent output.
        final_node_list.sort_by(|a, b| a.path.cmp(&b.path));

        // --- Pass 3: Calculate stats on the final_node_list ---
        for node in &final_node_list {
            // Update stats, ensuring not to double-count or miscount permission-denied entries.
            if !node.permission_denied || node.is_dir {
                // Dirs (even denied) contribute to dir count.
                final_stats.update_file(node);
            }
        }

        (final_node_list, final_stats)
    }

    /// ## `process_entry`
    ///
    /// Converts a `walkdir::DirEntry` into our `FileNode` struct.
    /// This involves fetching metadata, determining file type, category, hidden status, etc.
    /// It also incorporates the `is_ignored_by_rules` status passed to it.
    /// Returns `Ok(Some(FileNode))` on success, `Ok(None)` if the entry should be skipped
    /// (e.g., hidden and not showing hidden), or an `Err` if metadata access fails.
    /// The `is_ignored_by_rules` parameter tells this function if `should_ignore` already determined this node is ignored.
    fn process_entry(
        &self,
        entry: &DirEntry,
        depth: usize,
        is_ignored_by_rules: bool,
    ) -> Result<Option<FileNode>> {
        let path = entry.path();

        // Determine if the file is hidden (starts with '.').
        let is_hidden = path
            .file_name()
            .and_then(|name_osstr| name_osstr.to_str()) // Convert OsStr to &str
            .is_some_and(|name_str| name_str.starts_with('.'));

        // Skip if hidden and we are not configured to show hidden files,
        // UNLESS it's an ignored item that we *are* configured to show (is_ignored_by_rules = true, config.show_ignored = true).
        // The `is_ignored_by_rules` flag takes precedence for display if `config.show_ignored` is true.
        if is_hidden && !self.config.show_hidden && !is_ignored_by_rules {
            // If it's a directory, we need to tell walkdir to skip its contents.
            if entry.file_type().is_dir() {
                // This is tricky because `process_entry` doesn't have `walker` to call `skip_current_dir()`.
                // The caller (`scan` or `scan_stream`) handles `skip_current_dir` based on `should_ignore`
                // and hidden status before calling `process_entry` or by checking the returned node.
                // For now, returning None signals to the caller that this node (and its children if a dir)
                // should not be further processed or added, unless `show_ignored` logic overrides.
            }
            return Ok(None); // Skip this hidden entry.
        }

        // Try to get metadata for the entry. This can fail (e.g., permission denied).
        let metadata = match entry.metadata() {
            Ok(md) => md,
            Err(_e) => {
                // If metadata fails, it's likely a permission issue or a broken symlink.
                // We create a special "permission_denied_node" in the calling `scan`/`scan_stream` methods
                // because they have access to `walker.skip_current_dir()`.
                // Here, we can't fully form that node, so we might return an error or a partial node.
                // For simplicity, if metadata fails here, we treat it as an inaccessible entry.
                // The main scan loops handle creating a FileNode for permission denied errors from WalkDir.
                // This specific call path implies WalkDir *could* read the entry but metadata() failed.
                // This is less common than WalkDir itself erroring.
                // Let's assume the main loops catch this via `Err(e)` from `walker.next()`.
                // If `process_entry` is called on an entry that `WalkDir` gave Ok for, but `metadata()` fails,
                // it's an edge case. We'll return a basic node marked as permission denied.
                return Ok(Some(self.create_permission_denied_node(path, depth)));
            }
        };

        let file_type = self.determine_file_type(&metadata);
        let category = Self::get_file_category(path, file_type);

        // Determine the size. For special virtual files (like in /proc or /sys),
        // reported size can be misleading (e.g., 0 or huge). We mark these as size 0.
        let size = if self.is_special_virtual_file(path, &metadata) {
            0
        } else {
            metadata.len()
        };

        // Check if this is a directory that we can't read the contents of
        let permission_denied = if metadata.is_dir() {
            // Try to read the directory to see if we have permission
            std::fs::read_dir(path).is_err()
        } else {
            false
        };

        // Check for git branch if this is a directory
        let git_branch = if metadata.is_dir() {
            Self::get_git_branch(path)
        } else {
            None
        };

        Ok(Some(FileNode {
            path: path.to_path_buf(),
            is_dir: metadata.is_dir(),
            size,
            permissions: Self::get_permissions(&metadata),
            uid: Self::get_uid(&metadata),
            gid: Self::get_gid(&metadata),
            modified: metadata.modified().unwrap_or(SystemTime::UNIX_EPOCH), // Fallback for modified time
            is_symlink: metadata.file_type().is_symlink(), // Use file_type() for symlink check
            is_hidden,
            permission_denied, // Set based on whether we can read directory contents
            is_ignored: is_ignored_by_rules, // Use the pre-determined ignore status.
            depth,
            file_type,
            category,
            search_matches: None, // Search matches are added later by the caller if needed.
            filesystem_type: Self::get_filesystem_type(path),
            git_branch,
            // Smart scanning fields - populated later by interest calculator
            traversal_context: None,
            interest: None,
            security_findings: Vec::new(),
            change_status: None,
            content_hash: None,
        }))
    }

    /// ## `get_git_branch`
    ///
    /// Gets the current git branch if this directory contains a .git folder.
    /// Reads directly from .git/HEAD for speed (no subprocess).
    fn get_git_branch(path: &Path) -> Option<String> {
        let git_dir = path.join(".git");
        if !git_dir.exists() {
            return None;
        }

        // Read .git/HEAD to get the current ref
        let head_path = git_dir.join("HEAD");
        let head_content = std::fs::read_to_string(&head_path).ok()?;
        let head_content = head_content.trim();

        // HEAD can be either:
        // 1. "ref: refs/heads/branch-name" (normal branch)
        // 2. A raw commit hash (detached HEAD)
        if let Some(branch_ref) = head_content.strip_prefix("ref: refs/heads/") {
            Some(branch_ref.to_string())
        } else if head_content.len() >= 7 {
            // Detached HEAD - show abbreviated commit hash
            Some(format!(":{}", &head_content[..7]))
        } else {
            None
        }
    }

    /// ## `get_filesystem_type`
    ///
    /// Detects the filesystem type for a given path
    #[cfg(unix)]
    fn get_filesystem_type(path: &Path) -> FilesystemType {
        // Skip filesystem detection in CI environments to avoid hangs
        if std::env::var("CI").is_ok() || std::env::var("GITHUB_ACTIONS").is_ok() {
            return FilesystemType::Unknown;
        }

        #[cfg(target_os = "linux")]
        {
            Self::get_filesystem_type_linux(path)
        }
        #[cfg(not(target_os = "linux"))]
        {
            // On non-Linux Unix systems, we can't reliably detect filesystem type
            // Just check for special paths
            if let Some(path_str) = path.to_str() {
                if path_str.starts_with("/proc") {
                    return FilesystemType::Procfs;
                } else if path_str.starts_with("/sys") {
                    return FilesystemType::Sysfs;
                } else if path_str.starts_with("/dev") {
                    return FilesystemType::Devfs;
                }
            }
            FilesystemType::Unknown
        }
    }

    /// ## `get_filesystem_type_linux`
    ///
    /// Detects the filesystem type for a given path using statfs on Linux systems
    #[cfg(target_os = "linux")]
    fn get_filesystem_type_linux(path: &Path) -> FilesystemType {
        // Double-check for CI environment
        if std::env::var("CI").is_ok() || std::env::var("GITHUB_ACTIONS").is_ok() {
            return FilesystemType::Unknown;
        }

        use libc::statfs;
        use std::ffi::CString;
        use std::mem;

        // Filesystem magic numbers from statfs.h
        type FsType = i64;

        const EXT4_SUPER_MAGIC: FsType = 0xef53;
        const XFS_SUPER_MAGIC: FsType = 0x58465342;
        const BTRFS_SUPER_MAGIC: FsType = 0x9123683e;
        const ZFS_SUPER_MAGIC: FsType = 0x2fc12fc1;
        const NTFS_SB_MAGIC: FsType = 0x5346544e;
        const MSDOS_SUPER_MAGIC: FsType = 0x4d44; // FAT
        const EXFAT_SUPER_MAGIC: FsType = 0x2011bab0;
        const APFS_SUPER_MAGIC: FsType = 0x42535041; // 'APFS'
        const HFS_SUPER_MAGIC: FsType = 0x482b; // HFS+
        const NFS_SUPER_MAGIC: FsType = 0x6969;
        const SMB_SUPER_MAGIC: FsType = 0x517b;
        const TMPFS_MAGIC: FsType = 0x01021994;
        const PROC_SUPER_MAGIC: FsType = 0x9fa0;
        const SYSFS_MAGIC: FsType = 0x62656572;
        const DEVFS_SUPER_MAGIC: FsType = 0x1373;

        let path_cstr = match CString::new(path.to_string_lossy().as_bytes()) {
            Ok(s) => s,
            Err(_) => return FilesystemType::Unknown,
        };

        let mut stat_buf: libc::statfs = unsafe { mem::zeroed() };
        let result = unsafe { statfs(path_cstr.as_ptr(), &mut stat_buf) };

        if result != 0 {
            // statfs failed, fall back to path-based detection for virtual filesystems
            if let Some(path_str) = path.to_str() {
                if path_str.starts_with("/proc") {
                    return FilesystemType::Procfs;
                } else if path_str.starts_with("/sys") {
                    return FilesystemType::Sysfs;
                } else if path_str.starts_with("/dev") {
                    return FilesystemType::Devfs;
                }
            }
            return FilesystemType::Unknown;
        }

        // Check for Mem8 filesystem by looking for .mem8 marker files
        if path.join(".mem8").exists() || path.to_string_lossy().contains("mem8") {
            return FilesystemType::Mem8;
        }

        match stat_buf.f_type {
            EXT4_SUPER_MAGIC => FilesystemType::Ext4, // TODO: Distinguish ext2/3/4
            XFS_SUPER_MAGIC => FilesystemType::Xfs,
            BTRFS_SUPER_MAGIC => FilesystemType::Btrfs,
            ZFS_SUPER_MAGIC => FilesystemType::Zfs,
            NTFS_SB_MAGIC => FilesystemType::Ntfs,
            MSDOS_SUPER_MAGIC => FilesystemType::Fat32,
            EXFAT_SUPER_MAGIC => FilesystemType::ExFat,
            APFS_SUPER_MAGIC => FilesystemType::Apfs,
            HFS_SUPER_MAGIC => FilesystemType::Hfs,
            NFS_SUPER_MAGIC => FilesystemType::Nfs,
            SMB_SUPER_MAGIC => FilesystemType::Smb,
            TMPFS_MAGIC => FilesystemType::Tmpfs,
            PROC_SUPER_MAGIC => FilesystemType::Procfs,
            SYSFS_MAGIC => FilesystemType::Sysfs,
            DEVFS_SUPER_MAGIC => FilesystemType::Devfs,
            _ => FilesystemType::Unknown,
        }
    }

    #[cfg(not(unix))]
    fn get_filesystem_type(_path: &Path) -> FilesystemType {
        // On non-Unix systems, we can't easily detect filesystem type
        FilesystemType::Unknown
    }

    /// ## `is_virtual_filesystem`
    ///
    /// Checks if a path is on a virtual filesystem
    fn is_virtual_filesystem(path: &Path) -> bool {
        Self::get_filesystem_type(path).is_virtual()
    }

    /// ## `is_special_virtual_file`
    ///
    /// Checks if a file is likely a special virtual file (e.g., in /proc, /sys, /dev)
    /// where reported metadata like size might be zero, misleading, or cause issues if read.
    /// This helps in deciding to report size as 0 for such files.
    #[allow(unused_variables)]
    fn is_special_virtual_file(&self, path: &Path, metadata: &fs::Metadata) -> bool {
        // Check if the path starts with known virtual filesystem prefixes.
        if let Some(path_str) = path.to_str() {
            if path_str.starts_with("/proc/")
                || path_str.starts_with("/sys/")
                || path_str.starts_with("/dev/")
            {
                return true;
            }
        }

        // Check for specific problematic files by absolute path.
        if self.ignore_files.contains(path) {
            // Uses the pre-built HashSet of specific problem files.
            return true;
        }

        // On Unix, check for special file types like character devices, block devices, FIFOs, sockets.
        // These often have size 0 or non-standard size reporting.
        #[cfg(unix)]
        {
            use std::os::unix::fs::FileTypeExt; // For is_char_device(), is_block_device(), etc.
            let ft = metadata.file_type();
            if ft.is_char_device() || ft.is_block_device() || ft.is_fifo() || ft.is_socket() {
                return true;
            }
        }

        false // Not determined to be a special virtual file by these checks.
    }

    /// ## `create_permission_denied_node`
    ///
    /// Helper to create a `FileNode` representing an entry (usually a directory)
    /// that could not be accessed due to permission errors.
    /// These nodes are marked specially so formatters can indicate the issue.
    fn create_permission_denied_node(&self, path: &Path, depth: usize) -> FileNode {
        FileNode {
            path: path.to_path_buf(),
            is_dir: true, // Assume it's a directory, as that's common for permission errors during traversal.
            size: 0,      // No size info available.
            permissions: 0, // No permission info.
            uid: 0,       // No UID info.
            gid: 0,       // No GID info.
            modified: SystemTime::UNIX_EPOCH, // Default timestamp.
            is_symlink: false,
            is_hidden: false,        // Cannot determine if hidden.
            permission_denied: true, // Mark as permission denied.
            is_ignored: false,       // Not ignored by rules, but inaccessible.
            depth,
            file_type: FileType::Directory, // Assume directory.
            category: FileCategory::Unknown,
            search_matches: None,
            filesystem_type: Self::get_filesystem_type(path),
            git_branch: None, // Can't check git for permission-denied directories
            // Smart scanning fields - N/A for permission denied nodes
            traversal_context: None,
            interest: None,
            security_findings: Vec::new(),
            change_status: None,
            content_hash: None,
        }
    }

    /// ## `should_ignore` - The Bouncer at the Club Door
    ///
    /// This function is our tough-but-fair bouncer. It checks every file and
    /// directory against our lists (`.gitignore`, default ignores, etc.).
    /// "Sorry, `node_modules`, you're not on the list tonight."
    /// It's the first line of defense against clutter.
    fn should_ignore(&self, path: &Path) -> Result<bool> {
        // --- Rule 0: Never ignore the root path itself ---
        // If the user explicitly asks to scan a directory, we should show it
        // even if it would normally be ignored (e.g., scanning 'target' directory)
        if path == self.root {
            return Ok(false);
        }

        // --- Rule 1: Check against specific, always-ignored files (absolute paths) ---
        if self.config.use_default_ignores && self.ignore_files.contains(path) {
            return Ok(true); // Matches a specific problematic file.
        }

        // --- Rule 2: ALWAYS skip virtual filesystems like /proc, /sys, /dev ---
        // These are checked regardless of use_default_ignores because they're not real files
        // and can cause issues (huge fake sizes, hangs, etc.)
        if Self::is_virtual_filesystem(path) {
            return Ok(true);
        }

        // --- Rule 3: Check against other system paths if using default ignores ---
        if self.config.use_default_ignores {
            // Check for exact match of a system path.
            if self.system_paths.contains(path) {
                return Ok(true);
            }
            // Check if the current path is a child of any registered system path.
            for system_root_path in &self.system_paths {
                if path.starts_with(system_root_path) {
                    return Ok(true); // It's inside /tmp, /var/tmp, etc.
                }
            }
        }

        // --- Rule 3: Check against default ignore patterns (GlobSet) ---
        // These patterns usually match file/directory names or relative paths within a project.
        if let Some(ref default_ignore_set) = self.default_ignores {
            // Check if the simple file/directory name matches any default pattern.
            // (e.g., "node_modules" will match `path/to/project/node_modules`)
            if let Some(file_name) = path.file_name() {
                if default_ignore_set.is_match(Path::new(file_name)) {
                    return Ok(true);
                }
            }
            // Also check the path relative to the scan root against default patterns.
            // This handles patterns like "*.pyc" or "build/outputs/".
            if let Ok(relative_path_to_root) = path.strip_prefix(&self.root) {
                if default_ignore_set.is_match(relative_path_to_root) {
                    return Ok(true);
                }
            }
        }

        // --- Rule 4: Check against .gitignore patterns (GlobSet) ---
        // These patterns are always relative to the root of the scan (where .gitignore is located).
        if let Some(ref gitignore_set) = self.gitignore {
            if let Ok(relative_path_to_root) = path.strip_prefix(&self.root) {
                if gitignore_set.is_match(relative_path_to_root) {
                    return Ok(true); // Matches a .gitignore pattern.
                }
            }
            // If strip_prefix fails (path is not under root), it can't match .gitignore relative patterns.
        }

        // If none of the above rules triggered, the path is not ignored.
        Ok(false)
    }

    /// ## `should_include` - The Velvet Rope
    ///
    /// Once a file gets past the bouncer (`should_ignore`), it has to get past
    /// the velvet rope. This function checks if the file meets the specific criteria
    /// for this party: "Are you a `.rs` file? Are you bigger than 1MB?"
    /// Only the coolest files that match all the rules get in.
    fn should_include(&self, node: &FileNode) -> bool {
        // --- Filter by --find pattern (applies to both files and directories) ---
        if let Some(ref find_regex_pattern) = self.config.find_pattern {
            // Convert path to string for regex matching. Lossy conversion is acceptable for matching.
            let path_str = node.path.to_string_lossy();
            if !find_regex_pattern.is_match(&path_str) {
                return false; // Path doesn't match the --find pattern.
            }
        }

        // --- Filter by entry type (--entry-type) ---
        if let Some(ref entry_type) = self.config.entry_type_filter {
            match entry_type.as_str() {
                "f" => {
                    if node.is_dir {
                        return false; // Looking for files only, but this is a directory
                    }
                }
                "d" => {
                    if !node.is_dir {
                        return false; // Looking for directories only, but this is a file
                    }
                }
                _ => {} // Should not happen due to clap validation
            }
        }

        // --- Filters below only apply to files, not directories ---
        if !node.is_dir {
            // --- Filter by file extension (--type) ---
            if let Some(ref required_extension) = self.config.file_type_filter {
                match node
                    .path
                    .extension()
                    .and_then(|ext_osstr| ext_osstr.to_str())
                {
                    Some(file_ext_str) => {
                        if !file_ext_str.eq_ignore_ascii_case(required_extension) {
                            return false; // Extension doesn't match.
                        }
                    }
                    None => return false, // File has no extension, so cannot match.
                }
            }

            // --- Filter by minimum size (--min-size) ---
            if let Some(min_allowed_size) = self.config.min_size {
                if node.size < min_allowed_size {
                    return false; // File is too small.
                }
            }

            // --- Filter by maximum size (--max-size) ---
            if let Some(max_allowed_size) = self.config.max_size {
                if node.size > max_allowed_size {
                    return false; // File is too large.
                }
            }
        } // End of file-only filters

        // --- Date filters (apply to both files and directories based on their modification time) ---
        // --- Filter by newer_than date (--newer-than) ---
        if let Some(min_modification_date) = self.config.newer_than {
            if node.modified < min_modification_date {
                return false; // Entry is older than required.
            }
        }

        // --- Filter by older_than date (--older-than) ---
        if let Some(max_modification_date) = self.config.older_than {
            if node.modified > max_modification_date {
                return false; // Entry is newer than allowed.
            }
        }

        // If all applicable filters passed (or no filters were active for a category), include the node.
        true
    }

    /// ## `determine_file_type` (Helper for `process_entry`)
    ///
    /// Examines `fs::Metadata` to determine a more specific `FileType`
    /// than just `is_dir` or `is_file`. On Unix, this can identify symlinks,
    /// sockets, FIFOs, block/char devices, and executables (by permission).
    /// On non-Unix, it's simpler (dir, symlink, or regular file).
    fn determine_file_type(&self, metadata: &fs::Metadata) -> FileType {
        #[cfg(unix)] // Unix-specific detailed file type detection
        {
            use std::os::unix::fs::FileTypeExt; // For is_socket, is_fifo, etc.
            let ft = metadata.file_type(); // Get the rich FileType from metadata.

            if ft.is_dir() {
                FileType::Directory
            } else if ft.is_symlink() {
                // Check symlink before other types, as it can point to them.
                FileType::Symlink
            } else if ft.is_socket() {
                FileType::Socket
            } else if ft.is_fifo() {
                // Named pipe
                FileType::Pipe
            } else if ft.is_block_device() {
                FileType::BlockDevice
            } else if ft.is_char_device() {
                FileType::CharDevice
            // Check for executable permission (any of user, group, other execute bits are set).
            // This applies to regular files that are not dirs, symlinks, or other special types.
            } else if ft.is_file() && (metadata.permissions().mode() & 0o111 != 0) {
                FileType::Executable
            } else {
                // If none of the above, it's a regular (non-executable) file.
                FileType::RegularFile
            }
        }

        #[cfg(not(unix))] // Simpler detection for non-Unix platforms
        {
            if metadata.is_dir() {
                FileType::Directory
            } else if metadata.file_type().is_symlink() {
                // `is_symlink()` is part of stable `fs::FileType`
                FileType::Symlink
            } else {
                // No easy cross-platform way to check executable bit without external crates or OS-specific calls.
                // So, on non-Unix, we don't distinguish Executable from RegularFile here.
                FileType::RegularFile
            }
        }
    }

    // --- Platform-Dependent Metadata Helpers ---
    // These provide a consistent way to get permissions, UID, and GID,
    // with sensible defaults for non-Unix systems where these concepts might not directly apply
    // or be easily accessible via standard Rust fs::Metadata.

    #[cfg(unix)]
    fn get_permissions(metadata: &fs::Metadata) -> u32 {
        // On Unix, get the mode and mask it to get the permission bits (e.g., 0o755).
        metadata.permissions().mode() & 0o777
    }
    #[cfg(not(unix))]
    fn get_permissions(_metadata: &fs::Metadata) -> u32 {
        0o755 // A common default permission (rwxr-xr-x) for non-Unix.
    }

    #[cfg(unix)]
    fn get_uid(metadata: &fs::Metadata) -> u32 {
        metadata.uid() // Get User ID from metadata.
    }
    #[cfg(not(unix))]
    fn get_uid(_metadata: &fs::Metadata) -> u32 {
        1000 // Common default UID placeholder for non-Unix.
    }

    #[cfg(unix)]
    fn get_gid(metadata: &fs::Metadata) -> u32 {
        metadata.gid() // Get Group ID from metadata.
    }
    #[cfg(not(unix))]
    fn get_gid(_metadata: &fs::Metadata) -> u32 {
        0
    }

    /// Apply sorting and optional top-N limit to the results
    fn apply_sorting_and_limit(&self, mut nodes: Vec<FileNode>) -> Vec<FileNode> {
        // If no sort field specified, return as-is
        let sort_field = match &self.config.sort_field {
            Some(field) => field,
            None => return nodes,
        };

        // Sort based on the field
        match sort_field.as_str() {
            "name" | "a-to-z" => {
                // Sort by name alphabetically (A to Z)
                nodes.sort_by(|a, b| {
                    let name_a = a.path.file_name().unwrap_or_default().to_string_lossy();
                    let name_b = b.path.file_name().unwrap_or_default().to_string_lossy();
                    name_a.cmp(&name_b)
                });
            }
            "z-to-a" => {
                // Sort by name reverse alphabetically (Z to A)
                nodes.sort_by(|a, b| {
                    let name_a = a.path.file_name().unwrap_or_default().to_string_lossy();
                    let name_b = b.path.file_name().unwrap_or_default().to_string_lossy();
                    name_b.cmp(&name_a)
                });
            }
            "size" | "largest" => {
                // Sort by size descending (largest first)
                nodes.sort_by(|a, b| b.size.cmp(&a.size));
            }
            "smallest" => {
                // Sort by size ascending (smallest first)
                nodes.sort_by(|a, b| a.size.cmp(&b.size));
            }
            "date" | "newest" => {
                // Sort by modification time descending (newest first)
                nodes.sort_by(|a, b| b.modified.cmp(&a.modified));
            }
            "oldest" => {
                // Sort by modification time ascending (oldest first)
                nodes.sort_by(|a, b| a.modified.cmp(&b.modified));
            }
            "type" => {
                // Sort by file extension, then by name
                nodes.sort_by(|a, b| {
                    let ext_a = a.path.extension().unwrap_or_default().to_string_lossy();
                    let ext_b = b.path.extension().unwrap_or_default().to_string_lossy();
                    match ext_a.cmp(&ext_b) {
                        std::cmp::Ordering::Equal => {
                            let name_a = a.path.file_name().unwrap_or_default().to_string_lossy();
                            let name_b = b.path.file_name().unwrap_or_default().to_string_lossy();
                            name_a.cmp(&name_b)
                        }
                        other => other,
                    }
                });
            }
            _ => {
                // Unknown sort field, don't sort
                eprintln!("Warning: Unknown sort field '{}', ignoring", sort_field);
            }
        }

        // Apply top-N limit if specified
        if let Some(limit) = self.config.top_n {
            nodes.truncate(limit);
        }

        nodes
    }
} // end impl Scanner

/// # `parse_size` - The Universal Translator for Sizes
///
/// This handy function takes something a human understands, like "2.5M", and
/// translates it into something a computer understands (2,621,440 bytes).
/// It's like having a Babel fish for file sizes. Why should we have to do
/// that math when the computer can do it for us?
pub fn parse_size(size_str: &str) -> Result<u64> {
    let size_str = size_str.trim().to_uppercase();
    if size_str.is_empty() {
        return Err(anyhow::anyhow!("Empty size string"));
    }

    // Find the first alphabetic character which marks the start of the unit.
    let unit_start_index = size_str
        .find(|c: char| c.is_alphabetic())
        .unwrap_or(size_str.len());
    let (num_part_str, unit_part) = size_str.split_at(unit_start_index);

    // Trim any space from the number part before parsing.
    let num_part_str = num_part_str.trim();

    if num_part_str.is_empty() {
        return Err(anyhow::anyhow!("Missing number for size string"));
    }

    let num: f64 = match num_part_str.parse() {
        Ok(n) => n,
        Err(e) => return Err(anyhow::anyhow!("Invalid number '{}': {}", num_part_str, e)),
    };

    // Check for negative numbers.
    if num.is_sign_negative() {
        return Err(anyhow::anyhow!("Size cannot be negative: {}", num));
    }

    let multiplier = match unit_part {
        "K" | "KB" => 1024.0,
        "M" | "MB" => 1024.0 * 1024.0,
        "G" | "GB" => 1024.0 * 1024.0 * 1024.0,
        "T" | "TB" => 1024.0 * 1024.0 * 1024.0 * 1024.0,
        "B" | "" => 1.0,
        _ => return Err(anyhow::anyhow!("Invalid size unit: '{}'", unit_part)),
    };

    Ok((num * multiplier) as u64)
}

// --- Unit Tests: Ensuring Our Scanner Behaves ---
// Aye, even the most brilliant code needs tests to keep it honest!
// These tests cover some basic functionality of the scanner.
#[cfg(test)]
mod tests {
    use super::*; // Import everything from the parent module (scanner.rs).

    #[test]
    fn test_parse_size_valid_inputs() {
        assert_eq!(parse_size("100").unwrap(), 100);
        assert_eq!(parse_size("100B").unwrap(), 100);
        assert_eq!(parse_size("1k").unwrap(), 1024);
        assert_eq!(parse_size("1K").unwrap(), 1024);
        assert_eq!(parse_size("1KB").unwrap(), 1024);
        assert_eq!(parse_size("2.5M").unwrap(), (2.5 * 1024.0 * 1024.0) as u64);
        assert_eq!(parse_size("1GB").unwrap(), 1024 * 1024 * 1024);
        assert_eq!(
            parse_size("0.5T").unwrap(),
            (0.5 * 1024.0 * 1024.0 * 1024.0 * 1024.0) as u64
        );
        assert_eq!(parse_size("  2 MB  ").unwrap(), 2 * 1024 * 1024); // Test with whitespace
    }

    #[test]
    fn test_parse_size_invalid_inputs() {
        assert!(parse_size("100X").is_err());
        assert!(parse_size("garbage").is_err());
        assert!(parse_size("-100M").is_err());
        assert!(parse_size("1..5K").is_err());
    }

    #[test]
    fn test_parse_size_zero_and_empty() {
        assert_eq!(parse_size("0").unwrap(), 0);
        assert!(parse_size("").is_err());
        assert!(parse_size("  ").is_err());
    }

    // Basic test for Scanner creation. More comprehensive tests would involve
    // creating a temporary directory structure and verifying scan results.
    #[test]
    fn test_scanner_creation_defaults() {
        let temp_dir = tempfile::tempdir().unwrap();
        let config = ScannerConfig {
            max_depth: 5,
            follow_symlinks: false,
            respect_gitignore: true,
            show_hidden: false,
            show_ignored: false,
            find_pattern: None,
            file_type_filter: None,
            entry_type_filter: None,
            min_size: None,
            max_size: None,
            newer_than: None,
            older_than: None,
            use_default_ignores: true,
            search_keyword: None,
            show_filesystems: false,
            sort_field: None,
            top_n: None,
            include_line_content: false,
            // Smart scanning options
            compute_interest: false,
            security_scan: false,
            min_interest: 0.0,
            track_traversal: false,
            changes_only: false,
            compare_state: None,
            smart_mode: false,
        };
        let scanner_result = Scanner::new(temp_dir.path(), config);
        assert!(scanner_result.is_ok());
    }
}