go_brrr/callgraph/
scanner.rs

1//! Project file scanner for call graph analysis.
2//!
3//! Provides efficient file discovery with:
4//! - Parallel scanning using rayon for large projects
5//! - Respect for .gitignore and .brrrignore patterns
6//! - Language-based and extension-based filtering
7//! - File metadata collection (size, modification time, detected language)
8//! - Comprehensive error handling with visibility into skipped files
9//!
10//! # Performance
11//!
12//! Sequential scanning is used for projects with fewer than 15 files
13//! (see `MIN_FILES_FOR_PARALLEL`) to avoid thread spawn overhead.
14//! For larger projects, rayon's work-stealing parallelism provides significant
15//! speedups on multi-core systems. This threshold matches the Python implementation
16//! for consistent behavior across both backends.
17//!
18//! # Error Handling
19//!
20//! The scanner collects errors encountered during traversal rather than silently
21//! dropping them. Errors include permission denied, broken symlinks, and I/O errors.
22//! Users can choose to fail on errors or continue with warnings via `ScanConfig`.
23
24use std::collections::HashSet;
25use std::fmt;
26use std::fs;
27use std::path::{Path, PathBuf};
28use std::sync::Mutex;
29
30use ignore::overrides::OverrideBuilder;
31use ignore::WalkBuilder;
32use rayon::prelude::*;
33use tracing::{debug, warn};
34
35use crate::error::{Result, BrrrError};
36use crate::lang::LanguageRegistry;
37
38/// Minimum number of files before parallel processing is enabled.
39///
40/// This threshold balances the overhead of thread spawning against
41/// the benefits of parallel execution. For small projects, sequential
42/// processing is often faster due to reduced synchronization overhead.
43///
44/// Value of 15 matches the Python implementation for consistent behavior
45/// across both backends. Process spawn overhead in Python (~50-100ms per worker)
46/// is similar to Rayon's thread pool initialization cost for small workloads.
47///
48/// # Performance Notes
49///
50/// - Below 15 files: Sequential is typically faster (thread spawn overhead dominates)
51/// - 15-50 files: Parallel provides modest speedup on multi-core systems
52/// - 50+ files: Parallel provides significant speedup (2-4x on 4+ cores)
53const MIN_FILES_FOR_PARALLEL: usize = 15;
54
55/// Category of scan error for programmatic handling.
56#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
57pub enum ScanErrorKind {
58    /// Permission denied when accessing file or directory.
59    PermissionDenied,
60    /// Broken symbolic link that could not be followed.
61    BrokenSymlink,
62    /// Generic I/O error during traversal.
63    IoError,
64    /// Directory loop detected (symlink cycle).
65    DirectoryLoop,
66    /// Other unclassified error.
67    Other,
68}
69
70impl fmt::Display for ScanErrorKind {
71    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
72        match self {
73            ScanErrorKind::PermissionDenied => write!(f, "permission denied"),
74            ScanErrorKind::BrokenSymlink => write!(f, "broken symlink"),
75            ScanErrorKind::IoError => write!(f, "I/O error"),
76            ScanErrorKind::DirectoryLoop => write!(f, "directory loop"),
77            ScanErrorKind::Other => write!(f, "other error"),
78        }
79    }
80}
81
82/// Error encountered during file scanning.
83///
84/// Contains details about what went wrong and where, allowing users
85/// to diagnose and potentially fix scanning issues.
86#[derive(Debug, Clone)]
87pub struct ScanError {
88    /// Path where the error occurred (if available).
89    pub path: Option<PathBuf>,
90    /// Human-readable error message.
91    pub message: String,
92    /// Category of the error for programmatic handling.
93    pub kind: ScanErrorKind,
94}
95
96impl ScanError {
97    /// Create a new scan error from an ignore::Error.
98    ///
99    /// Extracts path information by pattern matching on the error
100    /// variants, since `ignore::Error` doesn't provide direct accessor methods.
101    fn from_ignore_error(err: &ignore::Error) -> Self {
102        let message = err.to_string();
103
104        // Extract path by pattern matching on error variants
105        let path = Self::extract_path(err);
106
107        // Classify the error kind
108        let kind = if let Some(io_err) = err.io_error() {
109            match io_err.kind() {
110                std::io::ErrorKind::PermissionDenied => ScanErrorKind::PermissionDenied,
111                std::io::ErrorKind::NotFound => ScanErrorKind::BrokenSymlink,
112                _ => ScanErrorKind::IoError,
113            }
114        } else {
115            Self::classify_from_message(&message)
116        };
117
118        Self {
119            path,
120            message,
121            kind,
122        }
123    }
124
125    /// Extract path from ignore::Error variants recursively.
126    fn extract_path(err: &ignore::Error) -> Option<PathBuf> {
127        match err {
128            ignore::Error::WithPath { path, .. } => Some(path.clone()),
129            ignore::Error::WithDepth { err: inner, .. } => Self::extract_path(inner),
130            ignore::Error::Loop { child, .. } => Some(child.clone()),
131            _ => None,
132        }
133    }
134
135    /// Classify error kind from message content when io_error is unavailable.
136    fn classify_from_message(message: &str) -> ScanErrorKind {
137        let msg_lower = message.to_lowercase();
138        if msg_lower.contains("loop") || msg_lower.contains("cycle") {
139            ScanErrorKind::DirectoryLoop
140        } else if msg_lower.contains("symlink") || msg_lower.contains("link") {
141            ScanErrorKind::BrokenSymlink
142        } else if msg_lower.contains("permission") {
143            ScanErrorKind::PermissionDenied
144        } else {
145            ScanErrorKind::Other
146        }
147    }
148}
149
150impl fmt::Display for ScanError {
151    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
152        if let Some(ref path) = self.path {
153            write!(f, "{}: {} ({})", path.display(), self.message, self.kind)
154        } else {
155            write!(f, "{} ({})", self.message, self.kind)
156        }
157    }
158}
159
160/// Behavior when scan errors are encountered.
161#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
162pub enum ErrorHandling {
163    /// Continue scanning, collecting errors for later review.
164    /// This is the default behavior for maximum file discovery.
165    #[default]
166    CollectAndContinue,
167    /// Stop scanning immediately on first error.
168    #[allow(dead_code)]
169    FailFast,
170    /// Log warnings but don't collect errors (legacy behavior).
171    #[allow(dead_code)]
172    LogOnly,
173}
174
175/// Metadata collected for each scanned file.
176#[derive(Debug, Clone)]
177pub struct FileMetadata {
178    /// Absolute path to the file.
179    pub path: PathBuf,
180    /// File size in bytes.
181    pub size: u64,
182    /// Detected language name (e.g., "python", "typescript").
183    pub language: Option<String>,
184}
185
186impl FileMetadata {
187    /// Create metadata for a file path with pre-cached language detection.
188    ///
189    /// This avoids redundant language detection when the language was already
190    /// determined during filtering. Reduces language detection calls from 2-3
191    /// per file to exactly 1.
192    fn from_path_with_language(path: PathBuf, cached_language: Option<String>) -> Option<Self> {
193        let metadata = fs::metadata(&path).ok()?;
194
195        // Skip directories (should not happen with WalkBuilder, but safety check)
196        if !metadata.is_file() {
197            return None;
198        }
199
200        Some(Self {
201            path,
202            size: metadata.len(),
203            language: cached_language,
204        })
205    }
206}
207
208/// File entry with cached language detection result.
209///
210/// Used during scanning to avoid redundant language detection calls.
211/// Language is detected exactly once when the file is first encountered,
212/// then reused for filtering and metadata collection.
213#[derive(Debug, Clone)]
214struct ScannedFile {
215    /// Path to the file.
216    path: PathBuf,
217    /// Cached language name (None if not a supported language).
218    language: Option<&'static str>,
219}
220
221/// Configuration for project scanning.
222#[derive(Debug, Clone, Default)]
223pub struct ScanConfig {
224    /// Filter by specific language (e.g., "python").
225    pub language: Option<String>,
226    /// Filter by file extensions (e.g., [".py", ".pyi"]).
227    pub extensions: Vec<String>,
228    /// Glob patterns to include (e.g., ["src/**/*.rs"]).
229    pub include_patterns: Vec<String>,
230    /// Glob patterns to exclude (e.g., ["**/test/**"]).
231    pub exclude_patterns: Vec<String>,
232    /// Whether to follow symbolic links (default: false).
233    pub follow_symlinks: bool,
234    /// Maximum directory depth (None for unlimited).
235    pub max_depth: Option<usize>,
236    /// Whether to collect file metadata (default: false for faster scanning).
237    pub collect_metadata: bool,
238    /// Whether to use parallel scanning (default: true for large projects).
239    pub parallel: bool,
240    /// Whether to disable default exclude patterns (node_modules, __pycache__, .git, etc.).
241    /// Default: false (default excludes are applied).
242    /// Set to true when you need to include files from typically-excluded directories
243    /// like vendored dependencies in node_modules.
244    pub disable_default_excludes: bool,
245    /// How to handle errors encountered during scanning.
246    /// Default: CollectAndContinue (collect errors but don't stop scanning).
247    pub error_handling: ErrorHandling,
248    /// Whether to ignore all ignore files (.gitignore, .brrrignore) and include all files.
249    /// Default: false (respect ignore files).
250    /// Set to true to bypass all ignore file processing (equivalent to --no-ignore flag).
251    pub no_ignore: bool,
252}
253
254impl ScanConfig {
255    /// Create a config for scanning a specific language.
256    pub fn for_language(lang: &str) -> Self {
257        Self {
258            language: Some(lang.to_string()),
259            ..Default::default()
260        }
261    }
262
263    /// Create a config for scanning specific file extensions.
264    #[allow(dead_code)]
265    pub fn for_extensions(exts: &[&str]) -> Self {
266        Self {
267            extensions: exts.iter().map(|s| (*s).to_string()).collect(),
268            ..Default::default()
269        }
270    }
271
272    /// Add include patterns.
273    #[allow(dead_code)]
274    pub fn with_includes(mut self, patterns: &[&str]) -> Self {
275        self.include_patterns = patterns.iter().map(|s| (*s).to_string()).collect();
276        self
277    }
278
279    /// Add exclude patterns.
280    #[allow(dead_code)]
281    pub fn with_excludes(mut self, patterns: &[&str]) -> Self {
282        self.exclude_patterns = patterns.iter().map(|s| (*s).to_string()).collect();
283        self
284    }
285
286    /// Enable metadata collection.
287    #[allow(dead_code)]
288    pub fn with_metadata(mut self) -> Self {
289        self.collect_metadata = true;
290        self
291    }
292
293    /// Set maximum traversal depth.
294    #[allow(dead_code)]
295    pub fn with_max_depth(mut self, depth: usize) -> Self {
296        self.max_depth = Some(depth);
297        self
298    }
299
300    /// Disable default exclude patterns (node_modules, __pycache__, .git, etc.).
301    ///
302    /// Use this when you need to include files from typically-excluded directories,
303    /// such as vendored dependencies in node_modules or build artifacts you want to analyze.
304    ///
305    /// # Example
306    ///
307    /// ```no_run
308    /// use go_brrr::callgraph::scanner::ScanConfig;
309    ///
310    /// // Include vendored dependencies from node_modules
311    /// let config = ScanConfig::default()
312    ///     .with_default_excludes_disabled()
313    ///     .with_includes(&["**/node_modules/vendor/**"]);
314    /// ```
315    #[allow(dead_code)]
316    pub fn with_default_excludes_disabled(mut self) -> Self {
317        self.disable_default_excludes = true;
318        self
319    }
320
321    /// Set how errors should be handled during scanning.
322    ///
323    /// # Example
324    ///
325    /// ```no_run
326    /// use go_brrr::callgraph::scanner::{ScanConfig, ErrorHandling};
327    ///
328    /// // Fail immediately on any error
329    /// let strict_config = ScanConfig::default()
330    ///     .with_error_handling(ErrorHandling::FailFast);
331    ///
332    /// // Collect errors but continue scanning (default)
333    /// let permissive_config = ScanConfig::default()
334    ///     .with_error_handling(ErrorHandling::CollectAndContinue);
335    /// ```
336    #[allow(dead_code)]
337    pub fn with_error_handling(mut self, handling: ErrorHandling) -> Self {
338        self.error_handling = handling;
339        self
340    }
341
342    /// Configure to fail fast on any error.
343    ///
344    /// Shorthand for `.with_error_handling(ErrorHandling::FailFast)`.
345    #[allow(dead_code)]
346    pub fn fail_on_error(mut self) -> Self {
347        self.error_handling = ErrorHandling::FailFast;
348        self
349    }
350
351    /// Set whether to ignore all ignore files (.gitignore, .brrrignore).
352    ///
353    /// When set to true, all files will be included regardless of ignore patterns.
354    /// This is equivalent to the `--no-ignore` CLI flag.
355    ///
356    /// # Example
357    ///
358    /// ```no_run
359    /// use go_brrr::callgraph::scanner::ScanConfig;
360    ///
361    /// // Include all files, ignoring .gitignore and .brrrignore
362    /// let config = ScanConfig::default()
363    ///     .with_no_ignore(true);
364    /// ```
365    #[allow(dead_code)]
366    pub fn with_no_ignore(mut self, no_ignore: bool) -> Self {
367        self.no_ignore = no_ignore;
368        self
369    }
370}
371
372/// Filter for matching file extensions during scanning.
373///
374/// Encapsulates the extension matching logic to enable single-pass filtering
375/// during directory traversal, avoiding double-allocation of file vectors.
376///
377/// # Memory Efficiency
378///
379/// This struct is designed to be used inline during walker iteration,
380/// allowing filters to be applied as files are discovered rather than
381/// collecting all files first and then filtering.
382struct ExtensionFilter {
383    /// Set of allowed extensions (lowercase, without leading dot).
384    /// None means accept all files (no extension filtering).
385    extensions: Option<HashSet<String>>,
386}
387
388impl ExtensionFilter {
389    /// Create a filter from a set of extensions.
390    ///
391    /// If the set is empty, filtering is disabled (all files pass).
392    fn new(extensions: HashSet<String>) -> Self {
393        Self {
394            extensions: if extensions.is_empty() {
395                None
396            } else {
397                Some(extensions)
398            },
399        }
400    }
401
402    /// Check if a path matches the extension filter.
403    ///
404    /// Returns true if:
405    /// - No extension filter is set (accept all)
406    /// - The file's extension (case-insensitive) is in the allowed set
407    #[inline]
408    fn matches(&self, path: &Path) -> bool {
409        match &self.extensions {
410            Some(exts) => path
411                .extension()
412                .and_then(|e| e.to_str())
413                .map(|e| exts.contains(&e.to_lowercase()))
414                .unwrap_or(false),
415            None => true, // No filter, accept all
416        }
417    }
418
419    /// Check if extension filtering is active.
420    #[inline]
421    fn is_filtering(&self) -> bool {
422        self.extensions.is_some()
423    }
424}
425
426/// Filter for matching files by language during scanning.
427///
428/// Uses the language registry to detect and match file languages,
429/// enabling single-pass filtering during directory traversal.
430struct LanguageFilter<'a> {
431    /// Resolved target language name to match (after alias resolution).
432    /// None means accept all supported languages.
433    target_language: Option<&'a str>,
434    /// Reference to the language registry for detection.
435    registry: &'a LanguageRegistry,
436}
437
438/// Result of language filter matching, includes cached language for reuse.
439struct LanguageMatchResult {
440    /// Whether the file matches the filter criteria.
441    matches: bool,
442    /// Cached detected language name (None if not a supported language).
443    /// This value can be reused for metadata collection to avoid re-detection.
444    language: Option<&'static str>,
445}
446
447impl<'a> LanguageFilter<'a> {
448    /// Create a filter for a specific language.
449    ///
450    /// The `resolved_name` should be the canonical language name after
451    /// alias resolution (e.g., "typescript" for both "javascript" and "typescript").
452    fn new(resolved_name: Option<&'a str>, registry: &'a LanguageRegistry) -> Self {
453        Self {
454            target_language: resolved_name,
455            registry,
456        }
457    }
458
459    /// Check if a path matches the language filter, returning cached language.
460    ///
461    /// This method detects language exactly once and returns both the match
462    /// result and the detected language for caching. Use this when you need
463    /// the language information later (e.g., for metadata collection).
464    ///
465    /// # Performance
466    ///
467    /// Language detection is O(1) per file (extension-based lookup), but still
468    /// involves string operations and HashMap lookups. Caching the result
469    /// eliminates redundant detection calls during metadata collection.
470    #[inline]
471    fn matches_with_cache(&self, path: &Path, ext_filter: &ExtensionFilter) -> LanguageMatchResult {
472        // Detect language once and cache the result
473        let detected = self.registry.detect_language(path);
474        let language = detected.map(|l| l.name());
475
476        let matches = match self.target_language {
477            Some(target_name) => {
478                // Language filter active: check if detected language matches target
479                language.is_some_and(|l| l == target_name)
480            }
481            None => {
482                // No language filter: accept if extension filter passes OR file is supported
483                if ext_filter.is_filtering() {
484                    true // Extension filter handles the filtering
485                } else {
486                    // No filters: only accept supported language files
487                    language.is_some()
488                }
489            }
490        };
491
492        LanguageMatchResult { matches, language }
493    }
494}
495
496/// Result of a project scan with optional metadata.
497#[derive(Debug, Clone)]
498pub struct ScanResult {
499    /// All matching file paths.
500    pub files: Vec<PathBuf>,
501    /// File metadata (only populated if `collect_metadata` was true).
502    pub metadata: Vec<FileMetadata>,
503    /// Total bytes scanned.
504    pub total_bytes: u64,
505    /// Number of files by language.
506    pub by_language: std::collections::HashMap<String, usize>,
507    /// Errors encountered during scanning.
508    /// Contains details about files/directories that could not be accessed.
509    pub errors: Vec<ScanError>,
510    /// Warning messages for non-fatal issues.
511    pub warnings: Vec<String>,
512}
513
514impl ScanResult {
515    fn new() -> Self {
516        Self {
517            files: Vec::new(),
518            metadata: Vec::new(),
519            total_bytes: 0,
520            by_language: std::collections::HashMap::new(),
521            errors: Vec::new(),
522            warnings: Vec::new(),
523        }
524    }
525
526    fn add_file(&mut self, path: PathBuf) {
527        self.files.push(path);
528    }
529
530    fn add_metadata(&mut self, meta: FileMetadata) {
531        self.total_bytes += meta.size;
532        if let Some(ref lang) = meta.language {
533            *self.by_language.entry(lang.clone()).or_insert(0) += 1;
534        }
535        self.metadata.push(meta);
536    }
537
538    fn add_error(&mut self, error: ScanError) {
539        self.errors.push(error);
540    }
541
542    fn add_warning(&mut self, warning: String) {
543        self.warnings.push(warning);
544    }
545
546    /// Check if the scan encountered any errors.
547    pub fn has_errors(&self) -> bool {
548        !self.errors.is_empty()
549    }
550
551    /// Get the count of errors by kind.
552    pub fn error_counts(&self) -> std::collections::HashMap<ScanErrorKind, usize> {
553        let mut counts = std::collections::HashMap::new();
554        for error in &self.errors {
555            *counts.entry(error.kind).or_insert(0) += 1;
556        }
557        counts
558    }
559
560    /// Get a summary of scan errors for logging or display.
561    pub fn error_summary(&self) -> String {
562        if self.errors.is_empty() {
563            return String::from("No errors");
564        }
565
566        let counts = self.error_counts();
567        let parts: Vec<String> = counts
568            .iter()
569            .map(|(kind, count)| format!("{}: {}", kind, count))
570            .collect();
571
572        format!(
573            "{} total errors ({})",
574            self.errors.len(),
575            parts.join(", ")
576        )
577    }
578}
579
580/// Scans a project directory for source files.
581///
582/// Respects .gitignore and .brrrignore patterns, supports filtering by
583/// language and extension, and optionally collects file metadata.
584///
585/// # Example
586///
587/// ```no_run
588/// use go_brrr::callgraph::scanner::{ProjectScanner, ScanConfig};
589///
590/// let scanner = ProjectScanner::new("/path/to/project").unwrap();
591///
592/// // Scan all supported files
593/// let files = scanner.scan_files().unwrap();
594///
595/// // Scan only Python files
596/// let py_files = scanner.scan_language("python").unwrap();
597///
598/// // Advanced scanning with config
599/// let config = ScanConfig::for_language("rust")
600///     .with_excludes(&["**/target/**"])
601///     .with_metadata();
602/// let result = scanner.scan_with_config(&config).unwrap();
603/// ```
604pub struct ProjectScanner {
605    root: PathBuf,
606}
607
608impl ProjectScanner {
609    /// Create a new scanner for the given project root.
610    ///
611    /// # Errors
612    ///
613    /// Returns an error if the path does not exist or is not a directory.
614    pub fn new(path: &str) -> Result<Self> {
615        let root = PathBuf::from(path);
616
617        if !root.exists() {
618            return Err(BrrrError::Io(std::io::Error::new(
619                std::io::ErrorKind::NotFound,
620                format!("Project root does not exist: {}", path),
621            )));
622        }
623
624        if !root.is_dir() {
625            return Err(BrrrError::Io(std::io::Error::new(
626                std::io::ErrorKind::InvalidInput,
627                format!("Project root is not a directory: {}", path),
628            )));
629        }
630
631        Ok(Self { root })
632    }
633
634    /// Get the project root path.
635    #[allow(dead_code)]
636    pub fn root(&self) -> &Path {
637        &self.root
638    }
639
640    /// Scan for all supported source files.
641    ///
642    /// Returns file paths for all files with extensions recognized by
643    /// the language registry. Respects .gitignore and .brrrignore.
644    ///
645    /// Note: This method logs warnings for errors but continues scanning.
646    /// For full error details, use `scan_files_with_errors()` instead.
647    pub fn scan_files(&self) -> Result<Vec<PathBuf>> {
648        let result = self.scan_files_with_errors()?;
649
650        // Log warning if errors were encountered
651        if result.has_errors() {
652            warn!(
653                "File scan completed with errors: {}",
654                result.error_summary()
655            );
656            for error in &result.errors {
657                debug!("Scan error: {}", error);
658            }
659        }
660
661        Ok(result.files)
662    }
663
664    /// Scan for all supported source files with full error reporting.
665    ///
666    /// Returns a `ScanResult` containing both the found files and any
667    /// errors encountered during scanning (permission denied, broken
668    /// symlinks, I/O errors, etc.).
669    ///
670    /// # Example
671    ///
672    /// ```no_run
673    /// use go_brrr::callgraph::scanner::ProjectScanner;
674    ///
675    /// let scanner = ProjectScanner::new("/path/to/project").unwrap();
676    /// let result = scanner.scan_files_with_errors().unwrap();
677    ///
678    /// println!("Found {} files", result.files.len());
679    /// if result.has_errors() {
680    ///     eprintln!("Warning: {}", result.error_summary());
681    ///     for error in &result.errors {
682    ///         eprintln!("  - {}", error);
683    ///     }
684    /// }
685    /// ```
686    pub fn scan_files_with_errors(&self) -> Result<ScanResult> {
687        let registry = LanguageRegistry::global();
688        let mut result = ScanResult::new();
689
690        for entry_result in self.build_walker(None)? {
691            match entry_result {
692                Ok(entry) => {
693                    if entry.path().is_file() {
694                        if registry.detect_language(entry.path()).is_some() {
695                            result.add_file(entry.path().to_path_buf());
696                        }
697                    }
698                }
699                Err(e) => {
700                    let scan_error = ScanError::from_ignore_error(&e);
701                    warn!("Failed to scan entry: {}", scan_error);
702                    debug!("Error details: {:?}", e);
703                    result.add_error(scan_error);
704                }
705            }
706        }
707
708        Ok(result)
709    }
710
711    /// Scan for files of a specific language.
712    ///
713    /// # Arguments
714    ///
715    /// * `lang_name` - Language identifier (e.g., "python", "typescript", "rust")
716    ///
717    /// # Errors
718    ///
719    /// Returns `UnsupportedLanguage` error if the language is not recognized.
720    ///
721    /// Note: This method logs warnings for errors but continues scanning.
722    /// For full error details, use `scan_language_with_errors()` instead.
723    #[allow(dead_code)]
724    pub fn scan_language(&self, lang_name: &str) -> Result<Vec<PathBuf>> {
725        let result = self.scan_language_with_errors(lang_name)?;
726
727        // Log warning if errors were encountered
728        if result.has_errors() {
729            warn!(
730                "Language scan completed with errors: {}",
731                result.error_summary()
732            );
733            for error in &result.errors {
734                debug!("Scan error: {}", error);
735            }
736        }
737
738        Ok(result.files)
739    }
740
741    /// Scan for files of a specific language with full error reporting.
742    ///
743    /// Returns a `ScanResult` containing both the found files and any
744    /// errors encountered during scanning.
745    ///
746    /// # Arguments
747    ///
748    /// * `lang_name` - Language identifier (e.g., "python", "typescript", "rust")
749    ///
750    /// # Errors
751    ///
752    /// Returns `UnsupportedLanguage` error if the language is not recognized.
753    #[allow(dead_code)]
754    pub fn scan_language_with_errors(&self, lang_name: &str) -> Result<ScanResult> {
755        let registry = LanguageRegistry::global();
756
757        // Validate language exists and get the resolved language handler.
758        // This handles aliases like "javascript" -> "typescript".
759        let target_lang = registry
760            .get_by_name(lang_name)
761            .ok_or_else(|| BrrrError::UnsupportedLanguage(lang_name.to_string()))?;
762        let target_name = target_lang.name();
763
764        let mut result = ScanResult::new();
765
766        for entry_result in self.build_walker(None)? {
767            match entry_result {
768                Ok(entry) => {
769                    if entry.path().is_file() {
770                        // Compare against resolved canonical name to handle aliases correctly.
771                        // e.g., "javascript" resolves to "typescript", so we compare against "typescript"
772                        if registry
773                            .detect_language(entry.path())
774                            .is_some_and(|l| l.name() == target_name)
775                        {
776                            result.add_file(entry.path().to_path_buf());
777                        }
778                    }
779                }
780                Err(e) => {
781                    let scan_error = ScanError::from_ignore_error(&e);
782                    warn!("Failed to scan entry: {}", scan_error);
783                    debug!("Error details: {:?}", e);
784                    result.add_error(scan_error);
785                }
786            }
787        }
788
789        Ok(result)
790    }
791
792    /// Scan for files matching specific extensions.
793    ///
794    /// # Arguments
795    ///
796    /// * `extensions` - File extensions to match (e.g., [".py", ".pyi"])
797    ///
798    /// Note: This method logs warnings for errors but continues scanning.
799    /// For full error details, use `scan_extensions_with_errors()` instead.
800    #[allow(dead_code)]
801    pub fn scan_extensions(&self, extensions: &[&str]) -> Result<Vec<PathBuf>> {
802        let result = self.scan_extensions_with_errors(extensions)?;
803
804        // Log warning if errors were encountered
805        if result.has_errors() {
806            warn!(
807                "Extension scan completed with errors: {}",
808                result.error_summary()
809            );
810            for error in &result.errors {
811                debug!("Scan error: {}", error);
812            }
813        }
814
815        Ok(result.files)
816    }
817
818    /// Scan for files matching specific extensions with full error reporting.
819    ///
820    /// Returns a `ScanResult` containing both the found files and any
821    /// errors encountered during scanning.
822    ///
823    /// # Arguments
824    ///
825    /// * `extensions` - File extensions to match (e.g., [".py", ".pyi"])
826    #[allow(dead_code)]
827    pub fn scan_extensions_with_errors(&self, extensions: &[&str]) -> Result<ScanResult> {
828        // Normalize extensions to lowercase without leading dot for case-insensitive matching
829        let ext_set: std::collections::HashSet<String> = extensions
830            .iter()
831            .map(|e| e.trim_start_matches('.').to_lowercase())
832            .collect();
833        let mut result = ScanResult::new();
834
835        for entry_result in self.build_walker(None)? {
836            match entry_result {
837                Ok(entry) => {
838                    if entry.path().is_file() {
839                        // Case-insensitive extension matching: .py, .PY, .Py all match
840                        let matches = entry
841                            .path()
842                            .extension()
843                            .and_then(|ext| ext.to_str())
844                            .map(|ext| ext_set.contains(&ext.to_lowercase()))
845                            .unwrap_or(false);
846
847                        if matches {
848                            result.add_file(entry.path().to_path_buf());
849                        }
850                    }
851                }
852                Err(e) => {
853                    let scan_error = ScanError::from_ignore_error(&e);
854                    warn!("Failed to scan entry: {}", scan_error);
855                    debug!("Error details: {:?}", e);
856                    result.add_error(scan_error);
857                }
858            }
859        }
860
861        Ok(result)
862    }
863
864    /// Scan with full configuration options.
865    ///
866    /// This is the most flexible scanning method, supporting:
867    /// - Language and extension filtering
868    /// - Include/exclude glob patterns
869    /// - Metadata collection
870    /// - Parallel processing for large projects
871    /// - Configurable error handling (fail-fast, collect-and-continue, log-only)
872    ///
873    /// # Error Handling
874    ///
875    /// By default, errors are collected and scanning continues (`CollectAndContinue`).
876    /// Use `config.error_handling` to change this behavior:
877    /// - `FailFast`: Stop on first error
878    /// - `CollectAndContinue`: Collect errors, continue scanning (default)
879    /// - `LogOnly`: Log warnings, don't collect errors
880    ///
881    /// # Memory Efficiency
882    ///
883    /// This method uses single-pass filtering during directory traversal to avoid
884    /// creating intermediate collections. For a 100K file project, this reduces
885    /// memory usage by avoiding double-allocation of file entry vectors.
886    pub fn scan_with_config(&self, config: &ScanConfig) -> Result<ScanResult> {
887        let registry = LanguageRegistry::global();
888
889        // Validate language if specified and get resolved name for alias support.
890        // e.g., "javascript" -> "typescript"
891        let resolved_lang_name: Option<&str> = match &config.language {
892            Some(lang) => {
893                let resolved = registry
894                    .get_by_name(lang)
895                    .ok_or_else(|| BrrrError::UnsupportedLanguage(lang.clone()))?;
896                Some(resolved.name())
897            }
898            None => None,
899        };
900
901        // Build filters for single-pass filtering during traversal.
902        // This avoids collecting all files first and then filtering (double allocation).
903        let ext_filter = ExtensionFilter::new(
904            config
905                .extensions
906                .iter()
907                .map(|e| e.trim_start_matches('.').to_lowercase())
908                .collect(),
909        );
910        let lang_filter = LanguageFilter::new(resolved_lang_name, registry);
911
912        // Single-pass: iterate walker, apply filters inline, collect only matching files.
913        // This eliminates the memory double-allocation bug where we previously:
914        // 1. Collected all files into entries Vec
915        // 2. Filtered and collected into filtered Vec
916        //
917        // PERFORMANCE: Cache language detection during filtering to avoid redundant calls.
918        // Language is detected exactly once per file, then reused for metadata collection.
919        let walker = self.build_walker_with_config(config)?;
920        let mut result = ScanResult::new();
921        let mut filtered: Vec<ScannedFile> = Vec::new();
922
923        for entry_result in walker {
924            match entry_result {
925                Ok(entry) => {
926                    let path = entry.path();
927                    // Apply extension filter first (fast, no language detection needed)
928                    if path.is_file() && ext_filter.matches(path) {
929                        // Use matches_with_cache to detect language once and cache the result
930                        let match_result = lang_filter.matches_with_cache(path, &ext_filter);
931                        if match_result.matches {
932                            filtered.push(ScannedFile {
933                                path: path.to_path_buf(),
934                                language: match_result.language,
935                            });
936                        }
937                    }
938                }
939                Err(e) => {
940                    let scan_error = ScanError::from_ignore_error(&e);
941
942                    match config.error_handling {
943                        ErrorHandling::FailFast => {
944                            return Err(BrrrError::Io(std::io::Error::new(
945                                std::io::ErrorKind::Other,
946                                format!("Scan failed: {}", scan_error),
947                            )));
948                        }
949                        ErrorHandling::CollectAndContinue => {
950                            warn!("Failed to scan entry: {}", scan_error);
951                            debug!("Error details: {:?}", e);
952                            result.add_error(scan_error);
953                        }
954                        ErrorHandling::LogOnly => {
955                            warn!("Failed to scan entry: {}", scan_error);
956                            debug!("Error details: {:?}", e);
957                        }
958                    }
959                }
960            }
961        }
962
963        // Collect metadata if requested, using cached language from filtering
964        if config.collect_metadata {
965            let use_parallel = config.parallel && filtered.len() >= MIN_FILES_FOR_PARALLEL;
966
967            if use_parallel {
968                // Parallel metadata collection with thread-safe error collection
969                // Uses cached language from filtering - no redundant detection
970                let errors = Mutex::new(Vec::new());
971                let metadata: Vec<_> = filtered
972                    .par_iter()
973                    .filter_map(|scanned| {
974                        let cached_lang = scanned.language.map(|s| s.to_string());
975                        match FileMetadata::from_path_with_language(scanned.path.clone(), cached_lang) {
976                            Some(meta) => Some(meta),
977                            None => {
978                                // Metadata collection failed (likely permission or I/O error)
979                                let warning = format!(
980                                    "Could not collect metadata for: {}",
981                                    scanned.path.display()
982                                );
983                                warn!("{}", warning);
984                                if matches!(config.error_handling, ErrorHandling::CollectAndContinue)
985                                {
986                                    errors.lock().unwrap().push(warning);
987                                }
988                                None
989                            }
990                        }
991                    })
992                    .collect();
993
994                // Merge collected warnings
995                for warning in errors.into_inner().unwrap() {
996                    result.add_warning(warning);
997                }
998
999                for meta in metadata {
1000                    result.add_file(meta.path.clone());
1001                    result.add_metadata(meta);
1002                }
1003            } else {
1004                // Sequential metadata collection using cached language
1005                for scanned in filtered {
1006                    let cached_lang = scanned.language.map(|s| s.to_string());
1007                    if let Some(meta) = FileMetadata::from_path_with_language(scanned.path.clone(), cached_lang) {
1008                        result.add_file(meta.path.clone());
1009                        result.add_metadata(meta);
1010                    } else {
1011                        let warning =
1012                            format!("Could not collect metadata for: {}", scanned.path.display());
1013                        warn!("{}", warning);
1014                        if matches!(config.error_handling, ErrorHandling::CollectAndContinue) {
1015                            result.add_warning(warning);
1016                        }
1017                        result.add_file(scanned.path);
1018                    }
1019                }
1020            }
1021        } else {
1022            // No metadata collection - extract paths from cached ScannedFile entries
1023            result.files = filtered.into_iter().map(|f| f.path).collect();
1024        }
1025
1026        // Log summary if errors were encountered
1027        if result.has_errors() {
1028            warn!(
1029                "Scan completed with errors: {}",
1030                result.error_summary()
1031            );
1032        }
1033
1034        Ok(result)
1035    }
1036
1037    /// Scan and return detailed file metadata.
1038    ///
1039    /// This is a convenience method equivalent to:
1040    /// ```ignore
1041    /// scanner.scan_with_config(&ScanConfig::default().with_metadata())
1042    /// ```
1043    #[allow(dead_code)]
1044    pub fn scan_with_metadata(&self) -> Result<Vec<FileMetadata>> {
1045        let config = ScanConfig {
1046            collect_metadata: true,
1047            parallel: true,
1048            ..Default::default()
1049        };
1050
1051        Ok(self.scan_with_config(&config)?.metadata)
1052    }
1053
1054    /// Scan a specific language and return metadata.
1055    #[allow(dead_code)]
1056    pub fn scan_language_with_metadata(&self, lang_name: &str) -> Result<Vec<FileMetadata>> {
1057        let config = ScanConfig {
1058            language: Some(lang_name.to_string()),
1059            collect_metadata: true,
1060            parallel: true,
1061            ..Default::default()
1062        };
1063
1064        Ok(self.scan_with_config(&config)?.metadata)
1065    }
1066
1067    /// Build a WalkBuilder with default settings.
1068    ///
1069    /// # Ignore Handling Design
1070    ///
1071    /// WalkBuilder handles gitignore natively (efficient, integrated with walking).
1072    /// `.brrrignore` is added as a custom ignore file.
1073    ///
1074    /// Note: `BrrrIgnore` (in util/ignore.rs) intentionally does NOT load `.gitignore`
1075    /// to avoid duplicate processing. This scanner handles gitignore, while other code
1076    /// paths use `BrrrIgnore` for `.brrrignore` patterns only.
1077    fn build_walker(
1078        &self,
1079        max_depth: Option<usize>,
1080    ) -> Result<impl Iterator<Item = std::result::Result<ignore::DirEntry, ignore::Error>>> {
1081        let mut builder = WalkBuilder::new(&self.root);
1082
1083        // gitignore handling: WalkBuilder handles this natively and efficiently.
1084        // BrrrIgnore does NOT load .gitignore to avoid duplicate processing.
1085        builder
1086            .hidden(true) // Skip hidden files/dirs
1087            .parents(true) // Respect .gitignore in parent dirs
1088            .git_ignore(true) // Respect .gitignore
1089            .git_global(true) // Respect global gitignore
1090            .git_exclude(true) // Respect .git/info/exclude
1091            .add_custom_ignore_filename(".brrrignore");
1092
1093        if let Some(depth) = max_depth {
1094            builder.max_depth(Some(depth));
1095        }
1096
1097        // Add common exclude patterns
1098        let mut overrides = OverrideBuilder::new(&self.root);
1099        // Standard directories to always skip
1100        let _ = overrides.add("!**/node_modules/**");
1101        let _ = overrides.add("!**/__pycache__/**");
1102        let _ = overrides.add("!**/.venv/**");
1103        let _ = overrides.add("!**/venv/**");
1104        let _ = overrides.add("!**/target/debug/**");
1105        let _ = overrides.add("!**/target/release/**");
1106        let _ = overrides.add("!**/.git/**");
1107        let _ = overrides.add("!**/dist/**");
1108        let _ = overrides.add("!**/build/**");
1109        let _ = overrides.add("!**/*.min.js");
1110        let _ = overrides.add("!**/*.min.css");
1111
1112        if let Ok(built) = overrides.build() {
1113            builder.overrides(built);
1114        }
1115
1116        Ok(builder.build())
1117    }
1118
1119    /// Build a WalkBuilder with custom configuration.
1120    ///
1121    /// See `build_walker` for ignore handling design notes.
1122    fn build_walker_with_config(
1123        &self,
1124        config: &ScanConfig,
1125    ) -> Result<impl Iterator<Item = std::result::Result<ignore::DirEntry, ignore::Error>>> {
1126        let mut builder = WalkBuilder::new(&self.root);
1127
1128        // Handle no_ignore flag: when set, disable all ignore file processing
1129        if config.no_ignore {
1130            // Disable all ignore file processing when --no-ignore is set
1131            builder
1132                .hidden(false) // Include hidden files
1133                .parents(false) // Don't look for ignore files in parent dirs
1134                .git_ignore(false) // Ignore .gitignore
1135                .git_global(false) // Ignore global gitignore
1136                .git_exclude(false) // Ignore .git/info/exclude
1137                .ignore(false) // Ignore .ignore files
1138                .follow_links(config.follow_symlinks);
1139            // Note: Do NOT add .brrrignore when no_ignore is set
1140        } else {
1141            // gitignore handling: WalkBuilder handles this natively and efficiently.
1142            // BrrrIgnore does NOT load .gitignore to avoid duplicate processing.
1143            builder
1144                .hidden(true)
1145                .parents(true)
1146                .git_ignore(true)
1147                .git_global(true)
1148                .git_exclude(true)
1149                .follow_links(config.follow_symlinks)
1150                .add_custom_ignore_filename(".brrrignore");
1151        }
1152
1153        if let Some(depth) = config.max_depth {
1154            builder.max_depth(Some(depth));
1155        }
1156
1157        // Build overrides from config patterns
1158        let mut overrides = OverrideBuilder::new(&self.root);
1159
1160        // Standard excludes (only applied if not disabled by config and not no_ignore mode)
1161        // These can be disabled when users need to include files from typically-excluded
1162        // directories like vendored dependencies in node_modules.
1163        // When no_ignore is set, we also skip default excludes
1164        if !config.disable_default_excludes && !config.no_ignore {
1165            let _ = overrides.add("!**/node_modules/**");
1166            let _ = overrides.add("!**/__pycache__/**");
1167            let _ = overrides.add("!**/.venv/**");
1168            let _ = overrides.add("!**/venv/**");
1169            let _ = overrides.add("!**/target/debug/**");
1170            let _ = overrides.add("!**/target/release/**");
1171            let _ = overrides.add("!**/.git/**");
1172        }
1173
1174        // User-specified excludes
1175        for pattern in &config.exclude_patterns {
1176            let exclude = if pattern.starts_with('!') {
1177                pattern.clone()
1178            } else {
1179                format!("!{}", pattern)
1180            };
1181            let _ = overrides.add(&exclude);
1182        }
1183
1184        // User-specified includes (if any)
1185        for pattern in &config.include_patterns {
1186            let _ = overrides.add(pattern);
1187        }
1188
1189        if let Ok(built) = overrides.build() {
1190            builder.overrides(built);
1191        }
1192
1193        // Enable parallel directory traversal for large projects
1194        if config.parallel {
1195            builder.threads(0); // Use all available CPUs
1196        } else {
1197            builder.threads(1);
1198        }
1199
1200        Ok(builder.build())
1201    }
1202
1203    /// Get the count of supported source files in the project.
1204    ///
1205    /// Performs a full directory traversal to get an accurate count.
1206    /// Uses parallel walking for performance - on modern SSDs this completes
1207    /// in under 1 second for projects with up to 100K files.
1208    ///
1209    /// This count matches what `scan_files()` will return (filtered by
1210    /// supported languages, respecting .gitignore and .brrrignore).
1211    ///
1212    /// Useful for progress bars or deciding scan strategy.
1213    /// Note: Errors during counting are logged but do not affect the count.
1214    #[allow(dead_code)]
1215    pub fn estimate_file_count(&self) -> Result<usize> {
1216        let registry = LanguageRegistry::global();
1217        let mut error_count = 0;
1218
1219        // Single-pass traversal counting only supported language files.
1220        // This is both simpler and more accurate than the previous heuristic
1221        // which did TWO traversals with an inaccurate multiplication formula.
1222        let count = self
1223            .build_walker(None)?
1224            .filter_map(|e| match e {
1225                Ok(entry) => Some(entry),
1226                Err(err) => {
1227                    debug!("Error during file count: {:?}", err);
1228                    error_count += 1;
1229                    None
1230                }
1231            })
1232            .filter(|e| {
1233                e.path().is_file() && registry.detect_language(e.path()).is_some()
1234            })
1235            .count();
1236
1237        if error_count > 0 {
1238            warn!(
1239                "File count encountered {} errors (count may be incomplete)",
1240                error_count
1241            );
1242        }
1243
1244        Ok(count)
1245    }
1246}
1247
1248#[cfg(test)]
1249mod tests {
1250    use super::*;
1251    use std::fs::File;
1252    use tempfile::TempDir;
1253
1254    fn create_test_project() -> TempDir {
1255        let dir = TempDir::new().unwrap();
1256        let root = dir.path();
1257
1258        // Create some test files
1259        File::create(root.join("main.py")).unwrap();
1260        File::create(root.join("lib.py")).unwrap();
1261        File::create(root.join("utils.rs")).unwrap();
1262        File::create(root.join("app.ts")).unwrap();
1263
1264        // Create subdirectory
1265        std::fs::create_dir(root.join("src")).unwrap();
1266        File::create(root.join("src/module.py")).unwrap();
1267        File::create(root.join("src/helper.rs")).unwrap();
1268
1269        // Create ignored directory
1270        std::fs::create_dir(root.join("node_modules")).unwrap();
1271        File::create(root.join("node_modules/dep.js")).unwrap();
1272
1273        dir
1274    }
1275
1276    #[test]
1277    fn test_scan_files() {
1278        let dir = create_test_project();
1279        let scanner = ProjectScanner::new(dir.path().to_str().unwrap()).unwrap();
1280
1281        let files = scanner.scan_files().unwrap();
1282
1283        // Should find Python, Rust, TypeScript files but not node_modules
1284        assert!(files.iter().any(|p| p.ends_with("main.py")));
1285        assert!(files.iter().any(|p| p.ends_with("utils.rs")));
1286        assert!(files.iter().any(|p| p.ends_with("app.ts")));
1287        assert!(!files
1288            .iter()
1289            .any(|p| p.to_str().unwrap().contains("node_modules")));
1290    }
1291
1292    #[test]
1293    fn test_scan_language() {
1294        let dir = create_test_project();
1295        let scanner = ProjectScanner::new(dir.path().to_str().unwrap()).unwrap();
1296
1297        let py_files = scanner.scan_language("python").unwrap();
1298
1299        assert_eq!(py_files.len(), 3); // main.py, lib.py, src/module.py
1300        assert!(py_files.iter().all(|p| p.extension().unwrap() == "py"));
1301    }
1302
1303    #[test]
1304    fn test_scan_extensions() {
1305        let dir = create_test_project();
1306        let scanner = ProjectScanner::new(dir.path().to_str().unwrap()).unwrap();
1307
1308        let rs_files = scanner.scan_extensions(&[".rs"]).unwrap();
1309
1310        assert_eq!(rs_files.len(), 2); // utils.rs, src/helper.rs
1311    }
1312
1313    #[test]
1314    fn test_scan_with_metadata() {
1315        let dir = create_test_project();
1316        let scanner = ProjectScanner::new(dir.path().to_str().unwrap()).unwrap();
1317
1318        let metadata = scanner.scan_with_metadata().unwrap();
1319
1320        assert!(!metadata.is_empty());
1321        // All files should have language detected
1322        assert!(metadata.iter().all(|m| m.language.is_some()));
1323    }
1324
1325    #[test]
1326    fn test_scan_config() {
1327        let dir = create_test_project();
1328        let scanner = ProjectScanner::new(dir.path().to_str().unwrap()).unwrap();
1329
1330        let config = ScanConfig::for_language("python")
1331            .with_excludes(&["**/src/**"])
1332            .with_metadata();
1333
1334        let result = scanner.scan_with_config(&config).unwrap();
1335
1336        // Should only find root-level Python files
1337        assert_eq!(result.files.len(), 2); // main.py, lib.py
1338        assert!(result.by_language.contains_key("python"));
1339    }
1340
1341    #[test]
1342    fn test_unsupported_language_error() {
1343        let dir = create_test_project();
1344        let scanner = ProjectScanner::new(dir.path().to_str().unwrap()).unwrap();
1345
1346        let result = scanner.scan_language("brainfuck");
1347
1348        assert!(matches!(result, Err(BrrrError::UnsupportedLanguage(_))));
1349    }
1350
1351    #[test]
1352    fn test_scan_language_javascript_alias() {
1353        // BUG FIX TEST: "javascript" should be a valid language name (alias for TypeScript)
1354        // Previously, scan_language("javascript") would return UnsupportedLanguage error
1355        let dir = TempDir::new().unwrap();
1356        let root = dir.path();
1357
1358        // Create JavaScript files
1359        File::create(root.join("app.js")).unwrap();
1360        File::create(root.join("utils.mjs")).unwrap();
1361        File::create(root.join("config.cjs")).unwrap();
1362        std::fs::create_dir(root.join("src")).unwrap();
1363        File::create(root.join("src/helper.js")).unwrap();
1364
1365        let scanner = ProjectScanner::new(root.to_str().unwrap()).unwrap();
1366
1367        // This should NOT return UnsupportedLanguage error
1368        let js_files = scanner.scan_language("javascript");
1369        assert!(
1370            js_files.is_ok(),
1371            "scan_language('javascript') should work: {:?}",
1372            js_files.err()
1373        );
1374
1375        // Files should be found via the TypeScript parser (which handles JS)
1376        let files = js_files.unwrap();
1377        assert_eq!(files.len(), 4, "Should find all 4 JS files");
1378
1379        // Short aliases should also work
1380        assert!(
1381            scanner.scan_language("js").is_ok(),
1382            "scan_language('js') alias should work"
1383        );
1384    }
1385
1386    #[test]
1387    fn test_nonexistent_path_error() {
1388        let result = ProjectScanner::new("/nonexistent/path/12345");
1389
1390        assert!(matches!(result, Err(BrrrError::Io(_))));
1391    }
1392
1393    #[test]
1394    fn test_disable_default_excludes() {
1395        let dir = create_test_project();
1396        let scanner = ProjectScanner::new(dir.path().to_str().unwrap()).unwrap();
1397
1398        // By default, node_modules should be excluded
1399        let default_config = ScanConfig::default();
1400        let result = scanner.scan_with_config(&default_config).unwrap();
1401        assert!(
1402            !result
1403                .files
1404                .iter()
1405                .any(|p| p.to_str().unwrap().contains("node_modules")),
1406            "node_modules should be excluded by default"
1407        );
1408
1409        // With disable_default_excludes, node_modules should be included
1410        let config_with_disabled = ScanConfig::default().with_default_excludes_disabled();
1411        let result = scanner.scan_with_config(&config_with_disabled).unwrap();
1412        assert!(
1413            result
1414                .files
1415                .iter()
1416                .any(|p| p.to_str().unwrap().contains("node_modules")),
1417            "node_modules should be included when default excludes are disabled"
1418        );
1419    }
1420
1421    #[test]
1422    fn test_disable_default_excludes_with_include_pattern() {
1423        let dir = create_test_project();
1424
1425        // Create a vendored file in node_modules
1426        std::fs::create_dir_all(dir.path().join("node_modules/vendor")).unwrap();
1427        File::create(dir.path().join("node_modules/vendor/lib.js")).unwrap();
1428
1429        let scanner = ProjectScanner::new(dir.path().to_str().unwrap()).unwrap();
1430
1431        // With default excludes disabled and include pattern, should find vendored file
1432        let config = ScanConfig::default()
1433            .with_default_excludes_disabled()
1434            .with_includes(&["**/node_modules/vendor/**"]);
1435
1436        let result = scanner.scan_with_config(&config).unwrap();
1437        assert!(
1438            result
1439                .files
1440                .iter()
1441                .any(|p| p.to_str().unwrap().contains("node_modules/vendor")),
1442            "should find vendored files in node_modules when default excludes are disabled"
1443        );
1444    }
1445
1446    #[test]
1447    fn test_scan_files_with_errors_returns_scan_result() {
1448        let dir = create_test_project();
1449        let scanner = ProjectScanner::new(dir.path().to_str().unwrap()).unwrap();
1450
1451        let result = scanner.scan_files_with_errors().unwrap();
1452
1453        // Should find files successfully
1454        assert!(!result.files.is_empty());
1455        // In a normal project, should have no errors
1456        assert!(!result.has_errors());
1457        assert_eq!(result.error_summary(), "No errors");
1458    }
1459
1460    #[test]
1461    fn test_scan_language_with_errors_returns_scan_result() {
1462        let dir = create_test_project();
1463        let scanner = ProjectScanner::new(dir.path().to_str().unwrap()).unwrap();
1464
1465        let result = scanner.scan_language_with_errors("python").unwrap();
1466
1467        assert_eq!(result.files.len(), 3); // main.py, lib.py, src/module.py
1468        assert!(!result.has_errors());
1469    }
1470
1471    #[test]
1472    fn test_scan_extensions_with_errors_returns_scan_result() {
1473        let dir = create_test_project();
1474        let scanner = ProjectScanner::new(dir.path().to_str().unwrap()).unwrap();
1475
1476        let result = scanner.scan_extensions_with_errors(&[".rs"]).unwrap();
1477
1478        assert_eq!(result.files.len(), 2); // utils.rs, src/helper.rs
1479        assert!(!result.has_errors());
1480    }
1481
1482    #[test]
1483    fn test_error_handling_config() {
1484        let config = ScanConfig::default().with_error_handling(ErrorHandling::FailFast);
1485        assert_eq!(config.error_handling, ErrorHandling::FailFast);
1486
1487        let config = ScanConfig::default().fail_on_error();
1488        assert_eq!(config.error_handling, ErrorHandling::FailFast);
1489
1490        let config = ScanConfig::default().with_error_handling(ErrorHandling::CollectAndContinue);
1491        assert_eq!(config.error_handling, ErrorHandling::CollectAndContinue);
1492
1493        let config = ScanConfig::default().with_error_handling(ErrorHandling::LogOnly);
1494        assert_eq!(config.error_handling, ErrorHandling::LogOnly);
1495    }
1496
1497    #[test]
1498    fn test_scan_error_kind_display() {
1499        assert_eq!(
1500            format!("{}", ScanErrorKind::PermissionDenied),
1501            "permission denied"
1502        );
1503        assert_eq!(format!("{}", ScanErrorKind::BrokenSymlink), "broken symlink");
1504        assert_eq!(format!("{}", ScanErrorKind::IoError), "I/O error");
1505        assert_eq!(format!("{}", ScanErrorKind::DirectoryLoop), "directory loop");
1506        assert_eq!(format!("{}", ScanErrorKind::Other), "other error");
1507    }
1508
1509    #[test]
1510    fn test_scan_error_display() {
1511        let error_with_path = ScanError {
1512            path: Some(PathBuf::from("/test/file.txt")),
1513            message: "test error".to_string(),
1514            kind: ScanErrorKind::PermissionDenied,
1515        };
1516        assert!(format!("{}", error_with_path).contains("/test/file.txt"));
1517        assert!(format!("{}", error_with_path).contains("test error"));
1518        assert!(format!("{}", error_with_path).contains("permission denied"));
1519
1520        let error_without_path = ScanError {
1521            path: None,
1522            message: "test error".to_string(),
1523            kind: ScanErrorKind::IoError,
1524        };
1525        assert!(format!("{}", error_without_path).contains("test error"));
1526        assert!(format!("{}", error_without_path).contains("I/O error"));
1527    }
1528
1529    #[test]
1530    fn test_scan_result_error_counts() {
1531        let mut result = ScanResult::new();
1532        result.add_error(ScanError {
1533            path: Some(PathBuf::from("/a")),
1534            message: "error 1".to_string(),
1535            kind: ScanErrorKind::PermissionDenied,
1536        });
1537        result.add_error(ScanError {
1538            path: Some(PathBuf::from("/b")),
1539            message: "error 2".to_string(),
1540            kind: ScanErrorKind::PermissionDenied,
1541        });
1542        result.add_error(ScanError {
1543            path: Some(PathBuf::from("/c")),
1544            message: "error 3".to_string(),
1545            kind: ScanErrorKind::BrokenSymlink,
1546        });
1547
1548        let counts = result.error_counts();
1549        assert_eq!(counts.get(&ScanErrorKind::PermissionDenied), Some(&2));
1550        assert_eq!(counts.get(&ScanErrorKind::BrokenSymlink), Some(&1));
1551
1552        assert!(result.has_errors());
1553        let summary = result.error_summary();
1554        assert!(summary.contains("3 total errors"));
1555    }
1556
1557    #[test]
1558    fn test_scan_result_warnings() {
1559        let mut result = ScanResult::new();
1560        result.add_warning("warning 1".to_string());
1561        result.add_warning("warning 2".to_string());
1562
1563        assert_eq!(result.warnings.len(), 2);
1564        assert!(result.warnings.contains(&"warning 1".to_string()));
1565        assert!(result.warnings.contains(&"warning 2".to_string()));
1566    }
1567
1568    #[test]
1569    fn test_scan_extensions_case_insensitive() {
1570        let dir = TempDir::new().unwrap();
1571        let root = dir.path();
1572
1573        // Create files with various extension cases
1574        File::create(root.join("lowercase.py")).unwrap();
1575        File::create(root.join("uppercase.PY")).unwrap();
1576        File::create(root.join("mixed.Py")).unwrap();
1577        File::create(root.join("mixed2.pY")).unwrap();
1578        File::create(root.join("other.rs")).unwrap();
1579
1580        let scanner = ProjectScanner::new(root.to_str().unwrap()).unwrap();
1581
1582        // Test with lowercase extension in query
1583        let py_files = scanner.scan_extensions(&[".py"]).unwrap();
1584        assert_eq!(py_files.len(), 4, "Should match all .py variants regardless of case");
1585
1586        // Test with uppercase extension in query
1587        let py_files_upper = scanner.scan_extensions(&[".PY"]).unwrap();
1588        assert_eq!(py_files_upper.len(), 4, "Query with .PY should also match all variants");
1589
1590        // Test without leading dot
1591        let py_files_no_dot = scanner.scan_extensions(&["py"]).unwrap();
1592        assert_eq!(py_files_no_dot.len(), 4, "Query without dot should work");
1593    }
1594
1595    #[test]
1596    fn test_scan_config_extensions_case_insensitive() {
1597        let dir = TempDir::new().unwrap();
1598        let root = dir.path();
1599
1600        // Create files with various extension cases
1601        File::create(root.join("test1.rs")).unwrap();
1602        File::create(root.join("test2.RS")).unwrap();
1603        File::create(root.join("test3.Rs")).unwrap();
1604
1605        let scanner = ProjectScanner::new(root.to_str().unwrap()).unwrap();
1606
1607        let config = ScanConfig::for_extensions(&[".rs"]);
1608        let result = scanner.scan_with_config(&config).unwrap();
1609
1610        assert_eq!(result.files.len(), 3, "Should match all .rs variants regardless of case");
1611    }
1612
1613    #[test]
1614    fn test_estimate_file_count_accuracy() {
1615        // BUG FIX TEST: estimate_file_count should return accurate count
1616        // Previously, it used a broken heuristic: depth_1_files * total_dirs
1617        // which gave wildly inaccurate results (e.g., estimated 12, actual 101)
1618        let dir = TempDir::new().unwrap();
1619        let root = dir.path();
1620
1621        // Create a nested structure that would expose the old bug
1622        // Old formula: 2 files at depth 1 * 4 dirs = 8
1623        // But actual count is 6 supported files
1624        File::create(root.join("root1.py")).unwrap();
1625        File::create(root.join("root2.py")).unwrap();
1626
1627        std::fs::create_dir(root.join("subdir1")).unwrap();
1628        File::create(root.join("subdir1/file1.py")).unwrap();
1629        File::create(root.join("subdir1/file2.py")).unwrap();
1630
1631        std::fs::create_dir(root.join("subdir2")).unwrap();
1632        std::fs::create_dir(root.join("subdir2/nested")).unwrap();
1633        File::create(root.join("subdir2/nested/deep.py")).unwrap();
1634
1635        // Also create an unsupported file type (should not be counted)
1636        File::create(root.join("readme.txt")).unwrap();
1637
1638        let scanner = ProjectScanner::new(root.to_str().unwrap()).unwrap();
1639
1640        let estimate = scanner.estimate_file_count().unwrap();
1641        let actual_files = scanner.scan_files().unwrap();
1642
1643        // Estimate should now exactly match actual count
1644        assert_eq!(
1645            estimate,
1646            actual_files.len(),
1647            "estimate_file_count() should match scan_files() count exactly.\n\
1648             Estimate: {}, Actual: {}",
1649            estimate,
1650            actual_files.len()
1651        );
1652
1653        // Should have found exactly 5 Python files (not counting .txt)
1654        assert_eq!(actual_files.len(), 5);
1655    }
1656}