syncable_cli/analyzer/security/turbo/
file_discovery.rs

1//! # File Discovery Module
2//!
3//! Ultra-fast file discovery with git-aware filtering and smart prioritization.
4
5use std::fs;
6use std::path::{Path, PathBuf};
7use std::process::Command;
8use std::time::SystemTime;
9
10use ahash::AHashSet;
11use log::{debug, trace};
12use rayon::prelude::*;
13use walkdir::WalkDir;
14
15use super::{ScanMode, SecurityError};
16
17/// File metadata for efficient filtering
18#[derive(Debug, Clone)]
19pub struct FileMetadata {
20    pub path: PathBuf,
21    pub size: usize,
22    pub extension: Option<String>,
23    pub is_gitignored: bool,
24    pub modified: SystemTime,
25    pub priority_hints: PriorityHints,
26}
27
28/// Priority hints for file scoring
29#[derive(Debug, Clone, Default)]
30pub struct PriorityHints {
31    pub is_env_file: bool,
32    pub is_config_file: bool,
33    pub is_secret_file: bool,
34    pub is_source_file: bool,
35    pub has_secret_keywords: bool,
36}
37
38/// Configuration for file discovery
39#[derive(Debug, Clone)]
40pub struct DiscoveryConfig {
41    pub use_git: bool,
42    pub max_file_size: usize,
43    pub priority_extensions: Vec<String>,
44    pub scan_mode: ScanMode,
45}
46
47/// High-performance file discovery
48pub struct FileDiscovery {
49    config: DiscoveryConfig,
50    ignored_dirs: AHashSet<String>,
51    secret_keywords: Vec<&'static str>,
52    binary_extensions: AHashSet<&'static str>,
53    excluded_filenames: AHashSet<&'static str>,
54    asset_extensions: AHashSet<&'static str>,
55}
56
57impl FileDiscovery {
58    pub fn new(config: DiscoveryConfig) -> Self {
59        let ignored_dirs = Self::get_ignored_dirs(&config.scan_mode);
60        let secret_keywords = Self::get_secret_keywords();
61        let binary_extensions = Self::get_binary_extensions();
62        let excluded_filenames = Self::get_excluded_filenames();
63        let asset_extensions = Self::get_asset_extensions();
64
65        Self {
66            config,
67            ignored_dirs,
68            secret_keywords,
69            binary_extensions,
70            excluded_filenames,
71            asset_extensions,
72        }
73    }
74
75    /// Discover files with ultra-fast git-aware filtering
76    pub fn discover_files(&self, project_root: &Path) -> Result<Vec<FileMetadata>, SecurityError> {
77        let is_git_repo = project_root.join(".git").exists();
78
79        if is_git_repo && self.config.use_git {
80            self.git_aware_discovery(project_root)
81        } else {
82            self.filesystem_discovery(project_root)
83        }
84    }
85
86    /// Git-aware file discovery (fastest method)
87    fn git_aware_discovery(&self, project_root: &Path) -> Result<Vec<FileMetadata>, SecurityError> {
88        debug!("Using git-aware file discovery");
89
90        // Get all tracked files using git ls-files
91        let tracked_files = self.get_git_tracked_files(project_root)?;
92
93        // Get untracked files that might contain secrets
94        let untracked_files = self.get_untracked_secret_files(project_root)?;
95
96        // Combine and process in parallel
97        let all_paths: Vec<PathBuf> = tracked_files.into_iter().chain(untracked_files).collect();
98
99        // Process files in parallel to build metadata
100        let files: Vec<FileMetadata> = all_paths
101            .par_iter()
102            .filter_map(|path| self.build_file_metadata(path, project_root).ok())
103            .filter(|meta| self.should_include_file(meta))
104            .collect();
105
106        Ok(files)
107    }
108
109    /// Get tracked files from git
110    fn get_git_tracked_files(&self, project_root: &Path) -> Result<Vec<PathBuf>, SecurityError> {
111        let output = Command::new("git")
112            .args(["ls-files", "-z"]) // -z for null-terminated output
113            .current_dir(project_root)
114            .output()
115            .map_err(|e| SecurityError::FileDiscovery(format!("Git ls-files failed: {}", e)))?;
116
117        if !output.status.success() {
118            return Err(SecurityError::FileDiscovery(
119                "Git ls-files failed".to_string(),
120            ));
121        }
122
123        // Parse null-terminated paths
124        let paths: Vec<PathBuf> = output
125            .stdout
126            .split(|&b| b == 0)
127            .filter(|path| !path.is_empty())
128            .filter_map(|path| std::str::from_utf8(path).ok())
129            .map(|path| project_root.join(path))
130            .collect();
131
132        Ok(paths)
133    }
134
135    /// Get untracked files that might contain secrets (including gitignored files)
136    fn get_untracked_secret_files(
137        &self,
138        project_root: &Path,
139    ) -> Result<Vec<PathBuf>, SecurityError> {
140        // Common secret file patterns that might not be tracked
141        let secret_patterns = vec![
142            ".env*",
143            "*.key",
144            "*.pem",
145            "*.p12",
146            "*credentials*",
147            "*secret*",
148            "config/*.json",
149            "config/*.yml",
150        ];
151
152        let mut untracked_files = Vec::new();
153
154        for pattern in secret_patterns {
155            // First, get untracked files that are NOT gitignored (potential accidental exposure)
156            let output = Command::new("git")
157                .args(["ls-files", "--others", "--exclude-standard", pattern])
158                .current_dir(project_root)
159                .output();
160
161            if let Ok(output) = output
162                && output.status.success()
163            {
164                let paths: Vec<PathBuf> = String::from_utf8_lossy(&output.stdout)
165                    .lines()
166                    .filter(|line| !line.is_empty())
167                    .map(|line| project_root.join(line))
168                    .collect();
169                untracked_files.extend(paths);
170            }
171
172            // Also get gitignored files - these should be scanned to verify they exist
173            // and contain real secrets (important for security audit completeness)
174            let output = Command::new("git")
175                .args([
176                    "ls-files",
177                    "--others",
178                    "--ignored",
179                    "--exclude-standard",
180                    pattern,
181                ])
182                .current_dir(project_root)
183                .output();
184
185            if let Ok(output) = output
186                && output.status.success()
187            {
188                let paths: Vec<PathBuf> = String::from_utf8_lossy(&output.stdout)
189                    .lines()
190                    .filter(|line| !line.is_empty())
191                    .map(|line| project_root.join(line))
192                    .collect();
193                untracked_files.extend(paths);
194            }
195        }
196
197        Ok(untracked_files)
198    }
199
200    /// Fallback filesystem discovery
201    fn filesystem_discovery(
202        &self,
203        project_root: &Path,
204    ) -> Result<Vec<FileMetadata>, SecurityError> {
205        debug!("Using filesystem discovery");
206
207        let walker = WalkDir::new(project_root)
208            .follow_links(false)
209            .max_depth(20)
210            .into_iter()
211            .filter_entry(|entry| {
212                // Skip ignored directories
213                if entry.file_type().is_dir() {
214                    let dir_name = entry.file_name().to_string_lossy();
215                    return !self.ignored_dirs.contains(dir_name.as_ref());
216                }
217                true
218            });
219
220        let files: Vec<FileMetadata> = walker
221            .par_bridge()
222            .filter_map(|entry| entry.ok())
223            .filter(|entry| entry.file_type().is_file())
224            .filter_map(|entry| self.build_file_metadata(entry.path(), project_root).ok())
225            .filter(|meta| self.should_include_file(meta))
226            .collect();
227
228        Ok(files)
229    }
230
231    /// Build file metadata with priority hints
232    fn build_file_metadata(
233        &self,
234        path: &Path,
235        project_root: &Path,
236    ) -> Result<FileMetadata, std::io::Error> {
237        let metadata = fs::metadata(path)?;
238        let size = metadata.len() as usize;
239        let modified = metadata.modified()?;
240
241        let extension = path
242            .extension()
243            .and_then(|ext| ext.to_str())
244            .map(|s| s.to_lowercase());
245
246        let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
247
248        let file_name_lower = file_name.to_lowercase();
249
250        // Check gitignore status efficiently
251        let is_gitignored = if project_root.join(".git").exists() {
252            self.check_gitignore_batch(path, project_root)
253        } else {
254            false
255        };
256
257        // Build priority hints
258        let priority_hints = PriorityHints {
259            is_env_file: file_name_lower.starts_with(".env") || file_name_lower.ends_with(".env"),
260            is_config_file: self.is_config_file(&file_name_lower, &extension),
261            is_secret_file: self.is_secret_file(&file_name_lower, path),
262            is_source_file: self.is_source_file(&extension),
263            has_secret_keywords: self.has_secret_keywords(&file_name_lower),
264        };
265
266        Ok(FileMetadata {
267            path: path.to_path_buf(),
268            size,
269            extension,
270            is_gitignored,
271            modified,
272            priority_hints,
273        })
274    }
275
276    /// Batch check gitignore status
277    fn check_gitignore_batch(&self, path: &Path, project_root: &Path) -> bool {
278        // Quick check using git check-ignore
279        let output = Command::new("git")
280            .args(["check-ignore", path.to_str().unwrap_or("")])
281            .current_dir(project_root)
282            .output();
283
284        match output {
285            Ok(output) => output.status.success(),
286            Err(_) => false,
287        }
288    }
289
290    /// Check if file should be included based on filters
291    fn should_include_file(&self, meta: &FileMetadata) -> bool {
292        // Size filter
293        if meta.size > self.config.max_file_size {
294            trace!(
295                "Skipping large file: {} ({} bytes)",
296                meta.path.display(),
297                meta.size
298            );
299            return false;
300        }
301
302        // Enhanced binary file detection
303        if self.is_binary_file(meta) {
304            trace!("Skipping binary file: {}", meta.path.display());
305            return false;
306        }
307
308        // Asset file detection (images, fonts, media)
309        if self.is_asset_file(meta) {
310            trace!("Skipping asset file: {}", meta.path.display());
311            return false;
312        }
313
314        // Exclude files that are unlikely to contain real secrets
315        if self.should_exclude_from_security_scan(meta) {
316            trace!("Excluding from security scan: {}", meta.path.display());
317            return false;
318        }
319
320        // Critical files always included
321        if meta.is_critical() {
322            return true;
323        }
324
325        // Scan mode specific filtering
326        match self.config.scan_mode {
327            ScanMode::Lightning => {
328                // Only critical files (already handled above)
329                false
330            }
331            ScanMode::Fast => {
332                // Priority files or small source files
333                meta.is_priority() || (meta.priority_hints.is_source_file && meta.size < 50_000)
334            }
335            _ => true, // Include all for other modes
336        }
337    }
338
339    /// Enhanced binary file detection
340    fn is_binary_file(&self, meta: &FileMetadata) -> bool {
341        if let Some(ext) = &meta.extension
342            && self.binary_extensions.contains(ext.as_str())
343        {
344            return true;
345        }
346
347        // Check filename patterns
348        let filename = meta
349            .path
350            .file_name()
351            .and_then(|n| n.to_str())
352            .unwrap_or("")
353            .to_lowercase();
354
355        if self.excluded_filenames.contains(filename.as_str()) {
356            return true;
357        }
358
359        false
360    }
361
362    /// Check if file is an asset (images, fonts, media)
363    fn is_asset_file(&self, meta: &FileMetadata) -> bool {
364        if let Some(ext) = &meta.extension
365            && self.asset_extensions.contains(ext.as_str())
366        {
367            return true;
368        }
369
370        // Check for asset directories
371        let path_str = meta.path.to_string_lossy().to_lowercase();
372        let asset_dirs = [
373            "/assets/",
374            "/static/",
375            "/public/",
376            "/images/",
377            "/img/",
378            "/media/",
379            "/fonts/",
380            "/icons/",
381            "/graphics/",
382            "/pictures/",
383        ];
384
385        asset_dirs.iter().any(|&dir| path_str.contains(dir))
386    }
387
388    /// Check if file should be excluded from security scanning
389    fn should_exclude_from_security_scan(&self, meta: &FileMetadata) -> bool {
390        let path_str = meta.path.to_string_lossy().to_lowercase();
391
392        // DEPENDENCY LOCK FILES - These contain package hashes/metadata, not secrets
393        if self.is_dependency_lock_file(meta) {
394            return true;
395        }
396
397        // SVG files often contain base64 encoded graphics that trigger false positives
398        if meta.extension.as_deref() == Some("svg") {
399            return true;
400        }
401
402        // Minified and bundled files
403        if self.is_minified_or_bundled_file(meta) {
404            return true;
405        }
406
407        // Documentation and non-code files that rarely contain real secrets
408        let exclude_patterns = [
409            ".md",
410            ".txt",
411            ".rst",
412            ".adoc",
413            ".asciidoc",
414            "readme",
415            "changelog",
416            "license",
417            "todo",
418            "roadmap",
419            "contributing",
420            "authors",
421            // Test files (often contain fake/example data)
422            "/test/",
423            "/tests/",
424            "/spec/",
425            "/specs/",
426            "__test__",
427            "__spec__",
428            ".test.",
429            ".spec.",
430            "_test.",
431            "_spec.",
432            "fixtures",
433            "mocks",
434            "examples",
435            // Documentation directories
436            "/docs/",
437            "/doc/",
438            "/documentation/",
439            // Framework/library detection files (they contain patterns but not secrets)
440            "frameworks/",
441            "detector",
442            "rules",
443            "patterns",
444            // Build artifacts and generated files
445            "target/",
446            "build/",
447            "dist/",
448            ".next/",
449            "coverage/",
450            ".nuxt/",
451            ".output/",
452            ".vercel/",
453            ".netlify/",
454            // IDE and editor files
455            ".vscode/",
456            ".idea/",
457            ".vs/",
458            "*.swp",
459            "*.swo",
460            // OS files
461            ".ds_store",
462            "thumbs.db",
463            "desktop.ini",
464        ];
465
466        // Check patterns
467        if exclude_patterns
468            .iter()
469            .any(|&pattern| path_str.contains(pattern))
470        {
471            return true;
472        }
473
474        // Documentation file extensions
475        if let Some(ext) = &meta.extension {
476            let doc_extensions = ["md", "txt", "rst", "adoc", "asciidoc", "rtf"];
477            if doc_extensions.contains(&ext.as_str()) {
478                return true;
479            }
480        }
481
482        // Check if filename suggests it's documentation, examples, or code generation
483        let filename = meta
484            .path
485            .file_name()
486            .and_then(|n| n.to_str())
487            .unwrap_or("")
488            .to_lowercase();
489
490        let doc_filenames = [
491            "readme",
492            "changelog",
493            "license",
494            "authors",
495            "contributing",
496            "roadmap",
497            "todo",
498            "examples",
499            "demo",
500            "sample",
501            "fixture",
502            // Code generation and API example files
503            "apicodedialog",
504            "codedialog",
505            "codeexample",
506            "apiexample",
507            "codesnippet",
508            "snippets",
509            "templates",
510            "codegenerator",
511            "apitool",
512            "playground",
513            "sandbox",
514        ];
515
516        if doc_filenames.iter().any(|&name| filename.contains(name)) {
517            return true;
518        }
519
520        false
521    }
522
523    /// Check if file is minified or bundled
524    fn is_minified_or_bundled_file(&self, meta: &FileMetadata) -> bool {
525        let filename = meta
526            .path
527            .file_name()
528            .and_then(|n| n.to_str())
529            .unwrap_or("")
530            .to_lowercase();
531
532        // Minified file patterns
533        let minified_patterns = [
534            ".min.", ".bundle.", ".chunk.", ".vendor.", "-min.", "-bundle.", "-chunk.", "-vendor.",
535            "_min.", "_bundle.", "_chunk.", "_vendor.",
536        ];
537
538        minified_patterns
539            .iter()
540            .any(|&pattern| filename.contains(pattern))
541    }
542
543    /// Get ignored directories based on scan mode
544    fn get_ignored_dirs(scan_mode: &ScanMode) -> AHashSet<String> {
545        let mut dirs = AHashSet::new();
546
547        // Always ignore these
548        let always_ignore = vec![
549            ".git",
550            "node_modules",
551            "target",
552            "build",
553            "dist",
554            ".next",
555            "coverage",
556            "__pycache__",
557            ".pytest_cache",
558            ".mypy_cache",
559            "vendor",
560            "packages",
561            ".bundle",
562            "bower_components",
563            ".nuxt",
564            ".output",
565            ".vercel",
566            ".netlify",
567            ".vscode",
568            ".idea",
569            ".venv",
570            "venv", // Python virtual environments
571        ];
572
573        for dir in always_ignore {
574            dirs.insert(dir.to_string());
575        }
576
577        // Additional ignores for faster modes
578        if matches!(scan_mode, ScanMode::Lightning | ScanMode::Fast) {
579            let fast_ignore = vec!["test", "tests", "spec", "specs", "docs", "documentation"];
580            for dir in fast_ignore {
581                dirs.insert(dir.to_string());
582            }
583        }
584
585        dirs
586    }
587
588    /// Get comprehensive binary file extensions
589    fn get_binary_extensions() -> AHashSet<&'static str> {
590        let mut extensions = AHashSet::new();
591
592        // Executables and libraries
593        let binary_exts = [
594            "exe", "dll", "so", "dylib", "lib", "a", "o", "obj", "bin", "com", "scr", "msi", "deb",
595            "rpm", "pkg", // Archives
596            "zip", "tar", "gz", "bz2", "xz", "7z", "rar", "ace", "cab", "dmg", "iso", "img",
597            // Media files
598            "mp3", "mp4", "avi", "mov", "wmv", "flv", "mkv", "webm", "wav", "flac", "ogg", "aac",
599            "m4a", "wma", // Images (will be handled separately as assets)
600            "jpg", "jpeg", "png", "gif", "bmp", "tiff", "tga", "webp", "ico", "cur", "psd", "ai",
601            "eps", "raw", "cr2", "nef", // Fonts
602            "ttf", "otf", "woff", "woff2", "eot", // Documents
603            "pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx", "odt", "ods", "odp", "rtf",
604            // Databases
605            "db", "sqlite", "sqlite3", "mdb", "accdb", "wt", // Other binary formats
606            "pyc", "pyo", "class", "jar", "war", "ear", "cer", "jks",
607        ];
608
609        for ext in binary_exts {
610            extensions.insert(ext);
611        }
612
613        extensions
614    }
615
616    /// Get asset file extensions (images, media, fonts)
617    fn get_asset_extensions() -> AHashSet<&'static str> {
618        let mut extensions = AHashSet::new();
619
620        let asset_exts = [
621            // Images
622            "jpg", "jpeg", "png", "gif", "bmp", "tiff", "tga", "webp", "ico", "cur", "psd", "ai",
623            "eps", "raw", "cr2", "nef", "svg", // Fonts
624            "ttf", "otf", "woff", "woff2", "eot", // Media
625            "mp3", "mp4", "avi", "mov", "wmv", "flv", "mkv", "webm", "wav", "flac", "ogg", "aac",
626            "m4a", "wma",
627        ];
628
629        for ext in asset_exts {
630            extensions.insert(ext);
631        }
632
633        extensions
634    }
635
636    /// Get filenames that should be excluded
637    fn get_excluded_filenames() -> AHashSet<&'static str> {
638        let mut filenames = AHashSet::new();
639
640        let excluded = [
641            // OS files
642            ".ds_store",
643            "thumbs.db",
644            "desktop.ini",
645            "folder.ico",
646            // Editor files
647            ".gitkeep",
648            ".keep",
649            ".placeholder",
650            // Temporary files
651            ".tmp",
652            ".temp",
653            ".swp",
654            ".swo",
655            ".bak",
656            ".backup",
657        ];
658
659        for filename in excluded {
660            filenames.insert(filename);
661        }
662
663        filenames
664    }
665
666    /// Get secret keywords for detection
667    fn get_secret_keywords() -> Vec<&'static str> {
668        vec![
669            "secret",
670            "key",
671            "token",
672            "password",
673            "credential",
674            "auth",
675            "api",
676            "private",
677            "access",
678            "bearer",
679        ]
680    }
681
682    fn is_config_file(&self, name: &str, extension: &Option<String>) -> bool {
683        let config_extensions = [
684            "json", "yml", "yaml", "toml", "ini", "conf", "config", "xml",
685        ];
686        let config_names = ["config", "settings", "configuration", ".env"];
687
688        if let Some(ext) = extension
689            && config_extensions.contains(&ext.as_str())
690        {
691            return true;
692        }
693
694        config_names.iter().any(|&n| name.contains(n))
695    }
696
697    fn is_secret_file(&self, name: &str, path: &Path) -> bool {
698        let secret_patterns = [
699            ".env",
700            ".key",
701            ".pem",
702            ".p12",
703            ".pfx",
704            "credentials",
705            "secret",
706            "private",
707            "cert",
708        ];
709
710        // Check filename
711        if secret_patterns.iter().any(|&p| name.contains(p)) {
712            return true;
713        }
714
715        // Check path components
716        let path_str = path.to_string_lossy().to_lowercase();
717        secret_patterns.iter().any(|&p| path_str.contains(p))
718    }
719
720    fn is_source_file(&self, extension: &Option<String>) -> bool {
721        if let Some(ext) = extension {
722            let source_extensions = [
723                "js", "jsx", "ts", "tsx", "py", "java", "kt", "go", "rs", "rb", "php", "cs", "cpp",
724                "c", "h", "swift", "scala", "clj", "ex", "exs",
725            ];
726            source_extensions.contains(&ext.as_str())
727        } else {
728            false
729        }
730    }
731
732    fn has_secret_keywords(&self, name: &str) -> bool {
733        self.secret_keywords
734            .iter()
735            .any(|&keyword| name.contains(keyword))
736    }
737
738    /// Enhanced dependency lock file detection
739    fn is_dependency_lock_file(&self, meta: &FileMetadata) -> bool {
740        let filename = meta
741            .path
742            .file_name()
743            .and_then(|n| n.to_str())
744            .unwrap_or("")
745            .to_lowercase();
746
747        // Common dependency lock files that contain package hashes and metadata
748        let lock_files = [
749            // JavaScript/Node.js
750            "package-lock.json",
751            "yarn.lock",
752            "pnpm-lock.yaml",
753            "bun.lockb", // Bun lock file (binary format)
754            // Python
755            "poetry.lock",
756            "pipfile.lock",
757            "pip-lock.txt",
758            "pdm.lock",
759            // Rust
760            "cargo.lock",
761            // Go
762            "go.sum",
763            "go.mod",
764            // Java
765            "gradle.lockfile",
766            "maven-dependency-plugin.log",
767            // Ruby
768            "gemfile.lock",
769            // PHP
770            "composer.lock",
771            // .NET
772            "packages.lock.json",
773            "paket.lock",
774            // Others
775            "mix.lock",       // Elixir
776            "pubspec.lock",   // Dart
777            "swift.resolved", // Swift
778            "flake.lock",     // Nix
779        ];
780
781        // Check if filename matches any lock file pattern
782        lock_files.iter().any(|&pattern| filename == pattern) ||
783        // Also check for common lock file patterns
784        filename.ends_with(".lock") ||
785        filename.ends_with("-lock.json") ||
786        filename.ends_with("-lock.yaml") ||
787        filename.ends_with("-lock.yml") ||
788        filename.ends_with(".lockb") ||  // Binary lock files
789        filename.contains("shrinkwrap") ||
790        filename.contains("lockfile")
791    }
792}
793
794impl FileMetadata {
795    /// Check if file is critical (must scan)
796    pub fn is_critical(&self) -> bool {
797        self.priority_hints.is_env_file
798            || self.priority_hints.is_secret_file
799            || self.extension.as_deref() == Some("pem")
800            || self.extension.as_deref() == Some("key")
801    }
802
803    /// Check if file is high priority
804    pub fn is_priority(&self) -> bool {
805        self.is_critical()
806            || self.priority_hints.is_config_file
807            || self.priority_hints.has_secret_keywords
808    }
809
810    /// Calculate priority score (higher = more important)
811    pub fn priority_score(&self) -> u32 {
812        let mut score: u32 = 0;
813
814        if self.priority_hints.is_env_file {
815            score += 1000;
816        }
817        if self.priority_hints.is_secret_file {
818            score += 900;
819        }
820        if self.priority_hints.is_config_file {
821            score += 500;
822        }
823        if self.priority_hints.has_secret_keywords {
824            score += 300;
825        }
826        if !self.is_gitignored {
827            score += 200;
828        }
829        if self.priority_hints.is_source_file {
830            score += 100;
831        }
832
833        // Penalize large files
834        if self.size > 1_000_000 {
835            score = score.saturating_sub(100);
836        }
837
838        score
839    }
840}
841
842#[cfg(test)]
843mod tests {
844    use super::*;
845    use tempfile::TempDir;
846
847    #[test]
848    fn test_file_priority_scoring() {
849        let meta = FileMetadata {
850            path: PathBuf::from(".env"),
851            size: 100,
852            extension: Some("env".to_string()),
853            is_gitignored: false,
854            modified: SystemTime::now(),
855            priority_hints: PriorityHints {
856                is_env_file: true,
857                is_config_file: true,
858                is_secret_file: true,
859                is_source_file: false,
860                has_secret_keywords: true,
861            },
862        };
863
864        assert!(meta.is_critical());
865        assert!(meta.is_priority());
866        assert!(meta.priority_score() > 2000);
867    }
868
869    #[test]
870    fn test_file_discovery() {
871        let temp_dir = TempDir::new().unwrap();
872        fs::write(temp_dir.path().join(".env"), "SECRET=123").unwrap();
873        fs::write(temp_dir.path().join("config.json"), "{}").unwrap();
874        fs::create_dir(temp_dir.path().join("node_modules")).unwrap();
875        fs::write(temp_dir.path().join("node_modules/test.js"), "code").unwrap();
876
877        let config = DiscoveryConfig {
878            use_git: false,
879            max_file_size: 1024 * 1024,
880            priority_extensions: vec!["env".to_string()],
881            scan_mode: ScanMode::Fast,
882        };
883
884        let discovery = FileDiscovery::new(config);
885        let files = discovery.discover_files(temp_dir.path()).unwrap();
886
887        // Should find .env and config.json but not node_modules/test.js
888        assert_eq!(files.len(), 2);
889        assert!(files.iter().any(|f| f.path.ends_with(".env")));
890        assert!(files.iter().any(|f| f.path.ends_with("config.json")));
891    }
892
893    #[test]
894    fn test_binary_file_detection() {
895        let config = DiscoveryConfig {
896            use_git: false,
897            max_file_size: 1024 * 1024,
898            priority_extensions: vec![],
899            scan_mode: ScanMode::Fast,
900        };
901        let discovery = FileDiscovery::new(config);
902
903        let binary_meta = FileMetadata {
904            path: PathBuf::from("test.jpg"),
905            size: 100,
906            extension: Some("jpg".to_string()),
907            is_gitignored: false,
908            modified: SystemTime::now(),
909            priority_hints: PriorityHints::default(),
910        };
911
912        assert!(discovery.is_binary_file(&binary_meta));
913    }
914
915    #[test]
916    fn test_lock_file_detection() {
917        let config = DiscoveryConfig {
918            use_git: false,
919            max_file_size: 1024 * 1024,
920            priority_extensions: vec![],
921            scan_mode: ScanMode::Fast,
922        };
923        let discovery = FileDiscovery::new(config);
924
925        let lock_files = [
926            "package-lock.json",
927            "yarn.lock",
928            "pnpm-lock.yaml",
929            "bun.lockb",
930            "cargo.lock",
931            "go.sum",
932        ];
933
934        for lock_file in lock_files {
935            let meta = FileMetadata {
936                path: PathBuf::from(lock_file),
937                size: 100,
938                extension: None,
939                is_gitignored: false,
940                modified: SystemTime::now(),
941                priority_hints: PriorityHints::default(),
942            };
943
944            assert!(
945                discovery.is_dependency_lock_file(&meta),
946                "Failed to detect {}",
947                lock_file
948            );
949        }
950    }
951}