syncable_cli/analyzer/security/turbo/
file_discovery.rs

1//! # File Discovery Module
2//! 
3//! Ultra-fast file discovery with git-aware filtering and smart prioritization.
4
5use std::path::{Path, PathBuf};
6use std::process::Command;
7use std::fs;
8use std::time::SystemTime;
9
10use ahash::AHashSet;
11use rayon::prelude::*;
12use walkdir::WalkDir;
13use log::{debug, trace};
14
15use super::{ScanMode, SecurityError};
16
17/// File metadata for efficient filtering
18#[derive(Debug, Clone)]
19pub struct FileMetadata {
20    pub path: PathBuf,
21    pub size: usize,
22    pub extension: Option<String>,
23    pub is_gitignored: bool,
24    pub modified: SystemTime,
25    pub priority_hints: PriorityHints,
26}
27
28/// Priority hints for file scoring
29#[derive(Debug, Clone, Default)]
30pub struct PriorityHints {
31    pub is_env_file: bool,
32    pub is_config_file: bool,
33    pub is_secret_file: bool,
34    pub is_source_file: bool,
35    pub has_secret_keywords: bool,
36}
37
38/// Configuration for file discovery
39#[derive(Debug, Clone)]
40pub struct DiscoveryConfig {
41    pub use_git: bool,
42    pub max_file_size: usize,
43    pub priority_extensions: Vec<String>,
44    pub scan_mode: ScanMode,
45}
46
47/// High-performance file discovery
48pub struct FileDiscovery {
49    config: DiscoveryConfig,
50    ignored_dirs: AHashSet<String>,
51    secret_keywords: Vec<&'static str>,
52    binary_extensions: AHashSet<&'static str>,
53    excluded_filenames: AHashSet<&'static str>,
54    asset_extensions: AHashSet<&'static str>,
55}
56
57impl FileDiscovery {
58    pub fn new(config: DiscoveryConfig) -> Self {
59        let ignored_dirs = Self::get_ignored_dirs(&config.scan_mode);
60        let secret_keywords = Self::get_secret_keywords();
61        let binary_extensions = Self::get_binary_extensions();
62        let excluded_filenames = Self::get_excluded_filenames();
63        let asset_extensions = Self::get_asset_extensions();
64        
65        Self {
66            config,
67            ignored_dirs,
68            secret_keywords,
69            binary_extensions,
70            excluded_filenames,
71            asset_extensions,
72        }
73    }
74    
75    /// Discover files with ultra-fast git-aware filtering
76    pub fn discover_files(&self, project_root: &Path) -> Result<Vec<FileMetadata>, SecurityError> {
77        let is_git_repo = project_root.join(".git").exists();
78        
79        if is_git_repo && self.config.use_git {
80            self.git_aware_discovery(project_root)
81        } else {
82            self.filesystem_discovery(project_root)
83        }
84    }
85    
86    /// Git-aware file discovery (fastest method)
87    fn git_aware_discovery(&self, project_root: &Path) -> Result<Vec<FileMetadata>, SecurityError> {
88        debug!("Using git-aware file discovery");
89        
90        // Get all tracked files using git ls-files
91        let tracked_files = self.get_git_tracked_files(project_root)?;
92        
93        // Get untracked files that might contain secrets
94        let untracked_files = self.get_untracked_secret_files(project_root)?;
95        
96        // Combine and process in parallel
97        let all_paths: Vec<PathBuf> = tracked_files.into_iter()
98            .chain(untracked_files)
99            .collect();
100        
101        // Process files in parallel to build metadata
102        let files: Vec<FileMetadata> = all_paths
103            .par_iter()
104            .filter_map(|path| self.build_file_metadata(path, project_root).ok())
105            .filter(|meta| self.should_include_file(meta))
106            .collect();
107        
108        Ok(files)
109    }
110    
111    /// Get tracked files from git
112    fn get_git_tracked_files(&self, project_root: &Path) -> Result<Vec<PathBuf>, SecurityError> {
113        let output = Command::new("git")
114            .args(&["ls-files", "-z"]) // -z for null-terminated output
115            .current_dir(project_root)
116            .output()
117            .map_err(|e| SecurityError::FileDiscovery(format!("Git ls-files failed: {}", e)))?;
118        
119        if !output.status.success() {
120            return Err(SecurityError::FileDiscovery("Git ls-files failed".to_string()));
121        }
122        
123        // Parse null-terminated paths
124        let paths: Vec<PathBuf> = output.stdout
125            .split(|&b| b == 0)
126            .filter(|path| !path.is_empty())
127            .filter_map(|path| std::str::from_utf8(path).ok())
128            .map(|path| project_root.join(path))
129            .collect();
130        
131        Ok(paths)
132    }
133    
134    /// Get untracked files that might contain secrets
135    fn get_untracked_secret_files(&self, project_root: &Path) -> Result<Vec<PathBuf>, SecurityError> {
136        // Common secret file patterns that might not be tracked
137        let secret_patterns = vec![
138            ".env*",
139            "*.key",
140            "*.pem",
141            "*.p12",
142            "*credentials*",
143            "*secret*",
144            "config/*.json",
145            "config/*.yml",
146        ];
147        
148        let mut untracked_files = Vec::new();
149        
150        for pattern in secret_patterns {
151            let output = Command::new("git")
152                .args(&["ls-files", "--others", "--exclude-standard", pattern])
153                .current_dir(project_root)
154                .output();
155            
156            if let Ok(output) = output {
157                if output.status.success() {
158                    let paths: Vec<PathBuf> = String::from_utf8_lossy(&output.stdout)
159                        .lines()
160                        .map(|line| project_root.join(line))
161                        .collect();
162                    untracked_files.extend(paths);
163                }
164            }
165        }
166        
167        Ok(untracked_files)
168    }
169    
170    /// Fallback filesystem discovery
171    fn filesystem_discovery(&self, project_root: &Path) -> Result<Vec<FileMetadata>, SecurityError> {
172        debug!("Using filesystem discovery");
173        
174        let walker = WalkDir::new(project_root)
175            .follow_links(false)
176            .max_depth(20)
177            .into_iter()
178            .filter_entry(|entry| {
179                // Skip ignored directories
180                if entry.file_type().is_dir() {
181                    let dir_name = entry.file_name().to_string_lossy();
182                    return !self.ignored_dirs.contains(dir_name.as_ref());
183                }
184                true
185            });
186        
187        let files: Vec<FileMetadata> = walker
188            .par_bridge()
189            .filter_map(|entry| entry.ok())
190            .filter(|entry| entry.file_type().is_file())
191            .filter_map(|entry| self.build_file_metadata(entry.path(), project_root).ok())
192            .filter(|meta| self.should_include_file(meta))
193            .collect();
194        
195        Ok(files)
196    }
197    
198    /// Build file metadata with priority hints
199    fn build_file_metadata(&self, path: &Path, project_root: &Path) -> Result<FileMetadata, std::io::Error> {
200        let metadata = fs::metadata(path)?;
201        let size = metadata.len() as usize;
202        let modified = metadata.modified()?;
203        
204        let extension = path.extension()
205            .and_then(|ext| ext.to_str())
206            .map(|s| s.to_lowercase());
207        
208        let file_name = path.file_name()
209            .and_then(|n| n.to_str())
210            .unwrap_or("");
211        
212        let file_name_lower = file_name.to_lowercase();
213        
214        // Check gitignore status efficiently
215        let is_gitignored = if project_root.join(".git").exists() {
216            self.check_gitignore_batch(path, project_root)
217        } else {
218            false
219        };
220        
221        // Build priority hints
222        let priority_hints = PriorityHints {
223            is_env_file: file_name_lower.starts_with(".env") || file_name_lower.ends_with(".env"),
224            is_config_file: self.is_config_file(&file_name_lower, &extension),
225            is_secret_file: self.is_secret_file(&file_name_lower, path),
226            is_source_file: self.is_source_file(&extension),
227            has_secret_keywords: self.has_secret_keywords(&file_name_lower),
228        };
229        
230        Ok(FileMetadata {
231            path: path.to_path_buf(),
232            size,
233            extension,
234            is_gitignored,
235            modified,
236            priority_hints,
237        })
238    }
239    
240    /// Batch check gitignore status
241    fn check_gitignore_batch(&self, path: &Path, project_root: &Path) -> bool {
242        // Quick check using git check-ignore
243        let output = Command::new("git")
244            .args(&["check-ignore", path.to_str().unwrap_or("")])
245            .current_dir(project_root)
246            .output();
247        
248        match output {
249            Ok(output) => output.status.success(),
250            Err(_) => false,
251        }
252    }
253    
254    /// Check if file should be included based on filters
255    fn should_include_file(&self, meta: &FileMetadata) -> bool {
256        // Size filter
257        if meta.size > self.config.max_file_size {
258            trace!("Skipping large file: {} ({} bytes)", meta.path.display(), meta.size);
259            return false;
260        }
261        
262        // Enhanced binary file detection
263        if self.is_binary_file(meta) {
264            trace!("Skipping binary file: {}", meta.path.display());
265            return false;
266        }
267        
268        // Asset file detection (images, fonts, media)
269        if self.is_asset_file(meta) {
270            trace!("Skipping asset file: {}", meta.path.display());
271            return false;
272        }
273        
274        // Exclude files that are unlikely to contain real secrets
275        if self.should_exclude_from_security_scan(meta) {
276            trace!("Excluding from security scan: {}", meta.path.display());
277            return false;
278        }
279        
280        // Critical files always included
281        if meta.is_critical() {
282            return true;
283        }
284        
285        // Scan mode specific filtering
286        match self.config.scan_mode {
287            ScanMode::Lightning => {
288                // Only critical files (already handled above)
289                false
290            }
291            ScanMode::Fast => {
292                // Priority files or small source files
293                meta.is_priority() || (meta.priority_hints.is_source_file && meta.size < 50_000)
294            }
295            _ => true, // Include all for other modes
296        }
297    }
298    
299    /// Enhanced binary file detection
300    fn is_binary_file(&self, meta: &FileMetadata) -> bool {
301        if let Some(ext) = &meta.extension {
302            if self.binary_extensions.contains(ext.as_str()) {
303                return true;
304            }
305        }
306        
307        // Check filename patterns
308        let filename = meta.path.file_name()
309            .and_then(|n| n.to_str())
310            .unwrap_or("")
311            .to_lowercase();
312        
313        if self.excluded_filenames.contains(filename.as_str()) {
314            return true;
315        }
316        
317        false
318    }
319    
320    /// Check if file is an asset (images, fonts, media)
321    fn is_asset_file(&self, meta: &FileMetadata) -> bool {
322        if let Some(ext) = &meta.extension {
323            if self.asset_extensions.contains(ext.as_str()) {
324                return true;
325            }
326        }
327        
328        // Check for asset directories
329        let path_str = meta.path.to_string_lossy().to_lowercase();
330        let asset_dirs = [
331            "/assets/", "/static/", "/public/", "/images/", "/img/", 
332            "/media/", "/fonts/", "/icons/", "/graphics/", "/pictures/"
333        ];
334        
335        asset_dirs.iter().any(|&dir| path_str.contains(dir))
336    }
337    
338    /// Check if file should be excluded from security scanning
339    fn should_exclude_from_security_scan(&self, meta: &FileMetadata) -> bool {
340        let path_str = meta.path.to_string_lossy().to_lowercase();
341        
342        // DEPENDENCY LOCK FILES - These contain package hashes/metadata, not secrets
343        if self.is_dependency_lock_file(meta) {
344            return true;
345        }
346        
347        // SVG files often contain base64 encoded graphics that trigger false positives
348        if meta.extension.as_deref() == Some("svg") {
349            return true;
350        }
351        
352        // Minified and bundled files
353        if self.is_minified_or_bundled_file(meta) {
354            return true;
355        }
356        
357        // Documentation and non-code files that rarely contain real secrets
358        let exclude_patterns = [
359            ".md", ".txt", ".rst", ".adoc", ".asciidoc",
360            "readme", "changelog", "license", "todo",
361            "roadmap", "contributing", "authors",
362            // Test files (often contain fake/example data)
363            "/test/", "/tests/", "/spec/", "/specs/",
364            "__test__", "__spec__", ".test.", ".spec.",
365            "_test.", "_spec.", "fixtures", "mocks", "examples",
366            // Documentation directories
367            "/docs/", "/doc/", "/documentation/",
368            // Framework/library detection files (they contain patterns but not secrets)
369            "frameworks/", "detector", "rules", "patterns",
370            // Build artifacts and generated files
371            "target/", "build/", "dist/", ".next/", "coverage/",
372            ".nuxt/", ".output/", ".vercel/", ".netlify/",
373            // IDE and editor files
374            ".vscode/", ".idea/", ".vs/", "*.swp", "*.swo",
375            // OS files
376            ".ds_store", "thumbs.db", "desktop.ini",
377        ];
378        
379        // Check patterns
380        if exclude_patterns.iter().any(|&pattern| path_str.contains(pattern)) {
381            return true;
382        }
383        
384        // Documentation file extensions
385        if let Some(ext) = &meta.extension {
386            let doc_extensions = ["md", "txt", "rst", "adoc", "asciidoc", "rtf"];
387            if doc_extensions.contains(&ext.as_str()) {
388                return true;
389            }
390        }
391        
392        // Check if filename suggests it's documentation, examples, or code generation
393        let filename = meta.path.file_name()
394            .and_then(|n| n.to_str())
395            .unwrap_or("")
396            .to_lowercase();
397        
398        let doc_filenames = [
399            "readme", "changelog", "license", "authors", "contributing",
400            "roadmap", "todo", "examples", "demo", "sample", "fixture",
401            // Code generation and API example files
402            "apicodedialog", "codedialog", "codeexample", "apiexample",
403            "codesnippet", "snippets", "templates", "codegenerator",
404            "apitool", "playground", "sandbox",
405        ];
406        
407        if doc_filenames.iter().any(|&name| filename.contains(name)) {
408            return true;
409        }
410        
411        false
412    }
413    
414    /// Check if file is minified or bundled
415    fn is_minified_or_bundled_file(&self, meta: &FileMetadata) -> bool {
416        let filename = meta.path.file_name()
417            .and_then(|n| n.to_str())
418            .unwrap_or("")
419            .to_lowercase();
420        
421        // Minified file patterns
422        let minified_patterns = [
423            ".min.", ".bundle.", ".chunk.", ".vendor.",
424            "-min.", "-bundle.", "-chunk.", "-vendor.",
425            "_min.", "_bundle.", "_chunk.", "_vendor.",
426        ];
427        
428        minified_patterns.iter().any(|&pattern| filename.contains(pattern))
429    }
430    
431    /// Get ignored directories based on scan mode
432    fn get_ignored_dirs(scan_mode: &ScanMode) -> AHashSet<String> {
433        let mut dirs = AHashSet::new();
434        
435        // Always ignore these
436        let always_ignore = vec![
437            ".git", "node_modules", "target", "build", "dist", ".next",
438            "coverage", "__pycache__", ".pytest_cache", ".mypy_cache",
439            "vendor", "packages", ".bundle", "bower_components",
440            ".nuxt", ".output", ".vercel", ".netlify", ".vscode", ".idea",
441            ".venv", "venv", // Python virtual environments
442        ];
443        
444        for dir in always_ignore {
445            dirs.insert(dir.to_string());
446        }
447        
448        // Additional ignores for faster modes
449        if matches!(scan_mode, ScanMode::Lightning | ScanMode::Fast) {
450            let fast_ignore = vec!["test", "tests", "spec", "specs", "docs", "documentation"];
451            for dir in fast_ignore {
452                dirs.insert(dir.to_string());
453            }
454        }
455        
456        dirs
457    }
458    
459    /// Get comprehensive binary file extensions
460    fn get_binary_extensions() -> AHashSet<&'static str> {
461        let mut extensions = AHashSet::new();
462        
463        // Executables and libraries
464        let binary_exts = [
465            "exe", "dll", "so", "dylib", "lib", "a", "o", "obj",
466            "bin", "com", "scr", "msi", "deb", "rpm", "pkg",
467            // Archives
468            "zip", "tar", "gz", "bz2", "xz", "7z", "rar", "ace",
469            "cab", "dmg", "iso", "img",
470            // Media files
471            "mp3", "mp4", "avi", "mov", "wmv", "flv", "mkv", "webm",
472            "wav", "flac", "ogg", "aac", "m4a", "wma",
473            // Images (will be handled separately as assets)
474            "jpg", "jpeg", "png", "gif", "bmp", "tiff", "tga", "webp",
475            "ico", "cur", "psd", "ai", "eps", "raw", "cr2", "nef",
476            // Fonts
477            "ttf", "otf", "woff", "woff2", "eot",
478            // Documents
479            "pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx",
480            "odt", "ods", "odp", "rtf",
481            // Databases
482            "db", "sqlite", "sqlite3", "mdb", "accdb", "wt",
483            // Other binary formats
484            "pyc", "pyo", "class", "jar", "war", "ear", "cer", "jks",
485        ];
486        
487        for ext in binary_exts {
488            extensions.insert(ext);
489        }
490        
491        extensions
492    }
493    
494    /// Get asset file extensions (images, media, fonts)
495    fn get_asset_extensions() -> AHashSet<&'static str> {
496        let mut extensions = AHashSet::new();
497        
498        let asset_exts = [
499            // Images
500            "jpg", "jpeg", "png", "gif", "bmp", "tiff", "tga", "webp",
501            "ico", "cur", "psd", "ai", "eps", "raw", "cr2", "nef", "svg",
502            // Fonts
503            "ttf", "otf", "woff", "woff2", "eot",
504            // Media
505            "mp3", "mp4", "avi", "mov", "wmv", "flv", "mkv", "webm",
506            "wav", "flac", "ogg", "aac", "m4a", "wma",
507        ];
508        
509        for ext in asset_exts {
510            extensions.insert(ext);
511        }
512        
513        extensions
514    }
515    
516    /// Get filenames that should be excluded
517    fn get_excluded_filenames() -> AHashSet<&'static str> {
518        let mut filenames = AHashSet::new();
519        
520        let excluded = [
521            // OS files
522            ".ds_store", "thumbs.db", "desktop.ini", "folder.ico",
523            // Editor files
524            ".gitkeep", ".keep", ".placeholder",
525            // Temporary files
526            ".tmp", ".temp", ".swp", ".swo", ".bak", ".backup",
527        ];
528        
529        for filename in excluded {
530            filenames.insert(filename);
531        }
532        
533        filenames
534    }
535    
536    /// Get secret keywords for detection
537    fn get_secret_keywords() -> Vec<&'static str> {
538        vec![
539            "secret", "key", "token", "password", "credential",
540            "auth", "api", "private", "access", "bearer",
541        ]
542    }
543    
544    fn is_config_file(&self, name: &str, extension: &Option<String>) -> bool {
545        let config_extensions = ["json", "yml", "yaml", "toml", "ini", "conf", "config", "xml"];
546        let config_names = ["config", "settings", "configuration", ".env"];
547        
548        if let Some(ext) = extension {
549            if config_extensions.contains(&ext.as_str()) {
550                return true;
551            }
552        }
553        
554        config_names.iter().any(|&n| name.contains(n))
555    }
556    
557    fn is_secret_file(&self, name: &str, path: &Path) -> bool {
558        let secret_patterns = [
559            ".env", ".key", ".pem", ".p12", ".pfx",
560            "credentials", "secret", "private", "cert",
561        ];
562        
563        // Check filename
564        if secret_patterns.iter().any(|&p| name.contains(p)) {
565            return true;
566        }
567        
568        // Check path components
569        let path_str = path.to_string_lossy().to_lowercase();
570        secret_patterns.iter().any(|&p| path_str.contains(p))
571    }
572    
573    fn is_source_file(&self, extension: &Option<String>) -> bool {
574        if let Some(ext) = extension {
575            let source_extensions = [
576                "js", "jsx", "ts", "tsx", "py", "java", "kt", "go",
577                "rs", "rb", "php", "cs", "cpp", "c", "h", "swift",
578                "scala", "clj", "ex", "exs",
579            ];
580            source_extensions.contains(&ext.as_str())
581        } else {
582            false
583        }
584    }
585    
586    fn has_secret_keywords(&self, name: &str) -> bool {
587        self.secret_keywords.iter().any(|&keyword| name.contains(keyword))
588    }
589    
590    /// Enhanced dependency lock file detection
591    fn is_dependency_lock_file(&self, meta: &FileMetadata) -> bool {
592        let filename = meta.path.file_name()
593            .and_then(|n| n.to_str())
594            .unwrap_or("")
595            .to_lowercase();
596        
597        // Common dependency lock files that contain package hashes and metadata
598        let lock_files = [
599            // JavaScript/Node.js
600            "package-lock.json",
601            "yarn.lock", 
602            "pnpm-lock.yaml",
603            "bun.lockb",  // Bun lock file (binary format)
604            // Python
605            "poetry.lock",
606            "pipfile.lock",
607            "pip-lock.txt",
608            "pdm.lock",
609            // Rust
610            "cargo.lock",
611            // Go
612            "go.sum",
613            "go.mod",
614            // Java
615            "gradle.lockfile",
616            "maven-dependency-plugin.log",
617            // Ruby
618            "gemfile.lock",
619            // PHP
620            "composer.lock",
621            // .NET
622            "packages.lock.json",
623            "paket.lock",
624            // Others
625            "mix.lock",  // Elixir
626            "pubspec.lock",  // Dart
627            "swift.resolved", // Swift
628            "flake.lock", // Nix
629        ];
630        
631        // Check if filename matches any lock file pattern
632        lock_files.iter().any(|&pattern| filename == pattern) ||
633        // Also check for common lock file patterns
634        filename.ends_with(".lock") ||
635        filename.ends_with("-lock.json") ||
636        filename.ends_with("-lock.yaml") ||
637        filename.ends_with("-lock.yml") ||
638        filename.ends_with(".lockb") ||  // Binary lock files
639        filename.contains("shrinkwrap") ||
640        filename.contains("lockfile")
641    }
642}
643
644impl FileMetadata {
645    /// Check if file is critical (must scan)
646    pub fn is_critical(&self) -> bool {
647        self.priority_hints.is_env_file || 
648        self.priority_hints.is_secret_file ||
649        self.extension.as_deref() == Some("pem") ||
650        self.extension.as_deref() == Some("key")
651    }
652    
653    /// Check if file is high priority
654    pub fn is_priority(&self) -> bool {
655        self.is_critical() ||
656        self.priority_hints.is_config_file ||
657        self.priority_hints.has_secret_keywords
658    }
659    
660    /// Calculate priority score (higher = more important)
661    pub fn priority_score(&self) -> u32 {
662        let mut score: u32 = 0;
663        
664        if self.priority_hints.is_env_file { score += 1000; }
665        if self.priority_hints.is_secret_file { score += 900; }
666        if self.priority_hints.is_config_file { score += 500; }
667        if self.priority_hints.has_secret_keywords { score += 300; }
668        if !self.is_gitignored { score += 200; }
669        if self.priority_hints.is_source_file { score += 100; }
670        
671        // Penalize large files
672        if self.size > 1_000_000 { score = score.saturating_sub(100); }
673        
674        score
675    }
676}
677
678#[cfg(test)]
679mod tests {
680    use super::*;
681    use tempfile::TempDir;
682    
683    #[test]
684    fn test_file_priority_scoring() {
685        let meta = FileMetadata {
686            path: PathBuf::from(".env"),
687            size: 100,
688            extension: Some("env".to_string()),
689            is_gitignored: false,
690            modified: SystemTime::now(),
691            priority_hints: PriorityHints {
692                is_env_file: true,
693                is_config_file: true,
694                is_secret_file: true,
695                is_source_file: false,
696                has_secret_keywords: true,
697            },
698        };
699        
700        assert!(meta.is_critical());
701        assert!(meta.is_priority());
702        assert!(meta.priority_score() > 2000);
703    }
704    
705    #[test]
706    fn test_file_discovery() {
707        let temp_dir = TempDir::new().unwrap();
708        fs::write(temp_dir.path().join(".env"), "SECRET=123").unwrap();
709        fs::write(temp_dir.path().join("config.json"), "{}").unwrap();
710        fs::create_dir(temp_dir.path().join("node_modules")).unwrap();
711        fs::write(temp_dir.path().join("node_modules/test.js"), "code").unwrap();
712        
713        let config = DiscoveryConfig {
714            use_git: false,
715            max_file_size: 1024 * 1024,
716            priority_extensions: vec!["env".to_string()],
717            scan_mode: ScanMode::Fast,
718        };
719        
720        let discovery = FileDiscovery::new(config);
721        let files = discovery.discover_files(temp_dir.path()).unwrap();
722        
723        // Should find .env and config.json but not node_modules/test.js
724        assert_eq!(files.len(), 2);
725        assert!(files.iter().any(|f| f.path.ends_with(".env")));
726        assert!(files.iter().any(|f| f.path.ends_with("config.json")));
727    }
728    
729    #[test]
730    fn test_binary_file_detection() {
731        let config = DiscoveryConfig {
732            use_git: false,
733            max_file_size: 1024 * 1024,
734            priority_extensions: vec![],
735            scan_mode: ScanMode::Fast,
736        };
737        let discovery = FileDiscovery::new(config);
738        
739        let binary_meta = FileMetadata {
740            path: PathBuf::from("test.jpg"),
741            size: 100,
742            extension: Some("jpg".to_string()),
743            is_gitignored: false,
744            modified: SystemTime::now(),
745            priority_hints: PriorityHints::default(),
746        };
747        
748        assert!(discovery.is_binary_file(&binary_meta));
749    }
750    
751    #[test]
752    fn test_lock_file_detection() {
753        let config = DiscoveryConfig {
754            use_git: false,
755            max_file_size: 1024 * 1024,
756            priority_extensions: vec![],
757            scan_mode: ScanMode::Fast,
758        };
759        let discovery = FileDiscovery::new(config);
760        
761        let lock_files = [
762            "package-lock.json",
763            "yarn.lock",
764            "pnpm-lock.yaml",
765            "bun.lockb",
766            "cargo.lock",
767            "go.sum",
768        ];
769        
770        for lock_file in lock_files {
771            let meta = FileMetadata {
772                path: PathBuf::from(lock_file),
773                size: 100,
774                extension: None,
775                is_gitignored: false,
776                modified: SystemTime::now(),
777                priority_hints: PriorityHints::default(),
778            };
779            
780            assert!(discovery.is_dependency_lock_file(&meta), "Failed to detect {}", lock_file);
781        }
782    }
783}