syncable_cli/analyzer/security/turbo/
file_discovery.rs

1//! # File Discovery Module
2//! 
3//! Ultra-fast file discovery with git-aware filtering and smart prioritization.
4
5use std::path::{Path, PathBuf};
6use std::process::Command;
7use std::fs;
8use std::time::SystemTime;
9
10use ahash::AHashSet;
11use rayon::prelude::*;
12use walkdir::WalkDir;
13use log::{debug, trace};
14
15use super::{ScanMode, SecurityError};
16
17/// File metadata for efficient filtering
18#[derive(Debug, Clone)]
19pub struct FileMetadata {
20    pub path: PathBuf,
21    pub size: usize,
22    pub extension: Option<String>,
23    pub is_gitignored: bool,
24    pub modified: SystemTime,
25    pub priority_hints: PriorityHints,
26}
27
28/// Priority hints for file scoring
29#[derive(Debug, Clone, Default)]
30pub struct PriorityHints {
31    pub is_env_file: bool,
32    pub is_config_file: bool,
33    pub is_secret_file: bool,
34    pub is_source_file: bool,
35    pub has_secret_keywords: bool,
36}
37
38/// Configuration for file discovery
39#[derive(Debug, Clone)]
40pub struct DiscoveryConfig {
41    pub use_git: bool,
42    pub max_file_size: usize,
43    pub priority_extensions: Vec<String>,
44    pub scan_mode: ScanMode,
45}
46
47/// High-performance file discovery
48pub struct FileDiscovery {
49    config: DiscoveryConfig,
50    ignored_dirs: AHashSet<String>,
51    secret_keywords: Vec<&'static str>,
52    binary_extensions: AHashSet<&'static str>,
53    excluded_filenames: AHashSet<&'static str>,
54    asset_extensions: AHashSet<&'static str>,
55}
56
57impl FileDiscovery {
58    pub fn new(config: DiscoveryConfig) -> Self {
59        let ignored_dirs = Self::get_ignored_dirs(&config.scan_mode);
60        let secret_keywords = Self::get_secret_keywords();
61        let binary_extensions = Self::get_binary_extensions();
62        let excluded_filenames = Self::get_excluded_filenames();
63        let asset_extensions = Self::get_asset_extensions();
64        
65        Self {
66            config,
67            ignored_dirs,
68            secret_keywords,
69            binary_extensions,
70            excluded_filenames,
71            asset_extensions,
72        }
73    }
74    
75    /// Discover files with ultra-fast git-aware filtering
76    pub fn discover_files(&self, project_root: &Path) -> Result<Vec<FileMetadata>, SecurityError> {
77        let is_git_repo = project_root.join(".git").exists();
78        
79        if is_git_repo && self.config.use_git {
80            self.git_aware_discovery(project_root)
81        } else {
82            self.filesystem_discovery(project_root)
83        }
84    }
85    
86    /// Git-aware file discovery (fastest method)
87    fn git_aware_discovery(&self, project_root: &Path) -> Result<Vec<FileMetadata>, SecurityError> {
88        debug!("Using git-aware file discovery");
89        
90        // Get all tracked files using git ls-files
91        let tracked_files = self.get_git_tracked_files(project_root)?;
92        
93        // Get untracked files that might contain secrets
94        let untracked_files = self.get_untracked_secret_files(project_root)?;
95        
96        // Combine and process in parallel
97        let all_paths: Vec<PathBuf> = tracked_files.into_iter()
98            .chain(untracked_files)
99            .collect();
100        
101        // Process files in parallel to build metadata
102        let files: Vec<FileMetadata> = all_paths
103            .par_iter()
104            .filter_map(|path| self.build_file_metadata(path, project_root).ok())
105            .filter(|meta| self.should_include_file(meta))
106            .collect();
107        
108        Ok(files)
109    }
110    
111    /// Get tracked files from git
112    fn get_git_tracked_files(&self, project_root: &Path) -> Result<Vec<PathBuf>, SecurityError> {
113        let output = Command::new("git")
114            .args(&["ls-files", "-z"]) // -z for null-terminated output
115            .current_dir(project_root)
116            .output()
117            .map_err(|e| SecurityError::FileDiscovery(format!("Git ls-files failed: {}", e)))?;
118        
119        if !output.status.success() {
120            return Err(SecurityError::FileDiscovery("Git ls-files failed".to_string()));
121        }
122        
123        // Parse null-terminated paths
124        let paths: Vec<PathBuf> = output.stdout
125            .split(|&b| b == 0)
126            .filter(|path| !path.is_empty())
127            .filter_map(|path| std::str::from_utf8(path).ok())
128            .map(|path| project_root.join(path))
129            .collect();
130        
131        Ok(paths)
132    }
133    
134    /// Get untracked files that might contain secrets
135    fn get_untracked_secret_files(&self, project_root: &Path) -> Result<Vec<PathBuf>, SecurityError> {
136        // Common secret file patterns that might not be tracked
137        let secret_patterns = vec![
138            ".env*",
139            "*.key",
140            "*.pem",
141            "*.p12",
142            "*credentials*",
143            "*secret*",
144            "config/*.json",
145            "config/*.yml",
146        ];
147        
148        let mut untracked_files = Vec::new();
149        
150        for pattern in secret_patterns {
151            let output = Command::new("git")
152                .args(&["ls-files", "--others", "--exclude-standard", pattern])
153                .current_dir(project_root)
154                .output();
155            
156            if let Ok(output) = output {
157                if output.status.success() {
158                    let paths: Vec<PathBuf> = String::from_utf8_lossy(&output.stdout)
159                        .lines()
160                        .map(|line| project_root.join(line))
161                        .collect();
162                    untracked_files.extend(paths);
163                }
164            }
165        }
166        
167        Ok(untracked_files)
168    }
169    
170    /// Fallback filesystem discovery
171    fn filesystem_discovery(&self, project_root: &Path) -> Result<Vec<FileMetadata>, SecurityError> {
172        debug!("Using filesystem discovery");
173        
174        let walker = WalkDir::new(project_root)
175            .follow_links(false)
176            .max_depth(20)
177            .into_iter()
178            .filter_entry(|entry| {
179                // Skip ignored directories
180                if entry.file_type().is_dir() {
181                    let dir_name = entry.file_name().to_string_lossy();
182                    return !self.ignored_dirs.contains(dir_name.as_ref());
183                }
184                true
185            });
186        
187        let files: Vec<FileMetadata> = walker
188            .par_bridge()
189            .filter_map(|entry| entry.ok())
190            .filter(|entry| entry.file_type().is_file())
191            .filter_map(|entry| self.build_file_metadata(entry.path(), project_root).ok())
192            .filter(|meta| self.should_include_file(meta))
193            .collect();
194        
195        Ok(files)
196    }
197    
198    /// Build file metadata with priority hints
199    fn build_file_metadata(&self, path: &Path, project_root: &Path) -> Result<FileMetadata, std::io::Error> {
200        let metadata = fs::metadata(path)?;
201        let size = metadata.len() as usize;
202        let modified = metadata.modified()?;
203        
204        let extension = path.extension()
205            .and_then(|ext| ext.to_str())
206            .map(|s| s.to_lowercase());
207        
208        let file_name = path.file_name()
209            .and_then(|n| n.to_str())
210            .unwrap_or("");
211        
212        let file_name_lower = file_name.to_lowercase();
213        
214        // Check gitignore status efficiently
215        let is_gitignored = if project_root.join(".git").exists() {
216            self.check_gitignore_batch(path, project_root)
217        } else {
218            false
219        };
220        
221        // Build priority hints
222        let priority_hints = PriorityHints {
223            is_env_file: file_name_lower.starts_with(".env") || file_name_lower.ends_with(".env"),
224            is_config_file: self.is_config_file(&file_name_lower, &extension),
225            is_secret_file: self.is_secret_file(&file_name_lower, path),
226            is_source_file: self.is_source_file(&extension),
227            has_secret_keywords: self.has_secret_keywords(&file_name_lower),
228        };
229        
230        Ok(FileMetadata {
231            path: path.to_path_buf(),
232            size,
233            extension,
234            is_gitignored,
235            modified,
236            priority_hints,
237        })
238    }
239    
240    /// Batch check gitignore status
241    fn check_gitignore_batch(&self, path: &Path, project_root: &Path) -> bool {
242        // Quick check using git check-ignore
243        let output = Command::new("git")
244            .args(&["check-ignore", path.to_str().unwrap_or("")])
245            .current_dir(project_root)
246            .output();
247        
248        match output {
249            Ok(output) => output.status.success(),
250            Err(_) => false,
251        }
252    }
253    
254    /// Check if file should be included based on filters
255    fn should_include_file(&self, meta: &FileMetadata) -> bool {
256        // Size filter
257        if meta.size > self.config.max_file_size {
258            trace!("Skipping large file: {} ({} bytes)", meta.path.display(), meta.size);
259            return false;
260        }
261        
262        // Enhanced binary file detection
263        if self.is_binary_file(meta) {
264            trace!("Skipping binary file: {}", meta.path.display());
265            return false;
266        }
267        
268        // Asset file detection (images, fonts, media)
269        if self.is_asset_file(meta) {
270            trace!("Skipping asset file: {}", meta.path.display());
271            return false;
272        }
273        
274        // Exclude files that are unlikely to contain real secrets
275        if self.should_exclude_from_security_scan(meta) {
276            trace!("Excluding from security scan: {}", meta.path.display());
277            return false;
278        }
279        
280        // Critical files always included
281        if meta.is_critical() {
282            return true;
283        }
284        
285        // Scan mode specific filtering
286        match self.config.scan_mode {
287            ScanMode::Lightning => {
288                // Only critical files (already handled above)
289                false
290            }
291            ScanMode::Fast => {
292                // Priority files or small source files
293                meta.is_priority() || (meta.priority_hints.is_source_file && meta.size < 50_000)
294            }
295            _ => true, // Include all for other modes
296        }
297    }
298    
299    /// Enhanced binary file detection
300    fn is_binary_file(&self, meta: &FileMetadata) -> bool {
301        if let Some(ext) = &meta.extension {
302            if self.binary_extensions.contains(ext.as_str()) {
303                return true;
304            }
305        }
306        
307        // Check filename patterns
308        let filename = meta.path.file_name()
309            .and_then(|n| n.to_str())
310            .unwrap_or("")
311            .to_lowercase();
312        
313        if self.excluded_filenames.contains(filename.as_str()) {
314            return true;
315        }
316        
317        false
318    }
319    
320    /// Check if file is an asset (images, fonts, media)
321    fn is_asset_file(&self, meta: &FileMetadata) -> bool {
322        if let Some(ext) = &meta.extension {
323            if self.asset_extensions.contains(ext.as_str()) {
324                return true;
325            }
326        }
327        
328        // Check for asset directories
329        let path_str = meta.path.to_string_lossy().to_lowercase();
330        let asset_dirs = [
331            "/assets/", "/static/", "/public/", "/images/", "/img/", 
332            "/media/", "/fonts/", "/icons/", "/graphics/", "/pictures/"
333        ];
334        
335        asset_dirs.iter().any(|&dir| path_str.contains(dir))
336    }
337    
338    /// Check if file should be excluded from security scanning
339    fn should_exclude_from_security_scan(&self, meta: &FileMetadata) -> bool {
340        let path_str = meta.path.to_string_lossy().to_lowercase();
341        
342        // DEPENDENCY LOCK FILES - These contain package hashes/metadata, not secrets
343        if self.is_dependency_lock_file(meta) {
344            return true;
345        }
346        
347        // SVG files often contain base64 encoded graphics that trigger false positives
348        if meta.extension.as_deref() == Some("svg") {
349            return true;
350        }
351        
352        // Minified and bundled files
353        if self.is_minified_or_bundled_file(meta) {
354            return true;
355        }
356        
357        // Documentation and non-code files that rarely contain real secrets
358        let exclude_patterns = [
359            ".md", ".txt", ".rst", ".adoc", ".asciidoc",
360            "readme", "changelog", "license", "todo",
361            "roadmap", "contributing", "authors",
362            // Test files (often contain fake/example data)
363            "/test/", "/tests/", "/spec/", "/specs/",
364            "__test__", "__spec__", ".test.", ".spec.",
365            "_test.", "_spec.", "fixtures", "mocks", "examples",
366            // Documentation directories
367            "/docs/", "/doc/", "/documentation/",
368            // Framework/library detection files (they contain patterns but not secrets)
369            "frameworks/", "detector", "rules", "patterns",
370            // Build artifacts and generated files
371            "target/", "build/", "dist/", ".next/", "coverage/",
372            ".nuxt/", ".output/", ".vercel/", ".netlify/",
373            // IDE and editor files
374            ".vscode/", ".idea/", ".vs/", "*.swp", "*.swo",
375            // OS files
376            ".ds_store", "thumbs.db", "desktop.ini",
377        ];
378        
379        // Check patterns
380        if exclude_patterns.iter().any(|&pattern| path_str.contains(pattern)) {
381            return true;
382        }
383        
384        // Documentation file extensions
385        if let Some(ext) = &meta.extension {
386            let doc_extensions = ["md", "txt", "rst", "adoc", "asciidoc", "rtf"];
387            if doc_extensions.contains(&ext.as_str()) {
388                return true;
389            }
390        }
391        
392        // Check if filename suggests it's documentation, examples, or code generation
393        let filename = meta.path.file_name()
394            .and_then(|n| n.to_str())
395            .unwrap_or("")
396            .to_lowercase();
397        
398        let doc_filenames = [
399            "readme", "changelog", "license", "authors", "contributing",
400            "roadmap", "todo", "examples", "demo", "sample", "fixture",
401            // Code generation and API example files
402            "apicodedialog", "codedialog", "codeexample", "apiexample",
403            "codesnippet", "snippets", "templates", "codegenerator",
404            "apitool", "playground", "sandbox",
405        ];
406        
407        if doc_filenames.iter().any(|&name| filename.contains(name)) {
408            return true;
409        }
410        
411        false
412    }
413    
414    /// Check if file is minified or bundled
415    fn is_minified_or_bundled_file(&self, meta: &FileMetadata) -> bool {
416        let filename = meta.path.file_name()
417            .and_then(|n| n.to_str())
418            .unwrap_or("")
419            .to_lowercase();
420        
421        // Minified file patterns
422        let minified_patterns = [
423            ".min.", ".bundle.", ".chunk.", ".vendor.",
424            "-min.", "-bundle.", "-chunk.", "-vendor.",
425            "_min.", "_bundle.", "_chunk.", "_vendor.",
426        ];
427        
428        minified_patterns.iter().any(|&pattern| filename.contains(pattern))
429    }
430    
431    /// Get ignored directories based on scan mode
432    fn get_ignored_dirs(scan_mode: &ScanMode) -> AHashSet<String> {
433        let mut dirs = AHashSet::new();
434        
435        // Always ignore these
436        let always_ignore = vec![
437            ".git", "node_modules", "target", "build", "dist", ".next",
438            "coverage", "__pycache__", ".pytest_cache", ".mypy_cache",
439            "vendor", "packages", ".bundle", "bower_components",
440            ".nuxt", ".output", ".vercel", ".netlify", ".vscode", ".idea",
441        ];
442        
443        for dir in always_ignore {
444            dirs.insert(dir.to_string());
445        }
446        
447        // Additional ignores for faster modes
448        if matches!(scan_mode, ScanMode::Lightning | ScanMode::Fast) {
449            let fast_ignore = vec!["test", "tests", "spec", "specs", "docs", "documentation"];
450            for dir in fast_ignore {
451                dirs.insert(dir.to_string());
452            }
453        }
454        
455        dirs
456    }
457    
458    /// Get comprehensive binary file extensions
459    fn get_binary_extensions() -> AHashSet<&'static str> {
460        let mut extensions = AHashSet::new();
461        
462        // Executables and libraries
463        let binary_exts = [
464            "exe", "dll", "so", "dylib", "lib", "a", "o", "obj",
465            "bin", "com", "scr", "msi", "deb", "rpm", "pkg",
466            // Archives
467            "zip", "tar", "gz", "bz2", "xz", "7z", "rar", "ace",
468            "cab", "dmg", "iso", "img",
469            // Media files
470            "mp3", "mp4", "avi", "mov", "wmv", "flv", "mkv", "webm",
471            "wav", "flac", "ogg", "aac", "m4a", "wma",
472            // Images (will be handled separately as assets)
473            "jpg", "jpeg", "png", "gif", "bmp", "tiff", "tga", "webp",
474            "ico", "cur", "psd", "ai", "eps", "raw", "cr2", "nef",
475            // Fonts
476            "ttf", "otf", "woff", "woff2", "eot",
477            // Documents
478            "pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx",
479            "odt", "ods", "odp", "rtf",
480            // Databases
481            "db", "sqlite", "sqlite3", "mdb", "accdb",
482            // Other binary formats
483            "pyc", "pyo", "class", "jar", "war", "ear",
484        ];
485        
486        for ext in binary_exts {
487            extensions.insert(ext);
488        }
489        
490        extensions
491    }
492    
493    /// Get asset file extensions (images, media, fonts)
494    fn get_asset_extensions() -> AHashSet<&'static str> {
495        let mut extensions = AHashSet::new();
496        
497        let asset_exts = [
498            // Images
499            "jpg", "jpeg", "png", "gif", "bmp", "tiff", "tga", "webp",
500            "ico", "cur", "psd", "ai", "eps", "raw", "cr2", "nef", "svg",
501            // Fonts
502            "ttf", "otf", "woff", "woff2", "eot",
503            // Media
504            "mp3", "mp4", "avi", "mov", "wmv", "flv", "mkv", "webm",
505            "wav", "flac", "ogg", "aac", "m4a", "wma",
506        ];
507        
508        for ext in asset_exts {
509            extensions.insert(ext);
510        }
511        
512        extensions
513    }
514    
515    /// Get filenames that should be excluded
516    fn get_excluded_filenames() -> AHashSet<&'static str> {
517        let mut filenames = AHashSet::new();
518        
519        let excluded = [
520            // OS files
521            ".ds_store", "thumbs.db", "desktop.ini", "folder.ico",
522            // Editor files
523            ".gitkeep", ".keep", ".placeholder",
524            // Temporary files
525            ".tmp", ".temp", ".swp", ".swo", ".bak", ".backup",
526        ];
527        
528        for filename in excluded {
529            filenames.insert(filename);
530        }
531        
532        filenames
533    }
534    
535    /// Get secret keywords for detection
536    fn get_secret_keywords() -> Vec<&'static str> {
537        vec![
538            "secret", "key", "token", "password", "credential",
539            "auth", "api", "private", "access", "bearer",
540        ]
541    }
542    
543    fn is_config_file(&self, name: &str, extension: &Option<String>) -> bool {
544        let config_extensions = ["json", "yml", "yaml", "toml", "ini", "conf", "config", "xml"];
545        let config_names = ["config", "settings", "configuration", ".env"];
546        
547        if let Some(ext) = extension {
548            if config_extensions.contains(&ext.as_str()) {
549                return true;
550            }
551        }
552        
553        config_names.iter().any(|&n| name.contains(n))
554    }
555    
556    fn is_secret_file(&self, name: &str, path: &Path) -> bool {
557        let secret_patterns = [
558            ".env", ".key", ".pem", ".p12", ".pfx",
559            "credentials", "secret", "private", "cert",
560        ];
561        
562        // Check filename
563        if secret_patterns.iter().any(|&p| name.contains(p)) {
564            return true;
565        }
566        
567        // Check path components
568        let path_str = path.to_string_lossy().to_lowercase();
569        secret_patterns.iter().any(|&p| path_str.contains(p))
570    }
571    
572    fn is_source_file(&self, extension: &Option<String>) -> bool {
573        if let Some(ext) = extension {
574            let source_extensions = [
575                "js", "jsx", "ts", "tsx", "py", "java", "kt", "go",
576                "rs", "rb", "php", "cs", "cpp", "c", "h", "swift",
577                "scala", "clj", "ex", "exs",
578            ];
579            source_extensions.contains(&ext.as_str())
580        } else {
581            false
582        }
583    }
584    
585    fn has_secret_keywords(&self, name: &str) -> bool {
586        self.secret_keywords.iter().any(|&keyword| name.contains(keyword))
587    }
588    
589    /// Enhanced dependency lock file detection
590    fn is_dependency_lock_file(&self, meta: &FileMetadata) -> bool {
591        let filename = meta.path.file_name()
592            .and_then(|n| n.to_str())
593            .unwrap_or("")
594            .to_lowercase();
595        
596        // Common dependency lock files that contain package hashes and metadata
597        let lock_files = [
598            // JavaScript/Node.js
599            "package-lock.json",
600            "yarn.lock", 
601            "pnpm-lock.yaml",
602            "bun.lockb",  // Bun lock file (binary format)
603            // Python
604            "poetry.lock",
605            "pipfile.lock",
606            "pip-lock.txt",
607            "pdm.lock",
608            // Rust
609            "cargo.lock",
610            // Go
611            "go.sum",
612            "go.mod",
613            // Java
614            "gradle.lockfile",
615            "maven-dependency-plugin.log",
616            // Ruby
617            "gemfile.lock",
618            // PHP
619            "composer.lock",
620            // .NET
621            "packages.lock.json",
622            "paket.lock",
623            // Others
624            "mix.lock",  // Elixir
625            "pubspec.lock",  // Dart
626            "swift.resolved", // Swift
627            "flake.lock", // Nix
628        ];
629        
630        // Check if filename matches any lock file pattern
631        lock_files.iter().any(|&pattern| filename == pattern) ||
632        // Also check for common lock file patterns
633        filename.ends_with(".lock") ||
634        filename.ends_with("-lock.json") ||
635        filename.ends_with("-lock.yaml") ||
636        filename.ends_with("-lock.yml") ||
637        filename.ends_with(".lockb") ||  // Binary lock files
638        filename.contains("shrinkwrap") ||
639        filename.contains("lockfile")
640    }
641}
642
643impl FileMetadata {
644    /// Check if file is critical (must scan)
645    pub fn is_critical(&self) -> bool {
646        self.priority_hints.is_env_file || 
647        self.priority_hints.is_secret_file ||
648        self.extension.as_deref() == Some("pem") ||
649        self.extension.as_deref() == Some("key")
650    }
651    
652    /// Check if file is high priority
653    pub fn is_priority(&self) -> bool {
654        self.is_critical() ||
655        self.priority_hints.is_config_file ||
656        self.priority_hints.has_secret_keywords
657    }
658    
659    /// Calculate priority score (higher = more important)
660    pub fn priority_score(&self) -> u32 {
661        let mut score: u32 = 0;
662        
663        if self.priority_hints.is_env_file { score += 1000; }
664        if self.priority_hints.is_secret_file { score += 900; }
665        if self.priority_hints.is_config_file { score += 500; }
666        if self.priority_hints.has_secret_keywords { score += 300; }
667        if !self.is_gitignored { score += 200; }
668        if self.priority_hints.is_source_file { score += 100; }
669        
670        // Penalize large files
671        if self.size > 1_000_000 { score = score.saturating_sub(100); }
672        
673        score
674    }
675}
676
677#[cfg(test)]
678mod tests {
679    use super::*;
680    use tempfile::TempDir;
681    
682    #[test]
683    fn test_file_priority_scoring() {
684        let meta = FileMetadata {
685            path: PathBuf::from(".env"),
686            size: 100,
687            extension: Some("env".to_string()),
688            is_gitignored: false,
689            modified: SystemTime::now(),
690            priority_hints: PriorityHints {
691                is_env_file: true,
692                is_config_file: true,
693                is_secret_file: true,
694                is_source_file: false,
695                has_secret_keywords: true,
696            },
697        };
698        
699        assert!(meta.is_critical());
700        assert!(meta.is_priority());
701        assert!(meta.priority_score() > 2000);
702    }
703    
704    #[test]
705    fn test_file_discovery() {
706        let temp_dir = TempDir::new().unwrap();
707        fs::write(temp_dir.path().join(".env"), "SECRET=123").unwrap();
708        fs::write(temp_dir.path().join("config.json"), "{}").unwrap();
709        fs::create_dir(temp_dir.path().join("node_modules")).unwrap();
710        fs::write(temp_dir.path().join("node_modules/test.js"), "code").unwrap();
711        
712        let config = DiscoveryConfig {
713            use_git: false,
714            max_file_size: 1024 * 1024,
715            priority_extensions: vec!["env".to_string()],
716            scan_mode: ScanMode::Fast,
717        };
718        
719        let discovery = FileDiscovery::new(config);
720        let files = discovery.discover_files(temp_dir.path()).unwrap();
721        
722        // Should find .env and config.json but not node_modules/test.js
723        assert_eq!(files.len(), 2);
724        assert!(files.iter().any(|f| f.path.ends_with(".env")));
725        assert!(files.iter().any(|f| f.path.ends_with("config.json")));
726    }
727    
728    #[test]
729    fn test_binary_file_detection() {
730        let config = DiscoveryConfig {
731            use_git: false,
732            max_file_size: 1024 * 1024,
733            priority_extensions: vec![],
734            scan_mode: ScanMode::Fast,
735        };
736        let discovery = FileDiscovery::new(config);
737        
738        let binary_meta = FileMetadata {
739            path: PathBuf::from("test.jpg"),
740            size: 100,
741            extension: Some("jpg".to_string()),
742            is_gitignored: false,
743            modified: SystemTime::now(),
744            priority_hints: PriorityHints::default(),
745        };
746        
747        assert!(discovery.is_binary_file(&binary_meta));
748    }
749    
750    #[test]
751    fn test_lock_file_detection() {
752        let config = DiscoveryConfig {
753            use_git: false,
754            max_file_size: 1024 * 1024,
755            priority_extensions: vec![],
756            scan_mode: ScanMode::Fast,
757        };
758        let discovery = FileDiscovery::new(config);
759        
760        let lock_files = [
761            "package-lock.json",
762            "yarn.lock",
763            "pnpm-lock.yaml",
764            "bun.lockb",
765            "cargo.lock",
766            "go.sum",
767        ];
768        
769        for lock_file in lock_files {
770            let meta = FileMetadata {
771                path: PathBuf::from(lock_file),
772                size: 100,
773                extension: None,
774                is_gitignored: false,
775                modified: SystemTime::now(),
776                priority_hints: PriorityHints::default(),
777            };
778            
779            assert!(discovery.is_dependency_lock_file(&meta), "Failed to detect {}", lock_file);
780        }
781    }
782}