syncable_cli/analyzer/
security_analyzer.rs

1//! # Security Analyzer
2//!
3//! Comprehensive security analysis module that performs multi-layered security assessment:
4//! - Configuration security analysis (secrets, insecure settings)
5//! - Code security patterns (language/framework-specific issues)
6//! - Infrastructure security (Docker, compose configurations)
7//! - Security policy recommendations and compliance guidance
8//! - Security scoring with actionable remediation steps
9
10use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
11use log::{debug, info};
12use rayon::prelude::*;
13use regex::Regex;
14use serde::{Deserialize, Serialize};
15use std::collections::HashMap;
16use std::fs;
17use std::path::{Path, PathBuf};
18use std::process::Command;
19use std::time::Instant;
20use thiserror::Error;
21
22use crate::analyzer::dependency_parser::Language;
23use crate::analyzer::{DetectedLanguage, DetectedTechnology, EnvVar, ProjectAnalysis};
24
25#[derive(Debug, Error)]
26pub enum SecurityError {
27    #[error("Security analysis failed: {0}")]
28    AnalysisFailed(String),
29
30    #[error("Configuration analysis error: {0}")]
31    ConfigAnalysisError(String),
32
33    #[error("Code pattern analysis error: {0}")]
34    CodePatternError(String),
35
36    #[error("Infrastructure analysis error: {0}")]
37    InfrastructureError(String),
38
39    #[error("IO error: {0}")]
40    Io(#[from] std::io::Error),
41
42    #[error("Regex error: {0}")]
43    Regex(#[from] regex::Error),
44}
45
46/// Security finding severity levels
47#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
48pub enum SecuritySeverity {
49    Critical,
50    High,
51    Medium,
52    Low,
53    Info,
54}
55
56/// Categories of security findings
57#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
58pub enum SecurityCategory {
59    /// Exposed secrets, API keys, passwords
60    SecretsExposure,
61    /// Insecure configuration settings
62    InsecureConfiguration,
63    /// Language/framework-specific security patterns
64    CodeSecurityPattern,
65    /// Infrastructure and deployment security
66    InfrastructureSecurity,
67    /// Authentication and authorization issues
68    AuthenticationSecurity,
69    /// Data protection and privacy concerns
70    DataProtection,
71    /// Network and communication security
72    NetworkSecurity,
73    /// Compliance and regulatory requirements
74    Compliance,
75}
76
77/// A security finding with details and remediation
78#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct SecurityFinding {
80    pub id: String,
81    pub title: String,
82    pub description: String,
83    pub severity: SecuritySeverity,
84    pub category: SecurityCategory,
85    pub file_path: Option<PathBuf>,
86    pub line_number: Option<usize>,
87    pub column_number: Option<usize>,
88    pub evidence: Option<String>,
89    pub remediation: Vec<String>,
90    pub references: Vec<String>,
91    pub cwe_id: Option<String>,
92    pub compliance_frameworks: Vec<String>,
93}
94
95/// Comprehensive security analysis report
96#[derive(Debug, Serialize, Deserialize)]
97pub struct SecurityReport {
98    pub analyzed_at: chrono::DateTime<chrono::Utc>,
99    pub overall_score: f32, // 0-100, higher is better
100    pub risk_level: SecuritySeverity,
101    pub total_findings: usize,
102    pub findings_by_severity: HashMap<SecuritySeverity, usize>,
103    pub findings_by_category: HashMap<SecurityCategory, usize>,
104    pub findings: Vec<SecurityFinding>,
105    pub recommendations: Vec<String>,
106    pub compliance_status: HashMap<String, ComplianceStatus>,
107}
108
109/// Compliance framework status
110#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct ComplianceStatus {
112    pub framework: String,
113    pub coverage: f32, // 0-100%
114    pub missing_controls: Vec<String>,
115    pub recommendations: Vec<String>,
116}
117
118/// Configuration for security analysis
119#[derive(Debug, Clone)]
120pub struct SecurityAnalysisConfig {
121    pub include_low_severity: bool,
122    pub check_secrets: bool,
123    pub check_code_patterns: bool,
124    pub check_infrastructure: bool,
125    pub check_compliance: bool,
126    pub frameworks_to_check: Vec<String>,
127    pub ignore_patterns: Vec<String>,
128    /// Whether to skip scanning files that are gitignored
129    pub skip_gitignored_files: bool,
130    /// Whether to downgrade severity for gitignored files instead of skipping
131    pub downgrade_gitignored_severity: bool,
132}
133
134impl Default for SecurityAnalysisConfig {
135    fn default() -> Self {
136        Self {
137            include_low_severity: false,
138            check_secrets: true,
139            check_code_patterns: true,
140            check_infrastructure: true,
141            check_compliance: true,
142            frameworks_to_check: vec!["SOC2".to_string(), "GDPR".to_string(), "OWASP".to_string()],
143            ignore_patterns: vec![
144                "node_modules".to_string(),
145                ".git".to_string(),
146                "target".to_string(),
147                "build".to_string(),
148                ".next".to_string(),
149                "dist".to_string(),
150                "test".to_string(),
151                "tests".to_string(),
152                "*.json".to_string(), // Exclude JSON files that often contain hashes
153                "*.lock".to_string(), // Exclude lock files with checksums
154                "*_sample.*".to_string(), // Exclude sample files
155                "*audit*".to_string(), // Exclude audit reports
156            ],
157            skip_gitignored_files: true, // Default to skipping gitignored files
158            downgrade_gitignored_severity: false, // Skip entirely by default
159        }
160    }
161}
162
163pub struct SecurityAnalyzer {
164    config: SecurityAnalysisConfig,
165    secret_patterns: Vec<SecretPattern>,
166    security_rules: HashMap<Language, Vec<SecurityRule>>,
167    git_ignore_cache: std::sync::Mutex<HashMap<PathBuf, bool>>,
168    project_root: Option<PathBuf>,
169}
170
171/// Pattern for detecting secrets and sensitive data
172struct SecretPattern {
173    name: String,
174    pattern: Regex,
175    severity: SecuritySeverity,
176    description: String,
177}
178
179/// Security rule for code pattern analysis
180struct SecurityRule {
181    id: String,
182    name: String,
183    pattern: Regex,
184    severity: SecuritySeverity,
185    category: SecurityCategory,
186    description: String,
187    remediation: Vec<String>,
188    cwe_id: Option<String>,
189}
190
191impl SecurityAnalyzer {
192    pub fn new() -> Result<Self, SecurityError> {
193        Self::with_config(SecurityAnalysisConfig::default())
194    }
195
196    pub fn with_config(config: SecurityAnalysisConfig) -> Result<Self, SecurityError> {
197        let secret_patterns = Self::initialize_secret_patterns()?;
198        let security_rules = Self::initialize_security_rules()?;
199
200        Ok(Self {
201            config,
202            secret_patterns,
203            security_rules,
204            git_ignore_cache: std::sync::Mutex::new(HashMap::new()),
205            project_root: None,
206        })
207    }
208
209    /// Perform comprehensive security analysis with appropriate progress for verbosity level
210    pub fn analyze_security(
211        &mut self,
212        analysis: &ProjectAnalysis,
213    ) -> Result<SecurityReport, SecurityError> {
214        let start_time = Instant::now();
215        info!("Starting comprehensive security analysis");
216
217        // Set project root for gitignore checking
218        self.project_root = Some(analysis.project_root.clone());
219
220        // Check if we're in verbose mode by checking log level
221        let is_verbose = log::max_level() >= log::LevelFilter::Info;
222
223        // Set up progress tracking appropriate for verbosity
224        let multi_progress = MultiProgress::new();
225
226        // In verbose mode, we'll completely skip adding progress bars to avoid visual conflicts
227
228        // Count enabled analysis phases
229        let mut total_phases = 0;
230        if self.config.check_secrets {
231            total_phases += 1;
232        }
233        if self.config.check_code_patterns {
234            total_phases += 1;
235        }
236        if self.config.check_infrastructure {
237            total_phases += 1;
238        }
239        total_phases += 2; // env vars and framework analysis always run
240
241        // Create appropriate progress indicator based on verbosity
242        let main_pb = if is_verbose {
243            None // No main progress bar in verbose mode to avoid conflicts with logs
244        } else {
245            // Normal mode: Rich progress bar
246            let pb = multi_progress.add(ProgressBar::new(100));
247            pb.set_style(
248                ProgressStyle::default_bar()
249                    .template("🛡️  {msg} {bar:50.cyan/blue} {percent}% [{elapsed_precise}]")
250                    .unwrap()
251                    .progress_chars("██▉▊▋▌▍▎▏  "),
252            );
253            Some(pb)
254        };
255
256        let mut findings = Vec::new();
257        let phase_weight = if is_verbose {
258            1u64
259        } else {
260            100 / total_phases as u64
261        };
262        let mut current_progress = 0u64;
263
264        // 1. Configuration Security Analysis
265        if self.config.check_secrets {
266            if let Some(ref pb) = main_pb {
267                pb.set_message("Analyzing configuration & secrets...");
268                pb.set_position(current_progress);
269            }
270
271            if is_verbose {
272                findings.extend(self.analyze_configuration_security(&analysis.project_root)?);
273            } else {
274                findings.extend(self.analyze_configuration_security_with_progress(
275                    &analysis.project_root,
276                    &multi_progress,
277                )?);
278            }
279
280            if let Some(ref pb) = main_pb {
281                current_progress += phase_weight;
282                pb.set_position(current_progress);
283            }
284        }
285
286        // 2. Code Security Patterns
287        if self.config.check_code_patterns {
288            if let Some(ref pb) = main_pb {
289                pb.set_message("Analyzing code security patterns...");
290            }
291
292            if is_verbose {
293                findings.extend(
294                    self.analyze_code_security_patterns(
295                        &analysis.project_root,
296                        &analysis.languages,
297                    )?,
298                );
299            } else {
300                findings.extend(self.analyze_code_security_patterns_with_progress(
301                    &analysis.project_root,
302                    &analysis.languages,
303                    &multi_progress,
304                )?);
305            }
306
307            if let Some(ref pb) = main_pb {
308                current_progress += phase_weight;
309                pb.set_position(current_progress);
310            }
311        }
312
313        // 3. Infrastructure Security (skipped - not implemented yet)
314        // TODO: Implement infrastructure security analysis
315        // Currently all infrastructure analysis methods return empty results
316
317        // 4. Environment Variables Security
318        if let Some(ref pb) = main_pb {
319            pb.set_message("Analyzing environment variables...");
320        }
321
322        findings.extend(self.analyze_environment_security(&analysis.environment_variables));
323        if let Some(ref pb) = main_pb {
324            current_progress += phase_weight;
325            pb.set_position(current_progress);
326        }
327
328        // 5. Framework-specific Security (skipped - not implemented yet)
329        // TODO: Implement framework-specific security analysis
330        // Currently all framework analysis methods return empty results
331
332        if let Some(ref pb) = main_pb {
333            current_progress = 100;
334            pb.set_position(current_progress);
335        }
336
337        // Processing phase
338        if let Some(ref pb) = main_pb {
339            pb.set_message("Processing findings & generating report...");
340        }
341
342        // DEDUPLICATION: Remove duplicate findings for the same secret/issue
343        let pre_dedup_count = findings.len();
344        findings = self.deduplicate_findings(findings);
345        let post_dedup_count = findings.len();
346
347        if pre_dedup_count != post_dedup_count {
348            info!(
349                "Deduplicated {} redundant findings, {} unique findings remain",
350                pre_dedup_count - post_dedup_count,
351                post_dedup_count
352            );
353        }
354
355        // Filter findings based on configuration
356        let pre_filter_count = findings.len();
357        if !self.config.include_low_severity {
358            findings.retain(|f| {
359                f.severity != SecuritySeverity::Low && f.severity != SecuritySeverity::Info
360            });
361        }
362
363        // Sort by severity (most critical first)
364        findings.sort_by(|a, b| a.severity.cmp(&b.severity));
365
366        // Calculate metrics
367        let total_findings = findings.len();
368        let findings_by_severity = self.count_by_severity(&findings);
369        let findings_by_category = self.count_by_category(&findings);
370        let overall_score = self.calculate_security_score(&findings);
371        let risk_level = self.determine_risk_level(&findings);
372
373        // Generate compliance status (disabled - not implemented yet)
374        // TODO: Implement compliance assessment
375        let compliance_status = HashMap::new();
376
377        // Generate recommendations
378        let recommendations = self.generate_recommendations(&findings, &analysis.technologies);
379
380        // Complete with summary
381        let duration = start_time.elapsed().as_secs_f32();
382        if let Some(pb) = main_pb {
383            pb.finish_with_message(format!(
384                "✅ Security analysis completed in {:.1}s - Found {} issues",
385                duration, total_findings
386            ));
387        }
388
389        // Print summary
390        if pre_filter_count != total_findings {
391            info!(
392                "Found {} total findings, showing {} after filtering",
393                pre_filter_count, total_findings
394            );
395        } else {
396            info!("Found {} security findings", total_findings);
397        }
398
399        Ok(SecurityReport {
400            analyzed_at: chrono::Utc::now(),
401            overall_score,
402            risk_level,
403            total_findings,
404            findings_by_severity,
405            findings_by_category,
406            findings,
407            recommendations,
408            compliance_status,
409        })
410    }
411
412    /// Check if a file is gitignored using git check-ignore command
413    fn is_file_gitignored(&self, file_path: &Path) -> bool {
414        // Return false if we don't have project root set
415        let project_root = match &self.project_root {
416            Some(root) => root,
417            None => return false,
418        };
419
420        // Use cache to avoid repeated git calls
421        if let Ok(cache) = self.git_ignore_cache.lock() {
422            if let Some(&cached_result) = cache.get(file_path) {
423                return cached_result;
424            }
425        }
426
427        // Check if this is a git repository
428        if !project_root.join(".git").exists() {
429            debug!("Not a git repository, treating all files as tracked");
430            return false;
431        }
432
433        // First, try git check-ignore for the most accurate result
434        let git_result = Command::new("git")
435            .args(&["check-ignore", "--quiet"])
436            .arg(file_path)
437            .current_dir(project_root)
438            .output()
439            .map(|output| output.status.success())
440            .unwrap_or(false);
441
442        // If git check-ignore says it's ignored, trust it
443        if git_result {
444            if let Ok(mut cache) = self.git_ignore_cache.lock() {
445                cache.insert(file_path.to_path_buf(), true);
446            }
447            return true;
448        }
449
450        // Fallback: Parse .gitignore files manually for common patterns
451        // This helps when git check-ignore might not work perfectly in all scenarios
452        let manual_result = self.check_gitignore_patterns(file_path, project_root);
453
454        // Cache the result (prefer git result, fallback to manual)
455        let final_result = git_result || manual_result;
456        if let Ok(mut cache) = self.git_ignore_cache.lock() {
457            cache.insert(file_path.to_path_buf(), final_result);
458        }
459
460        final_result
461    }
462
463    /// Manually check gitignore patterns as a fallback
464    fn check_gitignore_patterns(&self, file_path: &Path, project_root: &Path) -> bool {
465        // Get relative path from project root
466        let relative_path = match file_path.strip_prefix(project_root) {
467            Ok(rel) => rel,
468            Err(_) => return false,
469        };
470
471        let path_str = relative_path.to_string_lossy();
472        let file_name = relative_path
473            .file_name()
474            .and_then(|n| n.to_str())
475            .unwrap_or("");
476
477        // Read .gitignore file
478        let gitignore_path = project_root.join(".gitignore");
479        if let Ok(gitignore_content) = fs::read_to_string(&gitignore_path) {
480            for line in gitignore_content.lines() {
481                let pattern = line.trim();
482                if pattern.is_empty() || pattern.starts_with('#') {
483                    continue;
484                }
485
486                // Check if this pattern matches our file
487                if self.matches_gitignore_pattern(pattern, &path_str, file_name) {
488                    debug!("File {} matches gitignore pattern: {}", path_str, pattern);
489                    return true;
490                }
491            }
492        }
493
494        // Also check global gitignore patterns for common .env patterns
495        self.matches_common_env_patterns(file_name)
496    }
497
498    /// Check if a file matches a specific gitignore pattern
499    fn matches_gitignore_pattern(&self, pattern: &str, path_str: &str, file_name: &str) -> bool {
500        // Handle different types of patterns
501        if pattern.contains('*') {
502            // Wildcard patterns
503            if let Ok(glob_pattern) = glob::Pattern::new(pattern) {
504                // Try matching both full path and just filename
505                if glob_pattern.matches(path_str) || glob_pattern.matches(file_name) {
506                    return true;
507                }
508            }
509        } else if pattern.starts_with('/') {
510            // Absolute path from repo root
511            let abs_pattern = &pattern[1..];
512            if path_str == abs_pattern {
513                return true;
514            }
515        } else {
516            // Simple pattern - could match anywhere in path
517            if path_str == pattern
518                || file_name == pattern
519                || path_str.ends_with(&format!("/{}", pattern))
520            {
521                return true;
522            }
523        }
524
525        false
526    }
527
528    /// Check against common .env file patterns that should typically be ignored
529    fn matches_common_env_patterns(&self, file_name: &str) -> bool {
530        let common_env_patterns = [
531            ".env",
532            ".env.local",
533            ".env.development",
534            ".env.production",
535            ".env.staging",
536            ".env.test",
537            ".env.example", // Usually committed but should be treated carefully
538        ];
539
540        // Exact matches
541        if common_env_patterns.contains(&file_name) {
542            return file_name != ".env.example"; // .env.example is usually committed
543        }
544
545        // Pattern matches
546        if file_name.starts_with(".env.")
547            || file_name.ends_with(".env")
548            || (file_name.starts_with(".") && file_name.contains("env"))
549        {
550            // Be conservative - only ignore if it's clearly a local/environment specific file
551            return !file_name.contains("example")
552                && !file_name.contains("sample")
553                && !file_name.contains("template");
554        }
555
556        false
557    }
558
559    /// Check if a file is actually tracked by git
560    fn is_file_tracked(&self, file_path: &Path) -> bool {
561        let project_root = match &self.project_root {
562            Some(root) => root,
563            None => return true, // Assume tracked if no project root
564        };
565
566        // Check if this is a git repository
567        if !project_root.join(".git").exists() {
568            return true; // Not a git repo, treat as tracked
569        }
570
571        // Use git ls-files to check if file is tracked
572        Command::new("git")
573            .args(&["ls-files", "--error-unmatch"])
574            .arg(file_path)
575            .current_dir(project_root)
576            .output()
577            .map(|output| output.status.success())
578            .unwrap_or(true) // Default to tracked if git command fails
579    }
580
581    /// Determine the appropriate severity for a secret finding based on git status
582    fn determine_secret_severity(
583        &self,
584        file_path: &Path,
585        original_severity: SecuritySeverity,
586    ) -> (SecuritySeverity, Vec<String>) {
587        let mut additional_remediation = Vec::new();
588
589        // Check if file is gitignored
590        if self.is_file_gitignored(file_path) {
591            if self.config.skip_gitignored_files {
592                // Return Info level to indicate this should be skipped
593                return (
594                    SecuritySeverity::Info,
595                    vec!["File is properly gitignored".to_string()],
596                );
597            } else if self.config.downgrade_gitignored_severity {
598                // Downgrade severity for gitignored files
599                let downgraded = match original_severity {
600                    SecuritySeverity::Critical => SecuritySeverity::Medium,
601                    SecuritySeverity::High => SecuritySeverity::Low,
602                    SecuritySeverity::Medium => SecuritySeverity::Low,
603                    SecuritySeverity::Low => SecuritySeverity::Info,
604                    SecuritySeverity::Info => SecuritySeverity::Info,
605                };
606                additional_remediation
607                    .push("Note: File is gitignored, reducing severity".to_string());
608                return (downgraded, additional_remediation);
609            }
610        }
611
612        // Check if file is tracked by git
613        if !self.is_file_tracked(file_path) {
614            additional_remediation.push(
615                "Ensure this file is added to .gitignore to prevent accidental commits".to_string(),
616            );
617        } else {
618            // File is tracked - this is a serious issue
619            additional_remediation.push(
620                "⚠️  CRITICAL: This file is tracked by git! Secrets may be in version history."
621                    .to_string(),
622            );
623            additional_remediation.push(
624                "Consider using git-filter-branch or BFG Repo-Cleaner to remove from history"
625                    .to_string(),
626            );
627            additional_remediation.push("Rotate any exposed secrets immediately".to_string());
628
629            // Upgrade severity for tracked files
630            let upgraded = match original_severity {
631                SecuritySeverity::High => SecuritySeverity::Critical,
632                SecuritySeverity::Medium => SecuritySeverity::High,
633                SecuritySeverity::Low => SecuritySeverity::Medium,
634                other => other,
635            };
636            return (upgraded, additional_remediation);
637        }
638
639        (original_severity, additional_remediation)
640    }
641
642    /// Initialize secret detection patterns
643    fn initialize_secret_patterns() -> Result<Vec<SecretPattern>, SecurityError> {
644        let patterns = vec![
645            // API Keys and Tokens - Specific patterns first
646            (
647                "AWS Access Key",
648                r"AKIA[0-9A-Z]{16}",
649                SecuritySeverity::Critical,
650            ),
651            (
652                "AWS Secret Key",
653                r#"(?i)(aws[_-]?secret|secret[_-]?access[_-]?key)["']?\s*[:=]\s*["']?[A-Za-z0-9/+=]{40}["']?"#,
654                SecuritySeverity::Critical,
655            ),
656            (
657                "S3 Secret Key",
658                r#"(?i)(s3[_-]?secret[_-]?key|linode[_-]?s3[_-]?secret)["']?\s*[:=]\s*["']?[A-Za-z0-9/+=]{20,}["']?"#,
659                SecuritySeverity::High,
660            ),
661            (
662                "GitHub Token",
663                r"gh[pousr]_[A-Za-z0-9_]{36,255}",
664                SecuritySeverity::High,
665            ),
666            (
667                "OpenAI API Key",
668                r"sk-[A-Za-z0-9]{48}",
669                SecuritySeverity::High,
670            ),
671            (
672                "Stripe API Key",
673                r"sk_live_[0-9a-zA-Z]{24}",
674                SecuritySeverity::Critical,
675            ),
676            (
677                "Stripe Publishable Key",
678                r"pk_live_[0-9a-zA-Z]{24}",
679                SecuritySeverity::Medium,
680            ),
681            // Database URLs and Passwords - Enhanced to avoid env var false positives
682            (
683                "Hardcoded Database URL",
684                r#"(?i)(database_url|db_url)["']?\s*[:=]\s*["']?(postgresql|mysql|mongodb)://[^"'\s]+"#,
685                SecuritySeverity::Critical,
686            ),
687            (
688                "Hardcoded Password",
689                r#"(?i)(password|passwd|pwd)["']?\s*[:=]\s*["']?[^"']{6,}["']?"#,
690                SecuritySeverity::High,
691            ),
692            (
693                "JWT Secret",
694                r#"(?i)(jwt[_-]?secret)["']?\s*[:=]\s*["']?[A-Za-z0-9_\-+/=]{20,}"#,
695                SecuritySeverity::High,
696            ),
697            // Private Keys
698            (
699                "RSA Private Key",
700                r"-----BEGIN RSA PRIVATE KEY-----",
701                SecuritySeverity::Critical,
702            ),
703            (
704                "SSH Private Key",
705                r"-----BEGIN OPENSSH PRIVATE KEY-----",
706                SecuritySeverity::Critical,
707            ),
708            (
709                "PGP Private Key",
710                r"-----BEGIN PGP PRIVATE KEY BLOCK-----",
711                SecuritySeverity::Critical,
712            ),
713            // Cloud Provider Keys
714            (
715                "Google Cloud Service Account",
716                r#""type":\s*"service_account""#,
717                SecuritySeverity::High,
718            ),
719            (
720                "Azure Storage Key",
721                r"DefaultEndpointsProtocol=https;AccountName=",
722                SecuritySeverity::High,
723            ),
724            // Client-side exposed environment variables (these are the real security issues)
725            (
726                "Client-side Exposed Secret",
727                r#"(?i)(REACT_APP_|NEXT_PUBLIC_|VUE_APP_|VITE_)[A-Z_]*(?:SECRET|KEY|TOKEN|PASSWORD|API)[A-Z_]*["']?\s*[:=]\s*["']?[A-Za-z0-9_\-+/=]{10,}"#,
728                SecuritySeverity::High,
729            ),
730            // Hardcoded API keys (not environment variable access)
731            (
732                "Hardcoded API Key",
733                r#"(?i)(api[_-]?key|apikey)["']?\s*[:=]\s*["']?[A-Za-z0-9_\-]{20,}["']?"#,
734                SecuritySeverity::High,
735            ),
736            // Generic secrets that are clearly hardcoded (not env var access)
737            (
738                "Hardcoded Secret",
739                r#"(?i)(secret|token)["']?\s*[:=]\s*["']?[A-Za-z0-9_\-+/=]{24,}["']?"#,
740                SecuritySeverity::Medium,
741            ),
742        ];
743
744        patterns
745            .into_iter()
746            .map(|(name, pattern, severity)| {
747                Ok(SecretPattern {
748                    name: name.to_string(),
749                    pattern: Regex::new(pattern)?,
750                    severity,
751                    description: format!("Potential {} found in code", name),
752                })
753            })
754            .collect()
755    }
756
757    /// Initialize language-specific security rules
758    fn initialize_security_rules() -> Result<HashMap<Language, Vec<SecurityRule>>, SecurityError> {
759        let mut rules = HashMap::new();
760
761        // JavaScript/TypeScript Rules
762        rules.insert(Language::JavaScript, vec![
763            SecurityRule {
764                id: "js-001".to_string(),
765                name: "Eval Usage".to_string(),
766                pattern: Regex::new(r"\beval\s*\(")?,
767                severity: SecuritySeverity::High,
768                category: SecurityCategory::CodeSecurityPattern,
769                description: "Use of eval() can lead to code injection vulnerabilities".to_string(),
770                remediation: vec![
771                    "Avoid using eval() with user input".to_string(),
772                    "Use JSON.parse() for parsing JSON data".to_string(),
773                    "Consider using safer alternatives like Function constructor with validation".to_string(),
774                ],
775                cwe_id: Some("CWE-95".to_string()),
776            },
777            SecurityRule {
778                id: "js-002".to_string(),
779                name: "innerHTML Usage".to_string(),
780                pattern: Regex::new(r"\.innerHTML\s*=")?,
781                severity: SecuritySeverity::Medium,
782                category: SecurityCategory::CodeSecurityPattern,
783                description: "innerHTML can lead to XSS vulnerabilities if used with unsanitized data".to_string(),
784                remediation: vec![
785                    "Use textContent instead of innerHTML for text".to_string(),
786                    "Sanitize HTML content before setting innerHTML".to_string(),
787                    "Consider using secure templating libraries".to_string(),
788                ],
789                cwe_id: Some("CWE-79".to_string()),
790            },
791        ]);
792
793        // Python Rules
794        rules.insert(
795            Language::Python,
796            vec![
797                SecurityRule {
798                    id: "py-001".to_string(),
799                    name: "SQL Injection Risk".to_string(),
800                    pattern: Regex::new(r#"\.execute\s*\(\s*[f]?["'][^"']*%[sd]"#)?,
801                    severity: SecuritySeverity::High,
802                    category: SecurityCategory::CodeSecurityPattern,
803                    description: "String formatting in SQL queries can lead to SQL injection"
804                        .to_string(),
805                    remediation: vec![
806                        "Use parameterized queries instead of string formatting".to_string(),
807                        "Use ORM query builders where possible".to_string(),
808                        "Validate and sanitize all user inputs".to_string(),
809                    ],
810                    cwe_id: Some("CWE-89".to_string()),
811                },
812                SecurityRule {
813                    id: "py-002".to_string(),
814                    name: "Pickle Usage".to_string(),
815                    pattern: Regex::new(r"\bpickle\.loads?\s*\(")?,
816                    severity: SecuritySeverity::High,
817                    category: SecurityCategory::CodeSecurityPattern,
818                    description: "Pickle can execute arbitrary code during deserialization"
819                        .to_string(),
820                    remediation: vec![
821                        "Avoid pickle for untrusted data".to_string(),
822                        "Use JSON or other safe serialization formats".to_string(),
823                        "If pickle is necessary, validate data sources".to_string(),
824                    ],
825                    cwe_id: Some("CWE-502".to_string()),
826                },
827            ],
828        );
829
830        // Add more language rules as needed...
831
832        Ok(rules)
833    }
834
835    /// Analyze configuration files for security issues with appropriate progress tracking
836    fn analyze_configuration_security_with_progress(
837        &self,
838        project_root: &Path,
839        multi_progress: &MultiProgress,
840    ) -> Result<Vec<SecurityFinding>, SecurityError> {
841        debug!("Analyzing configuration security");
842        let mut findings = Vec::new();
843
844        // Collect relevant files
845        let config_files = self.collect_config_files(project_root)?;
846
847        if config_files.is_empty() {
848            info!("No configuration files found");
849            return Ok(findings);
850        }
851
852        let is_verbose = log::max_level() >= log::LevelFilter::Info;
853
854        info!(
855            "📁 Found {} configuration files to analyze",
856            config_files.len()
857        );
858
859        // Create appropriate progress tracking - completely skip in verbose mode
860        let file_pb = if is_verbose {
861            None // No progress bars at all in verbose mode
862        } else {
863            // Normal mode: Show detailed progress
864            let pb = multi_progress.add(ProgressBar::new(config_files.len() as u64));
865            pb.set_style(
866                ProgressStyle::default_bar()
867                    .template("  🔍 {msg} {bar:40.cyan/blue} {pos}/{len} files ({percent}%)")
868                    .unwrap()
869                    .progress_chars("████▉▊▋▌▍▎▏  "),
870            );
871            pb.set_message("Scanning configuration files...");
872            Some(pb)
873        };
874
875        // Use atomic counter for progress updates if needed
876        use std::sync::Arc;
877        use std::sync::atomic::{AtomicUsize, Ordering};
878        let processed_count = Arc::new(AtomicUsize::new(0));
879
880        // Analyze each file with appropriate progress tracking
881        let file_findings: Vec<Vec<SecurityFinding>> = config_files
882            .par_iter()
883            .map(|file_path| {
884                let result = self.analyze_file_for_secrets(file_path);
885
886                // Update progress only in non-verbose mode
887                if let Some(ref pb) = file_pb {
888                    let current = processed_count.fetch_add(1, Ordering::Relaxed) + 1;
889                    if let Some(file_name) = file_path.file_name().and_then(|n| n.to_str()) {
890                        // Truncate long filenames for better display
891                        let display_name = if file_name.len() > 30 {
892                            format!("...{}", &file_name[file_name.len() - 27..])
893                        } else {
894                            file_name.to_string()
895                        };
896                        pb.set_message(format!("Scanning {}", display_name));
897                    }
898                    pb.set_position(current as u64);
899                }
900
901                result
902            })
903            .filter_map(|result| result.ok())
904            .collect();
905
906        // Finish progress tracking
907        if let Some(pb) = file_pb {
908            pb.finish_with_message(format!(
909                "✅ Scanned {} configuration files",
910                config_files.len()
911            ));
912        }
913
914        for mut file_findings in file_findings {
915            findings.append(&mut file_findings);
916        }
917
918        // Check for common insecure configurations
919        findings.extend(self.check_insecure_configurations(project_root)?);
920
921        info!(
922            "🔍 Found {} configuration security findings",
923            findings.len()
924        );
925        Ok(findings)
926    }
927
928    /// Direct configuration security analysis without progress bars
929    fn analyze_configuration_security(
930        &self,
931        project_root: &Path,
932    ) -> Result<Vec<SecurityFinding>, SecurityError> {
933        debug!("Analyzing configuration security");
934        let mut findings = Vec::new();
935
936        // Collect relevant files
937        let config_files = self.collect_config_files(project_root)?;
938
939        if config_files.is_empty() {
940            info!("No configuration files found");
941            return Ok(findings);
942        }
943
944        info!(
945            "📁 Found {} configuration files to analyze",
946            config_files.len()
947        );
948
949        // Analyze each file directly without progress tracking
950        let file_findings: Vec<Vec<SecurityFinding>> = config_files
951            .par_iter()
952            .map(|file_path| self.analyze_file_for_secrets(file_path))
953            .filter_map(|result| result.ok())
954            .collect();
955
956        for mut file_findings in file_findings {
957            findings.append(&mut file_findings);
958        }
959
960        // Check for common insecure configurations
961        findings.extend(self.check_insecure_configurations(project_root)?);
962
963        info!(
964            "🔍 Found {} configuration security findings",
965            findings.len()
966        );
967        Ok(findings)
968    }
969
970    /// Analyze code for security patterns with appropriate progress tracking
971    fn analyze_code_security_patterns_with_progress(
972        &self,
973        project_root: &Path,
974        languages: &[DetectedLanguage],
975        multi_progress: &MultiProgress,
976    ) -> Result<Vec<SecurityFinding>, SecurityError> {
977        debug!("Analyzing code security patterns");
978        let mut findings = Vec::new();
979
980        // Count total source files across all languages
981        let mut total_files = 0;
982        let mut language_files = Vec::new();
983
984        for language in languages {
985            if let Some(lang) = Language::from_string(&language.name) {
986                if let Some(_rules) = self.security_rules.get(&lang) {
987                    let source_files = self.collect_source_files(project_root, &language.name)?;
988                    total_files += source_files.len();
989                    language_files.push((language, source_files));
990                }
991            }
992        }
993
994        if total_files == 0 {
995            info!("No source files found for code pattern analysis");
996            return Ok(findings);
997        }
998
999        let is_verbose = log::max_level() >= log::LevelFilter::Info;
1000
1001        info!(
1002            "📄 Found {} source files across {} languages",
1003            total_files,
1004            language_files.len()
1005        );
1006
1007        // Create appropriate progress tracking
1008        let code_pb = if is_verbose {
1009            // Verbose mode: No sub-progress to avoid visual clutter
1010            None
1011        } else {
1012            // Normal mode: Show detailed progress
1013            let pb = multi_progress.add(ProgressBar::new(total_files as u64));
1014            pb.set_style(
1015                ProgressStyle::default_bar()
1016                    .template("  📄 {msg} {bar:40.yellow/white} {pos}/{len} files ({percent}%)")
1017                    .unwrap()
1018                    .progress_chars("████▉▊▋▌▍▎▏  "),
1019            );
1020            pb.set_message("Scanning source code...");
1021            Some(pb)
1022        };
1023
1024        // Use atomic counter for progress if needed
1025        use std::sync::Arc;
1026        use std::sync::atomic::{AtomicUsize, Ordering};
1027        let processed_count = Arc::new(AtomicUsize::new(0));
1028
1029        // Process all languages
1030        for (language, source_files) in language_files {
1031            if let Some(lang) = Language::from_string(&language.name) {
1032                if let Some(rules) = self.security_rules.get(&lang) {
1033                    let file_findings: Vec<Vec<SecurityFinding>> = source_files
1034                        .par_iter()
1035                        .map(|file_path| {
1036                            let result = self.analyze_file_with_rules(file_path, rules);
1037
1038                            // Update progress only in non-verbose mode
1039                            if let Some(ref pb) = code_pb {
1040                                let current = processed_count.fetch_add(1, Ordering::Relaxed) + 1;
1041                                if let Some(file_name) =
1042                                    file_path.file_name().and_then(|n| n.to_str())
1043                                {
1044                                    let display_name = if file_name.len() > 25 {
1045                                        format!("...{}", &file_name[file_name.len() - 22..])
1046                                    } else {
1047                                        file_name.to_string()
1048                                    };
1049                                    pb.set_message(format!(
1050                                        "Scanning {} ({})",
1051                                        display_name, language.name
1052                                    ));
1053                                }
1054                                pb.set_position(current as u64);
1055                            }
1056
1057                            result
1058                        })
1059                        .filter_map(|result| result.ok())
1060                        .collect();
1061
1062                    for mut file_findings in file_findings {
1063                        findings.append(&mut file_findings);
1064                    }
1065                }
1066            }
1067        }
1068
1069        // Finish progress tracking
1070        if let Some(pb) = code_pb {
1071            pb.finish_with_message(format!("✅ Scanned {} source files", total_files));
1072        }
1073
1074        info!("🔍 Found {} code security findings", findings.len());
1075        Ok(findings)
1076    }
1077
1078    /// Direct code security analysis without progress bars
1079    fn analyze_code_security_patterns(
1080        &self,
1081        project_root: &Path,
1082        languages: &[DetectedLanguage],
1083    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1084        debug!("Analyzing code security patterns");
1085        let mut findings = Vec::new();
1086
1087        // Count total source files across all languages
1088        let mut total_files = 0;
1089        let mut language_files = Vec::new();
1090
1091        for language in languages {
1092            if let Some(lang) = Language::from_string(&language.name) {
1093                if let Some(_rules) = self.security_rules.get(&lang) {
1094                    let source_files = self.collect_source_files(project_root, &language.name)?;
1095                    total_files += source_files.len();
1096                    language_files.push((language, source_files));
1097                }
1098            }
1099        }
1100
1101        if total_files == 0 {
1102            info!("No source files found for code pattern analysis");
1103            return Ok(findings);
1104        }
1105
1106        info!(
1107            "📄 Found {} source files across {} languages",
1108            total_files,
1109            language_files.len()
1110        );
1111
1112        // Process all languages without progress tracking
1113        for (language, source_files) in language_files {
1114            if let Some(lang) = Language::from_string(&language.name) {
1115                if let Some(rules) = self.security_rules.get(&lang) {
1116                    let file_findings: Vec<Vec<SecurityFinding>> = source_files
1117                        .par_iter()
1118                        .map(|file_path| self.analyze_file_with_rules(file_path, rules))
1119                        .filter_map(|result| result.ok())
1120                        .collect();
1121
1122                    for mut file_findings in file_findings {
1123                        findings.append(&mut file_findings);
1124                    }
1125                }
1126            }
1127        }
1128
1129        info!("🔍 Found {} code security findings", findings.len());
1130        Ok(findings)
1131    }
1132
1133    /// Analyze infrastructure configurations with appropriate progress tracking
1134    fn analyze_infrastructure_security_with_progress(
1135        &self,
1136        project_root: &Path,
1137        _technologies: &[DetectedTechnology],
1138        multi_progress: &MultiProgress,
1139    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1140        debug!("Analyzing infrastructure security");
1141        let mut findings = Vec::new();
1142
1143        let is_verbose = log::max_level() >= log::LevelFilter::Info;
1144
1145        // Create appropriate progress indicator
1146        let infra_pb = if is_verbose {
1147            // Verbose mode: No spinner to avoid conflicts with logs
1148            None
1149        } else {
1150            // Normal mode: Show spinner
1151            let pb = multi_progress.add(ProgressBar::new_spinner());
1152            pb.set_style(
1153                ProgressStyle::default_spinner()
1154                    .template("  🏗️  {msg} {spinner:.magenta}")
1155                    .unwrap()
1156                    .tick_chars("⠁⠂⠄⡀⢀⠠⠐⠈ "),
1157            );
1158            pb.enable_steady_tick(std::time::Duration::from_millis(100));
1159            Some(pb)
1160        };
1161
1162        // Check Dockerfile security
1163        if let Some(ref pb) = infra_pb {
1164            pb.set_message("Checking Dockerfiles & Compose files...");
1165        }
1166        findings.extend(self.analyze_dockerfile_security(project_root)?);
1167        findings.extend(self.analyze_compose_security(project_root)?);
1168
1169        // Check CI/CD configurations
1170        if let Some(ref pb) = infra_pb {
1171            pb.set_message("Checking CI/CD configurations...");
1172        }
1173        findings.extend(self.analyze_cicd_security(project_root)?);
1174
1175        // Finish progress tracking
1176        if let Some(pb) = infra_pb {
1177            pb.finish_with_message("✅ Infrastructure analysis complete");
1178        }
1179        info!(
1180            "🔍 Found {} infrastructure security findings",
1181            findings.len()
1182        );
1183
1184        Ok(findings)
1185    }
1186
1187    /// Direct infrastructure security analysis without progress bars
1188    fn analyze_infrastructure_security(
1189        &self,
1190        project_root: &Path,
1191        _technologies: &[DetectedTechnology],
1192    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1193        debug!("Analyzing infrastructure security");
1194        let mut findings = Vec::new();
1195
1196        // Check Dockerfile security
1197        findings.extend(self.analyze_dockerfile_security(project_root)?);
1198        findings.extend(self.analyze_compose_security(project_root)?);
1199
1200        // Check CI/CD configurations
1201        findings.extend(self.analyze_cicd_security(project_root)?);
1202
1203        info!(
1204            "🔍 Found {} infrastructure security findings",
1205            findings.len()
1206        );
1207        Ok(findings)
1208    }
1209
1210    /// Analyze environment variables for security issues
1211    fn analyze_environment_security(&self, env_vars: &[EnvVar]) -> Vec<SecurityFinding> {
1212        let mut findings = Vec::new();
1213
1214        for env_var in env_vars {
1215            // Check for sensitive variable names without proper protection
1216            if self.is_sensitive_env_var(&env_var.name) && env_var.default_value.is_some() {
1217                findings.push(SecurityFinding {
1218                    id: format!("env-{}", env_var.name.to_lowercase()),
1219                    title: "Sensitive Environment Variable with Default Value".to_string(),
1220                    description: format!("Environment variable '{}' appears to contain sensitive data but has a default value", env_var.name),
1221                    severity: SecuritySeverity::Medium,
1222                    category: SecurityCategory::SecretsExposure,
1223                    file_path: None,
1224                    line_number: None,
1225                    column_number: None,
1226                    evidence: Some(format!("Variable: {} = {:?}", env_var.name, env_var.default_value)),
1227                    remediation: vec![
1228                        "Remove default value for sensitive environment variables".to_string(),
1229                        "Use a secure secret management system".to_string(),
1230                        "Document required environment variables separately".to_string(),
1231                    ],
1232                    references: vec![
1233                        "https://owasp.org/www-project-top-ten/2021/A05_2021-Security_Misconfiguration/".to_string(),
1234                    ],
1235                    cwe_id: Some("CWE-200".to_string()),
1236                    compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string()],
1237                });
1238            }
1239        }
1240
1241        findings
1242    }
1243
1244    /// Analyze framework-specific security configurations with appropriate progress
1245    fn analyze_framework_security_with_progress(
1246        &self,
1247        project_root: &Path,
1248        technologies: &[DetectedTechnology],
1249        multi_progress: &MultiProgress,
1250    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1251        debug!("Analyzing framework-specific security");
1252        let mut findings = Vec::new();
1253
1254        let framework_count = technologies.len();
1255        if framework_count == 0 {
1256            info!("No frameworks detected for security analysis");
1257            return Ok(findings);
1258        }
1259
1260        let is_verbose = log::max_level() >= log::LevelFilter::Info;
1261
1262        info!("🔧 Found {} frameworks to analyze", framework_count);
1263
1264        // Create appropriate progress indicator
1265        let fw_pb = if is_verbose {
1266            // Verbose mode: No spinner to avoid conflicts with logs
1267            None
1268        } else {
1269            // Normal mode: Show spinner
1270            let pb = multi_progress.add(ProgressBar::new_spinner());
1271            pb.set_style(
1272                ProgressStyle::default_spinner()
1273                    .template("  🔧 {msg} {spinner:.cyan}")
1274                    .unwrap()
1275                    .tick_chars("⠁⠂⠄⡀⢀⠠⠐⠈ "),
1276            );
1277            pb.enable_steady_tick(std::time::Duration::from_millis(120));
1278            Some(pb)
1279        };
1280
1281        for tech in technologies {
1282            if let Some(ref pb) = fw_pb {
1283                pb.set_message(format!("Checking {} configuration...", tech.name));
1284            }
1285
1286            match tech.name.as_str() {
1287                "Express.js" | "Express" => {
1288                    findings.extend(self.analyze_express_security(project_root)?);
1289                }
1290                "Django" => {
1291                    findings.extend(self.analyze_django_security(project_root)?);
1292                }
1293                "Spring Boot" => {
1294                    findings.extend(self.analyze_spring_security(project_root)?);
1295                }
1296                "Next.js" => {
1297                    findings.extend(self.analyze_nextjs_security(project_root)?);
1298                }
1299                // Add more frameworks as needed
1300                _ => {}
1301            }
1302        }
1303
1304        // Finish progress tracking
1305        if let Some(pb) = fw_pb {
1306            pb.finish_with_message("✅ Framework analysis complete");
1307        }
1308        info!("🔍 Found {} framework security findings", findings.len());
1309
1310        Ok(findings)
1311    }
1312
1313    /// Direct framework security analysis without progress bars
1314    fn analyze_framework_security(
1315        &self,
1316        project_root: &Path,
1317        technologies: &[DetectedTechnology],
1318    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1319        debug!("Analyzing framework-specific security");
1320        let mut findings = Vec::new();
1321
1322        let framework_count = technologies.len();
1323        if framework_count == 0 {
1324            info!("No frameworks detected for security analysis");
1325            return Ok(findings);
1326        }
1327
1328        info!("🔧 Found {} frameworks to analyze", framework_count);
1329
1330        for tech in technologies {
1331            match tech.name.as_str() {
1332                "Express.js" | "Express" => {
1333                    findings.extend(self.analyze_express_security(project_root)?);
1334                }
1335                "Django" => {
1336                    findings.extend(self.analyze_django_security(project_root)?);
1337                }
1338                "Spring Boot" => {
1339                    findings.extend(self.analyze_spring_security(project_root)?);
1340                }
1341                "Next.js" => {
1342                    findings.extend(self.analyze_nextjs_security(project_root)?);
1343                }
1344                // Add more frameworks as needed
1345                _ => {}
1346            }
1347        }
1348
1349        info!("🔍 Found {} framework security findings", findings.len());
1350        Ok(findings)
1351    }
1352
1353    // Helper methods for specific analyses...
1354
1355    fn collect_config_files(&self, project_root: &Path) -> Result<Vec<PathBuf>, SecurityError> {
1356        let patterns = vec![
1357            "*.env*",
1358            "*.conf",
1359            "*.config",
1360            "*.ini",
1361            "*.yaml",
1362            "*.yml",
1363            "*.toml",
1364            "docker-compose*.yml",
1365            "Dockerfile*",
1366            ".github/**/*.yml",
1367            ".gitlab-ci.yml",
1368            "package.json",
1369            "requirements.txt",
1370            "Cargo.toml",
1371            "go.mod",
1372            "pom.xml",
1373        ];
1374
1375        let mut files = crate::common::file_utils::find_files_by_patterns(project_root, &patterns)
1376            .map_err(|e| SecurityError::Io(e))?;
1377
1378        // Filter out files matching ignore patterns
1379        files.retain(|file| {
1380            let file_name = file.file_name().and_then(|n| n.to_str()).unwrap_or("");
1381            let file_path = file.to_string_lossy();
1382
1383            !self.config.ignore_patterns.iter().any(|pattern| {
1384                if pattern.contains('*') {
1385                    // Use glob matching for wildcard patterns
1386                    glob::Pattern::new(pattern)
1387                        .map(|p| p.matches(&file_path) || p.matches(file_name))
1388                        .unwrap_or(false)
1389                } else {
1390                    // Exact string matching
1391                    file_path.contains(pattern) || file_name.contains(pattern)
1392                }
1393            })
1394        });
1395
1396        Ok(files)
1397    }
1398
1399    fn analyze_file_for_secrets(
1400        &self,
1401        file_path: &Path,
1402    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1403        let content = fs::read_to_string(file_path)?;
1404        let mut findings = Vec::new();
1405
1406        for (line_num, line) in content.lines().enumerate() {
1407            for pattern in &self.secret_patterns {
1408                if let Some(match_) = pattern.pattern.find(line) {
1409                    // Skip if it looks like a placeholder or example
1410                    if self.is_likely_placeholder(line) {
1411                        continue;
1412                    }
1413
1414                    // NEW: Skip if this is legitimate environment variable usage
1415                    if self.is_legitimate_env_var_usage(line, file_path) {
1416                        debug!("Skipping legitimate env var usage: {}", line.trim());
1417                        continue;
1418                    }
1419
1420                    // Determine severity based on git status
1421                    let (severity, additional_remediation) =
1422                        self.determine_secret_severity(file_path, pattern.severity.clone());
1423
1424                    // Skip if severity is Info (indicates gitignored and should be skipped)
1425                    if self.config.skip_gitignored_files && severity == SecuritySeverity::Info {
1426                        debug!(
1427                            "Skipping secret in gitignored file: {}",
1428                            file_path.display()
1429                        );
1430                        continue;
1431                    }
1432
1433                    // Build base remediation steps
1434                    let mut remediation = vec![
1435                        "Remove sensitive data from source code".to_string(),
1436                        "Use environment variables for secrets".to_string(),
1437                        "Consider using a secure secret management service".to_string(),
1438                    ];
1439
1440                    // Add git-specific remediation based on file status
1441                    remediation.extend(additional_remediation);
1442
1443                    // Add generic gitignore advice if not already covered
1444                    if !self.is_file_gitignored(file_path) && !self.is_file_tracked(file_path) {
1445                        remediation.push(
1446                            "Add this file to .gitignore to prevent accidental commits".to_string(),
1447                        );
1448                    }
1449
1450                    // Create enhanced finding with git-aware severity and remediation
1451                    let mut description = pattern.description.clone();
1452                    if self.is_file_tracked(file_path) {
1453                        description.push_str(" (⚠️  WARNING: File is tracked by git - secrets may be in version history!)");
1454                    } else if self.is_file_gitignored(file_path) {
1455                        description.push_str(" (ℹ️  Note: File is gitignored)");
1456                    }
1457
1458                    findings.push(SecurityFinding {
1459                        id: format!("secret-{}-{}", pattern.name.to_lowercase().replace(' ', "-"), line_num),
1460                        title: format!("Potential {} Exposure", pattern.name),
1461                        description,
1462                        severity,
1463                        category: SecurityCategory::SecretsExposure,
1464                        file_path: Some(file_path.to_path_buf()),
1465                        line_number: Some(line_num + 1),
1466                        column_number: Some(match_.start() + 1), // 1-indexed column position
1467                        evidence: Some(format!("Line: {}", line.trim())),
1468                        remediation,
1469                        references: vec![
1470                            "https://owasp.org/www-project-top-ten/2021/A05_2021-Security_Misconfiguration/".to_string(),
1471                        ],
1472                        cwe_id: Some("CWE-200".to_string()),
1473                        compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string()],
1474                    });
1475                }
1476            }
1477        }
1478
1479        Ok(findings)
1480    }
1481
1482    /// Check if a line represents legitimate environment variable usage (not a security issue)
1483    fn is_legitimate_env_var_usage(&self, line: &str, file_path: &Path) -> bool {
1484        let line_trimmed = line.trim();
1485
1486        // Check for common legitimate environment variable access patterns
1487        let legitimate_env_patterns = [
1488            // Node.js/JavaScript patterns
1489            r"process\.env\.[A-Z_]+",
1490            r#"process\.env\[['""][A-Z_]+['"]\]"#,
1491            // Vite/Modern JS patterns
1492            r"import\.meta\.env\.[A-Z_]+",
1493            r#"import\.meta\.env\[['""][A-Z_]+['"]\]"#,
1494            // Python patterns
1495            r#"os\.environ\.get\(["'][A-Z_]+["']\)"#,
1496            r#"os\.environ\[["'][A-Z_]+["']\]"#,
1497            r#"getenv\(["'][A-Z_]+["']\)"#,
1498            // Rust patterns
1499            r#"env::var\("([A-Z_]+)"\)"#,
1500            r#"std::env::var\("([A-Z_]+)"\)"#,
1501            // Go patterns
1502            r#"os\.Getenv\(["'][A-Z_]+["']\)"#,
1503            // Java patterns
1504            r#"System\.getenv\(["'][A-Z_]+["']\)"#,
1505            // Shell/Docker patterns
1506            r"\$\{?[A-Z_]+\}?",
1507            r"ENV [A-Z_]+",
1508            // Config file access patterns
1509            r"config\.[a-z_]+\.[A-Z_]+",
1510            r"settings\.[A-Z_]+",
1511            r"env\.[A-Z_]+",
1512        ];
1513
1514        // Check if the line matches any legitimate environment variable access pattern
1515        for pattern_str in &legitimate_env_patterns {
1516            if let Ok(pattern) = Regex::new(pattern_str) {
1517                if pattern.is_match(line_trimmed) {
1518                    // Additional context checks to make sure this is really legitimate
1519
1520                    // Check if this is in a server-side context (not client-side)
1521                    if self.is_server_side_file(file_path) {
1522                        return true;
1523                    }
1524
1525                    // Check if this is NOT a client-side exposed variable
1526                    if !self.is_client_side_exposed_env_var(line_trimmed) {
1527                        return true;
1528                    }
1529                }
1530            }
1531        }
1532
1533        // Check for assignment vs access - assignments might be setting up environment variables
1534        // which could be legitimate in certain contexts
1535        if self.is_env_var_assignment_context(line_trimmed, file_path) {
1536            return true;
1537        }
1538
1539        false
1540    }
1541
1542    /// Check if a file is likely server-side code (vs client-side)
1543    fn is_server_side_file(&self, file_path: &Path) -> bool {
1544        let path_str = file_path.to_string_lossy().to_lowercase();
1545        let file_name = file_path
1546            .file_name()
1547            .and_then(|n| n.to_str())
1548            .unwrap_or("")
1549            .to_lowercase();
1550
1551        // Server-side indicators
1552        let server_indicators = [
1553            "/server/",
1554            "/api/",
1555            "/backend/",
1556            "/src/app/api/",
1557            "/pages/api/",
1558            "/routes/",
1559            "/controllers/",
1560            "/middleware/",
1561            "/models/",
1562            "/lib/",
1563            "/utils/",
1564            "/services/",
1565            "/config/",
1566            "server.js",
1567            "index.js",
1568            "app.js",
1569            "main.js",
1570            ".env",
1571            "dockerfile",
1572            "docker-compose",
1573        ];
1574
1575        // Client-side indicators (these should return false)
1576        let client_indicators = [
1577            "/public/",
1578            "/static/",
1579            "/assets/",
1580            "/components/",
1581            "/pages/",
1582            "/src/components/",
1583            "/src/pages/",
1584            "/client/",
1585            "/frontend/",
1586            "index.html",
1587            ".html",
1588            "/dist/",
1589            "/build/",
1590            "dist/",
1591            "build/",
1592            "public/",
1593            "static/",
1594            "assets/",
1595        ];
1596
1597        // If it's clearly client-side, return false
1598        if client_indicators
1599            .iter()
1600            .any(|indicator| path_str.contains(indicator))
1601        {
1602            return false;
1603        }
1604
1605        // If it has server-side indicators, return true
1606        if server_indicators
1607            .iter()
1608            .any(|indicator| path_str.contains(indicator) || file_name.contains(indicator))
1609        {
1610            return true;
1611        }
1612
1613        // Default to true for ambiguous cases (be conservative about flagging env var usage)
1614        true
1615    }
1616
1617    /// Check if an environment variable is exposed to client-side (security issue)
1618    fn is_client_side_exposed_env_var(&self, line: &str) -> bool {
1619        let client_prefixes = [
1620            "REACT_APP_",
1621            "NEXT_PUBLIC_",
1622            "VUE_APP_",
1623            "VITE_",
1624            "GATSBY_",
1625            "PUBLIC_",
1626            "NUXT_PUBLIC_",
1627        ];
1628
1629        client_prefixes.iter().any(|prefix| line.contains(prefix))
1630    }
1631
1632    /// Check if this is a legitimate environment variable assignment context
1633    fn is_env_var_assignment_context(&self, line: &str, file_path: &Path) -> bool {
1634        let path_str = file_path.to_string_lossy().to_lowercase();
1635        let file_name = file_path
1636            .file_name()
1637            .and_then(|n| n.to_str())
1638            .unwrap_or("")
1639            .to_lowercase();
1640
1641        // Only very specific configuration files where env var assignments are expected
1642        // Be more restrictive to avoid false positives
1643        let env_config_files = [
1644            ".env",
1645            "docker-compose.yml",
1646            "docker-compose.yaml",
1647            ".env.example",
1648            ".env.sample",
1649            ".env.template",
1650            ".env.local",
1651            ".env.development",
1652            ".env.production",
1653            ".env.staging",
1654        ];
1655
1656        // Check for exact filename matches for .env files (most common legitimate case)
1657        if env_config_files.iter().any(|pattern| file_name == *pattern) {
1658            return true;
1659        }
1660
1661        // Docker files are also legitimate for environment variable assignment
1662        if file_name.starts_with("dockerfile") || file_name == "dockerfile" {
1663            return true;
1664        }
1665
1666        // Shell scripts or CI/CD files
1667        if file_name.ends_with(".sh")
1668            || file_name.ends_with(".bash")
1669            || path_str.contains(".github/workflows/")
1670            || path_str.contains(".gitlab-ci")
1671        {
1672            return true;
1673        }
1674
1675        // Lines that are clearly setting up environment variables for child processes
1676        // Only match very specific patterns that indicate legitimate environment setup
1677        let setup_patterns = [
1678            r"export [A-Z_]+=",         // Shell export
1679            r"ENV [A-Z_]+=",            // Dockerfile ENV
1680            r"^\s*environment:\s*$",    // Docker Compose environment section header
1681            r"^\s*env:\s*$",            // Kubernetes env section header
1682            r"process\.env\.[A-Z_]+ =", // Explicitly setting process.env (rare but legitimate)
1683        ];
1684
1685        for pattern_str in &setup_patterns {
1686            if let Ok(pattern) = Regex::new(pattern_str) {
1687                if pattern.is_match(line) {
1688                    return true;
1689                }
1690            }
1691        }
1692
1693        false
1694    }
1695
1696    fn is_likely_placeholder(&self, line: &str) -> bool {
1697        let placeholder_indicators = [
1698            "example",
1699            "placeholder",
1700            "your_",
1701            "insert_",
1702            "replace_",
1703            "xxx",
1704            "yyy",
1705            "zzz",
1706            "fake",
1707            "dummy",
1708            "test_key",
1709            "sk-xxxxxxxx",
1710            "AKIA00000000",
1711        ];
1712
1713        let hash_indicators = [
1714            "checksum",
1715            "hash",
1716            "sha1",
1717            "sha256",
1718            "md5",
1719            "commit",
1720            "fingerprint",
1721            "digest",
1722            "advisory",
1723            "ghsa-",
1724            "cve-",
1725            "rustc_fingerprint",
1726            "last-commit",
1727            "references",
1728        ];
1729
1730        let line_lower = line.to_lowercase();
1731
1732        // Check for placeholder indicators
1733        if placeholder_indicators
1734            .iter()
1735            .any(|indicator| line_lower.contains(indicator))
1736        {
1737            return true;
1738        }
1739
1740        // Check for hash/checksum context
1741        if hash_indicators
1742            .iter()
1743            .any(|indicator| line_lower.contains(indicator))
1744        {
1745            return true;
1746        }
1747
1748        // Check if it's a URL or path (often contains hash-like strings)
1749        if line_lower.contains("http") || line_lower.contains("github.com") {
1750            return true;
1751        }
1752
1753        // Check if it's likely a hex-only string (git commits, checksums)
1754        if let Some(potential_hash) = self.extract_potential_hash(line) {
1755            if potential_hash.len() >= 32 && self.is_hex_only(&potential_hash) {
1756                return true; // Likely a SHA hash
1757            }
1758        }
1759
1760        false
1761    }
1762
1763    fn extract_potential_hash(&self, line: &str) -> Option<String> {
1764        // Look for quoted strings that might be hashes
1765        if let Some(start) = line.find('"') {
1766            if let Some(end) = line[start + 1..].find('"') {
1767                let potential = &line[start + 1..start + 1 + end];
1768                if potential.len() >= 32 {
1769                    return Some(potential.to_string());
1770                }
1771            }
1772        }
1773        None
1774    }
1775
1776    fn is_hex_only(&self, s: &str) -> bool {
1777        s.chars().all(|c| c.is_ascii_hexdigit())
1778    }
1779
1780    fn is_sensitive_env_var(&self, name: &str) -> bool {
1781        let sensitive_patterns = [
1782            "password",
1783            "secret",
1784            "key",
1785            "token",
1786            "auth",
1787            "api",
1788            "private",
1789            "credential",
1790            "cert",
1791            "ssl",
1792            "tls",
1793        ];
1794
1795        let name_lower = name.to_lowercase();
1796        sensitive_patterns
1797            .iter()
1798            .any(|pattern| name_lower.contains(pattern))
1799    }
1800
1801    // Placeholder implementations for specific framework analysis
1802    fn analyze_express_security(
1803        &self,
1804        _project_root: &Path,
1805    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1806        // TODO: Implement Express.js specific security checks
1807        Ok(vec![])
1808    }
1809
1810    fn analyze_django_security(
1811        &self,
1812        _project_root: &Path,
1813    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1814        // TODO: Implement Django specific security checks
1815        Ok(vec![])
1816    }
1817
1818    fn analyze_spring_security(
1819        &self,
1820        _project_root: &Path,
1821    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1822        // TODO: Implement Spring Boot specific security checks
1823        Ok(vec![])
1824    }
1825
1826    fn analyze_nextjs_security(
1827        &self,
1828        _project_root: &Path,
1829    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1830        // TODO: Implement Next.js specific security checks
1831        Ok(vec![])
1832    }
1833
1834    fn analyze_dockerfile_security(
1835        &self,
1836        _project_root: &Path,
1837    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1838        // TODO: Implement Dockerfile security analysis
1839        Ok(vec![])
1840    }
1841
1842    fn analyze_compose_security(
1843        &self,
1844        _project_root: &Path,
1845    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1846        // TODO: Implement Docker Compose security analysis
1847        Ok(vec![])
1848    }
1849
1850    fn analyze_cicd_security(
1851        &self,
1852        _project_root: &Path,
1853    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1854        // TODO: Implement CI/CD security analysis
1855        Ok(vec![])
1856    }
1857
1858    // Additional helper methods...
1859    fn collect_source_files(
1860        &self,
1861        _project_root: &Path,
1862        _language: &str,
1863    ) -> Result<Vec<PathBuf>, SecurityError> {
1864        // TODO: Implement source file collection based on language
1865        Ok(vec![])
1866    }
1867
1868    fn analyze_file_with_rules(
1869        &self,
1870        _file_path: &Path,
1871        _rules: &[SecurityRule],
1872    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1873        // TODO: Implement rule-based file analysis
1874        Ok(vec![])
1875    }
1876
1877    fn check_insecure_configurations(
1878        &self,
1879        _project_root: &Path,
1880    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1881        // TODO: Implement insecure configuration checks
1882        Ok(vec![])
1883    }
1884
1885    /// Deduplicate findings to avoid multiple reports for the same secret/issue
1886    fn deduplicate_findings(&self, mut findings: Vec<SecurityFinding>) -> Vec<SecurityFinding> {
1887        use std::collections::HashSet;
1888
1889        let mut seen_secrets: HashSet<String> = HashSet::new();
1890        let mut deduplicated = Vec::new();
1891
1892        // Sort by priority: more specific patterns first, then by severity
1893        findings.sort_by(|a, b| {
1894            // First, prioritize specific patterns over generic ones
1895            let a_priority = self.get_pattern_priority(&a.title);
1896            let b_priority = self.get_pattern_priority(&b.title);
1897
1898            match a_priority.cmp(&b_priority) {
1899                std::cmp::Ordering::Equal => {
1900                    // If same priority, sort by severity (most critical first)
1901                    a.severity.cmp(&b.severity)
1902                }
1903                other => other,
1904            }
1905        });
1906
1907        for finding in findings {
1908            let key = self.generate_finding_key(&finding);
1909
1910            if !seen_secrets.contains(&key) {
1911                seen_secrets.insert(key);
1912                deduplicated.push(finding);
1913            }
1914        }
1915
1916        deduplicated
1917    }
1918
1919    /// Generate a unique key for deduplication based on the type of finding
1920    fn generate_finding_key(&self, finding: &SecurityFinding) -> String {
1921        match finding.category {
1922            SecurityCategory::SecretsExposure => {
1923                // For secrets, deduplicate based on file path and the actual secret content
1924                if let Some(evidence) = &finding.evidence {
1925                    if let Some(file_path) = &finding.file_path {
1926                        // Extract the secret value from the evidence line
1927                        if let Some(secret_value) = self.extract_secret_value(evidence) {
1928                            return format!("secret:{}:{}", file_path.display(), secret_value);
1929                        }
1930                        // Fallback to file + line if we can't extract the value
1931                        if let Some(line_num) = finding.line_number {
1932                            return format!("secret:{}:{}", file_path.display(), line_num);
1933                        }
1934                    }
1935                }
1936                // Fallback for environment variables or other secrets without file paths
1937                format!("secret:{}", finding.title)
1938            }
1939            _ => {
1940                // For non-secret findings, use file path + line number + title
1941                if let Some(file_path) = &finding.file_path {
1942                    if let Some(line_num) = finding.line_number {
1943                        format!(
1944                            "other:{}:{}:{}",
1945                            file_path.display(),
1946                            line_num,
1947                            finding.title
1948                        )
1949                    } else {
1950                        format!("other:{}:{}", file_path.display(), finding.title)
1951                    }
1952                } else {
1953                    format!("other:{}", finding.title)
1954                }
1955            }
1956        }
1957    }
1958
1959    /// Extract secret value from evidence line for deduplication
1960    fn extract_secret_value(&self, evidence: &str) -> Option<String> {
1961        // Look for patterns like "KEY=value" or "KEY: value"
1962        if let Some(pos) = evidence.find('=') {
1963            let value = evidence[pos + 1..].trim();
1964            // Remove quotes if present
1965            let value = value.trim_matches('"').trim_matches('\'');
1966            if value.len() > 10 {
1967                // Only consider substantial values
1968                return Some(value.to_string());
1969            }
1970        }
1971
1972        // Look for patterns like "key: value" in YAML/JSON
1973        if let Some(pos) = evidence.find(':') {
1974            let value = evidence[pos + 1..].trim();
1975            let value = value.trim_matches('"').trim_matches('\'');
1976            if value.len() > 10 {
1977                return Some(value.to_string());
1978            }
1979        }
1980
1981        None
1982    }
1983
1984    /// Get pattern priority for deduplication (lower number = higher priority)
1985    fn get_pattern_priority(&self, title: &str) -> u8 {
1986        // Most specific patterns get highest priority (lowest number)
1987        if title.contains("AWS Access Key") {
1988            return 1;
1989        }
1990        if title.contains("AWS Secret Key") {
1991            return 1;
1992        }
1993        if title.contains("S3 Secret Key") {
1994            return 1;
1995        }
1996        if title.contains("GitHub Token") {
1997            return 1;
1998        }
1999        if title.contains("OpenAI API Key") {
2000            return 1;
2001        }
2002        if title.contains("Stripe") {
2003            return 1;
2004        }
2005        if title.contains("RSA Private Key") {
2006            return 1;
2007        }
2008        if title.contains("SSH Private Key") {
2009            return 1;
2010        }
2011
2012        // JWT and specific API keys are more specific than generic
2013        if title.contains("JWT Secret") {
2014            return 2;
2015        }
2016        if title.contains("Database URL") {
2017            return 2;
2018        }
2019
2020        // Generic API key patterns are less specific
2021        if title.contains("API Key") {
2022            return 3;
2023        }
2024
2025        // Environment variable findings are less specific
2026        if title.contains("Environment Variable") {
2027            return 4;
2028        }
2029
2030        // Generic patterns get lowest priority (highest number)
2031        if title.contains("Generic Secret") {
2032            return 5;
2033        }
2034
2035        // Default priority for other patterns
2036        3
2037    }
2038
2039    fn count_by_severity(&self, findings: &[SecurityFinding]) -> HashMap<SecuritySeverity, usize> {
2040        let mut counts = HashMap::new();
2041        for finding in findings {
2042            *counts.entry(finding.severity.clone()).or_insert(0) += 1;
2043        }
2044        counts
2045    }
2046
2047    fn count_by_category(&self, findings: &[SecurityFinding]) -> HashMap<SecurityCategory, usize> {
2048        let mut counts = HashMap::new();
2049        for finding in findings {
2050            *counts.entry(finding.category.clone()).or_insert(0) += 1;
2051        }
2052        counts
2053    }
2054
2055    fn calculate_security_score(&self, findings: &[SecurityFinding]) -> f32 {
2056        if findings.is_empty() {
2057            return 100.0;
2058        }
2059
2060        let total_penalty = findings
2061            .iter()
2062            .map(|f| match f.severity {
2063                SecuritySeverity::Critical => 25.0,
2064                SecuritySeverity::High => 15.0,
2065                SecuritySeverity::Medium => 8.0,
2066                SecuritySeverity::Low => 3.0,
2067                SecuritySeverity::Info => 1.0,
2068            })
2069            .sum::<f32>();
2070
2071        (100.0 - total_penalty).max(0.0)
2072    }
2073
2074    fn determine_risk_level(&self, findings: &[SecurityFinding]) -> SecuritySeverity {
2075        if findings
2076            .iter()
2077            .any(|f| f.severity == SecuritySeverity::Critical)
2078        {
2079            SecuritySeverity::Critical
2080        } else if findings
2081            .iter()
2082            .any(|f| f.severity == SecuritySeverity::High)
2083        {
2084            SecuritySeverity::High
2085        } else if findings
2086            .iter()
2087            .any(|f| f.severity == SecuritySeverity::Medium)
2088        {
2089            SecuritySeverity::Medium
2090        } else if !findings.is_empty() {
2091            SecuritySeverity::Low
2092        } else {
2093            SecuritySeverity::Info
2094        }
2095    }
2096
2097    fn assess_compliance(
2098        &self,
2099        _findings: &[SecurityFinding],
2100        _technologies: &[DetectedTechnology],
2101    ) -> HashMap<String, ComplianceStatus> {
2102        // TODO: Implement compliance assessment
2103        HashMap::new()
2104    }
2105
2106    fn generate_recommendations(
2107        &self,
2108        findings: &[SecurityFinding],
2109        _technologies: &[DetectedTechnology],
2110    ) -> Vec<String> {
2111        let mut recommendations = Vec::new();
2112
2113        if findings
2114            .iter()
2115            .any(|f| f.category == SecurityCategory::SecretsExposure)
2116        {
2117            recommendations.push("Implement a secure secret management strategy".to_string());
2118        }
2119
2120        if findings
2121            .iter()
2122            .any(|f| f.severity == SecuritySeverity::Critical)
2123        {
2124            recommendations.push("Address critical security findings immediately".to_string());
2125        }
2126
2127        recommendations
2128    }
2129}
2130
2131#[cfg(test)]
2132mod tests {
2133    use super::*;
2134
2135    #[test]
2136    fn test_security_score_calculation() {
2137        let analyzer = SecurityAnalyzer::new().unwrap();
2138
2139        let findings = vec![SecurityFinding {
2140            id: "test-1".to_string(),
2141            title: "Test Critical".to_string(),
2142            description: "Test".to_string(),
2143            severity: SecuritySeverity::Critical,
2144            category: SecurityCategory::SecretsExposure,
2145            file_path: None,
2146            line_number: None,
2147            column_number: None,
2148            evidence: None,
2149            remediation: vec![],
2150            references: vec![],
2151            cwe_id: None,
2152            compliance_frameworks: vec![],
2153        }];
2154
2155        let score = analyzer.calculate_security_score(&findings);
2156        assert_eq!(score, 75.0); // 100 - 25 (critical penalty)
2157    }
2158
2159    #[test]
2160    fn test_secret_pattern_matching() {
2161        let analyzer = SecurityAnalyzer::new().unwrap();
2162
2163        // Test if placeholder detection works
2164        assert!(analyzer.is_likely_placeholder("API_KEY=sk-xxxxxxxxxxxxxxxx"));
2165        assert!(!analyzer.is_likely_placeholder("API_KEY=sk-1234567890abcdef"));
2166    }
2167
2168    #[test]
2169    fn test_sensitive_env_var_detection() {
2170        let analyzer = SecurityAnalyzer::new().unwrap();
2171
2172        assert!(analyzer.is_sensitive_env_var("DATABASE_PASSWORD"));
2173        assert!(analyzer.is_sensitive_env_var("JWT_SECRET"));
2174        assert!(!analyzer.is_sensitive_env_var("PORT"));
2175        assert!(!analyzer.is_sensitive_env_var("NODE_ENV"));
2176    }
2177
2178    #[test]
2179    fn test_gitignore_aware_severity() {
2180        use std::fs;
2181        use std::process::Command;
2182        use tempfile::TempDir;
2183
2184        let temp_dir = TempDir::new().unwrap();
2185        let project_root = temp_dir.path();
2186
2187        // Initialize a real git repo
2188        let git_init = Command::new("git")
2189            .args(&["init"])
2190            .current_dir(project_root)
2191            .output();
2192
2193        // Skip test if git is not available
2194        if git_init.is_err() {
2195            println!("Skipping gitignore test - git not available");
2196            return;
2197        }
2198
2199        // Create .gitignore file
2200        fs::write(project_root.join(".gitignore"), ".env\n.env.local\n").unwrap();
2201
2202        // Stage and commit .gitignore to make it effective
2203        let _ = Command::new("git")
2204            .args(&["add", ".gitignore"])
2205            .current_dir(project_root)
2206            .output();
2207        let _ = Command::new("git")
2208            .args(&["config", "user.email", "test@example.com"])
2209            .current_dir(project_root)
2210            .output();
2211        let _ = Command::new("git")
2212            .args(&["config", "user.name", "Test User"])
2213            .current_dir(project_root)
2214            .output();
2215        let _ = Command::new("git")
2216            .args(&["commit", "-m", "Add gitignore"])
2217            .current_dir(project_root)
2218            .output();
2219
2220        let mut analyzer = SecurityAnalyzer::new().unwrap();
2221        analyzer.project_root = Some(project_root.to_path_buf());
2222
2223        // Test file that would be gitignored
2224        let env_file = project_root.join(".env");
2225        fs::write(&env_file, "API_KEY=sk-1234567890abcdef").unwrap();
2226
2227        // Test severity determination for gitignored file
2228        let (severity, remediation) =
2229            analyzer.determine_secret_severity(&env_file, SecuritySeverity::High);
2230
2231        // With default config, gitignored files should be marked as Info (skipped)
2232        assert_eq!(severity, SecuritySeverity::Info);
2233        assert!(remediation.iter().any(|r| r.contains("gitignored")));
2234    }
2235
2236    #[test]
2237    fn test_gitignore_config_options() {
2238        let mut config = SecurityAnalysisConfig::default();
2239
2240        // Test default configuration
2241        assert!(config.skip_gitignored_files);
2242        assert!(!config.downgrade_gitignored_severity);
2243
2244        // Test downgrade mode
2245        config.skip_gitignored_files = false;
2246        config.downgrade_gitignored_severity = true;
2247
2248        let _analyzer = SecurityAnalyzer::with_config(config).unwrap();
2249        // Additional test logic could be added here for downgrade behavior
2250    }
2251
2252    #[test]
2253    fn test_gitignore_pattern_matching() {
2254        let analyzer = SecurityAnalyzer::new().unwrap();
2255
2256        // Test wildcard patterns - *.env matches files ending with .env
2257        assert!(!analyzer.matches_gitignore_pattern("*.env", ".env.local", ".env.local")); // Doesn't end with .env
2258        assert!(analyzer.matches_gitignore_pattern("*.env", "production.env", "production.env")); // Ends with .env
2259        assert!(analyzer.matches_gitignore_pattern(".env*", ".env.production", ".env.production")); // Starts with .env
2260        assert!(analyzer.matches_gitignore_pattern("*.log", "app.log", "app.log"));
2261
2262        // Test exact patterns
2263        assert!(analyzer.matches_gitignore_pattern(".env", ".env", ".env"));
2264        assert!(!analyzer.matches_gitignore_pattern(".env", ".env.local", ".env.local"));
2265
2266        // Test directory patterns
2267        assert!(analyzer.matches_gitignore_pattern("/config.json", "config.json", "config.json"));
2268        assert!(!analyzer.matches_gitignore_pattern(
2269            "/config.json",
2270            "src/config.json",
2271            "config.json"
2272        ));
2273
2274        // Test common .env patterns that should work
2275        assert!(analyzer.matches_gitignore_pattern(".env*", ".env", ".env"));
2276        assert!(analyzer.matches_gitignore_pattern(".env*", ".env.local", ".env.local"));
2277        assert!(analyzer.matches_gitignore_pattern(".env.*", ".env.production", ".env.production"));
2278    }
2279
2280    #[test]
2281    fn test_common_env_patterns() {
2282        let analyzer = SecurityAnalyzer::new().unwrap();
2283
2284        // Should match common .env files
2285        assert!(analyzer.matches_common_env_patterns(".env"));
2286        assert!(analyzer.matches_common_env_patterns(".env.local"));
2287        assert!(analyzer.matches_common_env_patterns(".env.production"));
2288        assert!(analyzer.matches_common_env_patterns(".env.development"));
2289        assert!(analyzer.matches_common_env_patterns(".env.test"));
2290
2291        // Should NOT match example/template files (usually committed)
2292        assert!(!analyzer.matches_common_env_patterns(".env.example"));
2293        assert!(!analyzer.matches_common_env_patterns(".env.sample"));
2294        assert!(!analyzer.matches_common_env_patterns(".env.template"));
2295
2296        // Should not match non-env files
2297        assert!(!analyzer.matches_common_env_patterns("config.json"));
2298        assert!(!analyzer.matches_common_env_patterns("package.json"));
2299    }
2300
2301    #[test]
2302    fn test_legitimate_env_var_usage() {
2303        let analyzer = SecurityAnalyzer::new().unwrap();
2304
2305        // Create mock file paths
2306        let server_file = Path::new("src/server/config.js");
2307        let client_file = Path::new("src/components/MyComponent.js");
2308
2309        // Test legitimate server-side environment variable usage (should NOT be flagged)
2310        assert!(analyzer.is_legitimate_env_var_usage(
2311            "const apiKey = process.env.RESEND_API_KEY;",
2312            server_file
2313        ));
2314        assert!(
2315            analyzer.is_legitimate_env_var_usage(
2316                "const dbUrl = process.env.DATABASE_URL;",
2317                server_file
2318            )
2319        );
2320        assert!(
2321            analyzer
2322                .is_legitimate_env_var_usage("api_key = os.environ.get('API_KEY')", server_file)
2323        );
2324        assert!(
2325            analyzer.is_legitimate_env_var_usage(
2326                "let secret = env::var(\"JWT_SECRET\")?;",
2327                server_file
2328            )
2329        );
2330
2331        // Test client-side environment variable usage (legitimate if not exposed)
2332        assert!(
2333            analyzer
2334                .is_legitimate_env_var_usage("const apiUrl = process.env.API_URL;", client_file)
2335        );
2336
2337        // Test client-side exposed variables (these ARE client-side exposed - security issues)
2338        assert!(analyzer.is_client_side_exposed_env_var("process.env.REACT_APP_SECRET_KEY"));
2339        assert!(analyzer.is_client_side_exposed_env_var("process.env.NEXT_PUBLIC_API_SECRET"));
2340
2341        // Test hardcoded secrets (should NOT be legitimate)
2342        assert!(
2343            !analyzer
2344                .is_legitimate_env_var_usage("const apiKey = 'sk-1234567890abcdef';", server_file)
2345        );
2346        assert!(!analyzer.is_legitimate_env_var_usage("password = 'hardcoded123'", server_file));
2347    }
2348
2349    #[test]
2350    fn test_server_vs_client_side_detection() {
2351        let analyzer = SecurityAnalyzer::new().unwrap();
2352
2353        // Server-side files
2354        assert!(analyzer.is_server_side_file(Path::new("src/server/app.js")));
2355        assert!(analyzer.is_server_side_file(Path::new("src/api/users.js")));
2356        assert!(analyzer.is_server_side_file(Path::new("pages/api/auth.js")));
2357        assert!(analyzer.is_server_side_file(Path::new("src/lib/database.js")));
2358        assert!(analyzer.is_server_side_file(Path::new(".env")));
2359        assert!(analyzer.is_server_side_file(Path::new("server.js")));
2360
2361        // Client-side files
2362        assert!(!analyzer.is_server_side_file(Path::new("src/components/Button.jsx")));
2363        assert!(!analyzer.is_server_side_file(Path::new("public/index.html")));
2364        assert!(!analyzer.is_server_side_file(Path::new("src/pages/home.js")));
2365        assert!(!analyzer.is_server_side_file(Path::new("dist/bundle.js")));
2366
2367        // Ambiguous files (default to server-side for conservative detection)
2368        assert!(analyzer.is_server_side_file(Path::new("src/utils/helper.js")));
2369        assert!(analyzer.is_server_side_file(Path::new("config/settings.js")));
2370    }
2371
2372    #[test]
2373    fn test_client_side_exposed_env_vars() {
2374        let analyzer = SecurityAnalyzer::new().unwrap();
2375
2376        // These should be flagged as client-side exposed (security issues)
2377        assert!(analyzer.is_client_side_exposed_env_var("process.env.REACT_APP_SECRET"));
2378        assert!(analyzer.is_client_side_exposed_env_var("import.meta.env.VITE_API_KEY"));
2379        assert!(analyzer.is_client_side_exposed_env_var("process.env.NEXT_PUBLIC_SECRET"));
2380        assert!(analyzer.is_client_side_exposed_env_var("process.env.VUE_APP_TOKEN"));
2381
2382        // These should NOT be flagged as client-side exposed
2383        assert!(!analyzer.is_client_side_exposed_env_var("process.env.DATABASE_URL"));
2384        assert!(!analyzer.is_client_side_exposed_env_var("process.env.JWT_SECRET"));
2385        assert!(!analyzer.is_client_side_exposed_env_var("process.env.API_KEY"));
2386    }
2387
2388    #[test]
2389    fn test_env_var_assignment_context() {
2390        let analyzer = SecurityAnalyzer::new().unwrap();
2391
2392        // Configuration files where assignments are legitimate
2393        assert!(analyzer.is_env_var_assignment_context("API_KEY=sk-test123", Path::new(".env")));
2394        assert!(analyzer.is_env_var_assignment_context(
2395            "DATABASE_URL=postgres://",
2396            Path::new("docker-compose.yml")
2397        ));
2398        assert!(
2399            analyzer.is_env_var_assignment_context("export SECRET=test", Path::new("setup.sh"))
2400        );
2401
2402        // Regular source files where assignments might be suspicious
2403        assert!(
2404            !analyzer.is_env_var_assignment_context(
2405                "const secret = 'hardcoded'",
2406                Path::new("src/app.js")
2407            )
2408        );
2409    }
2410
2411    #[test]
2412    fn test_enhanced_secret_patterns() {
2413        let analyzer = SecurityAnalyzer::new().unwrap();
2414
2415        // Test that hardcoded secrets are still detected
2416        let hardcoded_patterns = [
2417            "apikey = 'sk-1234567890abcdef1234567890abcdef12345678'",
2418            "const secret = 'my-super-secret-token-12345678901234567890'",
2419            "password = 'hardcoded123456'",
2420        ];
2421
2422        for pattern in &hardcoded_patterns {
2423            let has_secret = analyzer
2424                .secret_patterns
2425                .iter()
2426                .any(|sp| sp.pattern.is_match(pattern));
2427            assert!(has_secret, "Should detect hardcoded secret in: {}", pattern);
2428        }
2429
2430        // Test that legitimate env var usage is NOT detected as secret
2431        let legitimate_patterns = [
2432            "const apiKey = process.env.API_KEY;",
2433            "const dbUrl = process.env.DATABASE_URL || 'fallback';",
2434            "api_key = os.environ.get('API_KEY')",
2435            "let secret = env::var(\"JWT_SECRET\")?;",
2436        ];
2437
2438        for pattern in &legitimate_patterns {
2439            // These should either not match any secret pattern, or be filtered out by context detection
2440            let _matches_old_generic_pattern =
2441                pattern.to_lowercase().contains("secret") || pattern.to_lowercase().contains("key");
2442
2443            // Our new patterns should be more specific and not match env var access
2444            let matches_new_patterns = analyzer
2445                .secret_patterns
2446                .iter()
2447                .filter(|sp| sp.name.contains("Hardcoded"))
2448                .any(|sp| sp.pattern.is_match(pattern));
2449
2450            assert!(
2451                !matches_new_patterns,
2452                "Should NOT detect legitimate env var usage as hardcoded secret: {}",
2453                pattern
2454            );
2455        }
2456    }
2457
2458    #[test]
2459    fn test_context_aware_false_positive_reduction() {
2460        use tempfile::TempDir;
2461
2462        let temp_dir = TempDir::new().unwrap();
2463        let server_file = temp_dir.path().join("src/server/config.js");
2464
2465        // Create directory structure
2466        std::fs::create_dir_all(server_file.parent().unwrap()).unwrap();
2467
2468        // Write a file with legitimate environment variable usage
2469        let content = r#"
2470const config = {
2471    apiKey: process.env.RESEND_API_KEY,
2472    databaseUrl: process.env.DATABASE_URL,
2473    jwtSecret: process.env.JWT_SECRET,
2474    port: process.env.PORT || 3000
2475};
2476"#;
2477
2478        std::fs::write(&server_file, content).unwrap();
2479
2480        let analyzer = SecurityAnalyzer::new().unwrap();
2481        let findings = analyzer.analyze_file_for_secrets(&server_file).unwrap();
2482
2483        // Should have zero findings because all are legitimate env var usage
2484        assert_eq!(
2485            findings.len(),
2486            0,
2487            "Should not flag legitimate environment variable usage as security issues"
2488        );
2489    }
2490}