syncable_cli/analyzer/
security_analyzer.rs

1//! # Security Analyzer
2//!
3//! Comprehensive security analysis module that performs multi-layered security assessment:
4//! - Configuration security analysis (secrets, insecure settings)
5//! - Code security patterns (language/framework-specific issues)
6//! - Infrastructure security (Docker, compose configurations)
7//! - Security policy recommendations and compliance guidance
8//! - Security scoring with actionable remediation steps
9
10use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
11use log::{debug, info};
12use rayon::prelude::*;
13use regex::Regex;
14use serde::{Deserialize, Serialize};
15use std::collections::HashMap;
16use std::fs;
17use std::path::{Path, PathBuf};
18use std::process::Command;
19use std::time::Instant;
20use thiserror::Error;
21
22use crate::analyzer::dependency_parser::Language;
23use crate::analyzer::{DetectedLanguage, DetectedTechnology, EnvVar, ProjectAnalysis};
24
25#[derive(Debug, Error)]
26pub enum SecurityError {
27    #[error("Security analysis failed: {0}")]
28    AnalysisFailed(String),
29
30    #[error("Configuration analysis error: {0}")]
31    ConfigAnalysisError(String),
32
33    #[error("Code pattern analysis error: {0}")]
34    CodePatternError(String),
35
36    #[error("Infrastructure analysis error: {0}")]
37    InfrastructureError(String),
38
39    #[error("IO error: {0}")]
40    Io(#[from] std::io::Error),
41
42    #[error("Regex error: {0}")]
43    Regex(#[from] regex::Error),
44}
45
46/// Security finding severity levels
47#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
48pub enum SecuritySeverity {
49    Critical,
50    High,
51    Medium,
52    Low,
53    Info,
54}
55
56/// Categories of security findings
57#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
58pub enum SecurityCategory {
59    /// Exposed secrets, API keys, passwords
60    SecretsExposure,
61    /// Insecure configuration settings
62    InsecureConfiguration,
63    /// Language/framework-specific security patterns
64    CodeSecurityPattern,
65    /// Infrastructure and deployment security
66    InfrastructureSecurity,
67    /// Authentication and authorization issues
68    AuthenticationSecurity,
69    /// Data protection and privacy concerns
70    DataProtection,
71    /// Network and communication security
72    NetworkSecurity,
73    /// Compliance and regulatory requirements
74    Compliance,
75}
76
77/// A security finding with details and remediation
78#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct SecurityFinding {
80    pub id: String,
81    pub title: String,
82    pub description: String,
83    pub severity: SecuritySeverity,
84    pub category: SecurityCategory,
85    pub file_path: Option<PathBuf>,
86    pub line_number: Option<usize>,
87    pub column_number: Option<usize>,
88    pub evidence: Option<String>,
89    pub remediation: Vec<String>,
90    pub references: Vec<String>,
91    pub cwe_id: Option<String>,
92    pub compliance_frameworks: Vec<String>,
93}
94
95/// Comprehensive security analysis report
96#[derive(Debug, Serialize, Deserialize)]
97pub struct SecurityReport {
98    pub analyzed_at: chrono::DateTime<chrono::Utc>,
99    pub overall_score: f32, // 0-100, higher is better
100    pub risk_level: SecuritySeverity,
101    pub total_findings: usize,
102    pub findings_by_severity: HashMap<SecuritySeverity, usize>,
103    pub findings_by_category: HashMap<SecurityCategory, usize>,
104    pub findings: Vec<SecurityFinding>,
105    pub recommendations: Vec<String>,
106    pub compliance_status: HashMap<String, ComplianceStatus>,
107}
108
109/// Compliance framework status
110#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct ComplianceStatus {
112    pub framework: String,
113    pub coverage: f32, // 0-100%
114    pub missing_controls: Vec<String>,
115    pub recommendations: Vec<String>,
116}
117
118/// Configuration for security analysis
119#[derive(Debug, Clone)]
120pub struct SecurityAnalysisConfig {
121    pub include_low_severity: bool,
122    pub check_secrets: bool,
123    pub check_code_patterns: bool,
124    pub check_infrastructure: bool,
125    pub check_compliance: bool,
126    pub frameworks_to_check: Vec<String>,
127    pub ignore_patterns: Vec<String>,
128    /// Whether to skip scanning files that are gitignored
129    pub skip_gitignored_files: bool,
130    /// Whether to downgrade severity for gitignored files instead of skipping
131    pub downgrade_gitignored_severity: bool,
132}
133
134impl Default for SecurityAnalysisConfig {
135    fn default() -> Self {
136        Self {
137            include_low_severity: false,
138            check_secrets: true,
139            check_code_patterns: true,
140            check_infrastructure: true,
141            check_compliance: true,
142            frameworks_to_check: vec!["SOC2".to_string(), "GDPR".to_string(), "OWASP".to_string()],
143            ignore_patterns: vec![
144                "node_modules".to_string(),
145                ".git".to_string(),
146                "target".to_string(),
147                "build".to_string(),
148                ".next".to_string(),
149                "dist".to_string(),
150                "test".to_string(),
151                "tests".to_string(),
152                "*.json".to_string(), // Exclude JSON files that often contain hashes
153                "*.lock".to_string(), // Exclude lock files with checksums
154                "*_sample.*".to_string(), // Exclude sample files
155                "*audit*".to_string(), // Exclude audit reports
156            ],
157            skip_gitignored_files: true, // Default to skipping gitignored files
158            downgrade_gitignored_severity: false, // Skip entirely by default
159        }
160    }
161}
162
163pub struct SecurityAnalyzer {
164    config: SecurityAnalysisConfig,
165    secret_patterns: Vec<SecretPattern>,
166    security_rules: HashMap<Language, Vec<SecurityRule>>,
167    git_ignore_cache: std::sync::Mutex<HashMap<PathBuf, bool>>,
168    project_root: Option<PathBuf>,
169}
170
171/// Pattern for detecting secrets and sensitive data
172struct SecretPattern {
173    name: String,
174    pattern: Regex,
175    severity: SecuritySeverity,
176    description: String,
177}
178
179/// Security rule for code pattern analysis
180#[allow(dead_code)]
181struct SecurityRule {
182    id: String,
183    name: String,
184    pattern: Regex,
185    severity: SecuritySeverity,
186    category: SecurityCategory,
187    description: String,
188    remediation: Vec<String>,
189    cwe_id: Option<String>,
190}
191
192impl SecurityAnalyzer {
193    pub fn new() -> Result<Self, SecurityError> {
194        Self::with_config(SecurityAnalysisConfig::default())
195    }
196
197    pub fn with_config(config: SecurityAnalysisConfig) -> Result<Self, SecurityError> {
198        let secret_patterns = Self::initialize_secret_patterns()?;
199        let security_rules = Self::initialize_security_rules()?;
200
201        Ok(Self {
202            config,
203            secret_patterns,
204            security_rules,
205            git_ignore_cache: std::sync::Mutex::new(HashMap::new()),
206            project_root: None,
207        })
208    }
209
210    /// Perform comprehensive security analysis with appropriate progress for verbosity level
211    pub fn analyze_security(
212        &mut self,
213        analysis: &ProjectAnalysis,
214    ) -> Result<SecurityReport, SecurityError> {
215        let start_time = Instant::now();
216        info!("Starting comprehensive security analysis");
217
218        // Set project root for gitignore checking
219        self.project_root = Some(analysis.project_root.clone());
220
221        // Check if we're in verbose mode by checking log level
222        let is_verbose = log::max_level() >= log::LevelFilter::Info;
223
224        // Set up progress tracking appropriate for verbosity
225        let multi_progress = MultiProgress::new();
226
227        // In verbose mode, we'll completely skip adding progress bars to avoid visual conflicts
228
229        // Count enabled analysis phases
230        let mut total_phases = 0;
231        if self.config.check_secrets {
232            total_phases += 1;
233        }
234        if self.config.check_code_patterns {
235            total_phases += 1;
236        }
237        if self.config.check_infrastructure {
238            total_phases += 1;
239        }
240        total_phases += 2; // env vars and framework analysis always run
241
242        // Create appropriate progress indicator based on verbosity
243        let main_pb = if is_verbose {
244            None // No main progress bar in verbose mode to avoid conflicts with logs
245        } else {
246            // Normal mode: Rich progress bar
247            let pb = multi_progress.add(ProgressBar::new(100));
248            pb.set_style(
249                ProgressStyle::default_bar()
250                    .template("🛡️  {msg} {bar:50.cyan/blue} {percent}% [{elapsed_precise}]")
251                    .unwrap()
252                    .progress_chars("██▉▊▋▌▍▎▏  "),
253            );
254            Some(pb)
255        };
256
257        let mut findings = Vec::new();
258        let phase_weight = if is_verbose {
259            1u64
260        } else {
261            100 / total_phases as u64
262        };
263        let mut current_progress = 0u64;
264
265        // 1. Configuration Security Analysis
266        if self.config.check_secrets {
267            if let Some(ref pb) = main_pb {
268                pb.set_message("Analyzing configuration & secrets...");
269                pb.set_position(current_progress);
270            }
271
272            if is_verbose {
273                findings.extend(self.analyze_configuration_security(&analysis.project_root)?);
274            } else {
275                findings.extend(self.analyze_configuration_security_with_progress(
276                    &analysis.project_root,
277                    &multi_progress,
278                )?);
279            }
280
281            if let Some(ref pb) = main_pb {
282                current_progress += phase_weight;
283                pb.set_position(current_progress);
284            }
285        }
286
287        // 2. Code Security Patterns
288        if self.config.check_code_patterns {
289            if let Some(ref pb) = main_pb {
290                pb.set_message("Analyzing code security patterns...");
291            }
292
293            if is_verbose {
294                findings.extend(
295                    self.analyze_code_security_patterns(
296                        &analysis.project_root,
297                        &analysis.languages,
298                    )?,
299                );
300            } else {
301                findings.extend(self.analyze_code_security_patterns_with_progress(
302                    &analysis.project_root,
303                    &analysis.languages,
304                    &multi_progress,
305                )?);
306            }
307
308            if let Some(ref pb) = main_pb {
309                current_progress += phase_weight;
310                pb.set_position(current_progress);
311            }
312        }
313
314        // 3. Infrastructure Security (skipped - not implemented yet)
315        // TODO: Implement infrastructure security analysis
316        // Currently all infrastructure analysis methods return empty results
317
318        // 4. Environment Variables Security
319        if let Some(ref pb) = main_pb {
320            pb.set_message("Analyzing environment variables...");
321        }
322
323        findings.extend(self.analyze_environment_security(&analysis.environment_variables));
324        if let Some(ref pb) = main_pb {
325            current_progress += phase_weight;
326            pb.set_position(current_progress);
327        }
328
329        // 5. Framework-specific Security (skipped - not implemented yet)
330        // TODO: Implement framework-specific security analysis
331        // Currently all framework analysis methods return empty results
332
333        if let Some(ref pb) = main_pb {
334            current_progress = 100;
335            pb.set_position(current_progress);
336        }
337
338        // Processing phase
339        if let Some(ref pb) = main_pb {
340            pb.set_message("Processing findings & generating report...");
341        }
342
343        // DEDUPLICATION: Remove duplicate findings for the same secret/issue
344        let pre_dedup_count = findings.len();
345        findings = self.deduplicate_findings(findings);
346        let post_dedup_count = findings.len();
347
348        if pre_dedup_count != post_dedup_count {
349            info!(
350                "Deduplicated {} redundant findings, {} unique findings remain",
351                pre_dedup_count - post_dedup_count,
352                post_dedup_count
353            );
354        }
355
356        // Filter findings based on configuration
357        let pre_filter_count = findings.len();
358        if !self.config.include_low_severity {
359            findings.retain(|f| {
360                f.severity != SecuritySeverity::Low && f.severity != SecuritySeverity::Info
361            });
362        }
363
364        // Sort by severity (most critical first)
365        findings.sort_by(|a, b| a.severity.cmp(&b.severity));
366
367        // Calculate metrics
368        let total_findings = findings.len();
369        let findings_by_severity = self.count_by_severity(&findings);
370        let findings_by_category = self.count_by_category(&findings);
371        let overall_score = self.calculate_security_score(&findings);
372        let risk_level = self.determine_risk_level(&findings);
373
374        // Generate compliance status (disabled - not implemented yet)
375        // TODO: Implement compliance assessment
376        let compliance_status = HashMap::new();
377
378        // Generate recommendations
379        let recommendations = self.generate_recommendations(&findings, &analysis.technologies);
380
381        // Complete with summary
382        let duration = start_time.elapsed().as_secs_f32();
383        if let Some(pb) = main_pb {
384            pb.finish_with_message(format!(
385                "✅ Security analysis completed in {:.1}s - Found {} issues",
386                duration, total_findings
387            ));
388        }
389
390        // Print summary
391        if pre_filter_count != total_findings {
392            info!(
393                "Found {} total findings, showing {} after filtering",
394                pre_filter_count, total_findings
395            );
396        } else {
397            info!("Found {} security findings", total_findings);
398        }
399
400        Ok(SecurityReport {
401            analyzed_at: chrono::Utc::now(),
402            overall_score,
403            risk_level,
404            total_findings,
405            findings_by_severity,
406            findings_by_category,
407            findings,
408            recommendations,
409            compliance_status,
410        })
411    }
412
413    /// Check if a file is gitignored using git check-ignore command
414    fn is_file_gitignored(&self, file_path: &Path) -> bool {
415        // Return false if we don't have project root set
416        let project_root = match &self.project_root {
417            Some(root) => root,
418            None => return false,
419        };
420
421        // Use cache to avoid repeated git calls
422        if let Ok(cache) = self.git_ignore_cache.lock()
423            && let Some(&cached_result) = cache.get(file_path)
424        {
425            return cached_result;
426        }
427
428        // Check if this is a git repository
429        if !project_root.join(".git").exists() {
430            debug!("Not a git repository, treating all files as tracked");
431            return false;
432        }
433
434        // First, try git check-ignore for the most accurate result
435        let git_result = Command::new("git")
436            .args(["check-ignore", "--quiet"])
437            .arg(file_path)
438            .current_dir(project_root)
439            .output()
440            .map(|output| output.status.success())
441            .unwrap_or(false);
442
443        // If git check-ignore says it's ignored, trust it
444        if git_result {
445            if let Ok(mut cache) = self.git_ignore_cache.lock() {
446                cache.insert(file_path.to_path_buf(), true);
447            }
448            return true;
449        }
450
451        // Fallback: Parse .gitignore files manually for common patterns
452        // This helps when git check-ignore might not work perfectly in all scenarios
453        let manual_result = self.check_gitignore_patterns(file_path, project_root);
454
455        // Cache the result (prefer git result, fallback to manual)
456        let final_result = git_result || manual_result;
457        if let Ok(mut cache) = self.git_ignore_cache.lock() {
458            cache.insert(file_path.to_path_buf(), final_result);
459        }
460
461        final_result
462    }
463
464    /// Manually check gitignore patterns as a fallback
465    fn check_gitignore_patterns(&self, file_path: &Path, project_root: &Path) -> bool {
466        // Get relative path from project root
467        let relative_path = match file_path.strip_prefix(project_root) {
468            Ok(rel) => rel,
469            Err(_) => return false,
470        };
471
472        let path_str = relative_path.to_string_lossy();
473        let file_name = relative_path
474            .file_name()
475            .and_then(|n| n.to_str())
476            .unwrap_or("");
477
478        // Read .gitignore file
479        let gitignore_path = project_root.join(".gitignore");
480        if let Ok(gitignore_content) = fs::read_to_string(&gitignore_path) {
481            for line in gitignore_content.lines() {
482                let pattern = line.trim();
483                if pattern.is_empty() || pattern.starts_with('#') {
484                    continue;
485                }
486
487                // Check if this pattern matches our file
488                if self.matches_gitignore_pattern(pattern, &path_str, file_name) {
489                    debug!("File {} matches gitignore pattern: {}", path_str, pattern);
490                    return true;
491                }
492            }
493        }
494
495        // Also check global gitignore patterns for common .env patterns
496        self.matches_common_env_patterns(file_name)
497    }
498
499    /// Check if a file matches a specific gitignore pattern
500    fn matches_gitignore_pattern(&self, pattern: &str, path_str: &str, file_name: &str) -> bool {
501        // Handle different types of patterns
502        if pattern.contains('*') {
503            // Wildcard patterns
504            if let Ok(glob_pattern) = glob::Pattern::new(pattern) {
505                // Try matching both full path and just filename
506                if glob_pattern.matches(path_str) || glob_pattern.matches(file_name) {
507                    return true;
508                }
509            }
510        } else if let Some(abs_pattern) = pattern.strip_prefix('/') {
511            // Absolute path from repo root
512            if path_str == abs_pattern {
513                return true;
514            }
515        } else {
516            // Simple pattern - could match anywhere in path
517            if path_str == pattern
518                || file_name == pattern
519                || path_str.ends_with(&format!("/{}", pattern))
520            {
521                return true;
522            }
523        }
524
525        false
526    }
527
528    /// Check against common .env file patterns that should typically be ignored
529    fn matches_common_env_patterns(&self, file_name: &str) -> bool {
530        let common_env_patterns = [
531            ".env",
532            ".env.local",
533            ".env.development",
534            ".env.production",
535            ".env.staging",
536            ".env.test",
537            ".env.example", // Usually committed but should be treated carefully
538        ];
539
540        // Exact matches
541        if common_env_patterns.contains(&file_name) {
542            return file_name != ".env.example"; // .env.example is usually committed
543        }
544
545        // Pattern matches
546        if file_name.starts_with(".env.")
547            || file_name.ends_with(".env")
548            || (file_name.starts_with(".") && file_name.contains("env"))
549        {
550            // Be conservative - only ignore if it's clearly a local/environment specific file
551            return !file_name.contains("example")
552                && !file_name.contains("sample")
553                && !file_name.contains("template");
554        }
555
556        false
557    }
558
559    /// Check if a file is actually tracked by git
560    fn is_file_tracked(&self, file_path: &Path) -> bool {
561        let project_root = match &self.project_root {
562            Some(root) => root,
563            None => return true, // Assume tracked if no project root
564        };
565
566        // Check if this is a git repository
567        if !project_root.join(".git").exists() {
568            return true; // Not a git repo, treat as tracked
569        }
570
571        // Use git ls-files to check if file is tracked
572        Command::new("git")
573            .args(["ls-files", "--error-unmatch"])
574            .arg(file_path)
575            .current_dir(project_root)
576            .output()
577            .map(|output| output.status.success())
578            .unwrap_or(true) // Default to tracked if git command fails
579    }
580
581    /// Determine the appropriate severity for a secret finding based on git status
582    fn determine_secret_severity(
583        &self,
584        file_path: &Path,
585        original_severity: SecuritySeverity,
586    ) -> (SecuritySeverity, Vec<String>) {
587        let mut additional_remediation = Vec::new();
588
589        // Check if file is gitignored
590        if self.is_file_gitignored(file_path) {
591            if self.config.skip_gitignored_files {
592                // Return Info level to indicate this should be skipped
593                return (
594                    SecuritySeverity::Info,
595                    vec!["File is properly gitignored".to_string()],
596                );
597            } else if self.config.downgrade_gitignored_severity {
598                // Downgrade severity for gitignored files
599                let downgraded = match original_severity {
600                    SecuritySeverity::Critical => SecuritySeverity::Medium,
601                    SecuritySeverity::High => SecuritySeverity::Low,
602                    SecuritySeverity::Medium => SecuritySeverity::Low,
603                    SecuritySeverity::Low => SecuritySeverity::Info,
604                    SecuritySeverity::Info => SecuritySeverity::Info,
605                };
606                additional_remediation
607                    .push("Note: File is gitignored, reducing severity".to_string());
608                return (downgraded, additional_remediation);
609            }
610        }
611
612        // Check if file is tracked by git
613        if !self.is_file_tracked(file_path) {
614            additional_remediation.push(
615                "Ensure this file is added to .gitignore to prevent accidental commits".to_string(),
616            );
617        } else {
618            // File is tracked - this is a serious issue
619            additional_remediation.push(
620                "⚠️  CRITICAL: This file is tracked by git! Secrets may be in version history."
621                    .to_string(),
622            );
623            additional_remediation.push(
624                "Consider using git-filter-branch or BFG Repo-Cleaner to remove from history"
625                    .to_string(),
626            );
627            additional_remediation.push("Rotate any exposed secrets immediately".to_string());
628
629            // Upgrade severity for tracked files
630            let upgraded = match original_severity {
631                SecuritySeverity::High => SecuritySeverity::Critical,
632                SecuritySeverity::Medium => SecuritySeverity::High,
633                SecuritySeverity::Low => SecuritySeverity::Medium,
634                other => other,
635            };
636            return (upgraded, additional_remediation);
637        }
638
639        (original_severity, additional_remediation)
640    }
641
642    /// Initialize secret detection patterns
643    fn initialize_secret_patterns() -> Result<Vec<SecretPattern>, SecurityError> {
644        let patterns = vec![
645            // API Keys and Tokens - Specific patterns first
646            (
647                "AWS Access Key",
648                r"AKIA[0-9A-Z]{16}",
649                SecuritySeverity::Critical,
650            ),
651            (
652                "AWS Secret Key",
653                r#"(?i)(aws[_-]?secret|secret[_-]?access[_-]?key)["']?\s*[:=]\s*["']?[A-Za-z0-9/+=]{40}["']?"#,
654                SecuritySeverity::Critical,
655            ),
656            (
657                "S3 Secret Key",
658                r#"(?i)(s3[_-]?secret[_-]?key|linode[_-]?s3[_-]?secret)["']?\s*[:=]\s*["']?[A-Za-z0-9/+=]{20,}["']?"#,
659                SecuritySeverity::High,
660            ),
661            (
662                "GitHub Token",
663                r"gh[pousr]_[A-Za-z0-9_]{36,255}",
664                SecuritySeverity::High,
665            ),
666            (
667                "OpenAI API Key",
668                r"sk-[A-Za-z0-9]{48}",
669                SecuritySeverity::High,
670            ),
671            (
672                "Stripe API Key",
673                r"sk_live_[0-9a-zA-Z]{24}",
674                SecuritySeverity::Critical,
675            ),
676            (
677                "Stripe Publishable Key",
678                r"pk_live_[0-9a-zA-Z]{24}",
679                SecuritySeverity::Medium,
680            ),
681            // Database URLs and Passwords - Enhanced to avoid env var false positives
682            (
683                "Hardcoded Database URL",
684                r#"(?i)(database_url|db_url)["']?\s*[:=]\s*["']?(postgresql|mysql|mongodb)://[^"'\s]+"#,
685                SecuritySeverity::Critical,
686            ),
687            (
688                "Hardcoded Password",
689                r#"(?i)(password|passwd|pwd)["']?\s*[:=]\s*["']?[^"']{6,}["']?"#,
690                SecuritySeverity::High,
691            ),
692            (
693                "JWT Secret",
694                r#"(?i)(jwt[_-]?secret)["']?\s*[:=]\s*["']?[A-Za-z0-9_\-+/=]{20,}"#,
695                SecuritySeverity::High,
696            ),
697            // Private Keys
698            (
699                "RSA Private Key",
700                r"-----BEGIN RSA PRIVATE KEY-----",
701                SecuritySeverity::Critical,
702            ),
703            (
704                "SSH Private Key",
705                r"-----BEGIN OPENSSH PRIVATE KEY-----",
706                SecuritySeverity::Critical,
707            ),
708            (
709                "PGP Private Key",
710                r"-----BEGIN PGP PRIVATE KEY BLOCK-----",
711                SecuritySeverity::Critical,
712            ),
713            // Cloud Provider Keys
714            (
715                "Google Cloud Service Account",
716                r#""type":\s*"service_account""#,
717                SecuritySeverity::High,
718            ),
719            (
720                "Azure Storage Key",
721                r"DefaultEndpointsProtocol=https;AccountName=",
722                SecuritySeverity::High,
723            ),
724            // Client-side exposed environment variables (these are the real security issues)
725            (
726                "Client-side Exposed Secret",
727                r#"(?i)(REACT_APP_|NEXT_PUBLIC_|VUE_APP_|VITE_)[A-Z_]*(?:SECRET|KEY|TOKEN|PASSWORD|API)[A-Z_]*["']?\s*[:=]\s*["']?[A-Za-z0-9_\-+/=]{10,}"#,
728                SecuritySeverity::High,
729            ),
730            // Hardcoded API keys (not environment variable access)
731            (
732                "Hardcoded API Key",
733                r#"(?i)(api[_-]?key|apikey)["']?\s*[:=]\s*["']?[A-Za-z0-9_\-]{20,}["']?"#,
734                SecuritySeverity::High,
735            ),
736            // Generic secrets that are clearly hardcoded (not env var access)
737            (
738                "Hardcoded Secret",
739                r#"(?i)(secret|token)["']?\s*[:=]\s*["']?[A-Za-z0-9_\-+/=]{24,}["']?"#,
740                SecuritySeverity::Medium,
741            ),
742        ];
743
744        patterns
745            .into_iter()
746            .map(|(name, pattern, severity)| {
747                Ok(SecretPattern {
748                    name: name.to_string(),
749                    pattern: Regex::new(pattern)?,
750                    severity,
751                    description: format!("Potential {} found in code", name),
752                })
753            })
754            .collect()
755    }
756
757    /// Initialize language-specific security rules
758    fn initialize_security_rules() -> Result<HashMap<Language, Vec<SecurityRule>>, SecurityError> {
759        let mut rules = HashMap::new();
760
761        // JavaScript/TypeScript Rules
762        rules.insert(Language::JavaScript, vec![
763            SecurityRule {
764                id: "js-001".to_string(),
765                name: "Eval Usage".to_string(),
766                pattern: Regex::new(r"\beval\s*\(")?,
767                severity: SecuritySeverity::High,
768                category: SecurityCategory::CodeSecurityPattern,
769                description: "Use of eval() can lead to code injection vulnerabilities".to_string(),
770                remediation: vec![
771                    "Avoid using eval() with user input".to_string(),
772                    "Use JSON.parse() for parsing JSON data".to_string(),
773                    "Consider using safer alternatives like Function constructor with validation".to_string(),
774                ],
775                cwe_id: Some("CWE-95".to_string()),
776            },
777            SecurityRule {
778                id: "js-002".to_string(),
779                name: "innerHTML Usage".to_string(),
780                pattern: Regex::new(r"\.innerHTML\s*=")?,
781                severity: SecuritySeverity::Medium,
782                category: SecurityCategory::CodeSecurityPattern,
783                description: "innerHTML can lead to XSS vulnerabilities if used with unsanitized data".to_string(),
784                remediation: vec![
785                    "Use textContent instead of innerHTML for text".to_string(),
786                    "Sanitize HTML content before setting innerHTML".to_string(),
787                    "Consider using secure templating libraries".to_string(),
788                ],
789                cwe_id: Some("CWE-79".to_string()),
790            },
791        ]);
792
793        // Python Rules
794        rules.insert(
795            Language::Python,
796            vec![
797                SecurityRule {
798                    id: "py-001".to_string(),
799                    name: "SQL Injection Risk".to_string(),
800                    pattern: Regex::new(r#"\.execute\s*\(\s*[f]?["'][^"']*%[sd]"#)?,
801                    severity: SecuritySeverity::High,
802                    category: SecurityCategory::CodeSecurityPattern,
803                    description: "String formatting in SQL queries can lead to SQL injection"
804                        .to_string(),
805                    remediation: vec![
806                        "Use parameterized queries instead of string formatting".to_string(),
807                        "Use ORM query builders where possible".to_string(),
808                        "Validate and sanitize all user inputs".to_string(),
809                    ],
810                    cwe_id: Some("CWE-89".to_string()),
811                },
812                SecurityRule {
813                    id: "py-002".to_string(),
814                    name: "Pickle Usage".to_string(),
815                    pattern: Regex::new(r"\bpickle\.loads?\s*\(")?,
816                    severity: SecuritySeverity::High,
817                    category: SecurityCategory::CodeSecurityPattern,
818                    description: "Pickle can execute arbitrary code during deserialization"
819                        .to_string(),
820                    remediation: vec![
821                        "Avoid pickle for untrusted data".to_string(),
822                        "Use JSON or other safe serialization formats".to_string(),
823                        "If pickle is necessary, validate data sources".to_string(),
824                    ],
825                    cwe_id: Some("CWE-502".to_string()),
826                },
827            ],
828        );
829
830        // Add more language rules as needed...
831
832        Ok(rules)
833    }
834
835    /// Analyze configuration files for security issues with appropriate progress tracking
836    fn analyze_configuration_security_with_progress(
837        &self,
838        project_root: &Path,
839        multi_progress: &MultiProgress,
840    ) -> Result<Vec<SecurityFinding>, SecurityError> {
841        debug!("Analyzing configuration security");
842        let mut findings = Vec::new();
843
844        // Collect relevant files
845        let config_files = self.collect_config_files(project_root)?;
846
847        if config_files.is_empty() {
848            info!("No configuration files found");
849            return Ok(findings);
850        }
851
852        let is_verbose = log::max_level() >= log::LevelFilter::Info;
853
854        info!(
855            "📁 Found {} configuration files to analyze",
856            config_files.len()
857        );
858
859        // Create appropriate progress tracking - completely skip in verbose mode
860        let file_pb = if is_verbose {
861            None // No progress bars at all in verbose mode
862        } else {
863            // Normal mode: Show detailed progress
864            let pb = multi_progress.add(ProgressBar::new(config_files.len() as u64));
865            pb.set_style(
866                ProgressStyle::default_bar()
867                    .template("  🔍 {msg} {bar:40.cyan/blue} {pos}/{len} files ({percent}%)")
868                    .unwrap()
869                    .progress_chars("████▉▊▋▌▍▎▏  "),
870            );
871            pb.set_message("Scanning configuration files...");
872            Some(pb)
873        };
874
875        // Use atomic counter for progress updates if needed
876        use std::sync::Arc;
877        use std::sync::atomic::{AtomicUsize, Ordering};
878        let processed_count = Arc::new(AtomicUsize::new(0));
879
880        // Analyze each file with appropriate progress tracking
881        let file_findings: Vec<Vec<SecurityFinding>> = config_files
882            .par_iter()
883            .map(|file_path| {
884                let result = self.analyze_file_for_secrets(file_path);
885
886                // Update progress only in non-verbose mode
887                if let Some(ref pb) = file_pb {
888                    let current = processed_count.fetch_add(1, Ordering::Relaxed) + 1;
889                    if let Some(file_name) = file_path.file_name().and_then(|n| n.to_str()) {
890                        // Truncate long filenames for better display
891                        let display_name = if file_name.len() > 30 {
892                            format!("...{}", &file_name[file_name.len() - 27..])
893                        } else {
894                            file_name.to_string()
895                        };
896                        pb.set_message(format!("Scanning {}", display_name));
897                    }
898                    pb.set_position(current as u64);
899                }
900
901                result
902            })
903            .filter_map(|result| result.ok())
904            .collect();
905
906        // Finish progress tracking
907        if let Some(pb) = file_pb {
908            pb.finish_with_message(format!(
909                "✅ Scanned {} configuration files",
910                config_files.len()
911            ));
912        }
913
914        for mut file_findings in file_findings {
915            findings.append(&mut file_findings);
916        }
917
918        // Check for common insecure configurations
919        findings.extend(self.check_insecure_configurations(project_root)?);
920
921        info!(
922            "🔍 Found {} configuration security findings",
923            findings.len()
924        );
925        Ok(findings)
926    }
927
928    /// Direct configuration security analysis without progress bars
929    fn analyze_configuration_security(
930        &self,
931        project_root: &Path,
932    ) -> Result<Vec<SecurityFinding>, SecurityError> {
933        debug!("Analyzing configuration security");
934        let mut findings = Vec::new();
935
936        // Collect relevant files
937        let config_files = self.collect_config_files(project_root)?;
938
939        if config_files.is_empty() {
940            info!("No configuration files found");
941            return Ok(findings);
942        }
943
944        info!(
945            "📁 Found {} configuration files to analyze",
946            config_files.len()
947        );
948
949        // Analyze each file directly without progress tracking
950        let file_findings: Vec<Vec<SecurityFinding>> = config_files
951            .par_iter()
952            .map(|file_path| self.analyze_file_for_secrets(file_path))
953            .filter_map(|result| result.ok())
954            .collect();
955
956        for mut file_findings in file_findings {
957            findings.append(&mut file_findings);
958        }
959
960        // Check for common insecure configurations
961        findings.extend(self.check_insecure_configurations(project_root)?);
962
963        info!(
964            "🔍 Found {} configuration security findings",
965            findings.len()
966        );
967        Ok(findings)
968    }
969
970    /// Analyze code for security patterns with appropriate progress tracking
971    fn analyze_code_security_patterns_with_progress(
972        &self,
973        project_root: &Path,
974        languages: &[DetectedLanguage],
975        multi_progress: &MultiProgress,
976    ) -> Result<Vec<SecurityFinding>, SecurityError> {
977        debug!("Analyzing code security patterns");
978        let mut findings = Vec::new();
979
980        // Count total source files across all languages
981        let mut total_files = 0;
982        let mut language_files = Vec::new();
983
984        for language in languages {
985            if let Some(lang) = Language::from_string(&language.name)
986                && let Some(_rules) = self.security_rules.get(&lang)
987            {
988                let source_files = self.collect_source_files(project_root, &language.name)?;
989                total_files += source_files.len();
990                language_files.push((language, source_files));
991            }
992        }
993
994        if total_files == 0 {
995            info!("No source files found for code pattern analysis");
996            return Ok(findings);
997        }
998
999        let is_verbose = log::max_level() >= log::LevelFilter::Info;
1000
1001        info!(
1002            "📄 Found {} source files across {} languages",
1003            total_files,
1004            language_files.len()
1005        );
1006
1007        // Create appropriate progress tracking
1008        let code_pb = if is_verbose {
1009            // Verbose mode: No sub-progress to avoid visual clutter
1010            None
1011        } else {
1012            // Normal mode: Show detailed progress
1013            let pb = multi_progress.add(ProgressBar::new(total_files as u64));
1014            pb.set_style(
1015                ProgressStyle::default_bar()
1016                    .template("  📄 {msg} {bar:40.yellow/white} {pos}/{len} files ({percent}%)")
1017                    .unwrap()
1018                    .progress_chars("████▉▊▋▌▍▎▏  "),
1019            );
1020            pb.set_message("Scanning source code...");
1021            Some(pb)
1022        };
1023
1024        // Use atomic counter for progress if needed
1025        use std::sync::Arc;
1026        use std::sync::atomic::{AtomicUsize, Ordering};
1027        let processed_count = Arc::new(AtomicUsize::new(0));
1028
1029        // Process all languages
1030        for (language, source_files) in language_files {
1031            if let Some(lang) = Language::from_string(&language.name)
1032                && let Some(rules) = self.security_rules.get(&lang)
1033            {
1034                let file_findings: Vec<Vec<SecurityFinding>> = source_files
1035                    .par_iter()
1036                    .map(|file_path| {
1037                        let result = self.analyze_file_with_rules(file_path, rules);
1038
1039                        // Update progress only in non-verbose mode
1040                        if let Some(ref pb) = code_pb {
1041                            let current = processed_count.fetch_add(1, Ordering::Relaxed) + 1;
1042                            if let Some(file_name) = file_path.file_name().and_then(|n| n.to_str())
1043                            {
1044                                let display_name = if file_name.len() > 25 {
1045                                    format!("...{}", &file_name[file_name.len() - 22..])
1046                                } else {
1047                                    file_name.to_string()
1048                                };
1049                                pb.set_message(format!(
1050                                    "Scanning {} ({})",
1051                                    display_name, language.name
1052                                ));
1053                            }
1054                            pb.set_position(current as u64);
1055                        }
1056
1057                        result
1058                    })
1059                    .filter_map(|result| result.ok())
1060                    .collect();
1061
1062                for mut file_findings in file_findings {
1063                    findings.append(&mut file_findings);
1064                }
1065            }
1066        }
1067
1068        // Finish progress tracking
1069        if let Some(pb) = code_pb {
1070            pb.finish_with_message(format!("✅ Scanned {} source files", total_files));
1071        }
1072
1073        info!("🔍 Found {} code security findings", findings.len());
1074        Ok(findings)
1075    }
1076
1077    /// Direct code security analysis without progress bars
1078    fn analyze_code_security_patterns(
1079        &self,
1080        project_root: &Path,
1081        languages: &[DetectedLanguage],
1082    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1083        debug!("Analyzing code security patterns");
1084        let mut findings = Vec::new();
1085
1086        // Count total source files across all languages
1087        let mut total_files = 0;
1088        let mut language_files = Vec::new();
1089
1090        for language in languages {
1091            if let Some(lang) = Language::from_string(&language.name)
1092                && let Some(_rules) = self.security_rules.get(&lang)
1093            {
1094                let source_files = self.collect_source_files(project_root, &language.name)?;
1095                total_files += source_files.len();
1096                language_files.push((language, source_files));
1097            }
1098        }
1099
1100        if total_files == 0 {
1101            info!("No source files found for code pattern analysis");
1102            return Ok(findings);
1103        }
1104
1105        info!(
1106            "📄 Found {} source files across {} languages",
1107            total_files,
1108            language_files.len()
1109        );
1110
1111        // Process all languages without progress tracking
1112        for (language, source_files) in language_files {
1113            if let Some(lang) = Language::from_string(&language.name)
1114                && let Some(rules) = self.security_rules.get(&lang)
1115            {
1116                let file_findings: Vec<Vec<SecurityFinding>> = source_files
1117                    .par_iter()
1118                    .map(|file_path| self.analyze_file_with_rules(file_path, rules))
1119                    .filter_map(|result| result.ok())
1120                    .collect();
1121
1122                for mut file_findings in file_findings {
1123                    findings.append(&mut file_findings);
1124                }
1125            }
1126        }
1127
1128        info!("🔍 Found {} code security findings", findings.len());
1129        Ok(findings)
1130    }
1131
1132    /// Analyze infrastructure configurations with appropriate progress tracking
1133    #[allow(dead_code)]
1134    fn analyze_infrastructure_security_with_progress(
1135        &self,
1136        project_root: &Path,
1137        _technologies: &[DetectedTechnology],
1138        multi_progress: &MultiProgress,
1139    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1140        debug!("Analyzing infrastructure security");
1141        let mut findings = Vec::new();
1142
1143        let is_verbose = log::max_level() >= log::LevelFilter::Info;
1144
1145        // Create appropriate progress indicator
1146        let infra_pb = if is_verbose {
1147            // Verbose mode: No spinner to avoid conflicts with logs
1148            None
1149        } else {
1150            // Normal mode: Show spinner
1151            let pb = multi_progress.add(ProgressBar::new_spinner());
1152            pb.set_style(
1153                ProgressStyle::default_spinner()
1154                    .template("  🏗️  {msg} {spinner:.magenta}")
1155                    .unwrap()
1156                    .tick_chars("⠁⠂⠄⡀⢀⠠⠐⠈ "),
1157            );
1158            pb.enable_steady_tick(std::time::Duration::from_millis(100));
1159            Some(pb)
1160        };
1161
1162        // Check Dockerfile security
1163        if let Some(ref pb) = infra_pb {
1164            pb.set_message("Checking Dockerfiles & Compose files...");
1165        }
1166        findings.extend(self.analyze_dockerfile_security(project_root)?);
1167        findings.extend(self.analyze_compose_security(project_root)?);
1168
1169        // Check CI/CD configurations
1170        if let Some(ref pb) = infra_pb {
1171            pb.set_message("Checking CI/CD configurations...");
1172        }
1173        findings.extend(self.analyze_cicd_security(project_root)?);
1174
1175        // Finish progress tracking
1176        if let Some(pb) = infra_pb {
1177            pb.finish_with_message("✅ Infrastructure analysis complete");
1178        }
1179        info!(
1180            "🔍 Found {} infrastructure security findings",
1181            findings.len()
1182        );
1183
1184        Ok(findings)
1185    }
1186
1187    /// Direct infrastructure security analysis without progress bars
1188    #[allow(dead_code)]
1189    fn analyze_infrastructure_security(
1190        &self,
1191        project_root: &Path,
1192        _technologies: &[DetectedTechnology],
1193    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1194        debug!("Analyzing infrastructure security");
1195        let mut findings = Vec::new();
1196
1197        // Check Dockerfile security
1198        findings.extend(self.analyze_dockerfile_security(project_root)?);
1199        findings.extend(self.analyze_compose_security(project_root)?);
1200
1201        // Check CI/CD configurations
1202        findings.extend(self.analyze_cicd_security(project_root)?);
1203
1204        info!(
1205            "🔍 Found {} infrastructure security findings",
1206            findings.len()
1207        );
1208        Ok(findings)
1209    }
1210
1211    /// Analyze environment variables for security issues
1212    fn analyze_environment_security(&self, env_vars: &[EnvVar]) -> Vec<SecurityFinding> {
1213        let mut findings = Vec::new();
1214
1215        for env_var in env_vars {
1216            // Check for sensitive variable names without proper protection
1217            if self.is_sensitive_env_var(&env_var.name) && env_var.default_value.is_some() {
1218                findings.push(SecurityFinding {
1219                    id: format!("env-{}", env_var.name.to_lowercase()),
1220                    title: "Sensitive Environment Variable with Default Value".to_string(),
1221                    description: format!("Environment variable '{}' appears to contain sensitive data but has a default value", env_var.name),
1222                    severity: SecuritySeverity::Medium,
1223                    category: SecurityCategory::SecretsExposure,
1224                    file_path: None,
1225                    line_number: None,
1226                    column_number: None,
1227                    evidence: Some(format!("Variable: {} = {:?}", env_var.name, env_var.default_value)),
1228                    remediation: vec![
1229                        "Remove default value for sensitive environment variables".to_string(),
1230                        "Use a secure secret management system".to_string(),
1231                        "Document required environment variables separately".to_string(),
1232                    ],
1233                    references: vec![
1234                        "https://owasp.org/www-project-top-ten/2021/A05_2021-Security_Misconfiguration/".to_string(),
1235                    ],
1236                    cwe_id: Some("CWE-200".to_string()),
1237                    compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string()],
1238                });
1239            }
1240        }
1241
1242        findings
1243    }
1244
1245    /// Analyze framework-specific security configurations with appropriate progress
1246    #[allow(dead_code)]
1247    fn analyze_framework_security_with_progress(
1248        &self,
1249        project_root: &Path,
1250        technologies: &[DetectedTechnology],
1251        multi_progress: &MultiProgress,
1252    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1253        debug!("Analyzing framework-specific security");
1254        let mut findings = Vec::new();
1255
1256        let framework_count = technologies.len();
1257        if framework_count == 0 {
1258            info!("No frameworks detected for security analysis");
1259            return Ok(findings);
1260        }
1261
1262        let is_verbose = log::max_level() >= log::LevelFilter::Info;
1263
1264        info!("🔧 Found {} frameworks to analyze", framework_count);
1265
1266        // Create appropriate progress indicator
1267        let fw_pb = if is_verbose {
1268            // Verbose mode: No spinner to avoid conflicts with logs
1269            None
1270        } else {
1271            // Normal mode: Show spinner
1272            let pb = multi_progress.add(ProgressBar::new_spinner());
1273            pb.set_style(
1274                ProgressStyle::default_spinner()
1275                    .template("  🔧 {msg} {spinner:.cyan}")
1276                    .unwrap()
1277                    .tick_chars("⠁⠂⠄⡀⢀⠠⠐⠈ "),
1278            );
1279            pb.enable_steady_tick(std::time::Duration::from_millis(120));
1280            Some(pb)
1281        };
1282
1283        for tech in technologies {
1284            if let Some(ref pb) = fw_pb {
1285                pb.set_message(format!("Checking {} configuration...", tech.name));
1286            }
1287
1288            match tech.name.as_str() {
1289                "Express.js" | "Express" => {
1290                    findings.extend(self.analyze_express_security(project_root)?);
1291                }
1292                "Django" => {
1293                    findings.extend(self.analyze_django_security(project_root)?);
1294                }
1295                "Spring Boot" => {
1296                    findings.extend(self.analyze_spring_security(project_root)?);
1297                }
1298                "Next.js" => {
1299                    findings.extend(self.analyze_nextjs_security(project_root)?);
1300                }
1301                // Add more frameworks as needed
1302                _ => {}
1303            }
1304        }
1305
1306        // Finish progress tracking
1307        if let Some(pb) = fw_pb {
1308            pb.finish_with_message("✅ Framework analysis complete");
1309        }
1310        info!("🔍 Found {} framework security findings", findings.len());
1311
1312        Ok(findings)
1313    }
1314
1315    /// Direct framework security analysis without progress bars
1316    #[allow(dead_code)]
1317    fn analyze_framework_security(
1318        &self,
1319        project_root: &Path,
1320        technologies: &[DetectedTechnology],
1321    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1322        debug!("Analyzing framework-specific security");
1323        let mut findings = Vec::new();
1324
1325        let framework_count = technologies.len();
1326        if framework_count == 0 {
1327            info!("No frameworks detected for security analysis");
1328            return Ok(findings);
1329        }
1330
1331        info!("🔧 Found {} frameworks to analyze", framework_count);
1332
1333        for tech in technologies {
1334            match tech.name.as_str() {
1335                "Express.js" | "Express" => {
1336                    findings.extend(self.analyze_express_security(project_root)?);
1337                }
1338                "Django" => {
1339                    findings.extend(self.analyze_django_security(project_root)?);
1340                }
1341                "Spring Boot" => {
1342                    findings.extend(self.analyze_spring_security(project_root)?);
1343                }
1344                "Next.js" => {
1345                    findings.extend(self.analyze_nextjs_security(project_root)?);
1346                }
1347                // Add more frameworks as needed
1348                _ => {}
1349            }
1350        }
1351
1352        info!("🔍 Found {} framework security findings", findings.len());
1353        Ok(findings)
1354    }
1355
1356    // Helper methods for specific analyses...
1357
1358    fn collect_config_files(&self, project_root: &Path) -> Result<Vec<PathBuf>, SecurityError> {
1359        let patterns = vec![
1360            "*.env*",
1361            "*.conf",
1362            "*.config",
1363            "*.ini",
1364            "*.yaml",
1365            "*.yml",
1366            "*.toml",
1367            "docker-compose*.yml",
1368            "Dockerfile*",
1369            ".github/**/*.yml",
1370            ".gitlab-ci.yml",
1371            "package.json",
1372            "requirements.txt",
1373            "Cargo.toml",
1374            "go.mod",
1375            "pom.xml",
1376        ];
1377
1378        let mut files = crate::common::file_utils::find_files_by_patterns(project_root, &patterns)
1379            .map_err(SecurityError::Io)?;
1380
1381        // Filter out files matching ignore patterns
1382        files.retain(|file| {
1383            let file_name = file.file_name().and_then(|n| n.to_str()).unwrap_or("");
1384            let file_path = file.to_string_lossy();
1385
1386            !self.config.ignore_patterns.iter().any(|pattern| {
1387                if pattern.contains('*') {
1388                    // Use glob matching for wildcard patterns
1389                    glob::Pattern::new(pattern)
1390                        .map(|p| p.matches(&file_path) || p.matches(file_name))
1391                        .unwrap_or(false)
1392                } else {
1393                    // Exact string matching
1394                    file_path.contains(pattern) || file_name.contains(pattern)
1395                }
1396            })
1397        });
1398
1399        Ok(files)
1400    }
1401
1402    fn analyze_file_for_secrets(
1403        &self,
1404        file_path: &Path,
1405    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1406        let content = fs::read_to_string(file_path)?;
1407        let mut findings = Vec::new();
1408
1409        for (line_num, line) in content.lines().enumerate() {
1410            for pattern in &self.secret_patterns {
1411                if let Some(match_) = pattern.pattern.find(line) {
1412                    // Skip if it looks like a placeholder or example
1413                    if self.is_likely_placeholder(line) {
1414                        continue;
1415                    }
1416
1417                    // NEW: Skip if this is legitimate environment variable usage
1418                    if self.is_legitimate_env_var_usage(line, file_path) {
1419                        debug!("Skipping legitimate env var usage: {}", line.trim());
1420                        continue;
1421                    }
1422
1423                    // Determine severity based on git status
1424                    let (severity, additional_remediation) =
1425                        self.determine_secret_severity(file_path, pattern.severity.clone());
1426
1427                    // Skip if severity is Info (indicates gitignored and should be skipped)
1428                    if self.config.skip_gitignored_files && severity == SecuritySeverity::Info {
1429                        debug!(
1430                            "Skipping secret in gitignored file: {}",
1431                            file_path.display()
1432                        );
1433                        continue;
1434                    }
1435
1436                    // Build base remediation steps
1437                    let mut remediation = vec![
1438                        "Remove sensitive data from source code".to_string(),
1439                        "Use environment variables for secrets".to_string(),
1440                        "Consider using a secure secret management service".to_string(),
1441                    ];
1442
1443                    // Add git-specific remediation based on file status
1444                    remediation.extend(additional_remediation);
1445
1446                    // Add generic gitignore advice if not already covered
1447                    if !self.is_file_gitignored(file_path) && !self.is_file_tracked(file_path) {
1448                        remediation.push(
1449                            "Add this file to .gitignore to prevent accidental commits".to_string(),
1450                        );
1451                    }
1452
1453                    // Create enhanced finding with git-aware severity and remediation
1454                    let mut description = pattern.description.clone();
1455                    if self.is_file_tracked(file_path) {
1456                        description.push_str(" (⚠️  WARNING: File is tracked by git - secrets may be in version history!)");
1457                    } else if self.is_file_gitignored(file_path) {
1458                        description.push_str(" (ℹ️  Note: File is gitignored)");
1459                    }
1460
1461                    findings.push(SecurityFinding {
1462                        id: format!("secret-{}-{}", pattern.name.to_lowercase().replace(' ', "-"), line_num),
1463                        title: format!("Potential {} Exposure", pattern.name),
1464                        description,
1465                        severity,
1466                        category: SecurityCategory::SecretsExposure,
1467                        file_path: Some(file_path.to_path_buf()),
1468                        line_number: Some(line_num + 1),
1469                        column_number: Some(match_.start() + 1), // 1-indexed column position
1470                        evidence: Some(format!("Line: {}", line.trim())),
1471                        remediation,
1472                        references: vec![
1473                            "https://owasp.org/www-project-top-ten/2021/A05_2021-Security_Misconfiguration/".to_string(),
1474                        ],
1475                        cwe_id: Some("CWE-200".to_string()),
1476                        compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string()],
1477                    });
1478                }
1479            }
1480        }
1481
1482        Ok(findings)
1483    }
1484
1485    /// Check if a line represents legitimate environment variable usage (not a security issue)
1486    fn is_legitimate_env_var_usage(&self, line: &str, file_path: &Path) -> bool {
1487        let line_trimmed = line.trim();
1488
1489        // Check for common legitimate environment variable access patterns
1490        let legitimate_env_patterns = [
1491            // Node.js/JavaScript patterns
1492            r"process\.env\.[A-Z_]+",
1493            r#"process\.env\[['""][A-Z_]+['"]\]"#,
1494            // Vite/Modern JS patterns
1495            r"import\.meta\.env\.[A-Z_]+",
1496            r#"import\.meta\.env\[['""][A-Z_]+['"]\]"#,
1497            // Python patterns
1498            r#"os\.environ\.get\(["'][A-Z_]+["']\)"#,
1499            r#"os\.environ\[["'][A-Z_]+["']\]"#,
1500            r#"getenv\(["'][A-Z_]+["']\)"#,
1501            // Rust patterns
1502            r#"env::var\("([A-Z_]+)"\)"#,
1503            r#"std::env::var\("([A-Z_]+)"\)"#,
1504            // Go patterns
1505            r#"os\.Getenv\(["'][A-Z_]+["']\)"#,
1506            // Java patterns
1507            r#"System\.getenv\(["'][A-Z_]+["']\)"#,
1508            // Shell/Docker patterns
1509            r"\$\{?[A-Z_]+\}?",
1510            r"ENV [A-Z_]+",
1511            // Config file access patterns
1512            r"config\.[a-z_]+\.[A-Z_]+",
1513            r"settings\.[A-Z_]+",
1514            r"env\.[A-Z_]+",
1515        ];
1516
1517        // Check if the line matches any legitimate environment variable access pattern
1518        for pattern_str in &legitimate_env_patterns {
1519            if let Ok(pattern) = Regex::new(pattern_str)
1520                && pattern.is_match(line_trimmed)
1521            {
1522                // Additional context checks to make sure this is really legitimate
1523
1524                // Check if this is in a server-side context (not client-side)
1525                if self.is_server_side_file(file_path) {
1526                    return true;
1527                }
1528
1529                // Check if this is NOT a client-side exposed variable
1530                if !self.is_client_side_exposed_env_var(line_trimmed) {
1531                    return true;
1532                }
1533            }
1534        }
1535
1536        // Check for assignment vs access - assignments might be setting up environment variables
1537        // which could be legitimate in certain contexts
1538        if self.is_env_var_assignment_context(line_trimmed, file_path) {
1539            return true;
1540        }
1541
1542        false
1543    }
1544
1545    /// Check if a file is likely server-side code (vs client-side)
1546    fn is_server_side_file(&self, file_path: &Path) -> bool {
1547        let path_str = file_path.to_string_lossy().to_lowercase();
1548        let file_name = file_path
1549            .file_name()
1550            .and_then(|n| n.to_str())
1551            .unwrap_or("")
1552            .to_lowercase();
1553
1554        // Server-side indicators
1555        let server_indicators = [
1556            "/server/",
1557            "/api/",
1558            "/backend/",
1559            "/src/app/api/",
1560            "/pages/api/",
1561            "/routes/",
1562            "/controllers/",
1563            "/middleware/",
1564            "/models/",
1565            "/lib/",
1566            "/utils/",
1567            "/services/",
1568            "/config/",
1569            "server.js",
1570            "index.js",
1571            "app.js",
1572            "main.js",
1573            ".env",
1574            "dockerfile",
1575            "docker-compose",
1576        ];
1577
1578        // Client-side indicators (these should return false)
1579        let client_indicators = [
1580            "/public/",
1581            "/static/",
1582            "/assets/",
1583            "/components/",
1584            "/pages/",
1585            "/src/components/",
1586            "/src/pages/",
1587            "/client/",
1588            "/frontend/",
1589            "index.html",
1590            ".html",
1591            "/dist/",
1592            "/build/",
1593            "dist/",
1594            "build/",
1595            "public/",
1596            "static/",
1597            "assets/",
1598        ];
1599
1600        // If it's clearly client-side, return false
1601        if client_indicators
1602            .iter()
1603            .any(|indicator| path_str.contains(indicator))
1604        {
1605            return false;
1606        }
1607
1608        // If it has server-side indicators, return true
1609        if server_indicators
1610            .iter()
1611            .any(|indicator| path_str.contains(indicator) || file_name.contains(indicator))
1612        {
1613            return true;
1614        }
1615
1616        // Default to true for ambiguous cases (be conservative about flagging env var usage)
1617        true
1618    }
1619
1620    /// Check if an environment variable is exposed to client-side (security issue)
1621    fn is_client_side_exposed_env_var(&self, line: &str) -> bool {
1622        let client_prefixes = [
1623            "REACT_APP_",
1624            "NEXT_PUBLIC_",
1625            "VUE_APP_",
1626            "VITE_",
1627            "GATSBY_",
1628            "PUBLIC_",
1629            "NUXT_PUBLIC_",
1630        ];
1631
1632        client_prefixes.iter().any(|prefix| line.contains(prefix))
1633    }
1634
1635    /// Check if this is a legitimate environment variable assignment context
1636    fn is_env_var_assignment_context(&self, line: &str, file_path: &Path) -> bool {
1637        let path_str = file_path.to_string_lossy().to_lowercase();
1638        let file_name = file_path
1639            .file_name()
1640            .and_then(|n| n.to_str())
1641            .unwrap_or("")
1642            .to_lowercase();
1643
1644        // Only very specific configuration files where env var assignments are expected
1645        // Be more restrictive to avoid false positives
1646        let env_config_files = [
1647            ".env",
1648            "docker-compose.yml",
1649            "docker-compose.yaml",
1650            ".env.example",
1651            ".env.sample",
1652            ".env.template",
1653            ".env.local",
1654            ".env.development",
1655            ".env.production",
1656            ".env.staging",
1657        ];
1658
1659        // Check for exact filename matches for .env files (most common legitimate case)
1660        if env_config_files.iter().any(|pattern| file_name == *pattern) {
1661            return true;
1662        }
1663
1664        // Docker files are also legitimate for environment variable assignment
1665        if file_name.starts_with("dockerfile") || file_name == "dockerfile" {
1666            return true;
1667        }
1668
1669        // Shell scripts or CI/CD files
1670        if file_name.ends_with(".sh")
1671            || file_name.ends_with(".bash")
1672            || path_str.contains(".github/workflows/")
1673            || path_str.contains(".gitlab-ci")
1674        {
1675            return true;
1676        }
1677
1678        // Lines that are clearly setting up environment variables for child processes
1679        // Only match very specific patterns that indicate legitimate environment setup
1680        let setup_patterns = [
1681            r"export [A-Z_]+=",         // Shell export
1682            r"ENV [A-Z_]+=",            // Dockerfile ENV
1683            r"^\s*environment:\s*$",    // Docker Compose environment section header
1684            r"^\s*env:\s*$",            // Kubernetes env section header
1685            r"process\.env\.[A-Z_]+ =", // Explicitly setting process.env (rare but legitimate)
1686        ];
1687
1688        for pattern_str in &setup_patterns {
1689            if let Ok(pattern) = Regex::new(pattern_str)
1690                && pattern.is_match(line)
1691            {
1692                return true;
1693            }
1694        }
1695
1696        false
1697    }
1698
1699    fn is_likely_placeholder(&self, line: &str) -> bool {
1700        let placeholder_indicators = [
1701            "example",
1702            "placeholder",
1703            "your_",
1704            "insert_",
1705            "replace_",
1706            "xxx",
1707            "yyy",
1708            "zzz",
1709            "fake",
1710            "dummy",
1711            "test_key",
1712            "sk-xxxxxxxx",
1713            "AKIA00000000",
1714        ];
1715
1716        let hash_indicators = [
1717            "checksum",
1718            "hash",
1719            "sha1",
1720            "sha256",
1721            "md5",
1722            "commit",
1723            "fingerprint",
1724            "digest",
1725            "advisory",
1726            "ghsa-",
1727            "cve-",
1728            "rustc_fingerprint",
1729            "last-commit",
1730            "references",
1731        ];
1732
1733        let line_lower = line.to_lowercase();
1734
1735        // Check for placeholder indicators
1736        if placeholder_indicators
1737            .iter()
1738            .any(|indicator| line_lower.contains(indicator))
1739        {
1740            return true;
1741        }
1742
1743        // Check for hash/checksum context
1744        if hash_indicators
1745            .iter()
1746            .any(|indicator| line_lower.contains(indicator))
1747        {
1748            return true;
1749        }
1750
1751        // Check if it's a URL or path (often contains hash-like strings)
1752        if line_lower.contains("http") || line_lower.contains("github.com") {
1753            return true;
1754        }
1755
1756        // Check if it's likely a hex-only string (git commits, checksums)
1757        if let Some(potential_hash) = self.extract_potential_hash(line)
1758            && potential_hash.len() >= 32
1759            && self.is_hex_only(&potential_hash)
1760        {
1761            return true; // Likely a SHA hash
1762        }
1763
1764        false
1765    }
1766
1767    fn extract_potential_hash(&self, line: &str) -> Option<String> {
1768        // Look for quoted strings that might be hashes
1769        if let Some(start) = line.find('"')
1770            && let Some(end) = line[start + 1..].find('"')
1771        {
1772            let potential = &line[start + 1..start + 1 + end];
1773            if potential.len() >= 32 {
1774                return Some(potential.to_string());
1775            }
1776        }
1777        None
1778    }
1779
1780    fn is_hex_only(&self, s: &str) -> bool {
1781        s.chars().all(|c| c.is_ascii_hexdigit())
1782    }
1783
1784    fn is_sensitive_env_var(&self, name: &str) -> bool {
1785        let sensitive_patterns = [
1786            "password",
1787            "secret",
1788            "key",
1789            "token",
1790            "auth",
1791            "api",
1792            "private",
1793            "credential",
1794            "cert",
1795            "ssl",
1796            "tls",
1797        ];
1798
1799        let name_lower = name.to_lowercase();
1800        sensitive_patterns
1801            .iter()
1802            .any(|pattern| name_lower.contains(pattern))
1803    }
1804
1805    // Placeholder implementations for specific framework analysis
1806    fn analyze_express_security(
1807        &self,
1808        _project_root: &Path,
1809    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1810        // TODO: Implement Express.js specific security checks
1811        Ok(vec![])
1812    }
1813
1814    fn analyze_django_security(
1815        &self,
1816        _project_root: &Path,
1817    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1818        // TODO: Implement Django specific security checks
1819        Ok(vec![])
1820    }
1821
1822    fn analyze_spring_security(
1823        &self,
1824        _project_root: &Path,
1825    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1826        // TODO: Implement Spring Boot specific security checks
1827        Ok(vec![])
1828    }
1829
1830    fn analyze_nextjs_security(
1831        &self,
1832        _project_root: &Path,
1833    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1834        // TODO: Implement Next.js specific security checks
1835        Ok(vec![])
1836    }
1837
1838    fn analyze_dockerfile_security(
1839        &self,
1840        _project_root: &Path,
1841    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1842        // TODO: Implement Dockerfile security analysis
1843        Ok(vec![])
1844    }
1845
1846    fn analyze_compose_security(
1847        &self,
1848        _project_root: &Path,
1849    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1850        // TODO: Implement Docker Compose security analysis
1851        Ok(vec![])
1852    }
1853
1854    fn analyze_cicd_security(
1855        &self,
1856        _project_root: &Path,
1857    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1858        // TODO: Implement CI/CD security analysis
1859        Ok(vec![])
1860    }
1861
1862    // Additional helper methods...
1863    fn collect_source_files(
1864        &self,
1865        _project_root: &Path,
1866        _language: &str,
1867    ) -> Result<Vec<PathBuf>, SecurityError> {
1868        // TODO: Implement source file collection based on language
1869        Ok(vec![])
1870    }
1871
1872    fn analyze_file_with_rules(
1873        &self,
1874        _file_path: &Path,
1875        _rules: &[SecurityRule],
1876    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1877        // TODO: Implement rule-based file analysis
1878        Ok(vec![])
1879    }
1880
1881    fn check_insecure_configurations(
1882        &self,
1883        _project_root: &Path,
1884    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1885        // TODO: Implement insecure configuration checks
1886        Ok(vec![])
1887    }
1888
1889    /// Deduplicate findings to avoid multiple reports for the same secret/issue
1890    fn deduplicate_findings(&self, mut findings: Vec<SecurityFinding>) -> Vec<SecurityFinding> {
1891        use std::collections::HashSet;
1892
1893        let mut seen_secrets: HashSet<String> = HashSet::new();
1894        let mut deduplicated = Vec::new();
1895
1896        // Sort by priority: more specific patterns first, then by severity
1897        findings.sort_by(|a, b| {
1898            // First, prioritize specific patterns over generic ones
1899            let a_priority = self.get_pattern_priority(&a.title);
1900            let b_priority = self.get_pattern_priority(&b.title);
1901
1902            match a_priority.cmp(&b_priority) {
1903                std::cmp::Ordering::Equal => {
1904                    // If same priority, sort by severity (most critical first)
1905                    a.severity.cmp(&b.severity)
1906                }
1907                other => other,
1908            }
1909        });
1910
1911        for finding in findings {
1912            let key = self.generate_finding_key(&finding);
1913
1914            if !seen_secrets.contains(&key) {
1915                seen_secrets.insert(key);
1916                deduplicated.push(finding);
1917            }
1918        }
1919
1920        deduplicated
1921    }
1922
1923    /// Generate a unique key for deduplication based on the type of finding
1924    fn generate_finding_key(&self, finding: &SecurityFinding) -> String {
1925        match finding.category {
1926            SecurityCategory::SecretsExposure => {
1927                // For secrets, deduplicate based on file path and the actual secret content
1928                if let Some(evidence) = &finding.evidence
1929                    && let Some(file_path) = &finding.file_path
1930                {
1931                    // Extract the secret value from the evidence line
1932                    if let Some(secret_value) = self.extract_secret_value(evidence) {
1933                        return format!("secret:{}:{}", file_path.display(), secret_value);
1934                    }
1935                    // Fallback to file + line if we can't extract the value
1936                    if let Some(line_num) = finding.line_number {
1937                        return format!("secret:{}:{}", file_path.display(), line_num);
1938                    }
1939                }
1940                // Fallback for environment variables or other secrets without file paths
1941                format!("secret:{}", finding.title)
1942            }
1943            _ => {
1944                // For non-secret findings, use file path + line number + title
1945                if let Some(file_path) = &finding.file_path
1946                    && let Some(line_num) = finding.line_number
1947                {
1948                    format!(
1949                        "other:{}:{}:{}",
1950                        file_path.display(),
1951                        line_num,
1952                        finding.title
1953                    )
1954                } else if let Some(file_path) = &finding.file_path {
1955                    format!("other:{}:{}", file_path.display(), finding.title)
1956                } else {
1957                    format!("other:{}", finding.title)
1958                }
1959            }
1960        }
1961    }
1962
1963    /// Extract secret value from evidence line for deduplication
1964    fn extract_secret_value(&self, evidence: &str) -> Option<String> {
1965        // Look for patterns like "KEY=value" or "KEY: value"
1966        if let Some(pos) = evidence.find('=') {
1967            let value = evidence[pos + 1..].trim();
1968            // Remove quotes if present
1969            let value = value.trim_matches('"').trim_matches('\'');
1970            if value.len() > 10 {
1971                // Only consider substantial values
1972                return Some(value.to_string());
1973            }
1974        }
1975
1976        // Look for patterns like "key: value" in YAML/JSON
1977        if let Some(pos) = evidence.find(':') {
1978            let value = evidence[pos + 1..].trim();
1979            let value = value.trim_matches('"').trim_matches('\'');
1980            if value.len() > 10 {
1981                return Some(value.to_string());
1982            }
1983        }
1984
1985        None
1986    }
1987
1988    /// Get pattern priority for deduplication (lower number = higher priority)
1989    fn get_pattern_priority(&self, title: &str) -> u8 {
1990        // Most specific patterns get highest priority (lowest number)
1991        if title.contains("AWS Access Key") {
1992            return 1;
1993        }
1994        if title.contains("AWS Secret Key") {
1995            return 1;
1996        }
1997        if title.contains("S3 Secret Key") {
1998            return 1;
1999        }
2000        if title.contains("GitHub Token") {
2001            return 1;
2002        }
2003        if title.contains("OpenAI API Key") {
2004            return 1;
2005        }
2006        if title.contains("Stripe") {
2007            return 1;
2008        }
2009        if title.contains("RSA Private Key") {
2010            return 1;
2011        }
2012        if title.contains("SSH Private Key") {
2013            return 1;
2014        }
2015
2016        // JWT and specific API keys are more specific than generic
2017        if title.contains("JWT Secret") {
2018            return 2;
2019        }
2020        if title.contains("Database URL") {
2021            return 2;
2022        }
2023
2024        // Generic API key patterns are less specific
2025        if title.contains("API Key") {
2026            return 3;
2027        }
2028
2029        // Environment variable findings are less specific
2030        if title.contains("Environment Variable") {
2031            return 4;
2032        }
2033
2034        // Generic patterns get lowest priority (highest number)
2035        if title.contains("Generic Secret") {
2036            return 5;
2037        }
2038
2039        // Default priority for other patterns
2040        3
2041    }
2042
2043    fn count_by_severity(&self, findings: &[SecurityFinding]) -> HashMap<SecuritySeverity, usize> {
2044        let mut counts = HashMap::new();
2045        for finding in findings {
2046            *counts.entry(finding.severity.clone()).or_insert(0) += 1;
2047        }
2048        counts
2049    }
2050
2051    fn count_by_category(&self, findings: &[SecurityFinding]) -> HashMap<SecurityCategory, usize> {
2052        let mut counts = HashMap::new();
2053        for finding in findings {
2054            *counts.entry(finding.category.clone()).or_insert(0) += 1;
2055        }
2056        counts
2057    }
2058
2059    fn calculate_security_score(&self, findings: &[SecurityFinding]) -> f32 {
2060        if findings.is_empty() {
2061            return 100.0;
2062        }
2063
2064        let total_penalty = findings
2065            .iter()
2066            .map(|f| match f.severity {
2067                SecuritySeverity::Critical => 25.0,
2068                SecuritySeverity::High => 15.0,
2069                SecuritySeverity::Medium => 8.0,
2070                SecuritySeverity::Low => 3.0,
2071                SecuritySeverity::Info => 1.0,
2072            })
2073            .sum::<f32>();
2074
2075        (100.0 - total_penalty).max(0.0)
2076    }
2077
2078    fn determine_risk_level(&self, findings: &[SecurityFinding]) -> SecuritySeverity {
2079        if findings
2080            .iter()
2081            .any(|f| f.severity == SecuritySeverity::Critical)
2082        {
2083            SecuritySeverity::Critical
2084        } else if findings
2085            .iter()
2086            .any(|f| f.severity == SecuritySeverity::High)
2087        {
2088            SecuritySeverity::High
2089        } else if findings
2090            .iter()
2091            .any(|f| f.severity == SecuritySeverity::Medium)
2092        {
2093            SecuritySeverity::Medium
2094        } else if !findings.is_empty() {
2095            SecuritySeverity::Low
2096        } else {
2097            SecuritySeverity::Info
2098        }
2099    }
2100
2101    #[allow(dead_code)]
2102    fn assess_compliance(
2103        &self,
2104        _findings: &[SecurityFinding],
2105        _technologies: &[DetectedTechnology],
2106    ) -> HashMap<String, ComplianceStatus> {
2107        // TODO: Implement compliance assessment
2108        HashMap::new()
2109    }
2110
2111    fn generate_recommendations(
2112        &self,
2113        findings: &[SecurityFinding],
2114        _technologies: &[DetectedTechnology],
2115    ) -> Vec<String> {
2116        let mut recommendations = Vec::new();
2117
2118        if findings
2119            .iter()
2120            .any(|f| f.category == SecurityCategory::SecretsExposure)
2121        {
2122            recommendations.push("Implement a secure secret management strategy".to_string());
2123        }
2124
2125        if findings
2126            .iter()
2127            .any(|f| f.severity == SecuritySeverity::Critical)
2128        {
2129            recommendations.push("Address critical security findings immediately".to_string());
2130        }
2131
2132        recommendations
2133    }
2134}
2135
2136#[cfg(test)]
2137mod tests {
2138    use super::*;
2139
2140    #[test]
2141    fn test_security_score_calculation() {
2142        let analyzer = SecurityAnalyzer::new().unwrap();
2143
2144        let findings = vec![SecurityFinding {
2145            id: "test-1".to_string(),
2146            title: "Test Critical".to_string(),
2147            description: "Test".to_string(),
2148            severity: SecuritySeverity::Critical,
2149            category: SecurityCategory::SecretsExposure,
2150            file_path: None,
2151            line_number: None,
2152            column_number: None,
2153            evidence: None,
2154            remediation: vec![],
2155            references: vec![],
2156            cwe_id: None,
2157            compliance_frameworks: vec![],
2158        }];
2159
2160        let score = analyzer.calculate_security_score(&findings);
2161        assert_eq!(score, 75.0); // 100 - 25 (critical penalty)
2162    }
2163
2164    #[test]
2165    fn test_secret_pattern_matching() {
2166        let analyzer = SecurityAnalyzer::new().unwrap();
2167
2168        // Test if placeholder detection works
2169        assert!(analyzer.is_likely_placeholder("API_KEY=sk-xxxxxxxxxxxxxxxx"));
2170        assert!(!analyzer.is_likely_placeholder("API_KEY=sk-1234567890abcdef"));
2171    }
2172
2173    #[test]
2174    fn test_sensitive_env_var_detection() {
2175        let analyzer = SecurityAnalyzer::new().unwrap();
2176
2177        assert!(analyzer.is_sensitive_env_var("DATABASE_PASSWORD"));
2178        assert!(analyzer.is_sensitive_env_var("JWT_SECRET"));
2179        assert!(!analyzer.is_sensitive_env_var("PORT"));
2180        assert!(!analyzer.is_sensitive_env_var("NODE_ENV"));
2181    }
2182
2183    #[test]
2184    fn test_gitignore_aware_severity() {
2185        use std::fs;
2186        use std::process::Command;
2187        use tempfile::TempDir;
2188
2189        let temp_dir = TempDir::new().unwrap();
2190        let project_root = temp_dir.path();
2191
2192        // Initialize a real git repo
2193        let git_init = Command::new("git")
2194            .args(["init"])
2195            .current_dir(project_root)
2196            .output();
2197
2198        // Skip test if git is not available
2199        if git_init.is_err() {
2200            println!("Skipping gitignore test - git not available");
2201            return;
2202        }
2203
2204        // Create .gitignore file
2205        fs::write(project_root.join(".gitignore"), ".env\n.env.local\n").unwrap();
2206
2207        // Stage and commit .gitignore to make it effective
2208        let _ = Command::new("git")
2209            .args(["add", ".gitignore"])
2210            .current_dir(project_root)
2211            .output();
2212        let _ = Command::new("git")
2213            .args(["config", "user.email", "test@example.com"])
2214            .current_dir(project_root)
2215            .output();
2216        let _ = Command::new("git")
2217            .args(["config", "user.name", "Test User"])
2218            .current_dir(project_root)
2219            .output();
2220        let _ = Command::new("git")
2221            .args(["commit", "-m", "Add gitignore"])
2222            .current_dir(project_root)
2223            .output();
2224
2225        let mut analyzer = SecurityAnalyzer::new().unwrap();
2226        analyzer.project_root = Some(project_root.to_path_buf());
2227
2228        // Test file that would be gitignored
2229        let env_file = project_root.join(".env");
2230        fs::write(&env_file, "API_KEY=sk-1234567890abcdef").unwrap();
2231
2232        // Test severity determination for gitignored file
2233        let (severity, remediation) =
2234            analyzer.determine_secret_severity(&env_file, SecuritySeverity::High);
2235
2236        // With default config, gitignored files should be marked as Info (skipped)
2237        assert_eq!(severity, SecuritySeverity::Info);
2238        assert!(remediation.iter().any(|r| r.contains("gitignored")));
2239    }
2240
2241    #[test]
2242    fn test_gitignore_config_options() {
2243        let mut config = SecurityAnalysisConfig::default();
2244
2245        // Test default configuration
2246        assert!(config.skip_gitignored_files);
2247        assert!(!config.downgrade_gitignored_severity);
2248
2249        // Test downgrade mode
2250        config.skip_gitignored_files = false;
2251        config.downgrade_gitignored_severity = true;
2252
2253        let _analyzer = SecurityAnalyzer::with_config(config).unwrap();
2254        // Additional test logic could be added here for downgrade behavior
2255    }
2256
2257    #[test]
2258    fn test_gitignore_pattern_matching() {
2259        let analyzer = SecurityAnalyzer::new().unwrap();
2260
2261        // Test wildcard patterns - *.env matches files ending with .env
2262        assert!(!analyzer.matches_gitignore_pattern("*.env", ".env.local", ".env.local")); // Doesn't end with .env
2263        assert!(analyzer.matches_gitignore_pattern("*.env", "production.env", "production.env")); // Ends with .env
2264        assert!(analyzer.matches_gitignore_pattern(".env*", ".env.production", ".env.production")); // Starts with .env
2265        assert!(analyzer.matches_gitignore_pattern("*.log", "app.log", "app.log"));
2266
2267        // Test exact patterns
2268        assert!(analyzer.matches_gitignore_pattern(".env", ".env", ".env"));
2269        assert!(!analyzer.matches_gitignore_pattern(".env", ".env.local", ".env.local"));
2270
2271        // Test directory patterns
2272        assert!(analyzer.matches_gitignore_pattern("/config.json", "config.json", "config.json"));
2273        assert!(!analyzer.matches_gitignore_pattern(
2274            "/config.json",
2275            "src/config.json",
2276            "config.json"
2277        ));
2278
2279        // Test common .env patterns that should work
2280        assert!(analyzer.matches_gitignore_pattern(".env*", ".env", ".env"));
2281        assert!(analyzer.matches_gitignore_pattern(".env*", ".env.local", ".env.local"));
2282        assert!(analyzer.matches_gitignore_pattern(".env.*", ".env.production", ".env.production"));
2283    }
2284
2285    #[test]
2286    fn test_common_env_patterns() {
2287        let analyzer = SecurityAnalyzer::new().unwrap();
2288
2289        // Should match common .env files
2290        assert!(analyzer.matches_common_env_patterns(".env"));
2291        assert!(analyzer.matches_common_env_patterns(".env.local"));
2292        assert!(analyzer.matches_common_env_patterns(".env.production"));
2293        assert!(analyzer.matches_common_env_patterns(".env.development"));
2294        assert!(analyzer.matches_common_env_patterns(".env.test"));
2295
2296        // Should NOT match example/template files (usually committed)
2297        assert!(!analyzer.matches_common_env_patterns(".env.example"));
2298        assert!(!analyzer.matches_common_env_patterns(".env.sample"));
2299        assert!(!analyzer.matches_common_env_patterns(".env.template"));
2300
2301        // Should not match non-env files
2302        assert!(!analyzer.matches_common_env_patterns("config.json"));
2303        assert!(!analyzer.matches_common_env_patterns("package.json"));
2304    }
2305
2306    #[test]
2307    fn test_legitimate_env_var_usage() {
2308        let analyzer = SecurityAnalyzer::new().unwrap();
2309
2310        // Create mock file paths
2311        let server_file = Path::new("src/server/config.js");
2312        let client_file = Path::new("src/components/MyComponent.js");
2313
2314        // Test legitimate server-side environment variable usage (should NOT be flagged)
2315        assert!(analyzer.is_legitimate_env_var_usage(
2316            "const apiKey = process.env.RESEND_API_KEY;",
2317            server_file
2318        ));
2319        assert!(
2320            analyzer.is_legitimate_env_var_usage(
2321                "const dbUrl = process.env.DATABASE_URL;",
2322                server_file
2323            )
2324        );
2325        assert!(
2326            analyzer
2327                .is_legitimate_env_var_usage("api_key = os.environ.get('API_KEY')", server_file)
2328        );
2329        assert!(
2330            analyzer.is_legitimate_env_var_usage(
2331                "let secret = env::var(\"JWT_SECRET\")?;",
2332                server_file
2333            )
2334        );
2335
2336        // Test client-side environment variable usage (legitimate if not exposed)
2337        assert!(
2338            analyzer
2339                .is_legitimate_env_var_usage("const apiUrl = process.env.API_URL;", client_file)
2340        );
2341
2342        // Test client-side exposed variables (these ARE client-side exposed - security issues)
2343        assert!(analyzer.is_client_side_exposed_env_var("process.env.REACT_APP_SECRET_KEY"));
2344        assert!(analyzer.is_client_side_exposed_env_var("process.env.NEXT_PUBLIC_API_SECRET"));
2345
2346        // Test hardcoded secrets (should NOT be legitimate)
2347        assert!(
2348            !analyzer
2349                .is_legitimate_env_var_usage("const apiKey = 'sk-1234567890abcdef';", server_file)
2350        );
2351        assert!(!analyzer.is_legitimate_env_var_usage("password = 'hardcoded123'", server_file));
2352    }
2353
2354    #[test]
2355    fn test_server_vs_client_side_detection() {
2356        let analyzer = SecurityAnalyzer::new().unwrap();
2357
2358        // Server-side files
2359        assert!(analyzer.is_server_side_file(Path::new("src/server/app.js")));
2360        assert!(analyzer.is_server_side_file(Path::new("src/api/users.js")));
2361        assert!(analyzer.is_server_side_file(Path::new("pages/api/auth.js")));
2362        assert!(analyzer.is_server_side_file(Path::new("src/lib/database.js")));
2363        assert!(analyzer.is_server_side_file(Path::new(".env")));
2364        assert!(analyzer.is_server_side_file(Path::new("server.js")));
2365
2366        // Client-side files
2367        assert!(!analyzer.is_server_side_file(Path::new("src/components/Button.jsx")));
2368        assert!(!analyzer.is_server_side_file(Path::new("public/index.html")));
2369        assert!(!analyzer.is_server_side_file(Path::new("src/pages/home.js")));
2370        assert!(!analyzer.is_server_side_file(Path::new("dist/bundle.js")));
2371
2372        // Ambiguous files (default to server-side for conservative detection)
2373        assert!(analyzer.is_server_side_file(Path::new("src/utils/helper.js")));
2374        assert!(analyzer.is_server_side_file(Path::new("config/settings.js")));
2375    }
2376
2377    #[test]
2378    fn test_client_side_exposed_env_vars() {
2379        let analyzer = SecurityAnalyzer::new().unwrap();
2380
2381        // These should be flagged as client-side exposed (security issues)
2382        assert!(analyzer.is_client_side_exposed_env_var("process.env.REACT_APP_SECRET"));
2383        assert!(analyzer.is_client_side_exposed_env_var("import.meta.env.VITE_API_KEY"));
2384        assert!(analyzer.is_client_side_exposed_env_var("process.env.NEXT_PUBLIC_SECRET"));
2385        assert!(analyzer.is_client_side_exposed_env_var("process.env.VUE_APP_TOKEN"));
2386
2387        // These should NOT be flagged as client-side exposed
2388        assert!(!analyzer.is_client_side_exposed_env_var("process.env.DATABASE_URL"));
2389        assert!(!analyzer.is_client_side_exposed_env_var("process.env.JWT_SECRET"));
2390        assert!(!analyzer.is_client_side_exposed_env_var("process.env.API_KEY"));
2391    }
2392
2393    #[test]
2394    fn test_env_var_assignment_context() {
2395        let analyzer = SecurityAnalyzer::new().unwrap();
2396
2397        // Configuration files where assignments are legitimate
2398        assert!(analyzer.is_env_var_assignment_context("API_KEY=sk-test123", Path::new(".env")));
2399        assert!(analyzer.is_env_var_assignment_context(
2400            "DATABASE_URL=postgres://",
2401            Path::new("docker-compose.yml")
2402        ));
2403        assert!(
2404            analyzer.is_env_var_assignment_context("export SECRET=test", Path::new("setup.sh"))
2405        );
2406
2407        // Regular source files where assignments might be suspicious
2408        assert!(
2409            !analyzer.is_env_var_assignment_context(
2410                "const secret = 'hardcoded'",
2411                Path::new("src/app.js")
2412            )
2413        );
2414    }
2415
2416    #[test]
2417    fn test_enhanced_secret_patterns() {
2418        let analyzer = SecurityAnalyzer::new().unwrap();
2419
2420        // Test that hardcoded secrets are still detected
2421        let hardcoded_patterns = [
2422            "apikey = 'sk-1234567890abcdef1234567890abcdef12345678'",
2423            "const secret = 'my-super-secret-token-12345678901234567890'",
2424            "password = 'hardcoded123456'",
2425        ];
2426
2427        for pattern in &hardcoded_patterns {
2428            let has_secret = analyzer
2429                .secret_patterns
2430                .iter()
2431                .any(|sp| sp.pattern.is_match(pattern));
2432            assert!(has_secret, "Should detect hardcoded secret in: {}", pattern);
2433        }
2434
2435        // Test that legitimate env var usage is NOT detected as secret
2436        let legitimate_patterns = [
2437            "const apiKey = process.env.API_KEY;",
2438            "const dbUrl = process.env.DATABASE_URL || 'fallback';",
2439            "api_key = os.environ.get('API_KEY')",
2440            "let secret = env::var(\"JWT_SECRET\")?;",
2441        ];
2442
2443        for pattern in &legitimate_patterns {
2444            // These should either not match any secret pattern, or be filtered out by context detection
2445            let _matches_old_generic_pattern =
2446                pattern.to_lowercase().contains("secret") || pattern.to_lowercase().contains("key");
2447
2448            // Our new patterns should be more specific and not match env var access
2449            let matches_new_patterns = analyzer
2450                .secret_patterns
2451                .iter()
2452                .filter(|sp| sp.name.contains("Hardcoded"))
2453                .any(|sp| sp.pattern.is_match(pattern));
2454
2455            assert!(
2456                !matches_new_patterns,
2457                "Should NOT detect legitimate env var usage as hardcoded secret: {}",
2458                pattern
2459            );
2460        }
2461    }
2462
2463    #[test]
2464    fn test_context_aware_false_positive_reduction() {
2465        use tempfile::TempDir;
2466
2467        let temp_dir = TempDir::new().unwrap();
2468        let server_file = temp_dir.path().join("src/server/config.js");
2469
2470        // Create directory structure
2471        std::fs::create_dir_all(server_file.parent().unwrap()).unwrap();
2472
2473        // Write a file with legitimate environment variable usage
2474        let content = r#"
2475const config = {
2476    apiKey: process.env.RESEND_API_KEY,
2477    databaseUrl: process.env.DATABASE_URL,
2478    jwtSecret: process.env.JWT_SECRET,
2479    port: process.env.PORT || 3000
2480};
2481"#;
2482
2483        std::fs::write(&server_file, content).unwrap();
2484
2485        let analyzer = SecurityAnalyzer::new().unwrap();
2486        let findings = analyzer.analyze_file_for_secrets(&server_file).unwrap();
2487
2488        // Should have zero findings because all are legitimate env var usage
2489        assert_eq!(
2490            findings.len(),
2491            0,
2492            "Should not flag legitimate environment variable usage as security issues"
2493        );
2494    }
2495}
syncable_cli/analyzer/security_analyzer.rs

syncable_cli/analyzer/
security_analyzer.rs