repotoire 0.3.40

//! Command Injection Detector

use crate::detectors::base::{Detector, DetectorConfig};
use crate::graph::GraphStore;
use crate::models::{Finding, Severity};
use anyhow::Result;
use regex::Regex;
use std::path::{Path, PathBuf};
use std::sync::OnceLock;
use uuid::Uuid;

static SHELL_EXEC: OnceLock<Regex> = OnceLock::new();
static GO_EXEC: OnceLock<Regex> = OnceLock::new();
static JS_EXEC_DIRECT: OnceLock<Regex> = OnceLock::new();

fn shell_exec() -> &'static Regex {
    // Be specific about shell execution patterns - avoid matching RegExp.exec(), String.prototype.exec(), etc.
    // Pattern must match actual shell execution APIs:
    // - Python: os.system, os.popen, subprocess.*
    // - Node.js: child_process.exec, child_process.spawn, execSync, execAsync (promisified), require('child_process')
    // - PHP: shell_exec, system, popen, exec (standalone function)
    // - Ruby: system, exec, backticks
    // Note: execAsync is a common promisified wrapper for child_process.exec
    SHELL_EXEC.get_or_init(|| Regex::new(r#"(?i)(os\.system|os\.popen|subprocess\.(call|run|Popen)|child_process\.(exec|spawn|fork)|execSync|execAsync|spawnSync|require\(['"]child_process['"]\)|shell_exec|proc_open)"#).unwrap())
}

fn go_exec() -> &'static Regex {
    // Go exec patterns: exec.Command, exec.CommandContext
    GO_EXEC.get_or_init(|| Regex::new(r#"exec\.(Command|CommandContext)\s*\("#).unwrap())
}

fn js_exec_direct() -> &'static Regex {
    // Direct exec() call pattern for JavaScript - matches exec( but not .exec( to avoid RegExp.exec
    // This catches: exec(something), execSync(something), execAsync(something)
    JS_EXEC_DIRECT.get_or_init(|| Regex::new(r#"(?:^|[^.\w])(exec|execSync|execAsync)\s*\("#).unwrap())
}

pub struct CommandInjectionDetector {
    repository_path: PathBuf,
    max_findings: usize,
}

impl CommandInjectionDetector {
    pub fn new(repository_path: impl Into<PathBuf>) -> Self {
        Self { repository_path: repository_path.into(), max_findings: 50 }
    }
    
    /// Convert absolute path to relative path for consistent output
    fn relative_path(&self, path: &Path) -> PathBuf {
        path.strip_prefix(&self.repository_path)
            .unwrap_or(path)
            .to_path_buf()
    }
}

impl Detector for CommandInjectionDetector {
    fn name(&self) -> &'static str { "command-injection" }
    fn description(&self) -> &'static str { "Detects command injection vulnerabilities" }

    fn detect(&self, _graph: &GraphStore) -> Result<Vec<Finding>> {
        let mut findings = vec![];
        let walker = ignore::WalkBuilder::new(&self.repository_path).hidden(false).git_ignore(true).build();

        for entry in walker.filter_map(|e| e.ok()) {
            if findings.len() >= self.max_findings { break; }
            let path = entry.path();
            if !path.is_file() { continue; }
            
            let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
            if !matches!(ext, "py"|"js"|"ts"|"rb"|"php"|"java"|"go"|"sh") { continue; }

            if let Some(content) = crate::cache::global_cache().get_content(path) {
                let lines: Vec<&str> = content.lines().collect();
                
                // First pass: find template literals with RISKY interpolation stored in variables
                // e.g., const cmd = `echo ${userId}`;  // userId could be user input
                // But NOT: const cmd = `echo ${CONSTANT}`;  // All-caps likely safe
                let mut dangerous_vars: Vec<String> = vec![];
                for line in &lines {
                    // Match: const/let/var VARNAME = `...${...}...`
                    if (line.contains("const ") || line.contains("let ") || line.contains("var "))
                        && line.contains("`") && line.contains("${") {
                        // Check if the interpolated content looks like user input
                        // Look for: params, req, request, body, query, input, userId, id, args, etc.
                        let lower = line.to_lowercase();
                        let has_risky_interpolation = 
                            lower.contains("${") && (
                                lower.contains("id}") || lower.contains("id,") ||
                                lower.contains("param") || lower.contains("input") ||
                                lower.contains("user") || lower.contains("name}") ||
                                lower.contains("args") || lower.contains("arg}") ||
                                lower.contains("req.") || lower.contains("body") ||
                                lower.contains("query")
                            );
                        
                        if has_risky_interpolation {
                            // Extract variable name
                            if let Some(eq_pos) = line.find('=') {
                                let before_eq = &line[..eq_pos];
                                let var_name = before_eq.split_whitespace().last().unwrap_or("");
                                if !var_name.is_empty() {
                                    dangerous_vars.push(var_name.to_string());
                                }
                            }
                        }
                    }
                }
                
                for (i, line) in lines.iter().enumerate() {
                    // Check for direct shell execution with template literal
                    if shell_exec().is_match(line) {
                        // Check for user input sources
                        let has_user_input = line.contains("req.") || line.contains("request.") ||
                            line.contains("params.") || line.contains("params[") ||
                            line.contains("query.") || line.contains("body.") ||
                            line.contains("input") || line.contains("argv") || line.contains("args");
                        
                        // Check for string interpolation ON THIS LINE
                        let has_interpolation = line.contains("f\"") || line.contains("${") || 
                            line.contains("+ ") || line.contains(".format(");
                        
                        // Check for template literal with interpolation ON THIS LINE
                        let has_template_interpolation = line.contains("`") && line.contains("${");
                        
                        // Check if exec is using a dangerous variable we identified earlier
                        let uses_dangerous_var = dangerous_vars.iter().any(|v| line.contains(v));
                        
                        // Python subprocess shell=True is always dangerous
                        let has_shell_true = line.contains("shell=True") || line.contains("shell: true");
                        
                        // HIGH RISK conditions:
                        // 1. shell=True (Python) - always dangerous
                        // 2. exec with user input AND interpolation on same line
                        // 3. exec with template literal + ${} on same line (obvious injection)
                        // 4. exec using a variable that was built from template with ${}
                        let is_risky = has_shell_true 
                            || (has_user_input && has_interpolation)
                            || has_template_interpolation
                            || uses_dangerous_var;
                        
                        if is_risky {
                            let desc = if has_template_interpolation {
                                "Template literal with interpolation passed directly to shell execution. Variables are inserted unsanitized."
                            } else if uses_dangerous_var {
                                "Shell execution using a command string built from template literal. User input may flow into the command."
                            } else if has_shell_true {
                                "subprocess with shell=True allows shell injection through any unsanitized input."
                            } else {
                                "Shell command execution with potential user input."
                            };
                            
                            findings.push(Finding {
                                id: Uuid::new_v4().to_string(),
                                detector: "CommandInjectionDetector".to_string(),
                                severity: Severity::Critical,
                                title: "Potential command injection".to_string(),
                                description: desc.to_string(),
                                affected_files: vec![self.relative_path(path)],
                                line_start: Some((i + 1) as u32),
                                line_end: Some((i + 1) as u32),
                                suggested_fix: Some("Use subprocess/spawn with array arguments instead of shell string. Never interpolate user input into commands.".to_string()),
                                estimated_effort: Some("45 minutes".to_string()),
                                category: Some("security".to_string()),
                                cwe_id: Some("CWE-78".to_string()),
                                why_it_matters: Some("Attackers could execute arbitrary system commands by injecting shell metacharacters.".to_string()),
                            });
                        }
                    }
                    
                    // Fallback: Also flag template literals with ${} passed directly to exec-like functions
                    // but ONLY if shell_exec() didn't already match (avoid duplicates)
                    else if line.contains("exec(") || line.contains("execSync(") || line.contains("execAsync(") {
                        if line.contains("`") && line.contains("${") {
                            findings.push(Finding {
                                id: Uuid::new_v4().to_string(),
                                detector: "CommandInjectionDetector".to_string(),
                                severity: Severity::Critical,
                                title: "Command injection via template literal".to_string(),
                                description: "Template literal with variable interpolation passed to exec(). This is a classic command injection pattern.".to_string(),
                                affected_files: vec![self.relative_path(path)],
                                line_start: Some((i + 1) as u32),
                                line_end: Some((i + 1) as u32),
                                suggested_fix: Some("Use spawn() with array arguments: spawn('cmd', [arg1, arg2]) instead of exec(`cmd ${arg}`)".to_string()),
                                estimated_effort: Some("30 minutes".to_string()),
                                category: Some("security".to_string()),
                                cwe_id: Some("CWE-78".to_string()),
                                why_it_matters: Some("An attacker can inject shell commands by providing input like '; rm -rf /' or '$(malicious_command)'".to_string()),
                            });
                        }
                        // Also check if it's using a variable we identified as dangerous (built from template literal)
                        else if dangerous_vars.iter().any(|v| line.contains(&format!("({})", v)) || line.contains(&format!("({},", v))) {
                            findings.push(Finding {
                                id: Uuid::new_v4().to_string(),
                                detector: "CommandInjectionDetector".to_string(),
                                severity: Severity::Critical,
                                title: "Command injection via interpolated variable".to_string(),
                                description: "Shell execution using a command string that was built with template literal interpolation. User input may flow into the shell command.".to_string(),
                                affected_files: vec![self.relative_path(path)],
                                line_start: Some((i + 1) as u32),
                                line_end: Some((i + 1) as u32),
                                suggested_fix: Some("Use spawn() with array arguments instead of building command strings. Never interpolate user input.".to_string()),
                                estimated_effort: Some("45 minutes".to_string()),
                                category: Some("security".to_string()),
                                cwe_id: Some("CWE-78".to_string()),
                                why_it_matters: Some("The command variable was built using ${} interpolation, allowing shell injection.".to_string()),
                            });
                        }
                    }
                    
                    // Check for direct exec(req.body.command) pattern in JavaScript
                    // This catches exec(userInput) without template literals
                    if js_exec_direct().is_match(line) {
                        let has_direct_user_input = line.contains("req.body") || line.contains("req.query") ||
                            line.contains("req.params") || line.contains("request.body") ||
                            line.contains("request.query") || line.contains("request.params");
                        
                        if has_direct_user_input {
                            findings.push(Finding {
                                id: Uuid::new_v4().to_string(),
                                detector: "CommandInjectionDetector".to_string(),
                                severity: Severity::Critical,
                                title: "Command injection via direct user input".to_string(),
                                description: "User-controlled input (req.body/query/params) passed directly to shell execution function. This allows arbitrary command execution.".to_string(),
                                affected_files: vec![self.relative_path(path)],
                                line_start: Some((i + 1) as u32),
                                line_end: Some((i + 1) as u32),
                                suggested_fix: Some("Never pass user input directly to exec(). Use a whitelist of allowed commands, or use spawn() with a fixed command and user input only as arguments.".to_string()),
                                estimated_effort: Some("1 hour".to_string()),
                                category: Some("security".to_string()),
                                cwe_id: Some("CWE-78".to_string()),
                                why_it_matters: Some("An attacker can execute ANY system command by sending malicious input like 'rm -rf /' or 'cat /etc/passwd'.".to_string()),
                            });
                        }
                    }
                    
                    // Check for Go exec.Command with user input
                    if go_exec().is_match(line) {
                        let has_user_input = line.contains("r.") || line.contains("req.") || 
                            line.contains("request.") || line.contains("c.") ||
                            line.contains("ctx.") || line.contains("Param") ||
                            line.contains("Query") || line.contains("FormValue") ||
                            line.contains("PostForm") || line.contains("userInput") ||
                            line.contains("input") || line.contains("cmd") ||
                            line.contains("command");
                        
                        // Also flag if variable names suggest user input
                        let has_risky_var = line.to_lowercase().contains("userinput") ||
                            line.to_lowercase().contains("user_input") ||
                            line.to_lowercase().contains("usercmd") ||
                            line.to_lowercase().contains("user_cmd");
                        
                        if has_user_input || has_risky_var {
                            findings.push(Finding {
                                id: Uuid::new_v4().to_string(),
                                detector: "CommandInjectionDetector".to_string(),
                                severity: Severity::Critical,
                                title: "Potential command injection in Go exec.Command".to_string(),
                                description: "exec.Command called with potentially user-controlled input. If the command or arguments come from user input, this allows arbitrary command execution.".to_string(),
                                affected_files: vec![self.relative_path(path)],
                                line_start: Some((i + 1) as u32),
                                line_end: Some((i + 1) as u32),
                                suggested_fix: Some("Validate user input against a whitelist of allowed commands. Never pass raw user input to exec.Command. Use filepath.Clean for paths.".to_string()),
                                estimated_effort: Some("1 hour".to_string()),
                                category: Some("security".to_string()),
                                cwe_id: Some("CWE-78".to_string()),
                                why_it_matters: Some("Go's exec.Command runs system commands. If user input controls the command or arguments, attackers can execute arbitrary commands.".to_string()),
                            });
                        }
                    }
                }
            }
        }
        Ok(findings)
    }
}