Skip to main content

llmtrace_security/
code_security.rs

1//! Code security analysis for LLM-generated code outputs.
2//!
3//! Scans LLM response text for code blocks (Markdown fenced, indented, or inline)
4//! and analyses them for common security vulnerabilities:
5//!
6//! - **SQL Injection** — string concatenation in SQL queries
7//! - **Command Injection** — `os.system()`, `eval()`, `child_process.exec()`
8//! - **Path Traversal** — `../` in file operations without sanitisation
9//! - **Hardcoded Credentials** — `password = "..."`, AWS keys in code
10//! - **Insecure Deserialization** — `pickle.loads()`, `yaml.load()` without SafeLoader
11//! - **XSS Patterns** — `innerHTML`, `document.write()`, `dangerouslySetInnerHTML`
12//! - **Insecure Crypto** — MD5/SHA1 for passwords, `Math.random()` for security
13
14use llmtrace_core::{SecurityFinding, SecuritySeverity};
15use regex::Regex;
16
17// ---------------------------------------------------------------------------
18// Language detection
19// ---------------------------------------------------------------------------
20
21/// Programming language detected in a code block.
22#[derive(Debug, Clone, PartialEq, Eq)]
23pub enum CodeLanguage {
24    Python,
25    JavaScript,
26    TypeScript,
27    Sql,
28    Bash,
29    Rust,
30    Go,
31    Java,
32    C,
33    Cpp,
34    Ruby,
35    Unknown,
36}
37
38impl std::fmt::Display for CodeLanguage {
39    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40        match self {
41            Self::Python => write!(f, "Python"),
42            Self::JavaScript => write!(f, "JavaScript"),
43            Self::TypeScript => write!(f, "TypeScript"),
44            Self::Sql => write!(f, "SQL"),
45            Self::Bash => write!(f, "Shell/Bash"),
46            Self::Rust => write!(f, "Rust"),
47            Self::Go => write!(f, "Go"),
48            Self::Java => write!(f, "Java"),
49            Self::C => write!(f, "C"),
50            Self::Cpp => write!(f, "C++"),
51            Self::Ruby => write!(f, "Ruby"),
52            Self::Unknown => write!(f, "Unknown"),
53        }
54    }
55}
56
57/// Detect the programming language from a fenced code block's info string,
58/// falling back to content heuristics.
59fn detect_language(info_string: Option<&str>, code: &str) -> CodeLanguage {
60    // 1. Check the info string (e.g. ```python)
61    if let Some(info) = info_string {
62        let lower = info.trim().to_lowercase();
63        if lower.starts_with("python") || lower == "py" {
64            return CodeLanguage::Python;
65        }
66        if lower.starts_with("javascript") || lower == "js" || lower == "node" {
67            return CodeLanguage::JavaScript;
68        }
69        if lower.starts_with("typescript") || lower == "ts" {
70            return CodeLanguage::TypeScript;
71        }
72        if lower == "sql" || lower == "mysql" || lower == "postgresql" || lower == "sqlite" {
73            return CodeLanguage::Sql;
74        }
75        if lower == "bash" || lower == "sh" || lower == "shell" || lower == "zsh" {
76            return CodeLanguage::Bash;
77        }
78        if lower == "rust" || lower == "rs" {
79            return CodeLanguage::Rust;
80        }
81        if lower == "go" || lower == "golang" {
82            return CodeLanguage::Go;
83        }
84        if lower == "java" {
85            return CodeLanguage::Java;
86        }
87        if lower == "c++" || lower == "cpp" || lower == "cxx" {
88            return CodeLanguage::Cpp;
89        }
90        if lower == "c" {
91            return CodeLanguage::C;
92        }
93        if lower == "ruby" || lower == "rb" {
94            return CodeLanguage::Ruby;
95        }
96    }
97
98    // 2. Heuristic detection from content
99    detect_language_from_content(code)
100}
101
102/// Heuristic language detection based on code content patterns.
103fn detect_language_from_content(code: &str) -> CodeLanguage {
104    let lower = code.to_lowercase();
105
106    // Python indicators
107    if lower.contains("import ") && (lower.contains("def ") || lower.contains("from "))
108        || lower.contains("print(")
109        || lower.contains("pickle.")
110        || lower.contains("subprocess.")
111    {
112        return CodeLanguage::Python;
113    }
114
115    // JavaScript/TypeScript indicators
116    if lower.contains("const ") || lower.contains("let ") || lower.contains("var ") {
117        if lower.contains(": string") || lower.contains(": number") || lower.contains("interface ")
118        {
119            return CodeLanguage::TypeScript;
120        }
121        return CodeLanguage::JavaScript;
122    }
123    if lower.contains("require(") || lower.contains("module.exports") {
124        return CodeLanguage::JavaScript;
125    }
126    if lower.contains("document.") || lower.contains("console.log") {
127        return CodeLanguage::JavaScript;
128    }
129
130    // SQL indicators
131    if lower.contains("select ") && lower.contains(" from ")
132        || lower.contains("insert into ")
133        || lower.contains("create table ")
134        || lower.contains("update ") && lower.contains(" set ")
135    {
136        return CodeLanguage::Sql;
137    }
138
139    // Bash indicators
140    if lower.starts_with("#!/bin/")
141        || lower.contains("echo ")
142        || (lower.contains("if [") && lower.contains("then"))
143    {
144        return CodeLanguage::Bash;
145    }
146
147    // Go indicators
148    if lower.contains("func main()") || lower.contains("package main") {
149        return CodeLanguage::Go;
150    }
151
152    // Java indicators
153    if lower.contains("public class ") || lower.contains("system.out.println") {
154        return CodeLanguage::Java;
155    }
156
157    // Rust indicators
158    if lower.contains("fn main()") || lower.contains("let mut ") {
159        return CodeLanguage::Rust;
160    }
161
162    // Ruby indicators
163    if lower.contains("puts ") || lower.contains("def ") && lower.contains("end") {
164        return CodeLanguage::Ruby;
165    }
166
167    CodeLanguage::Unknown
168}
169
170// ---------------------------------------------------------------------------
171// Code block extraction
172// ---------------------------------------------------------------------------
173
174/// A code block extracted from text.
175#[derive(Debug, Clone)]
176struct CodeBlock {
177    /// The code content.
178    code: String,
179    /// Detected language.
180    language: CodeLanguage,
181}
182
183/// Extract code blocks from Markdown text.
184///
185/// Detects:
186/// 1. Fenced code blocks (``` ... ```)
187/// 2. Indented code blocks (4+ spaces or tab)
188fn extract_code_blocks(text: &str) -> Vec<CodeBlock> {
189    let mut blocks = Vec::new();
190
191    // 1. Fenced code blocks
192    let fence_re = Regex::new(r"```(\w*)\s*\n([\s\S]*?)```").expect("valid regex");
193    for cap in fence_re.captures_iter(text) {
194        let info_string = cap.get(1).map(|m| m.as_str()).filter(|s| !s.is_empty());
195        let code = cap.get(2).map_or("", |m| m.as_str());
196        if !code.trim().is_empty() {
197            let language = detect_language(info_string, code);
198            blocks.push(CodeBlock {
199                code: code.to_string(),
200                language,
201            });
202        }
203    }
204
205    // 2. Indented code blocks (4+ spaces at line start, consecutive lines)
206    // Only if no fenced blocks were found (to avoid double-counting)
207    if blocks.is_empty() {
208        let mut current_block = String::new();
209        for line in text.lines() {
210            if let Some(stripped) = line
211                .strip_prefix("    ")
212                .or_else(|| line.strip_prefix('\t'))
213            {
214                current_block.push_str(stripped);
215                current_block.push('\n');
216            } else if !current_block.is_empty() {
217                let code = current_block.trim().to_string();
218                if !code.is_empty() {
219                    let language = detect_language(None, &code);
220                    blocks.push(CodeBlock { code, language });
221                }
222                current_block.clear();
223            }
224        }
225        if !current_block.is_empty() {
226            let code = current_block.trim().to_string();
227            if !code.is_empty() {
228                let language = detect_language(None, &code);
229                blocks.push(CodeBlock { code, language });
230            }
231        }
232    }
233
234    blocks
235}
236
237// ---------------------------------------------------------------------------
238// Vulnerability patterns
239// ---------------------------------------------------------------------------
240
241/// Type of code vulnerability detected.
242#[derive(Debug, Clone, PartialEq, Eq)]
243enum VulnerabilityType {
244    SqlInjection,
245    CommandInjection,
246    PathTraversal,
247    HardcodedCredentials,
248    InsecureDeserialization,
249    Xss,
250    InsecureCrypto,
251}
252
253impl std::fmt::Display for VulnerabilityType {
254    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
255        match self {
256            Self::SqlInjection => write!(f, "SQL Injection"),
257            Self::CommandInjection => write!(f, "Command Injection"),
258            Self::PathTraversal => write!(f, "Path Traversal"),
259            Self::HardcodedCredentials => write!(f, "Hardcoded Credentials"),
260            Self::InsecureDeserialization => write!(f, "Insecure Deserialization"),
261            Self::Xss => write!(f, "Cross-Site Scripting (XSS)"),
262            Self::InsecureCrypto => write!(f, "Insecure Cryptography"),
263        }
264    }
265}
266
267/// A detected code vulnerability.
268struct CodeVulnerability {
269    vuln_type: VulnerabilityType,
270    severity: SecuritySeverity,
271    description: String,
272    snippet: String,
273    suggested_fix: String,
274    confidence: f64,
275}
276
277// ---------------------------------------------------------------------------
278// Vulnerability detection patterns
279// ---------------------------------------------------------------------------
280
281/// Compiled vulnerability detection pattern.
282struct VulnPattern {
283    regex: Regex,
284    vuln_type: VulnerabilityType,
285    severity: SecuritySeverity,
286    description: &'static str,
287    suggested_fix: &'static str,
288    confidence: f64,
289}
290
291/// Build all vulnerability detection patterns.
292fn build_vuln_patterns() -> Vec<VulnPattern> {
293    let definitions: Vec<(
294        &str,
295        VulnerabilityType,
296        SecuritySeverity,
297        &'static str,
298        &'static str,
299        f64,
300    )> = vec![
301        // ---------------------------------------------------------------
302        // SQL Injection (High)
303        // ---------------------------------------------------------------
304        (
305            // String concatenation in SQL: "SELECT ... " + variable
306            r#"(?i)(?:"|')(?:SELECT|INSERT|UPDATE|DELETE|DROP)\s[^"']*(?:"|')\s*(?:\+|\.format\(|%\s)"#,
307            VulnerabilityType::SqlInjection,
308            SecuritySeverity::High,
309            "SQL query built with string concatenation — vulnerable to SQL injection",
310            "Use parameterised queries (e.g., cursor.execute(\"SELECT * FROM t WHERE id = ?\", (user_id,)))",
311            0.85,
312        ),
313        (
314            // f-string SQL: f"SELECT ... {variable}"
315            r#"(?i)f\s*(?:"|')(?:SELECT|INSERT|UPDATE|DELETE|DROP)\b.*\{[^}]+\}"#,
316            VulnerabilityType::SqlInjection,
317            SecuritySeverity::High,
318            "SQL query built with f-string interpolation — vulnerable to SQL injection",
319            "Use parameterised queries instead of f-strings for SQL",
320            0.9,
321        ),
322        (
323            // execute() with string formatting: .execute("..." % ...) or .execute("..." + ...)
324            r#"(?i)\.execute\(\s*(?:f\s*)?["'][^"']*["']\s*(?:%|\+|\.format\()"#,
325            VulnerabilityType::SqlInjection,
326            SecuritySeverity::High,
327            "SQL execute() called with string formatting — vulnerable to SQL injection",
328            "Use parameterised queries: cursor.execute(\"SELECT ... WHERE id = %s\", (param,))",
329            0.9,
330        ),
331        // ---------------------------------------------------------------
332        // Command Injection (Critical)
333        // ---------------------------------------------------------------
334        (
335            // os.system() with string formatting (f-string)
336            r#"(?i)os\.system\s*\(\s*f\s*["'][^"']*\{[^}]*\}"#,
337            VulnerabilityType::CommandInjection,
338            SecuritySeverity::Critical,
339            "os.system() called with dynamic input — vulnerable to command injection",
340            "Use subprocess.run() with a list of arguments instead of os.system()",
341            0.9,
342        ),
343        (
344            // os.system() with concatenation
345            r#"(?i)os\.system\s*\(\s*["'][^"']*["']\s*\+"#,
346            VulnerabilityType::CommandInjection,
347            SecuritySeverity::Critical,
348            "os.system() called with string concatenation — vulnerable to command injection",
349            "Use subprocess.run() with a list of arguments instead of os.system()",
350            0.9,
351        ),
352        (
353            // subprocess with shell=True and string formatting
354            r"(?i)subprocess\.(?:call|run|Popen)\s*\([^)]*shell\s*=\s*True",
355            VulnerabilityType::CommandInjection,
356            SecuritySeverity::Critical,
357            "subprocess called with shell=True — vulnerable to command injection",
358            "Use subprocess.run([\"cmd\", \"arg1\", \"arg2\"]) without shell=True",
359            0.85,
360        ),
361        (
362            // eval() with non-literal argument
363            r"(?i)\beval\s*\(\s*[a-zA-Z_]",
364            VulnerabilityType::CommandInjection,
365            SecuritySeverity::Critical,
366            "eval() called with potentially dynamic input — code injection risk",
367            "Avoid eval(); use ast.literal_eval() for Python or JSON.parse() for JavaScript",
368            0.8,
369        ),
370        (
371            // child_process.exec() in Node.js with template literal or concatenation
372            r"(?i)child_process\.exec\s*\(\s*`[^`]*\$\{",
373            VulnerabilityType::CommandInjection,
374            SecuritySeverity::Critical,
375            "child_process.exec() called with template literal — vulnerable to command injection",
376            "Use child_process.execFile() or spawn() with argument arrays instead",
377            0.9,
378        ),
379        (
380            // child_process.exec() with concatenation
381            r#"(?i)(?:child_process\.exec|exec)\s*\(\s*["'][^"']*["']\s*\+"#,
382            VulnerabilityType::CommandInjection,
383            SecuritySeverity::Critical,
384            "child_process.exec() called with string concatenation — command injection risk",
385            "Use child_process.execFile() or spawn() with argument arrays instead",
386            0.85,
387        ),
388        // ---------------------------------------------------------------
389        // Path Traversal (High)
390        // ---------------------------------------------------------------
391        (
392            // open() or readFile with ../ path
393            r#"(?i)(?:open|readFile|readFileSync|read_file|fs\.read)\s*\([^)]*\.\.\/"#,
394            VulnerabilityType::PathTraversal,
395            SecuritySeverity::High,
396            "File operation with '../' path — vulnerable to path traversal",
397            "Validate and canonicalise file paths using os.path.realpath() or path.resolve()",
398            0.85,
399        ),
400        (
401            // Path concatenation with user input (Python-style)
402            r"(?i)open\s*\(\s*(?:(?:request|user_input|filename|path|file_path|params)\b[^)]*|[^)]*\+\s*(?:request|user_input|filename|path|file_path|params)\b)",
403            VulnerabilityType::PathTraversal,
404            SecuritySeverity::High,
405            "File open() with potentially user-controlled path — path traversal risk",
406            "Validate paths against an allowlist and use os.path.realpath() to resolve symlinks",
407            0.75,
408        ),
409        // ---------------------------------------------------------------
410        // Hardcoded Credentials (High)
411        // ---------------------------------------------------------------
412        (
413            // password = "..." or password = '...'
414            r#"(?i)(?:password|passwd|pwd)\s*=\s*(?:"|')[^"']{3,}(?:"|')"#,
415            VulnerabilityType::HardcodedCredentials,
416            SecuritySeverity::High,
417            "Hardcoded password detected in code",
418            "Use environment variables or a secrets manager instead of hardcoding passwords",
419            0.85,
420        ),
421        (
422            // api_key = "..." or secret = "..." or token = "..."
423            r#"(?i)(?:api[_-]?key|secret[_-]?key|auth[_-]?token|access[_-]?token|secret)\s*=\s*(?:"|')[A-Za-z0-9+/=_\-]{8,}(?:"|')"#,
424            VulnerabilityType::HardcodedCredentials,
425            SecuritySeverity::High,
426            "Hardcoded API key, secret, or token detected in code",
427            "Use environment variables or a secrets manager instead of hardcoding secrets",
428            0.85,
429        ),
430        (
431            // Connection string with password
432            r"(?i)(?:mysql|postgres|postgresql|mongodb|redis)://[^:]+:[^@]+@",
433            VulnerabilityType::HardcodedCredentials,
434            SecuritySeverity::High,
435            "Connection string with embedded password detected",
436            "Use environment variables for connection strings containing credentials",
437            0.85,
438        ),
439        (
440            // AWS key patterns in code assignment
441            r#"(?i)(?:aws_access_key_id|aws_secret_access_key)\s*=\s*["'][A-Za-z0-9/+=]{16,}["']"#,
442            VulnerabilityType::HardcodedCredentials,
443            SecuritySeverity::High,
444            "Hardcoded AWS credentials detected in code",
445            "Use IAM roles, environment variables, or AWS Secrets Manager instead",
446            0.9,
447        ),
448        // ---------------------------------------------------------------
449        // Insecure Deserialization (High)
450        // ---------------------------------------------------------------
451        (
452            // pickle.loads() or pickle.load()
453            r"(?i)pickle\.loads?\s*\(",
454            VulnerabilityType::InsecureDeserialization,
455            SecuritySeverity::High,
456            "pickle.loads() used — insecure deserialization can lead to remote code execution",
457            "Avoid pickle for untrusted data; use JSON or a safe serialisation format instead",
458            0.9,
459        ),
460        (
461            // yaml.load() without SafeLoader
462            r"(?i)yaml\.load\s*\([^)]*\)",
463            VulnerabilityType::InsecureDeserialization,
464            SecuritySeverity::High,
465            "yaml.load() without SafeLoader — can execute arbitrary code",
466            "Use yaml.safe_load() or yaml.load(data, Loader=yaml.SafeLoader)",
467            0.85,
468        ),
469        (
470            // eval(JSON.parse(...)) pattern
471            r"(?i)eval\s*\(\s*JSON\.parse",
472            VulnerabilityType::InsecureDeserialization,
473            SecuritySeverity::High,
474            "eval(JSON.parse(...)) — combining eval with parsed JSON is dangerous",
475            "Use JSON.parse() alone; never pass its result to eval()",
476            0.9,
477        ),
478        // ---------------------------------------------------------------
479        // XSS Patterns (Medium)
480        // ---------------------------------------------------------------
481        (
482            // innerHTML assignment
483            r"(?i)\.innerHTML\s*=",
484            VulnerabilityType::Xss,
485            SecuritySeverity::Medium,
486            "innerHTML assignment — potential XSS if user input is not sanitised",
487            "Use textContent or a DOM sanitisation library (e.g., DOMPurify) instead",
488            0.8,
489        ),
490        (
491            // document.write()
492            r"(?i)document\.write\s*\(",
493            VulnerabilityType::Xss,
494            SecuritySeverity::Medium,
495            "document.write() — potential XSS vector",
496            "Use DOM manipulation methods (createElement, appendChild) instead of document.write()",
497            0.8,
498        ),
499        (
500            // dangerouslySetInnerHTML
501            r"(?i)dangerouslySetInnerHTML",
502            VulnerabilityType::Xss,
503            SecuritySeverity::Medium,
504            "dangerouslySetInnerHTML in React — potential XSS if content is not sanitised",
505            "Sanitise HTML with DOMPurify before passing to dangerouslySetInnerHTML",
506            0.8,
507        ),
508        // ---------------------------------------------------------------
509        // Insecure Crypto (Medium)
510        // ---------------------------------------------------------------
511        (
512            // MD5 for passwords
513            r"(?i)(?:md5|MD5)\s*\(.*(?:password|passwd|pwd)",
514            VulnerabilityType::InsecureCrypto,
515            SecuritySeverity::Medium,
516            "MD5 used for password hashing — cryptographically broken",
517            "Use bcrypt, scrypt, or Argon2 for password hashing",
518            0.85,
519        ),
520        (
521            // hashlib.md5 or hashlib.sha1 for passwords
522            r"(?i)hashlib\.(?:md5|sha1)\s*\(.*(?:password|passwd|pwd)",
523            VulnerabilityType::InsecureCrypto,
524            SecuritySeverity::Medium,
525            "MD5/SHA1 used for password hashing — cryptographically weak",
526            "Use bcrypt, scrypt, or Argon2 for password hashing",
527            0.85,
528        ),
529        (
530            // Math.random() for security
531            r"(?i)(?:(?:token|key|secret|password|nonce|salt|iv).*Math\.random|Math\.random\s*\(\s*\).*(?:token|key|secret|password|nonce|salt|iv))",
532            VulnerabilityType::InsecureCrypto,
533            SecuritySeverity::Medium,
534            "Math.random() used for security-sensitive value — not cryptographically secure",
535            "Use crypto.getRandomValues() or crypto.randomBytes() instead",
536            0.8,
537        ),
538        (
539            // ECB mode
540            r"(?i)(?:AES|DES|Blowfish).*(?:ECB|mode_ecb|MODE_ECB)",
541            VulnerabilityType::InsecureCrypto,
542            SecuritySeverity::Medium,
543            "ECB mode encryption — does not provide semantic security",
544            "Use CBC, GCM, or another authenticated encryption mode instead of ECB",
545            0.85,
546        ),
547    ];
548
549    definitions
550        .into_iter()
551        .filter_map(
552            |(pattern, vuln_type, severity, description, suggested_fix, confidence)| {
553                Regex::new(pattern).ok().map(|regex| VulnPattern {
554                    regex,
555                    vuln_type,
556                    severity,
557                    description,
558                    suggested_fix,
559                    confidence,
560                })
561            },
562        )
563        .collect()
564}
565
566// ---------------------------------------------------------------------------
567// CodeSecurityAnalyzer
568// ---------------------------------------------------------------------------
569
570/// Analyser that scans text for code blocks and checks them for security
571/// vulnerabilities.
572pub struct CodeSecurityAnalyzer {
573    patterns: Vec<VulnPattern>,
574    /// Minimum severity to report (findings below this are skipped).
575    severity_threshold: SecuritySeverity,
576}
577
578impl CodeSecurityAnalyzer {
579    /// Create a new `CodeSecurityAnalyzer` with the default severity threshold
580    /// (`Medium`).
581    #[must_use]
582    pub fn new() -> Self {
583        Self {
584            patterns: build_vuln_patterns(),
585            severity_threshold: SecuritySeverity::Medium,
586        }
587    }
588
589    /// Create a new `CodeSecurityAnalyzer` with a custom severity threshold.
590    ///
591    /// Findings with severity below `threshold` are silently dropped.
592    #[must_use]
593    pub fn with_severity_threshold(threshold: SecuritySeverity) -> Self {
594        Self {
595            patterns: build_vuln_patterns(),
596            severity_threshold: threshold,
597        }
598    }
599
600    /// Analyse text (typically an LLM response) for code security vulnerabilities.
601    ///
602    /// Extracts code blocks, detects languages, and scans for vulnerability
603    /// patterns. Returns a list of `SecurityFinding`s tagged as
604    /// `"insecure_code"`.
605    pub fn analyze(&self, text: &str) -> Vec<SecurityFinding> {
606        let blocks = extract_code_blocks(text);
607        if blocks.is_empty() {
608            return Vec::new();
609        }
610
611        let mut findings = Vec::new();
612        for block in &blocks {
613            let vulns = self.scan_code(&block.code, &block.language);
614            for vuln in vulns {
615                if vuln.severity < self.severity_threshold {
616                    continue;
617                }
618                let description = format!(
619                    "[{}] {}: {}\n\nVulnerable code:\n  {}\n\nSuggested fix: {}",
620                    vuln.severity,
621                    vuln.vuln_type,
622                    vuln.description,
623                    vuln.snippet.trim(),
624                    vuln.suggested_fix,
625                );
626                let mut finding = SecurityFinding::new(
627                    vuln.severity,
628                    "insecure_code".to_string(),
629                    description,
630                    vuln.confidence,
631                );
632                finding
633                    .metadata
634                    .insert("vulnerability_type".to_string(), vuln.vuln_type.to_string());
635                finding
636                    .metadata
637                    .insert("language".to_string(), block.language.to_string());
638                finding
639                    .metadata
640                    .insert("code_snippet".to_string(), vuln.snippet);
641                finding
642                    .metadata
643                    .insert("suggested_fix".to_string(), vuln.suggested_fix);
644                findings.push(finding);
645            }
646        }
647
648        findings
649    }
650
651    /// Scan a single code block for vulnerability patterns.
652    fn scan_code(&self, code: &str, _language: &CodeLanguage) -> Vec<CodeVulnerability> {
653        let mut vulns = Vec::new();
654
655        for pattern in &self.patterns {
656            if let Some(mat) = pattern.regex.find(code) {
657                // Extract the line containing the match for context
658                let snippet = extract_snippet(code, mat.start());
659                vulns.push(CodeVulnerability {
660                    vuln_type: pattern.vuln_type.clone(),
661                    severity: pattern.severity.clone(),
662                    description: pattern.description.to_string(),
663                    snippet,
664                    suggested_fix: pattern.suggested_fix.to_string(),
665                    confidence: pattern.confidence,
666                });
667            }
668        }
669
670        vulns
671    }
672}
673
674impl Default for CodeSecurityAnalyzer {
675    fn default() -> Self {
676        Self::new()
677    }
678}
679
680/// Extract the line containing the byte offset `pos` from `code`.
681fn extract_snippet(code: &str, pos: usize) -> String {
682    let before = &code[..pos];
683    let line_start = before.rfind('\n').map_or(0, |i| i + 1);
684    let after = &code[pos..];
685    let line_end = after.find('\n').map_or(code.len(), |i| pos + i);
686    code[line_start..line_end].to_string()
687}
688
689// ===========================================================================
690// Tests
691// ===========================================================================
692
693#[cfg(test)]
694mod tests {
695    use super::*;
696
697    // ---------------------------------------------------------------
698    // Helper
699    // ---------------------------------------------------------------
700
701    fn analyzer() -> CodeSecurityAnalyzer {
702        CodeSecurityAnalyzer::new()
703    }
704
705    fn has_vuln_type(findings: &[SecurityFinding], vuln_type: &str) -> bool {
706        findings
707            .iter()
708            .any(|f| f.metadata.get("vulnerability_type") == Some(&vuln_type.to_string()))
709    }
710
711    // ---------------------------------------------------------------
712    // Language detection
713    // ---------------------------------------------------------------
714
715    #[test]
716    fn test_detect_language_from_info_string() {
717        assert_eq!(detect_language(Some("python"), ""), CodeLanguage::Python);
718        assert_eq!(detect_language(Some("js"), ""), CodeLanguage::JavaScript);
719        assert_eq!(detect_language(Some("ts"), ""), CodeLanguage::TypeScript);
720        assert_eq!(detect_language(Some("sql"), ""), CodeLanguage::Sql);
721        assert_eq!(detect_language(Some("bash"), ""), CodeLanguage::Bash);
722        assert_eq!(detect_language(Some("rust"), ""), CodeLanguage::Rust);
723        assert_eq!(detect_language(Some("go"), ""), CodeLanguage::Go);
724        assert_eq!(detect_language(Some("java"), ""), CodeLanguage::Java);
725        assert_eq!(detect_language(Some("cpp"), ""), CodeLanguage::Cpp);
726        assert_eq!(detect_language(Some("c"), ""), CodeLanguage::C);
727        assert_eq!(detect_language(Some("ruby"), ""), CodeLanguage::Ruby);
728    }
729
730    #[test]
731    fn test_detect_language_from_content() {
732        assert_eq!(
733            detect_language_from_content("import os\ndef foo():\n    pass"),
734            CodeLanguage::Python
735        );
736        assert_eq!(
737            detect_language_from_content("const x = 5; console.log(x);"),
738            CodeLanguage::JavaScript
739        );
740        assert_eq!(
741            detect_language_from_content("SELECT * FROM users WHERE id = 1"),
742            CodeLanguage::Sql
743        );
744    }
745
746    // ---------------------------------------------------------------
747    // Code block extraction
748    // ---------------------------------------------------------------
749
750    #[test]
751    fn test_extract_fenced_code_block() {
752        let text = "Here is code:\n```python\nimport os\nprint('hello')\n```\nDone.";
753        let blocks = extract_code_blocks(text);
754        assert_eq!(blocks.len(), 1);
755        assert_eq!(blocks[0].language, CodeLanguage::Python);
756        assert!(blocks[0].code.contains("import os"));
757    }
758
759    #[test]
760    fn test_extract_indented_code_block() {
761        let text = "Example:\n    import os\n    os.system('ls')\nDone.";
762        let blocks = extract_code_blocks(text);
763        assert_eq!(blocks.len(), 1);
764        assert!(blocks[0].code.contains("import os"));
765    }
766
767    #[test]
768    fn test_extract_multiple_fenced_blocks() {
769        let text = "```python\nprint('a')\n```\nText\n```js\nconsole.log('b');\n```";
770        let blocks = extract_code_blocks(text);
771        assert_eq!(blocks.len(), 2);
772    }
773
774    // ---------------------------------------------------------------
775    // SQL Injection
776    // ---------------------------------------------------------------
777
778    #[test]
779    fn test_sql_injection_string_concat() {
780        let text = r#"```python
781query = "SELECT * FROM users WHERE id = " + user_id
782cursor.execute(query)
783```"#;
784        let findings = analyzer().analyze(text);
785        assert!(
786            has_vuln_type(&findings, "SQL Injection"),
787            "Should detect SQL injection via concatenation; findings: {:?}",
788            findings
789                .iter()
790                .map(|f| f.metadata.get("vulnerability_type"))
791                .collect::<Vec<_>>()
792        );
793    }
794
795    #[test]
796    fn test_sql_injection_fstring() {
797        let text = r#"```python
798query = f"SELECT * FROM users WHERE name = '{username}'"
799cursor.execute(query)
800```"#;
801        let findings = analyzer().analyze(text);
802        assert!(
803            has_vuln_type(&findings, "SQL Injection"),
804            "Should detect SQL injection via f-string"
805        );
806    }
807
808    #[test]
809    fn test_sql_injection_execute_format() {
810        let text = r#"```python
811cursor.execute("SELECT * FROM users WHERE id = %s" % user_id)
812```"#;
813        let findings = analyzer().analyze(text);
814        assert!(
815            has_vuln_type(&findings, "SQL Injection"),
816            "Should detect SQL injection via execute with format"
817        );
818    }
819
820    #[test]
821    fn test_sql_parameterised_query_safe() {
822        let text = r#"```python
823cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))
824```"#;
825        let findings = analyzer().analyze(text);
826        assert!(
827            !has_vuln_type(&findings, "SQL Injection"),
828            "Parameterised query should NOT trigger SQL injection; findings: {:?}",
829            findings
830                .iter()
831                .map(|f| f.metadata.get("vulnerability_type"))
832                .collect::<Vec<_>>()
833        );
834    }
835
836    // ---------------------------------------------------------------
837    // Command Injection
838    // ---------------------------------------------------------------
839
840    #[test]
841    fn test_command_injection_os_system_fstring() {
842        let text = r#"```python
843import os
844os.system(f"rm -rf {user_input}")
845```"#;
846        let findings = analyzer().analyze(text);
847        assert!(
848            has_vuln_type(&findings, "Command Injection"),
849            "Should detect command injection via os.system with f-string"
850        );
851    }
852
853    #[test]
854    fn test_command_injection_os_system_concat() {
855        let text = r#"```python
856import os
857os.system("ping " + host)
858```"#;
859        let findings = analyzer().analyze(text);
860        assert!(
861            has_vuln_type(&findings, "Command Injection"),
862            "Should detect command injection via os.system with concatenation"
863        );
864    }
865
866    #[test]
867    fn test_command_injection_subprocess_shell_true() {
868        let text = r#"```python
869import subprocess
870subprocess.call("ls " + path, shell=True)
871```"#;
872        let findings = analyzer().analyze(text);
873        assert!(
874            has_vuln_type(&findings, "Command Injection"),
875            "Should detect command injection via subprocess with shell=True"
876        );
877    }
878
879    #[test]
880    fn test_command_injection_child_process_exec_template() {
881        let text = r#"```javascript
882const { exec } = require('child_process');
883child_process.exec(`ls ${userInput}`, callback);
884```"#;
885        let findings = analyzer().analyze(text);
886        assert!(
887            has_vuln_type(&findings, "Command Injection"),
888            "Should detect command injection via child_process.exec with template literal"
889        );
890    }
891
892    #[test]
893    fn test_subprocess_list_args_safe() {
894        let text = r#"```python
895import subprocess
896subprocess.run(["ls", "-la", path])
897```"#;
898        let findings = analyzer().analyze(text);
899        assert!(
900            !has_vuln_type(&findings, "Command Injection"),
901            "subprocess with list args should NOT trigger command injection"
902        );
903    }
904
905    // ---------------------------------------------------------------
906    // Path Traversal
907    // ---------------------------------------------------------------
908
909    #[test]
910    fn test_path_traversal_open_dotdot() {
911        let text = r#"```python
912with open("../../etc/passwd") as f:
913    data = f.read()
914```"#;
915        let findings = analyzer().analyze(text);
916        assert!(
917            has_vuln_type(&findings, "Path Traversal"),
918            "Should detect path traversal with ../"
919        );
920    }
921
922    #[test]
923    fn test_path_traversal_user_input() {
924        let text = r#"```python
925f = open(user_input)
926data = f.read()
927```"#;
928        let findings = analyzer().analyze(text);
929        assert!(
930            has_vuln_type(&findings, "Path Traversal"),
931            "Should detect open() with user-controlled path"
932        );
933    }
934
935    // ---------------------------------------------------------------
936    // Hardcoded Credentials
937    // ---------------------------------------------------------------
938
939    #[test]
940    fn test_hardcoded_password() {
941        let text = r#"```python
942password = "super_secret_123"
943db.connect(password=password)
944```"#;
945        let findings = analyzer().analyze(text);
946        assert!(
947            has_vuln_type(&findings, "Hardcoded Credentials"),
948            "Should detect hardcoded password"
949        );
950    }
951
952    #[test]
953    fn test_hardcoded_api_key() {
954        let text = r#"```javascript
955const api_key = "sk_live_abcdef1234567890";
956```"#;
957        let findings = analyzer().analyze(text);
958        assert!(
959            has_vuln_type(&findings, "Hardcoded Credentials"),
960            "Should detect hardcoded API key"
961        );
962    }
963
964    #[test]
965    fn test_hardcoded_connection_string() {
966        let text = r#"```python
967db_url = "postgresql://admin:password123@localhost:5432/mydb"
968```"#;
969        let findings = analyzer().analyze(text);
970        assert!(
971            has_vuln_type(&findings, "Hardcoded Credentials"),
972            "Should detect connection string with embedded password"
973        );
974    }
975
976    #[test]
977    fn test_env_var_password_safe() {
978        let text = r#"```python
979import os
980password = os.environ.get("DB_PASSWORD")
981db.connect(password=password)
982```"#;
983        let findings = analyzer().analyze(text);
984        assert!(
985            !has_vuln_type(&findings, "Hardcoded Credentials"),
986            "Password from env var should NOT trigger hardcoded credentials"
987        );
988    }
989
990    // ---------------------------------------------------------------
991    // Insecure Deserialization
992    // ---------------------------------------------------------------
993
994    #[test]
995    fn test_pickle_loads() {
996        let text = r#"```python
997import pickle
998data = pickle.loads(user_data)
999```"#;
1000        let findings = analyzer().analyze(text);
1001        assert!(
1002            has_vuln_type(&findings, "Insecure Deserialization"),
1003            "Should detect pickle.loads()"
1004        );
1005    }
1006
1007    #[test]
1008    fn test_yaml_load_unsafe() {
1009        let text = r#"```python
1010import yaml
1011data = yaml.load(content)
1012```"#;
1013        let findings = analyzer().analyze(text);
1014        assert!(
1015            has_vuln_type(&findings, "Insecure Deserialization"),
1016            "Should detect yaml.load() without SafeLoader"
1017        );
1018    }
1019
1020    #[test]
1021    fn test_eval_json_parse() {
1022        let text = r#"```javascript
1023const result = eval(JSON.parse(data));
1024```"#;
1025        let findings = analyzer().analyze(text);
1026        assert!(
1027            has_vuln_type(&findings, "Insecure Deserialization"),
1028            "Should detect eval(JSON.parse(...))"
1029        );
1030    }
1031
1032    // ---------------------------------------------------------------
1033    // XSS Patterns
1034    // ---------------------------------------------------------------
1035
1036    #[test]
1037    fn test_xss_innerhtml() {
1038        let text = r#"```javascript
1039element.innerHTML = userInput;
1040```"#;
1041        let findings = analyzer().analyze(text);
1042        assert!(
1043            has_vuln_type(&findings, "Cross-Site Scripting (XSS)"),
1044            "Should detect innerHTML assignment"
1045        );
1046    }
1047
1048    #[test]
1049    fn test_xss_document_write() {
1050        let text = r#"```javascript
1051document.write(userContent);
1052```"#;
1053        let findings = analyzer().analyze(text);
1054        assert!(
1055            has_vuln_type(&findings, "Cross-Site Scripting (XSS)"),
1056            "Should detect document.write()"
1057        );
1058    }
1059
1060    #[test]
1061    fn test_xss_dangerously_set_inner_html() {
1062        let text = r#"```javascript
1063<div dangerouslySetInnerHTML={{__html: content}} />
1064```"#;
1065        let findings = analyzer().analyze(text);
1066        assert!(
1067            has_vuln_type(&findings, "Cross-Site Scripting (XSS)"),
1068            "Should detect dangerouslySetInnerHTML"
1069        );
1070    }
1071
1072    // ---------------------------------------------------------------
1073    // Insecure Crypto
1074    // ---------------------------------------------------------------
1075
1076    #[test]
1077    fn test_md5_password() {
1078        let text = r#"```python
1079import hashlib
1080hashed = hashlib.md5(password.encode()).hexdigest()
1081```"#;
1082        let findings = analyzer().analyze(text);
1083        assert!(
1084            has_vuln_type(&findings, "Insecure Cryptography"),
1085            "Should detect MD5 for password hashing"
1086        );
1087    }
1088
1089    #[test]
1090    fn test_math_random_token() {
1091        let text = r#"```javascript
1092const token = Math.random().toString(36);
1093```"#;
1094        let findings = analyzer().analyze(text);
1095        assert!(
1096            has_vuln_type(&findings, "Insecure Cryptography"),
1097            "Should detect Math.random() for token generation"
1098        );
1099    }
1100
1101    #[test]
1102    fn test_ecb_mode() {
1103        let text = r#"```python
1104from Crypto.Cipher import AES
1105cipher = AES.new(key, AES.MODE_ECB)
1106```"#;
1107        let findings = analyzer().analyze(text);
1108        assert!(
1109            has_vuln_type(&findings, "Insecure Cryptography"),
1110            "Should detect ECB mode encryption"
1111        );
1112    }
1113
1114    // ---------------------------------------------------------------
1115    // Finding metadata
1116    // ---------------------------------------------------------------
1117
1118    #[test]
1119    fn test_findings_have_correct_type() {
1120        let text = r#"```python
1121password = "secret123"
1122```"#;
1123        let findings = analyzer().analyze(text);
1124        assert!(!findings.is_empty());
1125        for f in &findings {
1126            assert_eq!(f.finding_type, "insecure_code");
1127            assert!(f.metadata.contains_key("vulnerability_type"));
1128            assert!(f.metadata.contains_key("language"));
1129            assert!(f.metadata.contains_key("code_snippet"));
1130            assert!(f.metadata.contains_key("suggested_fix"));
1131        }
1132    }
1133
1134    #[test]
1135    fn test_findings_severity_high_or_above() {
1136        let text = r#"```python
1137os.system(f"rm {user_input}")
1138```"#;
1139        let findings = analyzer().analyze(text);
1140        assert!(!findings.is_empty());
1141        for f in &findings {
1142            assert!(f.severity >= SecuritySeverity::Medium);
1143        }
1144    }
1145
1146    // ---------------------------------------------------------------
1147    // Severity threshold filtering
1148    // ---------------------------------------------------------------
1149
1150    #[test]
1151    fn test_severity_threshold_filters_low() {
1152        // XSS is Medium severity — should be included with Medium threshold
1153        let text = r#"```javascript
1154element.innerHTML = data;
1155```"#;
1156        let analyzer = CodeSecurityAnalyzer::with_severity_threshold(SecuritySeverity::Medium);
1157        let findings = analyzer.analyze(text);
1158        assert!(
1159            !findings.is_empty(),
1160            "Medium findings should pass Medium threshold"
1161        );
1162
1163        // High threshold should filter out Medium findings
1164        let analyzer_high = CodeSecurityAnalyzer::with_severity_threshold(SecuritySeverity::High);
1165        let findings_high = analyzer_high.analyze(text);
1166        assert!(
1167            !has_vuln_type(&findings_high, "Cross-Site Scripting (XSS)"),
1168            "Medium XSS findings should be filtered by High threshold"
1169        );
1170    }
1171
1172    // ---------------------------------------------------------------
1173    // Safe code (no false positives)
1174    // ---------------------------------------------------------------
1175
1176    #[test]
1177    fn test_safe_python_code_no_findings() {
1178        let text = r#"```python
1179import json
1180
1181def get_user(user_id: int):
1182    cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))
1183    return cursor.fetchone()
1184
1185data = json.loads(response.text)
1186print(data)
1187```"#;
1188        let findings = analyzer().analyze(text);
1189        assert!(
1190            findings.is_empty(),
1191            "Safe Python code should not trigger findings; got: {:?}",
1192            findings
1193                .iter()
1194                .map(|f| f.metadata.get("vulnerability_type"))
1195                .collect::<Vec<_>>()
1196        );
1197    }
1198
1199    #[test]
1200    fn test_safe_node_code_no_findings() {
1201        let text = r#"```javascript
1202const { execFile } = require('child_process');
1203execFile('ls', ['-la', dir], (error, stdout) => {
1204    console.log(stdout);
1205});
1206```"#;
1207        let findings = analyzer().analyze(text);
1208        assert!(
1209            findings.is_empty(),
1210            "Safe Node.js code should not trigger findings"
1211        );
1212    }
1213
1214    // ---------------------------------------------------------------
1215    // Plain text without code blocks
1216    // ---------------------------------------------------------------
1217
1218    #[test]
1219    fn test_no_code_blocks_returns_empty() {
1220        let text = "This is just plain text without any code blocks. SELECT * FROM users.";
1221        let findings = analyzer().analyze(text);
1222        assert!(
1223            findings.is_empty(),
1224            "Plain text without code blocks should not trigger findings"
1225        );
1226    }
1227
1228    // ---------------------------------------------------------------
1229    // Multiple vulnerabilities in one block
1230    // ---------------------------------------------------------------
1231
1232    #[test]
1233    fn test_multiple_vulns_in_one_block() {
1234        let text = r#"```python
1235password = "hardcoded_secret"
1236query = f"SELECT * FROM users WHERE name = '{name}'"
1237data = pickle.loads(user_data)
1238```"#;
1239        let findings = analyzer().analyze(text);
1240        assert!(
1241            findings.len() >= 2,
1242            "Should detect multiple vulnerabilities; got {}",
1243            findings.len()
1244        );
1245    }
1246
1247    // ---------------------------------------------------------------
1248    // Markdown code blocks vs indented
1249    // ---------------------------------------------------------------
1250
1251    #[test]
1252    fn test_indented_code_block_detection() {
1253        let text = "Here is some code:\n    password = \"secret123\"\n    db.connect()\nEnd.";
1254        let findings = analyzer().analyze(text);
1255        assert!(
1256            has_vuln_type(&findings, "Hardcoded Credentials"),
1257            "Should detect vulnerabilities in indented code blocks"
1258        );
1259    }
1260}