1use llmtrace_core::{SecurityFinding, SecuritySeverity};
15use regex::Regex;
16
17#[derive(Debug, Clone, PartialEq, Eq)]
23pub enum CodeLanguage {
24 Python,
25 JavaScript,
26 TypeScript,
27 Sql,
28 Bash,
29 Rust,
30 Go,
31 Java,
32 C,
33 Cpp,
34 Ruby,
35 Unknown,
36}
37
38impl std::fmt::Display for CodeLanguage {
39 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40 match self {
41 Self::Python => write!(f, "Python"),
42 Self::JavaScript => write!(f, "JavaScript"),
43 Self::TypeScript => write!(f, "TypeScript"),
44 Self::Sql => write!(f, "SQL"),
45 Self::Bash => write!(f, "Shell/Bash"),
46 Self::Rust => write!(f, "Rust"),
47 Self::Go => write!(f, "Go"),
48 Self::Java => write!(f, "Java"),
49 Self::C => write!(f, "C"),
50 Self::Cpp => write!(f, "C++"),
51 Self::Ruby => write!(f, "Ruby"),
52 Self::Unknown => write!(f, "Unknown"),
53 }
54 }
55}
56
57fn detect_language(info_string: Option<&str>, code: &str) -> CodeLanguage {
60 if let Some(info) = info_string {
62 let lower = info.trim().to_lowercase();
63 if lower.starts_with("python") || lower == "py" {
64 return CodeLanguage::Python;
65 }
66 if lower.starts_with("javascript") || lower == "js" || lower == "node" {
67 return CodeLanguage::JavaScript;
68 }
69 if lower.starts_with("typescript") || lower == "ts" {
70 return CodeLanguage::TypeScript;
71 }
72 if lower == "sql" || lower == "mysql" || lower == "postgresql" || lower == "sqlite" {
73 return CodeLanguage::Sql;
74 }
75 if lower == "bash" || lower == "sh" || lower == "shell" || lower == "zsh" {
76 return CodeLanguage::Bash;
77 }
78 if lower == "rust" || lower == "rs" {
79 return CodeLanguage::Rust;
80 }
81 if lower == "go" || lower == "golang" {
82 return CodeLanguage::Go;
83 }
84 if lower == "java" {
85 return CodeLanguage::Java;
86 }
87 if lower == "c++" || lower == "cpp" || lower == "cxx" {
88 return CodeLanguage::Cpp;
89 }
90 if lower == "c" {
91 return CodeLanguage::C;
92 }
93 if lower == "ruby" || lower == "rb" {
94 return CodeLanguage::Ruby;
95 }
96 }
97
98 detect_language_from_content(code)
100}
101
102fn detect_language_from_content(code: &str) -> CodeLanguage {
104 let lower = code.to_lowercase();
105
106 if lower.contains("import ") && (lower.contains("def ") || lower.contains("from "))
108 || lower.contains("print(")
109 || lower.contains("pickle.")
110 || lower.contains("subprocess.")
111 {
112 return CodeLanguage::Python;
113 }
114
115 if lower.contains("const ") || lower.contains("let ") || lower.contains("var ") {
117 if lower.contains(": string") || lower.contains(": number") || lower.contains("interface ")
118 {
119 return CodeLanguage::TypeScript;
120 }
121 return CodeLanguage::JavaScript;
122 }
123 if lower.contains("require(") || lower.contains("module.exports") {
124 return CodeLanguage::JavaScript;
125 }
126 if lower.contains("document.") || lower.contains("console.log") {
127 return CodeLanguage::JavaScript;
128 }
129
130 if lower.contains("select ") && lower.contains(" from ")
132 || lower.contains("insert into ")
133 || lower.contains("create table ")
134 || lower.contains("update ") && lower.contains(" set ")
135 {
136 return CodeLanguage::Sql;
137 }
138
139 if lower.starts_with("#!/bin/")
141 || lower.contains("echo ")
142 || (lower.contains("if [") && lower.contains("then"))
143 {
144 return CodeLanguage::Bash;
145 }
146
147 if lower.contains("func main()") || lower.contains("package main") {
149 return CodeLanguage::Go;
150 }
151
152 if lower.contains("public class ") || lower.contains("system.out.println") {
154 return CodeLanguage::Java;
155 }
156
157 if lower.contains("fn main()") || lower.contains("let mut ") {
159 return CodeLanguage::Rust;
160 }
161
162 if lower.contains("puts ") || lower.contains("def ") && lower.contains("end") {
164 return CodeLanguage::Ruby;
165 }
166
167 CodeLanguage::Unknown
168}
169
170#[derive(Debug, Clone)]
176struct CodeBlock {
177 code: String,
179 language: CodeLanguage,
181}
182
183fn extract_code_blocks(text: &str) -> Vec<CodeBlock> {
189 let mut blocks = Vec::new();
190
191 let fence_re = Regex::new(r"```(\w*)\s*\n([\s\S]*?)```").expect("valid regex");
193 for cap in fence_re.captures_iter(text) {
194 let info_string = cap.get(1).map(|m| m.as_str()).filter(|s| !s.is_empty());
195 let code = cap.get(2).map_or("", |m| m.as_str());
196 if !code.trim().is_empty() {
197 let language = detect_language(info_string, code);
198 blocks.push(CodeBlock {
199 code: code.to_string(),
200 language,
201 });
202 }
203 }
204
205 if blocks.is_empty() {
208 let mut current_block = String::new();
209 for line in text.lines() {
210 if let Some(stripped) = line
211 .strip_prefix(" ")
212 .or_else(|| line.strip_prefix('\t'))
213 {
214 current_block.push_str(stripped);
215 current_block.push('\n');
216 } else if !current_block.is_empty() {
217 let code = current_block.trim().to_string();
218 if !code.is_empty() {
219 let language = detect_language(None, &code);
220 blocks.push(CodeBlock { code, language });
221 }
222 current_block.clear();
223 }
224 }
225 if !current_block.is_empty() {
226 let code = current_block.trim().to_string();
227 if !code.is_empty() {
228 let language = detect_language(None, &code);
229 blocks.push(CodeBlock { code, language });
230 }
231 }
232 }
233
234 blocks
235}
236
237#[derive(Debug, Clone, PartialEq, Eq)]
243enum VulnerabilityType {
244 SqlInjection,
245 CommandInjection,
246 PathTraversal,
247 HardcodedCredentials,
248 InsecureDeserialization,
249 Xss,
250 InsecureCrypto,
251}
252
253impl std::fmt::Display for VulnerabilityType {
254 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
255 match self {
256 Self::SqlInjection => write!(f, "SQL Injection"),
257 Self::CommandInjection => write!(f, "Command Injection"),
258 Self::PathTraversal => write!(f, "Path Traversal"),
259 Self::HardcodedCredentials => write!(f, "Hardcoded Credentials"),
260 Self::InsecureDeserialization => write!(f, "Insecure Deserialization"),
261 Self::Xss => write!(f, "Cross-Site Scripting (XSS)"),
262 Self::InsecureCrypto => write!(f, "Insecure Cryptography"),
263 }
264 }
265}
266
267struct CodeVulnerability {
269 vuln_type: VulnerabilityType,
270 severity: SecuritySeverity,
271 description: String,
272 snippet: String,
273 suggested_fix: String,
274 confidence: f64,
275}
276
277struct VulnPattern {
283 regex: Regex,
284 vuln_type: VulnerabilityType,
285 severity: SecuritySeverity,
286 description: &'static str,
287 suggested_fix: &'static str,
288 confidence: f64,
289}
290
291fn build_vuln_patterns() -> Vec<VulnPattern> {
293 let definitions: Vec<(
294 &str,
295 VulnerabilityType,
296 SecuritySeverity,
297 &'static str,
298 &'static str,
299 f64,
300 )> = vec![
301 (
305 r#"(?i)(?:"|')(?:SELECT|INSERT|UPDATE|DELETE|DROP)\s[^"']*(?:"|')\s*(?:\+|\.format\(|%\s)"#,
307 VulnerabilityType::SqlInjection,
308 SecuritySeverity::High,
309 "SQL query built with string concatenation — vulnerable to SQL injection",
310 "Use parameterised queries (e.g., cursor.execute(\"SELECT * FROM t WHERE id = ?\", (user_id,)))",
311 0.85,
312 ),
313 (
314 r#"(?i)f\s*(?:"|')(?:SELECT|INSERT|UPDATE|DELETE|DROP)\b.*\{[^}]+\}"#,
316 VulnerabilityType::SqlInjection,
317 SecuritySeverity::High,
318 "SQL query built with f-string interpolation — vulnerable to SQL injection",
319 "Use parameterised queries instead of f-strings for SQL",
320 0.9,
321 ),
322 (
323 r#"(?i)\.execute\(\s*(?:f\s*)?["'][^"']*["']\s*(?:%|\+|\.format\()"#,
325 VulnerabilityType::SqlInjection,
326 SecuritySeverity::High,
327 "SQL execute() called with string formatting — vulnerable to SQL injection",
328 "Use parameterised queries: cursor.execute(\"SELECT ... WHERE id = %s\", (param,))",
329 0.9,
330 ),
331 (
335 r#"(?i)os\.system\s*\(\s*f\s*["'][^"']*\{[^}]*\}"#,
337 VulnerabilityType::CommandInjection,
338 SecuritySeverity::Critical,
339 "os.system() called with dynamic input — vulnerable to command injection",
340 "Use subprocess.run() with a list of arguments instead of os.system()",
341 0.9,
342 ),
343 (
344 r#"(?i)os\.system\s*\(\s*["'][^"']*["']\s*\+"#,
346 VulnerabilityType::CommandInjection,
347 SecuritySeverity::Critical,
348 "os.system() called with string concatenation — vulnerable to command injection",
349 "Use subprocess.run() with a list of arguments instead of os.system()",
350 0.9,
351 ),
352 (
353 r"(?i)subprocess\.(?:call|run|Popen)\s*\([^)]*shell\s*=\s*True",
355 VulnerabilityType::CommandInjection,
356 SecuritySeverity::Critical,
357 "subprocess called with shell=True — vulnerable to command injection",
358 "Use subprocess.run([\"cmd\", \"arg1\", \"arg2\"]) without shell=True",
359 0.85,
360 ),
361 (
362 r"(?i)\beval\s*\(\s*[a-zA-Z_]",
364 VulnerabilityType::CommandInjection,
365 SecuritySeverity::Critical,
366 "eval() called with potentially dynamic input — code injection risk",
367 "Avoid eval(); use ast.literal_eval() for Python or JSON.parse() for JavaScript",
368 0.8,
369 ),
370 (
371 r"(?i)child_process\.exec\s*\(\s*`[^`]*\$\{",
373 VulnerabilityType::CommandInjection,
374 SecuritySeverity::Critical,
375 "child_process.exec() called with template literal — vulnerable to command injection",
376 "Use child_process.execFile() or spawn() with argument arrays instead",
377 0.9,
378 ),
379 (
380 r#"(?i)(?:child_process\.exec|exec)\s*\(\s*["'][^"']*["']\s*\+"#,
382 VulnerabilityType::CommandInjection,
383 SecuritySeverity::Critical,
384 "child_process.exec() called with string concatenation — command injection risk",
385 "Use child_process.execFile() or spawn() with argument arrays instead",
386 0.85,
387 ),
388 (
392 r#"(?i)(?:open|readFile|readFileSync|read_file|fs\.read)\s*\([^)]*\.\.\/"#,
394 VulnerabilityType::PathTraversal,
395 SecuritySeverity::High,
396 "File operation with '../' path — vulnerable to path traversal",
397 "Validate and canonicalise file paths using os.path.realpath() or path.resolve()",
398 0.85,
399 ),
400 (
401 r"(?i)open\s*\(\s*(?:(?:request|user_input|filename|path|file_path|params)\b[^)]*|[^)]*\+\s*(?:request|user_input|filename|path|file_path|params)\b)",
403 VulnerabilityType::PathTraversal,
404 SecuritySeverity::High,
405 "File open() with potentially user-controlled path — path traversal risk",
406 "Validate paths against an allowlist and use os.path.realpath() to resolve symlinks",
407 0.75,
408 ),
409 (
413 r#"(?i)(?:password|passwd|pwd)\s*=\s*(?:"|')[^"']{3,}(?:"|')"#,
415 VulnerabilityType::HardcodedCredentials,
416 SecuritySeverity::High,
417 "Hardcoded password detected in code",
418 "Use environment variables or a secrets manager instead of hardcoding passwords",
419 0.85,
420 ),
421 (
422 r#"(?i)(?:api[_-]?key|secret[_-]?key|auth[_-]?token|access[_-]?token|secret)\s*=\s*(?:"|')[A-Za-z0-9+/=_\-]{8,}(?:"|')"#,
424 VulnerabilityType::HardcodedCredentials,
425 SecuritySeverity::High,
426 "Hardcoded API key, secret, or token detected in code",
427 "Use environment variables or a secrets manager instead of hardcoding secrets",
428 0.85,
429 ),
430 (
431 r"(?i)(?:mysql|postgres|postgresql|mongodb|redis)://[^:]+:[^@]+@",
433 VulnerabilityType::HardcodedCredentials,
434 SecuritySeverity::High,
435 "Connection string with embedded password detected",
436 "Use environment variables for connection strings containing credentials",
437 0.85,
438 ),
439 (
440 r#"(?i)(?:aws_access_key_id|aws_secret_access_key)\s*=\s*["'][A-Za-z0-9/+=]{16,}["']"#,
442 VulnerabilityType::HardcodedCredentials,
443 SecuritySeverity::High,
444 "Hardcoded AWS credentials detected in code",
445 "Use IAM roles, environment variables, or AWS Secrets Manager instead",
446 0.9,
447 ),
448 (
452 r"(?i)pickle\.loads?\s*\(",
454 VulnerabilityType::InsecureDeserialization,
455 SecuritySeverity::High,
456 "pickle.loads() used — insecure deserialization can lead to remote code execution",
457 "Avoid pickle for untrusted data; use JSON or a safe serialisation format instead",
458 0.9,
459 ),
460 (
461 r"(?i)yaml\.load\s*\([^)]*\)",
463 VulnerabilityType::InsecureDeserialization,
464 SecuritySeverity::High,
465 "yaml.load() without SafeLoader — can execute arbitrary code",
466 "Use yaml.safe_load() or yaml.load(data, Loader=yaml.SafeLoader)",
467 0.85,
468 ),
469 (
470 r"(?i)eval\s*\(\s*JSON\.parse",
472 VulnerabilityType::InsecureDeserialization,
473 SecuritySeverity::High,
474 "eval(JSON.parse(...)) — combining eval with parsed JSON is dangerous",
475 "Use JSON.parse() alone; never pass its result to eval()",
476 0.9,
477 ),
478 (
482 r"(?i)\.innerHTML\s*=",
484 VulnerabilityType::Xss,
485 SecuritySeverity::Medium,
486 "innerHTML assignment — potential XSS if user input is not sanitised",
487 "Use textContent or a DOM sanitisation library (e.g., DOMPurify) instead",
488 0.8,
489 ),
490 (
491 r"(?i)document\.write\s*\(",
493 VulnerabilityType::Xss,
494 SecuritySeverity::Medium,
495 "document.write() — potential XSS vector",
496 "Use DOM manipulation methods (createElement, appendChild) instead of document.write()",
497 0.8,
498 ),
499 (
500 r"(?i)dangerouslySetInnerHTML",
502 VulnerabilityType::Xss,
503 SecuritySeverity::Medium,
504 "dangerouslySetInnerHTML in React — potential XSS if content is not sanitised",
505 "Sanitise HTML with DOMPurify before passing to dangerouslySetInnerHTML",
506 0.8,
507 ),
508 (
512 r"(?i)(?:md5|MD5)\s*\(.*(?:password|passwd|pwd)",
514 VulnerabilityType::InsecureCrypto,
515 SecuritySeverity::Medium,
516 "MD5 used for password hashing — cryptographically broken",
517 "Use bcrypt, scrypt, or Argon2 for password hashing",
518 0.85,
519 ),
520 (
521 r"(?i)hashlib\.(?:md5|sha1)\s*\(.*(?:password|passwd|pwd)",
523 VulnerabilityType::InsecureCrypto,
524 SecuritySeverity::Medium,
525 "MD5/SHA1 used for password hashing — cryptographically weak",
526 "Use bcrypt, scrypt, or Argon2 for password hashing",
527 0.85,
528 ),
529 (
530 r"(?i)(?:(?:token|key|secret|password|nonce|salt|iv).*Math\.random|Math\.random\s*\(\s*\).*(?:token|key|secret|password|nonce|salt|iv))",
532 VulnerabilityType::InsecureCrypto,
533 SecuritySeverity::Medium,
534 "Math.random() used for security-sensitive value — not cryptographically secure",
535 "Use crypto.getRandomValues() or crypto.randomBytes() instead",
536 0.8,
537 ),
538 (
539 r"(?i)(?:AES|DES|Blowfish).*(?:ECB|mode_ecb|MODE_ECB)",
541 VulnerabilityType::InsecureCrypto,
542 SecuritySeverity::Medium,
543 "ECB mode encryption — does not provide semantic security",
544 "Use CBC, GCM, or another authenticated encryption mode instead of ECB",
545 0.85,
546 ),
547 ];
548
549 definitions
550 .into_iter()
551 .filter_map(
552 |(pattern, vuln_type, severity, description, suggested_fix, confidence)| {
553 Regex::new(pattern).ok().map(|regex| VulnPattern {
554 regex,
555 vuln_type,
556 severity,
557 description,
558 suggested_fix,
559 confidence,
560 })
561 },
562 )
563 .collect()
564}
565
566pub struct CodeSecurityAnalyzer {
573 patterns: Vec<VulnPattern>,
574 severity_threshold: SecuritySeverity,
576}
577
578impl CodeSecurityAnalyzer {
579 #[must_use]
582 pub fn new() -> Self {
583 Self {
584 patterns: build_vuln_patterns(),
585 severity_threshold: SecuritySeverity::Medium,
586 }
587 }
588
589 #[must_use]
593 pub fn with_severity_threshold(threshold: SecuritySeverity) -> Self {
594 Self {
595 patterns: build_vuln_patterns(),
596 severity_threshold: threshold,
597 }
598 }
599
600 pub fn analyze(&self, text: &str) -> Vec<SecurityFinding> {
606 let blocks = extract_code_blocks(text);
607 if blocks.is_empty() {
608 return Vec::new();
609 }
610
611 let mut findings = Vec::new();
612 for block in &blocks {
613 let vulns = self.scan_code(&block.code, &block.language);
614 for vuln in vulns {
615 if vuln.severity < self.severity_threshold {
616 continue;
617 }
618 let description = format!(
619 "[{}] {}: {}\n\nVulnerable code:\n {}\n\nSuggested fix: {}",
620 vuln.severity,
621 vuln.vuln_type,
622 vuln.description,
623 vuln.snippet.trim(),
624 vuln.suggested_fix,
625 );
626 let mut finding = SecurityFinding::new(
627 vuln.severity,
628 "insecure_code".to_string(),
629 description,
630 vuln.confidence,
631 );
632 finding
633 .metadata
634 .insert("vulnerability_type".to_string(), vuln.vuln_type.to_string());
635 finding
636 .metadata
637 .insert("language".to_string(), block.language.to_string());
638 finding
639 .metadata
640 .insert("code_snippet".to_string(), vuln.snippet);
641 finding
642 .metadata
643 .insert("suggested_fix".to_string(), vuln.suggested_fix);
644 findings.push(finding);
645 }
646 }
647
648 findings
649 }
650
651 fn scan_code(&self, code: &str, _language: &CodeLanguage) -> Vec<CodeVulnerability> {
653 let mut vulns = Vec::new();
654
655 for pattern in &self.patterns {
656 if let Some(mat) = pattern.regex.find(code) {
657 let snippet = extract_snippet(code, mat.start());
659 vulns.push(CodeVulnerability {
660 vuln_type: pattern.vuln_type.clone(),
661 severity: pattern.severity.clone(),
662 description: pattern.description.to_string(),
663 snippet,
664 suggested_fix: pattern.suggested_fix.to_string(),
665 confidence: pattern.confidence,
666 });
667 }
668 }
669
670 vulns
671 }
672}
673
674impl Default for CodeSecurityAnalyzer {
675 fn default() -> Self {
676 Self::new()
677 }
678}
679
680fn extract_snippet(code: &str, pos: usize) -> String {
682 let before = &code[..pos];
683 let line_start = before.rfind('\n').map_or(0, |i| i + 1);
684 let after = &code[pos..];
685 let line_end = after.find('\n').map_or(code.len(), |i| pos + i);
686 code[line_start..line_end].to_string()
687}
688
689#[cfg(test)]
694mod tests {
695 use super::*;
696
697 fn analyzer() -> CodeSecurityAnalyzer {
702 CodeSecurityAnalyzer::new()
703 }
704
705 fn has_vuln_type(findings: &[SecurityFinding], vuln_type: &str) -> bool {
706 findings
707 .iter()
708 .any(|f| f.metadata.get("vulnerability_type") == Some(&vuln_type.to_string()))
709 }
710
711 #[test]
716 fn test_detect_language_from_info_string() {
717 assert_eq!(detect_language(Some("python"), ""), CodeLanguage::Python);
718 assert_eq!(detect_language(Some("js"), ""), CodeLanguage::JavaScript);
719 assert_eq!(detect_language(Some("ts"), ""), CodeLanguage::TypeScript);
720 assert_eq!(detect_language(Some("sql"), ""), CodeLanguage::Sql);
721 assert_eq!(detect_language(Some("bash"), ""), CodeLanguage::Bash);
722 assert_eq!(detect_language(Some("rust"), ""), CodeLanguage::Rust);
723 assert_eq!(detect_language(Some("go"), ""), CodeLanguage::Go);
724 assert_eq!(detect_language(Some("java"), ""), CodeLanguage::Java);
725 assert_eq!(detect_language(Some("cpp"), ""), CodeLanguage::Cpp);
726 assert_eq!(detect_language(Some("c"), ""), CodeLanguage::C);
727 assert_eq!(detect_language(Some("ruby"), ""), CodeLanguage::Ruby);
728 }
729
730 #[test]
731 fn test_detect_language_from_content() {
732 assert_eq!(
733 detect_language_from_content("import os\ndef foo():\n pass"),
734 CodeLanguage::Python
735 );
736 assert_eq!(
737 detect_language_from_content("const x = 5; console.log(x);"),
738 CodeLanguage::JavaScript
739 );
740 assert_eq!(
741 detect_language_from_content("SELECT * FROM users WHERE id = 1"),
742 CodeLanguage::Sql
743 );
744 }
745
746 #[test]
751 fn test_extract_fenced_code_block() {
752 let text = "Here is code:\n```python\nimport os\nprint('hello')\n```\nDone.";
753 let blocks = extract_code_blocks(text);
754 assert_eq!(blocks.len(), 1);
755 assert_eq!(blocks[0].language, CodeLanguage::Python);
756 assert!(blocks[0].code.contains("import os"));
757 }
758
759 #[test]
760 fn test_extract_indented_code_block() {
761 let text = "Example:\n import os\n os.system('ls')\nDone.";
762 let blocks = extract_code_blocks(text);
763 assert_eq!(blocks.len(), 1);
764 assert!(blocks[0].code.contains("import os"));
765 }
766
767 #[test]
768 fn test_extract_multiple_fenced_blocks() {
769 let text = "```python\nprint('a')\n```\nText\n```js\nconsole.log('b');\n```";
770 let blocks = extract_code_blocks(text);
771 assert_eq!(blocks.len(), 2);
772 }
773
774 #[test]
779 fn test_sql_injection_string_concat() {
780 let text = r#"```python
781query = "SELECT * FROM users WHERE id = " + user_id
782cursor.execute(query)
783```"#;
784 let findings = analyzer().analyze(text);
785 assert!(
786 has_vuln_type(&findings, "SQL Injection"),
787 "Should detect SQL injection via concatenation; findings: {:?}",
788 findings
789 .iter()
790 .map(|f| f.metadata.get("vulnerability_type"))
791 .collect::<Vec<_>>()
792 );
793 }
794
795 #[test]
796 fn test_sql_injection_fstring() {
797 let text = r#"```python
798query = f"SELECT * FROM users WHERE name = '{username}'"
799cursor.execute(query)
800```"#;
801 let findings = analyzer().analyze(text);
802 assert!(
803 has_vuln_type(&findings, "SQL Injection"),
804 "Should detect SQL injection via f-string"
805 );
806 }
807
808 #[test]
809 fn test_sql_injection_execute_format() {
810 let text = r#"```python
811cursor.execute("SELECT * FROM users WHERE id = %s" % user_id)
812```"#;
813 let findings = analyzer().analyze(text);
814 assert!(
815 has_vuln_type(&findings, "SQL Injection"),
816 "Should detect SQL injection via execute with format"
817 );
818 }
819
820 #[test]
821 fn test_sql_parameterised_query_safe() {
822 let text = r#"```python
823cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))
824```"#;
825 let findings = analyzer().analyze(text);
826 assert!(
827 !has_vuln_type(&findings, "SQL Injection"),
828 "Parameterised query should NOT trigger SQL injection; findings: {:?}",
829 findings
830 .iter()
831 .map(|f| f.metadata.get("vulnerability_type"))
832 .collect::<Vec<_>>()
833 );
834 }
835
836 #[test]
841 fn test_command_injection_os_system_fstring() {
842 let text = r#"```python
843import os
844os.system(f"rm -rf {user_input}")
845```"#;
846 let findings = analyzer().analyze(text);
847 assert!(
848 has_vuln_type(&findings, "Command Injection"),
849 "Should detect command injection via os.system with f-string"
850 );
851 }
852
853 #[test]
854 fn test_command_injection_os_system_concat() {
855 let text = r#"```python
856import os
857os.system("ping " + host)
858```"#;
859 let findings = analyzer().analyze(text);
860 assert!(
861 has_vuln_type(&findings, "Command Injection"),
862 "Should detect command injection via os.system with concatenation"
863 );
864 }
865
866 #[test]
867 fn test_command_injection_subprocess_shell_true() {
868 let text = r#"```python
869import subprocess
870subprocess.call("ls " + path, shell=True)
871```"#;
872 let findings = analyzer().analyze(text);
873 assert!(
874 has_vuln_type(&findings, "Command Injection"),
875 "Should detect command injection via subprocess with shell=True"
876 );
877 }
878
879 #[test]
880 fn test_command_injection_child_process_exec_template() {
881 let text = r#"```javascript
882const { exec } = require('child_process');
883child_process.exec(`ls ${userInput}`, callback);
884```"#;
885 let findings = analyzer().analyze(text);
886 assert!(
887 has_vuln_type(&findings, "Command Injection"),
888 "Should detect command injection via child_process.exec with template literal"
889 );
890 }
891
892 #[test]
893 fn test_subprocess_list_args_safe() {
894 let text = r#"```python
895import subprocess
896subprocess.run(["ls", "-la", path])
897```"#;
898 let findings = analyzer().analyze(text);
899 assert!(
900 !has_vuln_type(&findings, "Command Injection"),
901 "subprocess with list args should NOT trigger command injection"
902 );
903 }
904
905 #[test]
910 fn test_path_traversal_open_dotdot() {
911 let text = r#"```python
912with open("../../etc/passwd") as f:
913 data = f.read()
914```"#;
915 let findings = analyzer().analyze(text);
916 assert!(
917 has_vuln_type(&findings, "Path Traversal"),
918 "Should detect path traversal with ../"
919 );
920 }
921
922 #[test]
923 fn test_path_traversal_user_input() {
924 let text = r#"```python
925f = open(user_input)
926data = f.read()
927```"#;
928 let findings = analyzer().analyze(text);
929 assert!(
930 has_vuln_type(&findings, "Path Traversal"),
931 "Should detect open() with user-controlled path"
932 );
933 }
934
935 #[test]
940 fn test_hardcoded_password() {
941 let text = r#"```python
942password = "super_secret_123"
943db.connect(password=password)
944```"#;
945 let findings = analyzer().analyze(text);
946 assert!(
947 has_vuln_type(&findings, "Hardcoded Credentials"),
948 "Should detect hardcoded password"
949 );
950 }
951
952 #[test]
953 fn test_hardcoded_api_key() {
954 let text = r#"```javascript
955const api_key = "sk_live_abcdef1234567890";
956```"#;
957 let findings = analyzer().analyze(text);
958 assert!(
959 has_vuln_type(&findings, "Hardcoded Credentials"),
960 "Should detect hardcoded API key"
961 );
962 }
963
964 #[test]
965 fn test_hardcoded_connection_string() {
966 let text = r#"```python
967db_url = "postgresql://admin:password123@localhost:5432/mydb"
968```"#;
969 let findings = analyzer().analyze(text);
970 assert!(
971 has_vuln_type(&findings, "Hardcoded Credentials"),
972 "Should detect connection string with embedded password"
973 );
974 }
975
976 #[test]
977 fn test_env_var_password_safe() {
978 let text = r#"```python
979import os
980password = os.environ.get("DB_PASSWORD")
981db.connect(password=password)
982```"#;
983 let findings = analyzer().analyze(text);
984 assert!(
985 !has_vuln_type(&findings, "Hardcoded Credentials"),
986 "Password from env var should NOT trigger hardcoded credentials"
987 );
988 }
989
990 #[test]
995 fn test_pickle_loads() {
996 let text = r#"```python
997import pickle
998data = pickle.loads(user_data)
999```"#;
1000 let findings = analyzer().analyze(text);
1001 assert!(
1002 has_vuln_type(&findings, "Insecure Deserialization"),
1003 "Should detect pickle.loads()"
1004 );
1005 }
1006
1007 #[test]
1008 fn test_yaml_load_unsafe() {
1009 let text = r#"```python
1010import yaml
1011data = yaml.load(content)
1012```"#;
1013 let findings = analyzer().analyze(text);
1014 assert!(
1015 has_vuln_type(&findings, "Insecure Deserialization"),
1016 "Should detect yaml.load() without SafeLoader"
1017 );
1018 }
1019
1020 #[test]
1021 fn test_eval_json_parse() {
1022 let text = r#"```javascript
1023const result = eval(JSON.parse(data));
1024```"#;
1025 let findings = analyzer().analyze(text);
1026 assert!(
1027 has_vuln_type(&findings, "Insecure Deserialization"),
1028 "Should detect eval(JSON.parse(...))"
1029 );
1030 }
1031
1032 #[test]
1037 fn test_xss_innerhtml() {
1038 let text = r#"```javascript
1039element.innerHTML = userInput;
1040```"#;
1041 let findings = analyzer().analyze(text);
1042 assert!(
1043 has_vuln_type(&findings, "Cross-Site Scripting (XSS)"),
1044 "Should detect innerHTML assignment"
1045 );
1046 }
1047
1048 #[test]
1049 fn test_xss_document_write() {
1050 let text = r#"```javascript
1051document.write(userContent);
1052```"#;
1053 let findings = analyzer().analyze(text);
1054 assert!(
1055 has_vuln_type(&findings, "Cross-Site Scripting (XSS)"),
1056 "Should detect document.write()"
1057 );
1058 }
1059
1060 #[test]
1061 fn test_xss_dangerously_set_inner_html() {
1062 let text = r#"```javascript
1063<div dangerouslySetInnerHTML={{__html: content}} />
1064```"#;
1065 let findings = analyzer().analyze(text);
1066 assert!(
1067 has_vuln_type(&findings, "Cross-Site Scripting (XSS)"),
1068 "Should detect dangerouslySetInnerHTML"
1069 );
1070 }
1071
1072 #[test]
1077 fn test_md5_password() {
1078 let text = r#"```python
1079import hashlib
1080hashed = hashlib.md5(password.encode()).hexdigest()
1081```"#;
1082 let findings = analyzer().analyze(text);
1083 assert!(
1084 has_vuln_type(&findings, "Insecure Cryptography"),
1085 "Should detect MD5 for password hashing"
1086 );
1087 }
1088
1089 #[test]
1090 fn test_math_random_token() {
1091 let text = r#"```javascript
1092const token = Math.random().toString(36);
1093```"#;
1094 let findings = analyzer().analyze(text);
1095 assert!(
1096 has_vuln_type(&findings, "Insecure Cryptography"),
1097 "Should detect Math.random() for token generation"
1098 );
1099 }
1100
1101 #[test]
1102 fn test_ecb_mode() {
1103 let text = r#"```python
1104from Crypto.Cipher import AES
1105cipher = AES.new(key, AES.MODE_ECB)
1106```"#;
1107 let findings = analyzer().analyze(text);
1108 assert!(
1109 has_vuln_type(&findings, "Insecure Cryptography"),
1110 "Should detect ECB mode encryption"
1111 );
1112 }
1113
1114 #[test]
1119 fn test_findings_have_correct_type() {
1120 let text = r#"```python
1121password = "secret123"
1122```"#;
1123 let findings = analyzer().analyze(text);
1124 assert!(!findings.is_empty());
1125 for f in &findings {
1126 assert_eq!(f.finding_type, "insecure_code");
1127 assert!(f.metadata.contains_key("vulnerability_type"));
1128 assert!(f.metadata.contains_key("language"));
1129 assert!(f.metadata.contains_key("code_snippet"));
1130 assert!(f.metadata.contains_key("suggested_fix"));
1131 }
1132 }
1133
1134 #[test]
1135 fn test_findings_severity_high_or_above() {
1136 let text = r#"```python
1137os.system(f"rm {user_input}")
1138```"#;
1139 let findings = analyzer().analyze(text);
1140 assert!(!findings.is_empty());
1141 for f in &findings {
1142 assert!(f.severity >= SecuritySeverity::Medium);
1143 }
1144 }
1145
1146 #[test]
1151 fn test_severity_threshold_filters_low() {
1152 let text = r#"```javascript
1154element.innerHTML = data;
1155```"#;
1156 let analyzer = CodeSecurityAnalyzer::with_severity_threshold(SecuritySeverity::Medium);
1157 let findings = analyzer.analyze(text);
1158 assert!(
1159 !findings.is_empty(),
1160 "Medium findings should pass Medium threshold"
1161 );
1162
1163 let analyzer_high = CodeSecurityAnalyzer::with_severity_threshold(SecuritySeverity::High);
1165 let findings_high = analyzer_high.analyze(text);
1166 assert!(
1167 !has_vuln_type(&findings_high, "Cross-Site Scripting (XSS)"),
1168 "Medium XSS findings should be filtered by High threshold"
1169 );
1170 }
1171
1172 #[test]
1177 fn test_safe_python_code_no_findings() {
1178 let text = r#"```python
1179import json
1180
1181def get_user(user_id: int):
1182 cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))
1183 return cursor.fetchone()
1184
1185data = json.loads(response.text)
1186print(data)
1187```"#;
1188 let findings = analyzer().analyze(text);
1189 assert!(
1190 findings.is_empty(),
1191 "Safe Python code should not trigger findings; got: {:?}",
1192 findings
1193 .iter()
1194 .map(|f| f.metadata.get("vulnerability_type"))
1195 .collect::<Vec<_>>()
1196 );
1197 }
1198
1199 #[test]
1200 fn test_safe_node_code_no_findings() {
1201 let text = r#"```javascript
1202const { execFile } = require('child_process');
1203execFile('ls', ['-la', dir], (error, stdout) => {
1204 console.log(stdout);
1205});
1206```"#;
1207 let findings = analyzer().analyze(text);
1208 assert!(
1209 findings.is_empty(),
1210 "Safe Node.js code should not trigger findings"
1211 );
1212 }
1213
1214 #[test]
1219 fn test_no_code_blocks_returns_empty() {
1220 let text = "This is just plain text without any code blocks. SELECT * FROM users.";
1221 let findings = analyzer().analyze(text);
1222 assert!(
1223 findings.is_empty(),
1224 "Plain text without code blocks should not trigger findings"
1225 );
1226 }
1227
1228 #[test]
1233 fn test_multiple_vulns_in_one_block() {
1234 let text = r#"```python
1235password = "hardcoded_secret"
1236query = f"SELECT * FROM users WHERE name = '{name}'"
1237data = pickle.loads(user_data)
1238```"#;
1239 let findings = analyzer().analyze(text);
1240 assert!(
1241 findings.len() >= 2,
1242 "Should detect multiple vulnerabilities; got {}",
1243 findings.len()
1244 );
1245 }
1246
1247 #[test]
1252 fn test_indented_code_block_detection() {
1253 let text = "Here is some code:\n password = \"secret123\"\n db.connect()\nEnd.";
1254 let findings = analyzer().analyze(text);
1255 assert!(
1256 has_vuln_type(&findings, "Hardcoded Credentials"),
1257 "Should detect vulnerabilities in indented code blocks"
1258 );
1259 }
1260}