1use std::sync::Arc;
6use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
7use regex::Regex;
8use ahash::AHashMap;
9use log::debug;
10
11use super::{TurboConfig, SecurityError};
12use crate::analyzer::security::{SecuritySeverity, SecurityCategory};
13
14#[derive(Debug, Clone)]
16pub struct CompiledPattern {
17 pub id: String,
18 pub name: String,
19 pub severity: SecuritySeverity,
20 pub category: SecurityCategory,
21 pub description: String,
22 pub remediation: Vec<String>,
23 pub references: Vec<String>,
24 pub cwe_id: Option<String>,
25 pub confidence_boost_keywords: Vec<String>,
26 pub false_positive_keywords: Vec<String>,
27}
28
29#[derive(Debug, Clone)]
31pub struct PatternMatch {
32 pub pattern: Arc<CompiledPattern>,
33 pub line_number: usize,
34 pub column_number: usize,
35 pub evidence: String,
36 pub confidence: f32,
37}
38
39pub struct PatternEngine {
41 secret_matcher: AhoCorasick,
43 env_var_matcher: AhoCorasick,
44 api_key_matcher: AhoCorasick,
45
46 secret_patterns: AHashMap<usize, Arc<CompiledPattern>>,
48 env_var_patterns: AHashMap<usize, Arc<CompiledPattern>>,
49 api_key_patterns: AHashMap<usize, Arc<CompiledPattern>>,
50
51 complex_patterns: Vec<(Regex, Arc<CompiledPattern>)>,
53
54 total_patterns: usize,
56}
57
58impl PatternEngine {
59 pub fn new(config: &TurboConfig) -> Result<Self, SecurityError> {
60 debug!("Initializing pattern engine with pattern sets: {:?}", config.pattern_sets);
61
62 let (secret_patterns, env_var_patterns, api_key_patterns, complex_patterns) =
64 Self::load_patterns(&config.pattern_sets)?;
65
66 let secret_matcher = Self::build_matcher(&secret_patterns)?;
68 let env_var_matcher = Self::build_matcher(&env_var_patterns)?;
69 let api_key_matcher = Self::build_matcher(&api_key_patterns)?;
70
71 let total_patterns = secret_patterns.len() + env_var_patterns.len() +
72 api_key_patterns.len() + complex_patterns.len();
73
74 debug!("Pattern engine initialized with {} total patterns", total_patterns);
75
76 Ok(Self {
77 secret_matcher,
78 env_var_matcher,
79 api_key_matcher,
80 secret_patterns: Self::create_pattern_map(secret_patterns),
81 env_var_patterns: Self::create_pattern_map(env_var_patterns),
82 api_key_patterns: Self::create_pattern_map(api_key_patterns),
83 complex_patterns,
84 total_patterns,
85 })
86 }
87
88 pub fn pattern_count(&self) -> usize {
90 self.total_patterns
91 }
92
93 pub fn scan_content(&self, content: &str, quick_reject: bool, file_meta: &super::file_discovery::FileMetadata) -> Vec<PatternMatch> {
95 if quick_reject && !self.quick_contains_secrets(content) {
97 return Vec::new();
98 }
99
100 let mut matches = Vec::new();
101
102 let lines: Vec<&str> = content.lines().collect();
104 let mut line_offsets = vec![0];
105 let mut offset = 0;
106
107 for line in &lines {
108 offset += line.len() + 1; line_offsets.push(offset);
110 }
111
112 matches.extend(self.run_matcher(&self.secret_matcher, content, &self.secret_patterns, &lines, &line_offsets, file_meta));
114 matches.extend(self.run_matcher(&self.env_var_matcher, content, &self.env_var_patterns, &lines, &line_offsets, file_meta));
115 matches.extend(self.run_matcher(&self.api_key_matcher, content, &self.api_key_patterns, &lines, &line_offsets, file_meta));
116
117 for (line_num, line) in lines.iter().enumerate() {
119 for (regex, pattern) in &self.complex_patterns {
120 if let Some(mat) = regex.find(line) {
121 let confidence = self.calculate_confidence(line, content, &pattern, file_meta);
122
123 matches.push(PatternMatch {
124 pattern: Arc::clone(pattern),
125 line_number: line_num + 1,
126 column_number: mat.start() + 1,
127 evidence: self.extract_evidence(line, mat.start(), mat.end()),
128 confidence,
129 });
130 }
131 }
132 }
133
134 matches.retain(|m| {
136 let threshold = match m.pattern.id.as_str() {
137 id if id.contains("aws-access-key") => 0.4, id if id.contains("openai-api-key") => 0.4, id if id.contains("jwt-token") => 0.6, id if id.contains("database-url") => 0.5, id if id.contains("bearer-token") => 0.7, id if id.contains("generic") => 0.8, id if id.contains("long-secret-value") => 0.7, _ => 0.7, };
146 m.confidence > threshold
147 });
148
149 matches
150 }
151
152 fn quick_contains_secrets(&self, content: &str) -> bool {
154 if self.is_likely_false_positive_content(content) {
156 return false;
157 }
158
159 const QUICK_PATTERNS: &[&str] = &[
161 "api", "key", "secret", "token", "password", "credential",
162 "auth", "private", "-----BEGIN", "sk_", "pk_", "eyJ",
163 ];
164
165 let content_lower = content.to_lowercase();
166 QUICK_PATTERNS.iter().any(|&pattern| content_lower.contains(pattern))
167 }
168
169 fn is_likely_false_positive_content(&self, content: &str) -> bool {
171 let content_len = content.len();
172
173 if content_len < 10 {
175 return true;
176 }
177
178 if content.contains("data:image/") || content.contains("data:font/") {
180 return true;
181 }
182
183 let lines: Vec<&str> = content.lines().collect();
185 if lines.len() < 5 && lines.iter().any(|line| line.len() > 500 && line.matches(' ').count() < line.len() / 50) {
186 return true;
187 }
188
189 let base64_chars = content.chars().filter(|c| c.is_alphanumeric() || *c == '+' || *c == '/' || *c == '=').count();
191 let base64_ratio = base64_chars as f32 / content_len as f32;
192
193 if base64_ratio > 0.8 && !content.contains("eyJ") && content_len > 1000 {
195 return true;
196 }
197
198 if content.contains("<svg") || content.contains("xmlns=\"http://www.w3.org/2000/svg\"") {
200 return true;
201 }
202
203 if content.contains("@media") || content.contains("@import") ||
205 (content.contains("{") && content.contains("}") && content.contains(":")) {
206 return true;
207 }
208
209 false
210 }
211
212 fn run_matcher(
214 &self,
215 matcher: &AhoCorasick,
216 content: &str,
217 patterns: &AHashMap<usize, Arc<CompiledPattern>>,
218 lines: &[&str],
219 line_offsets: &[usize],
220 file_meta: &super::file_discovery::FileMetadata,
221 ) -> Vec<PatternMatch> {
222 let mut matches = Vec::new();
223
224 for mat in matcher.find_iter(content) {
225 let pattern_id = mat.pattern().as_usize();
226 if let Some(pattern) = patterns.get(&pattern_id) {
227 let (line_num, col_num) = self.offset_to_line_col(mat.start(), line_offsets);
229 let line = lines.get(line_num.saturating_sub(1)).unwrap_or(&"");
230
231 let confidence = self.calculate_confidence(line, content, pattern, file_meta);
232
233 matches.push(PatternMatch {
234 pattern: Arc::clone(pattern),
235 line_number: line_num,
236 column_number: col_num,
237 evidence: self.extract_evidence(line, mat.start(), mat.end()),
238 confidence,
239 });
240 }
241 }
242
243 matches
244 }
245
246 fn offset_to_line_col(&self, offset: usize, line_offsets: &[usize]) -> (usize, usize) {
248 let line_num = line_offsets.binary_search(&offset)
249 .unwrap_or_else(|i| i.saturating_sub(1));
250
251 let line_start = line_offsets.get(line_num).copied().unwrap_or(0);
252 let col_num = offset - line_start + 1;
253
254 (line_num + 1, col_num)
255 }
256
257 fn calculate_confidence(&self, line: &str, content: &str, pattern: &CompiledPattern, file_meta: &super::file_discovery::FileMetadata) -> f32 {
259 let mut confidence: f32 = 0.6;
260
261 let _line_lower = line.to_lowercase();
262 let _content_lower = content.to_lowercase();
263
264 if self.is_obvious_false_positive(line, content, file_meta) {
266 return 0.0;
267 }
268
269 confidence = self.adjust_confidence_for_context(confidence, line, content, pattern);
271
272 confidence = self.adjust_confidence_for_pattern(confidence, line, content, pattern);
274
275 confidence.clamp(0.0, 1.0)
276 }
277
278 fn is_obvious_false_positive(&self, line: &str, content: &str, file_meta: &super::file_discovery::FileMetadata) -> bool {
280 let line_lower = line.to_lowercase();
281
282 if line_lower.trim_start().starts_with("//") ||
284 line_lower.trim_start().starts_with("#") ||
285 line_lower.trim_start().starts_with("*") ||
286 line_lower.trim_start().starts_with("<!--") {
287 return true;
288 }
289
290 if self.is_safe_dependency_metadata(line, file_meta) {
292 return true;
293 }
294
295 if line.contains("${") && line.contains("}") {
297 return true;
298 }
299
300 if line.contains("${selectedApiKey") || line.contains("${apiKey") ||
302 line.contains("${key") || line.contains("${token") {
303 return true;
304 }
305
306 if self.is_in_code_generation_context(content) && self.looks_like_template_code(line) {
308 return true;
309 }
310
311 let false_positive_patterns = [
313 "example", "placeholder", "your_", "todo", "fixme", "xxx",
314 "xxxxxxxx", "12345", "abcdef", "test", "demo", "sample",
315 "lorem", "ipsum", "change_me", "replace_me", "insert_",
316 "enter_your", "add_your", "put_your", "use_your",
317 "props.", "state.", "this.", "component",
319 ];
320
321 if false_positive_patterns.iter().any(|&pattern| line_lower.contains(pattern)) {
322 return true;
323 }
324
325 if line_lower.contains("@example") || line_lower.contains("@param") ||
327 line_lower.contains("interface") || line_lower.contains("type ") {
328 return true;
329 }
330
331 if line.contains("data:image/") || line.contains("data:font/") ||
333 line.contains("data:application/") {
334 return true;
335 }
336
337 if (line.contains("http://") || line.contains("https://")) && self.is_in_array_or_list(content) {
339 return true;
340 }
341
342 if self.is_command_line_script(line) {
345 return true;
346 }
347
348 if self.is_env_var_interpolation(line, file_meta) {
350 return true;
351 }
352
353 if line.len() > 200 && line.matches(' ').count() < line.len() / 20 {
355 return true;
356 }
357
358 if line.contains("return `") || line.contains("const ") && line.contains(" = `") {
360 return true;
361 }
362
363 false
364 }
365
366 fn is_in_array_or_list(&self, content: &str) -> bool {
368 let content_lower = content.to_lowercase();
369 let array_patterns = [
371 "const ", "let ", "var ", "export const ", "export let ",
372 "authorized_parties", "allowed_origins", "authorized_domains",
373 "hosts", "urls", "uris", "endpoints", "domains",
374 "redirect_uris", "allowed_hosts", "cors_origins",
375 "trusted_sources",
376 ];
377
378 array_patterns.iter().any(|p| content_lower.contains(p)) &&
379 (content.contains("[") && content.contains("]")) || (content.contains("(") && content.contains(")")) || (content.contains("{") && content.contains("}")) }
383
384 fn is_command_line_script(&self, line: &str) -> bool {
388 if !line.contains("--") {
390 return false;
391 }
392
393 let line_lower = line.to_lowercase();
394
395 let command_keywords = [
398 "run", "exec", "build", "start", "test", "deploy", "gen", "generate",
400 "get", "set", "create", "delete", "update", "push", "pull", "watch",
401 "serve", "lint", "format",
402
403 "client", "server", "output", "input", "file", "env", "environment",
405 "config", "path", "dir", "port", "host", "watch", "prod", "dev",
406
407 "npm", "yarn", "pnpm", "npx", "node", "python", "pip", "go", "cargo",
409 "docker", "aws", "gcloud", "az", "kubectl", "terraform", "encore", "bun", "bunx",
410 "maven", "gradle", "gradlew", "gradlew.bat", "gradlew.sh", "gradlew.jar", "gradlew.zip",
411 "mvn", "pipx", "pipenv", "poetry", "ruff", "black", "isort", "flake8", "mypy", "pytest",
412 "jest", "mocha", "jasmine", "cypress", "playwright", "selenium", "puppeteer", "webdriver",
413 "puppeteer-extra", "puppeteer-extra-plugin-stealth", "puppeteer-extra-plugin-recaptcha"
414 ];
415
416 if command_keywords.iter().any(|&kw| line_lower.contains(kw)) {
418 return true;
419 }
420
421 if line.contains("--") && (line.contains('/') || line.contains('\\') || line.contains('=')) {
423 return true;
424 }
425
426 false
427 }
428
429 fn is_in_code_generation_context(&self, content: &str) -> bool {
431 let content_lower = content.to_lowercase();
432
433 let code_gen_patterns = [
435 "getcode", "generatecode", "codecomponent", "apicodedialog",
436 "const getcode", "function getcode", "const code", "function code",
437 "codesnippet", "codeexample", "template", "example code",
438 "code generator", "api example", "curl example",
439 "codeblock", "copyblock", "syntax highlight"
441 ];
442
443 code_gen_patterns.iter().any(|&pattern| content_lower.contains(pattern))
444 }
445
446 fn looks_like_template_code(&self, line: &str) -> bool {
448 if line.contains("return `") || line.contains("= `") {
450 return true;
451 }
452
453 if line.contains("API_URL") || line.contains("/api/v1/") || line.contains("/prediction/") {
455 return true;
456 }
457
458 if line.contains("requests.post") || line.contains("fetch(") ||
460 line.contains("curl ") || line.contains("import requests") {
461 return true;
462 }
463
464 if line.contains("Authorization:") || line.contains("Bearer ") {
466 return true;
467 }
468
469 false
470 }
471
472 fn adjust_confidence_for_context(&self, mut confidence: f32, line: &str, content: &str, _pattern: &CompiledPattern) -> f32 {
474 let line_lower = line.to_lowercase();
475 let content_lower = content.to_lowercase();
476
477 if line.contains("=") || line.contains(":") {
479 confidence += 0.2;
480 }
481
482 if line_lower.contains("export ") || line_lower.contains("process.env") {
484 confidence += 0.3;
485 }
486
487 if line_lower.contains("import") && (line_lower.contains("api") || line_lower.contains("key")) {
489 confidence += 0.1;
490 }
491
492 if content_lower.contains("package.json") || content_lower.contains("node_modules") {
494 confidence -= 0.2;
495 }
496
497 if content_lower.contains("/test/") || content_lower.contains("__test__") ||
499 content_lower.contains(".test.") || content_lower.contains(".spec.") {
500 confidence -= 0.3;
501 }
502
503 if content_lower.contains("readme") || content_lower.contains("documentation") ||
505 content_lower.contains("docs/") {
506 confidence -= 0.4;
507 }
508
509 confidence
510 }
511
512 fn adjust_confidence_for_pattern(&self, mut confidence: f32, line: &str, content: &str, pattern: &CompiledPattern) -> f32 {
514 let line_lower = line.to_lowercase();
515 let content_lower = content.to_lowercase();
516
517 if self.is_in_code_generation_context(content) {
519 confidence -= 0.6;
520 }
521
522 for keyword in &pattern.confidence_boost_keywords {
524 if content_lower.contains(&keyword.to_lowercase()) {
525 confidence += 0.1;
526 }
527 }
528
529 for keyword in &pattern.false_positive_keywords {
531 if line_lower.contains(&keyword.to_lowercase()) {
532 confidence -= 0.4;
533 }
534 }
535
536 match pattern.id.as_str() {
538 "jwt-token" => {
539 if !line.contains("eyJ") || line.split('.').count() != 3 {
541 confidence -= 0.3;
542 }
543 if line_lower.contains("example") || line_lower.contains("jwt") {
545 confidence -= 0.2;
546 }
547 if line.contains("${") {
549 confidence -= 0.8;
550 }
551 }
552 "openai-api-key" => {
553 if !line.contains("sk-") {
555 confidence -= 0.5;
556 }
557 if line_lower.contains("openai") || line_lower.contains("gpt") {
559 confidence += 0.2;
560 }
561 if line.contains("${") || line.contains("selectedApiKey") {
563 confidence -= 0.9;
564 }
565 }
566 "database-url-with-creds" => {
567 if !line.contains("://") || line.contains("example.com") {
569 confidence -= 0.4;
570 }
571
572 let placeholder_creds = [
574 "user:pass", "user:password", "admin:admin", "admin:password",
575 "username:password", "test:test", "root:root", "postgres:postgres",
576 ];
577 if placeholder_creds.iter().any(|p| line.contains(p)) {
578 confidence -= 0.8; }
580
581 if line.contains("${") {
583 confidence -= 0.7;
584 }
585 }
586 "long-secret-value" | "generic-api-key" => {
587 if line.contains("${") || line.contains("selectedApiKey") ||
589 line.contains("apiKey") && line.contains("?") {
590 confidence -= 0.8;
591 }
592 if line.contains("Bearer ") && line.contains("${") {
594 confidence -= 0.9;
595 }
596 }
597 _ => {
598 if line.contains("${") {
600 confidence -= 0.6;
601 }
602 }
603 }
604
605 if content_lower.contains("react") || content_lower.contains("jsx") ||
607 content_lower.contains("component") {
608 if line.contains("${") || line.contains("props.") || line.contains("state.") {
609 confidence -= 0.5;
610 }
611 }
612
613 confidence
614 }
615
616 fn extract_evidence(&self, line: &str, start: usize, end: usize) -> String {
618 let prefix = &line[..start.min(line.len())];
620 let suffix = &line[end.min(line.len())..];
621 let masked = "*".repeat((end - start).min(20));
622
623 format!("{}{}{}", prefix, masked, suffix).trim().to_string()
624 }
625
626 fn build_matcher(patterns: &[(String, Arc<CompiledPattern>)]) -> Result<AhoCorasick, SecurityError> {
628 let strings: Vec<&str> = patterns.iter().map(|(s, _)| s.as_str()).collect();
629
630 let matcher = AhoCorasickBuilder::new()
631 .match_kind(MatchKind::LeftmostFirst)
632 .ascii_case_insensitive(true)
633 .build(&strings)
634 .map_err(|e| SecurityError::PatternEngine(format!("Failed to build matcher: {}", e)))?;
635
636 Ok(matcher)
637 }
638
639 fn create_pattern_map(patterns: Vec<(String, Arc<CompiledPattern>)>) -> AHashMap<usize, Arc<CompiledPattern>> {
641 patterns.into_iter()
642 .enumerate()
643 .map(|(id, (_, pattern))| (id, pattern))
644 .collect()
645 }
646
647 fn load_patterns(pattern_sets: &[String]) -> Result<(
649 Vec<(String, Arc<CompiledPattern>)>,
650 Vec<(String, Arc<CompiledPattern>)>,
651 Vec<(String, Arc<CompiledPattern>)>,
652 Vec<(Regex, Arc<CompiledPattern>)>,
653 ), SecurityError> {
654 let mut secret_patterns = Vec::new();
655 let mut env_var_patterns = Vec::new();
656 let mut api_key_patterns = Vec::new();
657 let mut complex_patterns = Vec::new();
658
659 if pattern_sets.contains(&"default".to_string()) {
661 Self::load_default_patterns(&mut secret_patterns, &mut env_var_patterns,
662 &mut api_key_patterns, &mut complex_patterns)?;
663 }
664
665 for set in pattern_sets {
667 match set.as_str() {
668 "aws" => Self::load_aws_patterns(&mut api_key_patterns)?,
669 "gcp" => Self::load_gcp_patterns(&mut api_key_patterns)?,
670 "azure" => Self::load_azure_patterns(&mut api_key_patterns)?,
671 "crypto" => Self::load_crypto_patterns(&mut secret_patterns)?,
672 _ => {}
673 }
674 }
675
676 Ok((secret_patterns, env_var_patterns, api_key_patterns, complex_patterns))
677 }
678
679 fn load_default_patterns(
681 secret_patterns: &mut Vec<(String, Arc<CompiledPattern>)>,
682 _env_var_patterns: &mut Vec<(String, Arc<CompiledPattern>)>,
683 api_key_patterns: &mut Vec<(String, Arc<CompiledPattern>)>,
684 complex_patterns: &mut Vec<(Regex, Arc<CompiledPattern>)>,
685 ) -> Result<(), SecurityError> {
686 api_key_patterns.push((
690 "sk-".to_string(),
691 Arc::new(CompiledPattern {
692 id: "openai-api-key".to_string(),
693 name: "OpenAI API Key".to_string(),
694 severity: SecuritySeverity::Critical,
695 category: SecurityCategory::SecretsExposure,
696 description: "OpenAI API key detected".to_string(),
697 remediation: vec![
698 "Remove API key from source code".to_string(),
699 "Use environment variables".to_string(),
700 ],
701 references: vec!["https://platform.openai.com/docs/api-reference".to_string()],
702 cwe_id: Some("CWE-798".to_string()),
703 confidence_boost_keywords: vec!["openai".to_string(), "gpt".to_string()],
704 false_positive_keywords: vec![
705 "sk-xxxxxxxx".to_string(), "sk-...".to_string(), "sk_test".to_string(),
706 "example".to_string(), "placeholder".to_string(), "your_".to_string(),
707 "TODO".to_string(), "FIXME".to_string(), "XXX".to_string(),
708 ],
709 }),
710 ));
711
712 complex_patterns.push((
714 Regex::new(r#"(?i)(?:api[_-]?key|secret[_-]?key|access[_-]?token)\s*[:=]\s*['"]([a-zA-Z0-9+/=]{32,})['"]"#)
716 .map_err(|e| SecurityError::PatternEngine(format!("Regex error: {}", e)))?,
717 Arc::new(CompiledPattern {
718 id: "long-secret-value".to_string(),
719 name: "Hardcoded Secret Value".to_string(),
720 severity: SecuritySeverity::Critical,
721 category: SecurityCategory::SecretsExposure,
722 description: "Long secret value hardcoded in source code".to_string(),
723 remediation: vec![
724 "Use environment variables for secrets".to_string(),
725 "Implement proper secret management".to_string(),
726 ],
727 references: vec![],
728 cwe_id: Some("CWE-798".to_string()),
729 confidence_boost_keywords: vec!["bearer".to_string(), "auth".to_string()],
730 false_positive_keywords: vec![
731 "process.env".to_string(), "getenv".to_string(), "example".to_string(),
732 "placeholder".to_string(), "your_".to_string(), "TODO".to_string(),
733 "test".to_string(), "demo".to_string(), "fake".to_string(),
734 ],
735 }),
736 ));
737
738 complex_patterns.push((
740 Regex::new(r#"\beyJ[a-zA-Z0-9+/=]{100,}\b"#)
741 .map_err(|e| SecurityError::PatternEngine(format!("Regex error: {}", e)))?,
742 Arc::new(CompiledPattern {
743 id: "jwt-token".to_string(),
744 name: "JWT Token".to_string(),
745 severity: SecuritySeverity::High,
746 category: SecurityCategory::SecretsExposure,
747 description: "JWT token detected in source code".to_string(),
748 remediation: vec![
749 "Never hardcode JWT tokens".to_string(),
750 "Use secure token storage".to_string(),
751 ],
752 references: vec![],
753 cwe_id: Some("CWE-798".to_string()),
754 confidence_boost_keywords: vec!["bearer".to_string(), "authorization".to_string()],
755 false_positive_keywords: vec!["example".to_string(), "demo".to_string()],
756 }),
757 ));
758
759 complex_patterns.push((
761 Regex::new(r#"(?i)(?:postgres|postgresql|mysql|mongodb|redis|mariadb)://[^:\s]+:[^@\s]+@[^/\s]+/[^\s]*"#)
762 .map_err(|e| SecurityError::PatternEngine(format!("Regex error: {}", e)))?,
763 Arc::new(CompiledPattern {
764 id: "database-url-with-creds".to_string(),
765 name: "Database URL with Credentials".to_string(),
766 severity: SecuritySeverity::Critical,
767 category: SecurityCategory::SecretsExposure,
768 description: "Database connection string with embedded credentials".to_string(),
769 remediation: vec![
770 "Use environment variables for database credentials".to_string(),
771 "Use connection string without embedded passwords".to_string(),
772 ],
773 references: vec![],
774 cwe_id: Some("CWE-798".to_string()),
775 confidence_boost_keywords: vec!["connection".to_string(), "database".to_string()],
776 false_positive_keywords: vec![
777 "example.com".to_string(), "localhost".to_string(), "placeholder".to_string(),
778 "your_".to_string(), "user:pass".to_string(),
779 ],
780 }),
781 ));
782
783 secret_patterns.push((
785 "-----BEGIN".to_string(),
786 Arc::new(CompiledPattern {
787 id: "private-key-header".to_string(),
788 name: "Private Key".to_string(),
789 severity: SecuritySeverity::Critical,
790 category: SecurityCategory::SecretsExposure,
791 description: "Private key detected".to_string(),
792 remediation: vec![
793 "Never commit private keys to version control".to_string(),
794 "Use secure key storage solutions".to_string(),
795 ],
796 references: vec![],
797 cwe_id: Some("CWE-321".to_string()),
798 confidence_boost_keywords: vec!["PRIVATE".to_string(), "RSA".to_string(), "DSA".to_string()],
799 false_positive_keywords: vec!["PUBLIC".to_string(), "CERTIFICATE".to_string()],
800 }),
801 ));
802
803 Ok(())
804 }
805
806 fn load_aws_patterns(api_key_patterns: &mut Vec<(String, Arc<CompiledPattern>)>) -> Result<(), SecurityError> {
808 api_key_patterns.push((
809 "AKIA".to_string(),
810 Arc::new(CompiledPattern {
811 id: "aws-access-key".to_string(),
812 name: "AWS Access Key".to_string(),
813 severity: SecuritySeverity::Critical,
814 category: SecurityCategory::SecretsExposure,
815 description: "AWS Access Key ID detected".to_string(),
816 remediation: vec![
817 "Remove AWS credentials from source code".to_string(),
818 "Use IAM roles or environment variables".to_string(),
819 "Rotate the exposed key immediately".to_string(),
820 ],
821 references: vec!["https://docs.aws.amazon.com/security/".to_string()],
822 cwe_id: Some("CWE-798".to_string()),
823 confidence_boost_keywords: vec!["aws".to_string(), "s3".to_string(), "ec2".to_string()],
824 false_positive_keywords: vec!["AKIA00000000".to_string()],
825 }),
826 ));
827
828 Ok(())
829 }
830
831 fn load_gcp_patterns(api_key_patterns: &mut Vec<(String, Arc<CompiledPattern>)>) -> Result<(), SecurityError> {
833 api_key_patterns.push((
834 "AIza".to_string(),
835 Arc::new(CompiledPattern {
836 id: "gcp-api-key".to_string(),
837 name: "Google Cloud API Key".to_string(),
838 severity: SecuritySeverity::High,
839 category: SecurityCategory::SecretsExposure,
840 description: "Google Cloud API key detected".to_string(),
841 remediation: vec![
842 "Use service accounts instead of API keys".to_string(),
843 "Restrict API key usage by IP/referrer".to_string(),
844 ],
845 references: vec!["https://cloud.google.com/security/".to_string()],
846 cwe_id: Some("CWE-798".to_string()),
847 confidence_boost_keywords: vec!["google".to_string(), "gcp".to_string(), "firebase".to_string()],
848 false_positive_keywords: vec![],
849 }),
850 ));
851
852 Ok(())
853 }
854
855 fn load_azure_patterns(_api_key_patterns: &mut Vec<(String, Arc<CompiledPattern>)>) -> Result<(), SecurityError> {
857 Ok(())
859 }
860
861 fn load_crypto_patterns(secret_patterns: &mut Vec<(String, Arc<CompiledPattern>)>) -> Result<(), SecurityError> {
863 secret_patterns.push((
864 "-----BEGIN".to_string(),
865 Arc::new(CompiledPattern {
866 id: "private-key".to_string(),
867 name: "Private Key".to_string(),
868 severity: SecuritySeverity::Critical,
869 category: SecurityCategory::SecretsExposure,
870 description: "Private key detected".to_string(),
871 remediation: vec![
872 "Never commit private keys to version control".to_string(),
873 "Use secure key storage solutions".to_string(),
874 ],
875 references: vec![],
876 cwe_id: Some("CWE-321".to_string()),
877 confidence_boost_keywords: vec!["RSA".to_string(), "PRIVATE".to_string()],
878 false_positive_keywords: vec!["PUBLIC".to_string()],
879 }),
880 ));
881
882 Ok(())
883 }
884
885 fn is_safe_dependency_metadata(&self, line: &str, file_meta: &super::file_discovery::FileMetadata) -> bool {
887 let filename = file_meta.path.file_name().and_then(|s| s.to_str()).unwrap_or("");
888 let line_trimmed = line.trim();
889
890 match filename {
891 "package.json" => {
892 let safe_keys = [
894 "\"name\"", "\"version\"", "\"description\"", "\"main\"", "\"module\"",
895 "\"type\"", "\"private\"", "\"license\"", "\"author\"", "\"homepage\"",
896 "\"repository\"", "\"bugs\"", "\"keywords\"", "\"workspaces\"",
897 ];
898 safe_keys.iter().any(|key| line_trimmed.starts_with(key))
899 }
900 "Cargo.toml" | "pyproject.toml" => {
901 let safe_keys = [
903 "name =", "version =", "description =", "edition =", "license =",
904 "authors =", "homepage =", "repository =", "documentation =", "keywords =",
905 ];
906 safe_keys.iter().any(|key| line_trimmed.starts_with(key))
907 }
908 "go.mod" => {
909 line_trimmed.starts_with("module ") || line_trimmed.starts_with("go ")
910 }
911 "pom.xml" => {
912 let safe_tags = ["<groupId>", "<artifactId>", "<version>", "<name>", "<description>", "<url>", "<license>"];
914 safe_tags.iter().any(|tag| line_trimmed.contains(tag))
915 }
916 "build.gradle" | "build.gradle.kts" => {
917 let safe_assignments = ["rootProject.name =", "group =", "version ="];
918 safe_assignments.iter().any(|s| line_trimmed.starts_with(s))
919 }
920 _ => false,
921 }
922 }
923
924 fn is_env_var_interpolation(&self, line: &str, file_meta: &super::file_discovery::FileMetadata) -> bool {
926 let filename = file_meta.path.file_name().and_then(|s| s.to_str()).unwrap_or("");
927
928 if line.contains("\"$env\"") {
930 return true;
931 }
932
933 if line.contains('$') {
935 if line.contains("${") && line.contains("}") {
937 let is_config_file = matches!(
938 filename,
939 "docker-compose.yml"
940 | "docker-compose.yaml"
941 | "Dockerfile"
942 | "Jenkinsfile"
943 | "Makefile"
944 ) || filename.ends_with(".env")
945 || filename.ends_with(".sh")
946 || filename.ends_with(".yml")
947 || filename.ends_with(".yaml");
948
949 if is_config_file {
950 return true;
951 }
952
953 let line_lower = line.to_lowercase();
955 let env_context_keywords = ["environment:", "command:", "entrypoint:", "value:", "args:"];
956 if env_context_keywords.iter().any(|kw| line_lower.contains(kw)) {
957 return true;
958 }
959 }
960 }
961
962 false
963 }
964}
965
966#[cfg(test)]
967mod tests {
968 use super::*;
969 use crate::analyzer::security::turbo::file_discovery::{FileMetadata, PriorityHints};
970 use std::path::PathBuf;
971 use std::time::SystemTime;
972
973 fn dummy_metadata(path: &str) -> FileMetadata {
974 FileMetadata {
975 path: PathBuf::from(path),
976 size: 100,
977 extension: Some(
978 PathBuf::from(path)
979 .extension()
980 .and_then(|s| s.to_str())
981 .unwrap_or("")
982 .to_string(),
983 ),
984 is_gitignored: false,
985 modified: SystemTime::now(),
986 priority_hints: PriorityHints::default(),
987 }
988 }
989
990 #[test]
991 fn test_pattern_engine_creation() {
992 let config = TurboConfig::default();
993 let engine = PatternEngine::new(&config);
994 assert!(engine.is_ok());
995
996 let engine = engine.unwrap();
997 assert!(engine.pattern_count() > 0);
998 }
999
1000 #[test]
1001 fn test_pattern_matching() {
1002 let config = TurboConfig::default();
1003 let engine = PatternEngine::new(&config).unwrap();
1004 let meta = dummy_metadata("test.js");
1005
1006 let content = r#"
1007 const apiKey = "sk-1234567890abcdef1234567890abcdef12345678";
1008 password = "super_secret_password_that_is_long_enough";
1009 process.env.DATABASE_URL
1010 "#;
1011
1012 let matches = engine.scan_content(content, false, &meta);
1013 assert!(!matches.is_empty());
1014
1015 assert!(matches.iter().any(|m| m.pattern.id.contains("openai") || m.pattern.id.contains("secret")));
1017 }
1018
1019 #[test]
1020 fn test_template_literal_filtering() {
1021 let config = TurboConfig::default();
1022 let engine = PatternEngine::new(&config).unwrap();
1023 let meta = dummy_metadata("test.js");
1024
1025 let template_content = r#"
1027 const getCode = () => {
1028 return `Authorization: "Bearer ${selectedApiKey?.apiKey}"`;
1029 }
1030
1031 function generateExample() {
1032 return "Bearer " + apiKey;
1033 }
1034 "#;
1035
1036 let matches = engine.scan_content(template_content, false, &meta);
1037 assert!(matches.len() <= 1, "Template literals should be filtered out");
1039 }
1040
1041 #[test]
1042 fn test_code_generation_context() {
1043 let config = TurboConfig::default();
1044 let engine = PatternEngine::new(&config).unwrap();
1045 let meta = dummy_metadata("APICodeDialog.jsx");
1046
1047 let code_gen_content = r#"
1049 import { CopyBlock } from 'react-code-blocks';
1050
1051 const APICodeDialog = () => {
1052 const getCodeWithAuthorization = () => {
1053 return `
1054 headers: {
1055 Authorization: "Bearer ${selectedApiKey?.apiKey}",
1056 "Content-Type": "application/json"
1057 }
1058 `;
1059 };
1060
1061 return <CopyBlock text={getCodeWithAuthorization()} />;
1062 };
1063 "#;
1064
1065 let matches = engine.scan_content(code_gen_content, false, &meta);
1066 assert!(matches.is_empty() || matches.iter().all(|m| m.confidence < 0.3),
1068 "Code generation context should have very low confidence");
1069 }
1070
1071 #[test]
1072 fn test_quick_reject() {
1073 let config = TurboConfig::default();
1074 let engine = PatternEngine::new(&config).unwrap();
1075 let meta = dummy_metadata("main.rs");
1076
1077 let safe_content = "fn main() { println!(\"Hello, world!\"); }";
1078 let matches = engine.scan_content(safe_content, true, &meta);
1079 assert!(matches.is_empty());
1080 }
1081
1082 #[test]
1083 fn test_package_json_filtering() {
1084 let config = TurboConfig::default();
1085 let engine = PatternEngine::new(&config).unwrap();
1086 let meta = dummy_metadata("package.json");
1087
1088 let content = r#"
1089 {
1090 "name": "my-cool-package-with-a-long-name-that-could-be-a-secret",
1091 "version": "1.0.0-beta.this.is.a.very.long.version.string.that.is.not.a.key",
1092 "description": "a string that is not a secret"
1093 }
1094 "#;
1095
1096 let mut test_engine = engine;
1098 test_engine.complex_patterns.push((
1099 Regex::new(r#"[a-zA-Z0-9-]{20,}"#).unwrap(),
1100 Arc::new(CompiledPattern {
1101 id: "generic-long-string".to_string(),
1102 name: "Generic Long String".to_string(),
1103 severity: SecuritySeverity::High,
1104 category: SecurityCategory::SecretsExposure,
1105 description: "A generic long string.".to_string(),
1106 remediation: vec![],
1107 references: vec![],
1108 cwe_id: None,
1109 confidence_boost_keywords: vec![],
1110 false_positive_keywords: vec![],
1111 }),
1112 ));
1113
1114 let matches = test_engine.scan_content(content, false, &meta);
1115 assert!(matches.is_empty(), "Should not find secrets in safe package.json keys");
1116 }
1117}