1use ahash::AHashMap;
6use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
7use log::debug;
8use regex::Regex;
9use std::sync::Arc;
10
11use super::{SecurityError, TurboConfig};
12use crate::analyzer::security::{SecurityCategory, SecuritySeverity};
13
14#[derive(Debug, Clone)]
16pub struct CompiledPattern {
17 pub id: String,
18 pub name: String,
19 pub severity: SecuritySeverity,
20 pub category: SecurityCategory,
21 pub description: String,
22 pub remediation: Vec<String>,
23 pub references: Vec<String>,
24 pub cwe_id: Option<String>,
25 pub confidence_boost_keywords: Vec<String>,
26 pub false_positive_keywords: Vec<String>,
27}
28
29#[derive(Debug, Clone)]
31pub struct PatternMatch {
32 pub pattern: Arc<CompiledPattern>,
33 pub line_number: usize,
34 pub column_number: usize,
35 pub evidence: String,
36 pub confidence: f32,
37}
38
39pub struct PatternEngine {
41 secret_matcher: AhoCorasick,
43 env_var_matcher: AhoCorasick,
44 api_key_matcher: AhoCorasick,
45
46 secret_patterns: AHashMap<usize, Arc<CompiledPattern>>,
48 env_var_patterns: AHashMap<usize, Arc<CompiledPattern>>,
49 api_key_patterns: AHashMap<usize, Arc<CompiledPattern>>,
50
51 complex_patterns: Vec<(Regex, Arc<CompiledPattern>)>,
53
54 total_patterns: usize,
56}
57
58impl PatternEngine {
59 pub fn new(config: &TurboConfig) -> Result<Self, SecurityError> {
60 debug!(
61 "Initializing pattern engine with pattern sets: {:?}",
62 config.pattern_sets
63 );
64
65 let (secret_patterns, env_var_patterns, api_key_patterns, complex_patterns) =
67 Self::load_patterns(&config.pattern_sets)?;
68
69 let secret_matcher = Self::build_matcher(&secret_patterns)?;
71 let env_var_matcher = Self::build_matcher(&env_var_patterns)?;
72 let api_key_matcher = Self::build_matcher(&api_key_patterns)?;
73
74 let total_patterns = secret_patterns.len()
75 + env_var_patterns.len()
76 + api_key_patterns.len()
77 + complex_patterns.len();
78
79 debug!(
80 "Pattern engine initialized with {} total patterns",
81 total_patterns
82 );
83
84 Ok(Self {
85 secret_matcher,
86 env_var_matcher,
87 api_key_matcher,
88 secret_patterns: Self::create_pattern_map(secret_patterns),
89 env_var_patterns: Self::create_pattern_map(env_var_patterns),
90 api_key_patterns: Self::create_pattern_map(api_key_patterns),
91 complex_patterns,
92 total_patterns,
93 })
94 }
95
96 pub fn pattern_count(&self) -> usize {
98 self.total_patterns
99 }
100
101 pub fn scan_content(
103 &self,
104 content: &str,
105 quick_reject: bool,
106 file_meta: &super::file_discovery::FileMetadata,
107 ) -> Vec<PatternMatch> {
108 if quick_reject && !self.quick_contains_secrets(content) {
110 return Vec::new();
111 }
112
113 let mut matches = Vec::new();
114
115 let lines: Vec<&str> = content.lines().collect();
117 let mut line_offsets = vec![0];
118 let mut offset = 0;
119
120 for line in &lines {
121 offset += line.len() + 1; line_offsets.push(offset);
123 }
124
125 matches.extend(self.run_matcher(
127 &self.secret_matcher,
128 content,
129 &self.secret_patterns,
130 &lines,
131 &line_offsets,
132 file_meta,
133 ));
134 matches.extend(self.run_matcher(
135 &self.env_var_matcher,
136 content,
137 &self.env_var_patterns,
138 &lines,
139 &line_offsets,
140 file_meta,
141 ));
142 matches.extend(self.run_matcher(
143 &self.api_key_matcher,
144 content,
145 &self.api_key_patterns,
146 &lines,
147 &line_offsets,
148 file_meta,
149 ));
150
151 for (line_num, line) in lines.iter().enumerate() {
153 for (regex, pattern) in &self.complex_patterns {
154 if let Some(mat) = regex.find(line) {
155 let confidence = self.calculate_confidence(line, content, pattern, file_meta);
156
157 matches.push(PatternMatch {
158 pattern: Arc::clone(pattern),
159 line_number: line_num + 1,
160 column_number: mat.start() + 1,
161 evidence: self.extract_evidence(line, mat.start(), mat.end()),
162 confidence,
163 });
164 }
165 }
166 }
167
168 matches.retain(|m| {
170 let threshold = match m.pattern.id.as_str() {
171 id if id.contains("aws-access-key") => 0.4, id if id.contains("openai-api-key") => 0.4, id if id.contains("jwt-token") => 0.6, id if id.contains("database-url") => 0.5, id if id.contains("bearer-token") => 0.7, id if id.contains("generic") => 0.8, id if id.contains("long-secret-value") => 0.7, _ => 0.7, };
180 m.confidence > threshold
181 });
182
183 matches
184 }
185
186 fn quick_contains_secrets(&self, content: &str) -> bool {
188 if self.is_likely_false_positive_content(content) {
190 return false;
191 }
192
193 const QUICK_PATTERNS: &[&str] = &[
195 "api",
196 "key",
197 "secret",
198 "token",
199 "password",
200 "credential",
201 "auth",
202 "private",
203 "-----BEGIN",
204 "sk_",
205 "pk_",
206 "eyJ",
207 ];
208
209 let content_lower = content.to_lowercase();
210 QUICK_PATTERNS
211 .iter()
212 .any(|&pattern| content_lower.contains(pattern))
213 }
214
215 fn is_likely_false_positive_content(&self, content: &str) -> bool {
217 let content_len = content.len();
218
219 if content_len < 10 {
221 return true;
222 }
223
224 if content.contains("data:image/") || content.contains("data:font/") {
226 return true;
227 }
228
229 let lines: Vec<&str> = content.lines().collect();
231 if lines.len() < 5
232 && lines
233 .iter()
234 .any(|line| line.len() > 500 && line.matches(' ').count() < line.len() / 50)
235 {
236 return true;
237 }
238
239 let base64_chars = content
241 .chars()
242 .filter(|c| c.is_alphanumeric() || *c == '+' || *c == '/' || *c == '=')
243 .count();
244 let base64_ratio = base64_chars as f32 / content_len as f32;
245
246 if base64_ratio > 0.8 && !content.contains("eyJ") && content_len > 1000 {
248 return true;
249 }
250
251 if content.contains("<svg") || content.contains("xmlns=\"http://www.w3.org/2000/svg\"") {
253 return true;
254 }
255
256 if content.contains("@media")
258 || content.contains("@import")
259 || (content.contains("{") && content.contains("}") && content.contains(":"))
260 {
261 return true;
262 }
263
264 false
265 }
266
267 fn run_matcher(
269 &self,
270 matcher: &AhoCorasick,
271 content: &str,
272 patterns: &AHashMap<usize, Arc<CompiledPattern>>,
273 lines: &[&str],
274 line_offsets: &[usize],
275 file_meta: &super::file_discovery::FileMetadata,
276 ) -> Vec<PatternMatch> {
277 let mut matches = Vec::new();
278
279 for mat in matcher.find_iter(content) {
280 let pattern_id = mat.pattern().as_usize();
281 if let Some(pattern) = patterns.get(&pattern_id) {
282 let (line_num, col_num) = self.offset_to_line_col(mat.start(), line_offsets);
284 let line = lines.get(line_num.saturating_sub(1)).unwrap_or(&"");
285
286 let confidence = self.calculate_confidence(line, content, pattern, file_meta);
287
288 matches.push(PatternMatch {
289 pattern: Arc::clone(pattern),
290 line_number: line_num,
291 column_number: col_num,
292 evidence: self.extract_evidence(line, mat.start(), mat.end()),
293 confidence,
294 });
295 }
296 }
297
298 matches
299 }
300
301 fn offset_to_line_col(&self, offset: usize, line_offsets: &[usize]) -> (usize, usize) {
303 let line_num = line_offsets
304 .binary_search(&offset)
305 .unwrap_or_else(|i| i.saturating_sub(1));
306
307 let line_start = line_offsets.get(line_num).copied().unwrap_or(0);
308 let col_num = offset - line_start + 1;
309
310 (line_num + 1, col_num)
311 }
312
313 fn calculate_confidence(
315 &self,
316 line: &str,
317 content: &str,
318 pattern: &CompiledPattern,
319 file_meta: &super::file_discovery::FileMetadata,
320 ) -> f32 {
321 let mut confidence: f32 = 0.6;
322
323 let _line_lower = line.to_lowercase();
324 let _content_lower = content.to_lowercase();
325
326 if self.is_obvious_false_positive(line, content, file_meta) {
328 return 0.0;
329 }
330
331 confidence = self.adjust_confidence_for_context(confidence, line, content, pattern);
333
334 confidence = self.adjust_confidence_for_pattern(confidence, line, content, pattern);
336
337 confidence.clamp(0.0, 1.0)
338 }
339
340 fn is_obvious_false_positive(
342 &self,
343 line: &str,
344 content: &str,
345 file_meta: &super::file_discovery::FileMetadata,
346 ) -> bool {
347 let line_lower = line.to_lowercase();
348
349 if line_lower.trim_start().starts_with("//")
351 || line_lower.trim_start().starts_with("#")
352 || line_lower.trim_start().starts_with("*")
353 || line_lower.trim_start().starts_with("<!--")
354 {
355 return true;
356 }
357
358 if self.is_safe_dependency_metadata(line, file_meta) {
360 return true;
361 }
362
363 if line.contains("${") && line.contains("}") {
365 return true;
366 }
367
368 if line.contains("${selectedApiKey")
370 || line.contains("${apiKey")
371 || line.contains("${key")
372 || line.contains("${token")
373 {
374 return true;
375 }
376
377 if self.is_in_code_generation_context(content) && self.looks_like_template_code(line) {
379 return true;
380 }
381
382 let false_positive_patterns = [
384 "example",
385 "placeholder",
386 "your_",
387 "todo",
388 "fixme",
389 "xxx",
390 "xxxxxxxx",
391 "12345",
392 "abcdef",
393 "test",
394 "demo",
395 "sample",
396 "lorem",
397 "ipsum",
398 "change_me",
399 "replace_me",
400 "insert_",
401 "enter_your",
402 "add_your",
403 "put_your",
404 "use_your",
405 "props.",
407 "state.",
408 "this.",
409 "component",
410 ];
411
412 if false_positive_patterns
413 .iter()
414 .any(|&pattern| line_lower.contains(pattern))
415 {
416 return true;
417 }
418
419 if line_lower.contains("@example")
421 || line_lower.contains("@param")
422 || line_lower.contains("interface")
423 || line_lower.contains("type ")
424 {
425 return true;
426 }
427
428 if line.contains("data:image/")
430 || line.contains("data:font/")
431 || line.contains("data:application/")
432 {
433 return true;
434 }
435
436 if (line.contains("http://") || line.contains("https://"))
438 && self.is_in_array_or_list(content)
439 {
440 return true;
441 }
442
443 if self.is_command_line_script(line) {
446 return true;
447 }
448
449 if self.is_env_var_interpolation(line, file_meta) {
451 return true;
452 }
453
454 if line.len() > 200 && line.matches(' ').count() < line.len() / 20 {
456 return true;
457 }
458
459 if line.contains("return `") || line.contains("const ") && line.contains(" = `") {
461 return true;
462 }
463
464 false
465 }
466
467 fn is_in_array_or_list(&self, content: &str) -> bool {
469 let content_lower = content.to_lowercase();
470 let array_patterns = [
472 "const ",
473 "let ",
474 "var ",
475 "export const ",
476 "export let ",
477 "authorized_parties",
478 "allowed_origins",
479 "authorized_domains",
480 "hosts",
481 "urls",
482 "uris",
483 "endpoints",
484 "domains",
485 "redirect_uris",
486 "allowed_hosts",
487 "cors_origins",
488 "trusted_sources",
489 ];
490
491 array_patterns.iter().any(|p| content_lower.contains(p)) &&
492 (content.contains("[") && content.contains("]")) || (content.contains("(") && content.contains(")")) || (content.contains("{") && content.contains("}")) }
496
497 fn is_command_line_script(&self, line: &str) -> bool {
501 if !line.contains("--") {
503 return false;
504 }
505
506 let line_lower = line.to_lowercase();
507
508 let command_keywords = [
511 "run",
513 "exec",
514 "build",
515 "start",
516 "test",
517 "deploy",
518 "gen",
519 "generate",
520 "get",
521 "set",
522 "create",
523 "delete",
524 "update",
525 "push",
526 "pull",
527 "watch",
528 "serve",
529 "lint",
530 "format",
531 "client",
533 "server",
534 "output",
535 "input",
536 "file",
537 "env",
538 "environment",
539 "config",
540 "path",
541 "dir",
542 "port",
543 "host",
544 "watch",
545 "prod",
546 "dev",
547 "npm",
549 "yarn",
550 "pnpm",
551 "npx",
552 "node",
553 "python",
554 "pip",
555 "go",
556 "cargo",
557 "docker",
558 "aws",
559 "gcloud",
560 "az",
561 "kubectl",
562 "terraform",
563 "encore",
564 "bun",
565 "bunx",
566 "maven",
567 "gradle",
568 "gradlew",
569 "gradlew.bat",
570 "gradlew.sh",
571 "gradlew.jar",
572 "gradlew.zip",
573 "mvn",
574 "pipx",
575 "pipenv",
576 "poetry",
577 "ruff",
578 "black",
579 "isort",
580 "flake8",
581 "mypy",
582 "pytest",
583 "jest",
584 "mocha",
585 "jasmine",
586 "cypress",
587 "playwright",
588 "selenium",
589 "puppeteer",
590 "webdriver",
591 "puppeteer-extra",
592 "puppeteer-extra-plugin-stealth",
593 "puppeteer-extra-plugin-recaptcha",
594 ];
595
596 if command_keywords.iter().any(|&kw| line_lower.contains(kw)) {
598 return true;
599 }
600
601 if line.contains("--") && (line.contains('/') || line.contains('\\') || line.contains('='))
603 {
604 return true;
605 }
606
607 false
608 }
609
610 fn is_in_code_generation_context(&self, content: &str) -> bool {
612 let content_lower = content.to_lowercase();
613
614 let code_gen_patterns = [
616 "getcode",
617 "generatecode",
618 "codecomponent",
619 "apicodedialog",
620 "const getcode",
621 "function getcode",
622 "const code",
623 "function code",
624 "codesnippet",
625 "codeexample",
626 "template",
627 "example code",
628 "code generator",
629 "api example",
630 "curl example",
631 "codeblock",
633 "copyblock",
634 "syntax highlight",
635 ];
636
637 code_gen_patterns
638 .iter()
639 .any(|&pattern| content_lower.contains(pattern))
640 }
641
642 fn looks_like_template_code(&self, line: &str) -> bool {
644 if line.contains("return `") || line.contains("= `") {
646 return true;
647 }
648
649 if line.contains("API_URL") || line.contains("/api/v1/") || line.contains("/prediction/") {
651 return true;
652 }
653
654 if line.contains("requests.post")
656 || line.contains("fetch(")
657 || line.contains("curl ")
658 || line.contains("import requests")
659 {
660 return true;
661 }
662
663 if line.contains("Authorization:") || line.contains("Bearer ") {
665 return true;
666 }
667
668 false
669 }
670
671 fn adjust_confidence_for_context(
673 &self,
674 mut confidence: f32,
675 line: &str,
676 content: &str,
677 _pattern: &CompiledPattern,
678 ) -> f32 {
679 let line_lower = line.to_lowercase();
680 let content_lower = content.to_lowercase();
681
682 if line.contains("=") || line.contains(":") {
684 confidence += 0.2;
685 }
686
687 if line_lower.contains("export ") || line_lower.contains("process.env") {
689 confidence += 0.3;
690 }
691
692 if line_lower.contains("import")
694 && (line_lower.contains("api") || line_lower.contains("key"))
695 {
696 confidence += 0.1;
697 }
698
699 if content_lower.contains("package.json") || content_lower.contains("node_modules") {
701 confidence -= 0.2;
702 }
703
704 if content_lower.contains("/test/")
706 || content_lower.contains("__test__")
707 || content_lower.contains(".test.")
708 || content_lower.contains(".spec.")
709 {
710 confidence -= 0.3;
711 }
712
713 if content_lower.contains("readme")
715 || content_lower.contains("documentation")
716 || content_lower.contains("docs/")
717 {
718 confidence -= 0.4;
719 }
720
721 confidence
722 }
723
724 fn adjust_confidence_for_pattern(
726 &self,
727 mut confidence: f32,
728 line: &str,
729 content: &str,
730 pattern: &CompiledPattern,
731 ) -> f32 {
732 let line_lower = line.to_lowercase();
733 let content_lower = content.to_lowercase();
734
735 if self.is_in_code_generation_context(content) {
737 confidence -= 0.6;
738 }
739
740 for keyword in &pattern.confidence_boost_keywords {
742 if content_lower.contains(&keyword.to_lowercase()) {
743 confidence += 0.1;
744 }
745 }
746
747 for keyword in &pattern.false_positive_keywords {
749 if line_lower.contains(&keyword.to_lowercase()) {
750 confidence -= 0.4;
751 }
752 }
753
754 match pattern.id.as_str() {
756 "jwt-token" => {
757 if !line.contains("eyJ") || line.split('.').count() != 3 {
759 confidence -= 0.3;
760 }
761 if line_lower.contains("example") || line_lower.contains("jwt") {
763 confidence -= 0.2;
764 }
765 if line.contains("${") {
767 confidence -= 0.8;
768 }
769 }
770 "openai-api-key" => {
771 if !line.contains("sk-") {
773 confidence -= 0.5;
774 }
775 if line_lower.contains("openai") || line_lower.contains("gpt") {
777 confidence += 0.2;
778 }
779 if line.contains("${") || line.contains("selectedApiKey") {
781 confidence -= 0.9;
782 }
783 }
784 "database-url-with-creds" => {
785 if !line.contains("://") || line.contains("example.com") {
787 confidence -= 0.4;
788 }
789
790 let placeholder_creds = [
792 "user:pass",
793 "user:password",
794 "admin:admin",
795 "admin:password",
796 "username:password",
797 "test:test",
798 "root:root",
799 "postgres:postgres",
800 ];
801 if placeholder_creds.iter().any(|p| line.contains(p)) {
802 confidence -= 0.8; }
804
805 if line.contains("${") {
807 confidence -= 0.7;
808 }
809 }
810 "long-secret-value" | "generic-api-key" => {
811 if line.contains("${")
813 || line.contains("selectedApiKey")
814 || line.contains("apiKey") && line.contains("?")
815 {
816 confidence -= 0.8;
817 }
818 if line.contains("Bearer ") && line.contains("${") {
820 confidence -= 0.9;
821 }
822 }
823 _ => {
824 if line.contains("${") {
826 confidence -= 0.6;
827 }
828 }
829 }
830
831 if (content_lower.contains("react")
833 || content_lower.contains("jsx")
834 || content_lower.contains("component"))
835 && (line.contains("${") || line.contains("props.") || line.contains("state."))
836 {
837 confidence -= 0.5;
838 }
839
840 confidence
841 }
842
843 fn extract_evidence(&self, line: &str, start: usize, end: usize) -> String {
845 let prefix = &line[..start.min(line.len())];
847 let suffix = &line[end.min(line.len())..];
848 let masked = "*".repeat((end - start).min(20));
849
850 format!("{}{}{}", prefix, masked, suffix).trim().to_string()
851 }
852
853 fn build_matcher(
855 patterns: &[(String, Arc<CompiledPattern>)],
856 ) -> Result<AhoCorasick, SecurityError> {
857 let strings: Vec<&str> = patterns.iter().map(|(s, _)| s.as_str()).collect();
858
859 let matcher = AhoCorasickBuilder::new()
860 .match_kind(MatchKind::LeftmostFirst)
861 .ascii_case_insensitive(true)
862 .build(&strings)
863 .map_err(|e| SecurityError::PatternEngine(format!("Failed to build matcher: {}", e)))?;
864
865 Ok(matcher)
866 }
867
868 fn create_pattern_map(
870 patterns: Vec<(String, Arc<CompiledPattern>)>,
871 ) -> AHashMap<usize, Arc<CompiledPattern>> {
872 patterns
873 .into_iter()
874 .enumerate()
875 .map(|(id, (_, pattern))| (id, pattern))
876 .collect()
877 }
878
879 fn load_patterns(
881 pattern_sets: &[String],
882 ) -> Result<
883 (
884 Vec<(String, Arc<CompiledPattern>)>,
885 Vec<(String, Arc<CompiledPattern>)>,
886 Vec<(String, Arc<CompiledPattern>)>,
887 Vec<(Regex, Arc<CompiledPattern>)>,
888 ),
889 SecurityError,
890 > {
891 let mut secret_patterns = Vec::new();
892 let mut env_var_patterns = Vec::new();
893 let mut api_key_patterns = Vec::new();
894 let mut complex_patterns = Vec::new();
895
896 if pattern_sets.contains(&"default".to_string()) {
898 Self::load_default_patterns(
899 &mut secret_patterns,
900 &mut env_var_patterns,
901 &mut api_key_patterns,
902 &mut complex_patterns,
903 )?;
904 }
905
906 for set in pattern_sets {
908 match set.as_str() {
909 "aws" => Self::load_aws_patterns(&mut api_key_patterns)?,
910 "gcp" => Self::load_gcp_patterns(&mut api_key_patterns)?,
911 "azure" => Self::load_azure_patterns(&mut api_key_patterns)?,
912 "crypto" => Self::load_crypto_patterns(&mut secret_patterns)?,
913 _ => {}
914 }
915 }
916
917 Ok((
918 secret_patterns,
919 env_var_patterns,
920 api_key_patterns,
921 complex_patterns,
922 ))
923 }
924
925 fn load_default_patterns(
927 secret_patterns: &mut Vec<(String, Arc<CompiledPattern>)>,
928 _env_var_patterns: &mut Vec<(String, Arc<CompiledPattern>)>,
929 api_key_patterns: &mut Vec<(String, Arc<CompiledPattern>)>,
930 complex_patterns: &mut Vec<(Regex, Arc<CompiledPattern>)>,
931 ) -> Result<(), SecurityError> {
932 api_key_patterns.push((
936 "sk-".to_string(),
937 Arc::new(CompiledPattern {
938 id: "openai-api-key".to_string(),
939 name: "OpenAI API Key".to_string(),
940 severity: SecuritySeverity::Critical,
941 category: SecurityCategory::SecretsExposure,
942 description: "OpenAI API key detected".to_string(),
943 remediation: vec![
944 "Remove API key from source code".to_string(),
945 "Use environment variables".to_string(),
946 ],
947 references: vec!["https://platform.openai.com/docs/api-reference".to_string()],
948 cwe_id: Some("CWE-798".to_string()),
949 confidence_boost_keywords: vec!["openai".to_string(), "gpt".to_string()],
950 false_positive_keywords: vec![
951 "sk-xxxxxxxx".to_string(),
952 "sk-...".to_string(),
953 "sk_test".to_string(),
954 "example".to_string(),
955 "placeholder".to_string(),
956 "your_".to_string(),
957 "TODO".to_string(),
958 "FIXME".to_string(),
959 "XXX".to_string(),
960 ],
961 }),
962 ));
963
964 complex_patterns.push((
966 Regex::new(r#"(?i)(?:api[_-]?key|secret[_-]?key|access[_-]?token)\s*[:=]\s*['"]([a-zA-Z0-9+/=]{32,})['"]"#)
968 .map_err(|e| SecurityError::PatternEngine(format!("Regex error: {}", e)))?,
969 Arc::new(CompiledPattern {
970 id: "long-secret-value".to_string(),
971 name: "Hardcoded Secret Value".to_string(),
972 severity: SecuritySeverity::Critical,
973 category: SecurityCategory::SecretsExposure,
974 description: "Long secret value hardcoded in source code".to_string(),
975 remediation: vec![
976 "Use environment variables for secrets".to_string(),
977 "Implement proper secret management".to_string(),
978 ],
979 references: vec![],
980 cwe_id: Some("CWE-798".to_string()),
981 confidence_boost_keywords: vec!["bearer".to_string(), "auth".to_string()],
982 false_positive_keywords: vec![
983 "process.env".to_string(), "getenv".to_string(), "example".to_string(),
984 "placeholder".to_string(), "your_".to_string(), "TODO".to_string(),
985 "test".to_string(), "demo".to_string(), "fake".to_string(),
986 ],
987 }),
988 ));
989
990 complex_patterns.push((
992 Regex::new(r#"\beyJ[a-zA-Z0-9+/=]{100,}\b"#)
993 .map_err(|e| SecurityError::PatternEngine(format!("Regex error: {}", e)))?,
994 Arc::new(CompiledPattern {
995 id: "jwt-token".to_string(),
996 name: "JWT Token".to_string(),
997 severity: SecuritySeverity::High,
998 category: SecurityCategory::SecretsExposure,
999 description: "JWT token detected in source code".to_string(),
1000 remediation: vec![
1001 "Never hardcode JWT tokens".to_string(),
1002 "Use secure token storage".to_string(),
1003 ],
1004 references: vec![],
1005 cwe_id: Some("CWE-798".to_string()),
1006 confidence_boost_keywords: vec!["bearer".to_string(), "authorization".to_string()],
1007 false_positive_keywords: vec!["example".to_string(), "demo".to_string()],
1008 }),
1009 ));
1010
1011 complex_patterns.push((
1013 Regex::new(r#"(?i)(?:postgres|postgresql|mysql|mongodb|redis|mariadb)://[^:\s]+:[^@\s]+@[^/\s]+/[^\s]*"#)
1014 .map_err(|e| SecurityError::PatternEngine(format!("Regex error: {}", e)))?,
1015 Arc::new(CompiledPattern {
1016 id: "database-url-with-creds".to_string(),
1017 name: "Database URL with Credentials".to_string(),
1018 severity: SecuritySeverity::Critical,
1019 category: SecurityCategory::SecretsExposure,
1020 description: "Database connection string with embedded credentials".to_string(),
1021 remediation: vec![
1022 "Use environment variables for database credentials".to_string(),
1023 "Use connection string without embedded passwords".to_string(),
1024 ],
1025 references: vec![],
1026 cwe_id: Some("CWE-798".to_string()),
1027 confidence_boost_keywords: vec!["connection".to_string(), "database".to_string()],
1028 false_positive_keywords: vec![
1029 "example.com".to_string(), "localhost".to_string(), "placeholder".to_string(),
1030 "your_".to_string(), "user:pass".to_string(),
1031 ],
1032 }),
1033 ));
1034
1035 secret_patterns.push((
1037 "-----BEGIN".to_string(),
1038 Arc::new(CompiledPattern {
1039 id: "private-key-header".to_string(),
1040 name: "Private Key".to_string(),
1041 severity: SecuritySeverity::Critical,
1042 category: SecurityCategory::SecretsExposure,
1043 description: "Private key detected".to_string(),
1044 remediation: vec![
1045 "Never commit private keys to version control".to_string(),
1046 "Use secure key storage solutions".to_string(),
1047 ],
1048 references: vec![],
1049 cwe_id: Some("CWE-321".to_string()),
1050 confidence_boost_keywords: vec![
1051 "PRIVATE".to_string(),
1052 "RSA".to_string(),
1053 "DSA".to_string(),
1054 ],
1055 false_positive_keywords: vec!["PUBLIC".to_string(), "CERTIFICATE".to_string()],
1056 }),
1057 ));
1058
1059 Ok(())
1060 }
1061
1062 fn load_aws_patterns(
1064 api_key_patterns: &mut Vec<(String, Arc<CompiledPattern>)>,
1065 ) -> Result<(), SecurityError> {
1066 api_key_patterns.push((
1067 "AKIA".to_string(),
1068 Arc::new(CompiledPattern {
1069 id: "aws-access-key".to_string(),
1070 name: "AWS Access Key".to_string(),
1071 severity: SecuritySeverity::Critical,
1072 category: SecurityCategory::SecretsExposure,
1073 description: "AWS Access Key ID detected".to_string(),
1074 remediation: vec![
1075 "Remove AWS credentials from source code".to_string(),
1076 "Use IAM roles or environment variables".to_string(),
1077 "Rotate the exposed key immediately".to_string(),
1078 ],
1079 references: vec!["https://docs.aws.amazon.com/security/".to_string()],
1080 cwe_id: Some("CWE-798".to_string()),
1081 confidence_boost_keywords: vec![
1082 "aws".to_string(),
1083 "s3".to_string(),
1084 "ec2".to_string(),
1085 ],
1086 false_positive_keywords: vec!["AKIA00000000".to_string()],
1087 }),
1088 ));
1089
1090 Ok(())
1091 }
1092
1093 fn load_gcp_patterns(
1095 api_key_patterns: &mut Vec<(String, Arc<CompiledPattern>)>,
1096 ) -> Result<(), SecurityError> {
1097 api_key_patterns.push((
1098 "AIza".to_string(),
1099 Arc::new(CompiledPattern {
1100 id: "gcp-api-key".to_string(),
1101 name: "Google Cloud API Key".to_string(),
1102 severity: SecuritySeverity::High,
1103 category: SecurityCategory::SecretsExposure,
1104 description: "Google Cloud API key detected".to_string(),
1105 remediation: vec![
1106 "Use service accounts instead of API keys".to_string(),
1107 "Restrict API key usage by IP/referrer".to_string(),
1108 ],
1109 references: vec!["https://cloud.google.com/security/".to_string()],
1110 cwe_id: Some("CWE-798".to_string()),
1111 confidence_boost_keywords: vec![
1112 "google".to_string(),
1113 "gcp".to_string(),
1114 "firebase".to_string(),
1115 ],
1116 false_positive_keywords: vec![],
1117 }),
1118 ));
1119
1120 Ok(())
1121 }
1122
1123 fn load_azure_patterns(
1125 _api_key_patterns: &mut Vec<(String, Arc<CompiledPattern>)>,
1126 ) -> Result<(), SecurityError> {
1127 Ok(())
1129 }
1130
1131 fn load_crypto_patterns(
1133 secret_patterns: &mut Vec<(String, Arc<CompiledPattern>)>,
1134 ) -> Result<(), SecurityError> {
1135 secret_patterns.push((
1136 "-----BEGIN".to_string(),
1137 Arc::new(CompiledPattern {
1138 id: "private-key".to_string(),
1139 name: "Private Key".to_string(),
1140 severity: SecuritySeverity::Critical,
1141 category: SecurityCategory::SecretsExposure,
1142 description: "Private key detected".to_string(),
1143 remediation: vec![
1144 "Never commit private keys to version control".to_string(),
1145 "Use secure key storage solutions".to_string(),
1146 ],
1147 references: vec![],
1148 cwe_id: Some("CWE-321".to_string()),
1149 confidence_boost_keywords: vec!["RSA".to_string(), "PRIVATE".to_string()],
1150 false_positive_keywords: vec!["PUBLIC".to_string()],
1151 }),
1152 ));
1153
1154 Ok(())
1155 }
1156
1157 fn is_safe_dependency_metadata(
1159 &self,
1160 line: &str,
1161 file_meta: &super::file_discovery::FileMetadata,
1162 ) -> bool {
1163 let filename = file_meta
1164 .path
1165 .file_name()
1166 .and_then(|s| s.to_str())
1167 .unwrap_or("");
1168 let line_trimmed = line.trim();
1169
1170 match filename {
1171 "package.json" => {
1172 let safe_keys = [
1174 "\"name\"",
1175 "\"version\"",
1176 "\"description\"",
1177 "\"main\"",
1178 "\"module\"",
1179 "\"type\"",
1180 "\"private\"",
1181 "\"license\"",
1182 "\"author\"",
1183 "\"homepage\"",
1184 "\"repository\"",
1185 "\"bugs\"",
1186 "\"keywords\"",
1187 "\"workspaces\"",
1188 ];
1189 safe_keys.iter().any(|key| line_trimmed.starts_with(key))
1190 }
1191 "Cargo.toml" | "pyproject.toml" => {
1192 let safe_keys = [
1194 "name =",
1195 "version =",
1196 "description =",
1197 "edition =",
1198 "license =",
1199 "authors =",
1200 "homepage =",
1201 "repository =",
1202 "documentation =",
1203 "keywords =",
1204 ];
1205 safe_keys.iter().any(|key| line_trimmed.starts_with(key))
1206 }
1207 "go.mod" => line_trimmed.starts_with("module ") || line_trimmed.starts_with("go "),
1208 "pom.xml" => {
1209 let safe_tags = [
1211 "<groupId>",
1212 "<artifactId>",
1213 "<version>",
1214 "<name>",
1215 "<description>",
1216 "<url>",
1217 "<license>",
1218 ];
1219 safe_tags.iter().any(|tag| line_trimmed.contains(tag))
1220 }
1221 "build.gradle" | "build.gradle.kts" => {
1222 let safe_assignments = ["rootProject.name =", "group =", "version ="];
1223 safe_assignments.iter().any(|s| line_trimmed.starts_with(s))
1224 }
1225 _ => false,
1226 }
1227 }
1228
1229 fn is_env_var_interpolation(
1231 &self,
1232 line: &str,
1233 file_meta: &super::file_discovery::FileMetadata,
1234 ) -> bool {
1235 let filename = file_meta
1236 .path
1237 .file_name()
1238 .and_then(|s| s.to_str())
1239 .unwrap_or("");
1240
1241 if line.contains("\"$env\"") {
1243 return true;
1244 }
1245
1246 if line.contains('$') {
1248 if line.contains("${") && line.contains("}") {
1250 let is_config_file = matches!(
1251 filename,
1252 "docker-compose.yml"
1253 | "docker-compose.yaml"
1254 | "Dockerfile"
1255 | "Jenkinsfile"
1256 | "Makefile"
1257 ) || filename.ends_with(".env")
1258 || filename.ends_with(".sh")
1259 || filename.ends_with(".yml")
1260 || filename.ends_with(".yaml");
1261
1262 if is_config_file {
1263 return true;
1264 }
1265
1266 let line_lower = line.to_lowercase();
1268 let env_context_keywords =
1269 ["environment:", "command:", "entrypoint:", "value:", "args:"];
1270 if env_context_keywords
1271 .iter()
1272 .any(|kw| line_lower.contains(kw))
1273 {
1274 return true;
1275 }
1276 }
1277 }
1278
1279 false
1280 }
1281}
1282
1283#[cfg(test)]
1284mod tests {
1285 use super::*;
1286 use crate::analyzer::security::turbo::file_discovery::{FileMetadata, PriorityHints};
1287 use std::path::PathBuf;
1288 use std::time::SystemTime;
1289
1290 fn dummy_metadata(path: &str) -> FileMetadata {
1291 FileMetadata {
1292 path: PathBuf::from(path),
1293 size: 100,
1294 extension: Some(
1295 PathBuf::from(path)
1296 .extension()
1297 .and_then(|s| s.to_str())
1298 .unwrap_or("")
1299 .to_string(),
1300 ),
1301 is_gitignored: false,
1302 modified: SystemTime::now(),
1303 priority_hints: PriorityHints::default(),
1304 }
1305 }
1306
1307 #[test]
1308 fn test_pattern_engine_creation() {
1309 let config = TurboConfig::default();
1310 let engine = PatternEngine::new(&config);
1311 assert!(engine.is_ok());
1312
1313 let engine = engine.unwrap();
1314 assert!(engine.pattern_count() > 0);
1315 }
1316
1317 #[test]
1318 #[ignore] fn test_pattern_matching() {
1320 let config = TurboConfig::default();
1321 let engine = PatternEngine::new(&config).unwrap();
1322 let meta = dummy_metadata("test.js");
1323
1324 let content = r#"
1325 const apiKey = "sk-1234567890abcdef1234567890abcdef12345678";
1326 password = "super_secret_password_that_is_long_enough";
1327 process.env.DATABASE_URL
1328 "#;
1329
1330 let matches = engine.scan_content(content, false, &meta);
1331 assert!(!matches.is_empty());
1332
1333 assert!(
1335 matches
1336 .iter()
1337 .any(|m| m.pattern.id.contains("openai") || m.pattern.id.contains("secret"))
1338 );
1339 }
1340
1341 #[test]
1342 fn test_template_literal_filtering() {
1343 let config = TurboConfig::default();
1344 let engine = PatternEngine::new(&config).unwrap();
1345 let meta = dummy_metadata("test.js");
1346
1347 let template_content = r#"
1349 const getCode = () => {
1350 return `Authorization: "Bearer ${selectedApiKey?.apiKey}"`;
1351 }
1352
1353 function generateExample() {
1354 return "Bearer " + apiKey;
1355 }
1356 "#;
1357
1358 let matches = engine.scan_content(template_content, false, &meta);
1359 assert!(
1361 matches.len() <= 1,
1362 "Template literals should be filtered out"
1363 );
1364 }
1365
1366 #[test]
1367 fn test_code_generation_context() {
1368 let config = TurboConfig::default();
1369 let engine = PatternEngine::new(&config).unwrap();
1370 let meta = dummy_metadata("APICodeDialog.jsx");
1371
1372 let code_gen_content = r#"
1374 import { CopyBlock } from 'react-code-blocks';
1375
1376 const APICodeDialog = () => {
1377 const getCodeWithAuthorization = () => {
1378 return `
1379 headers: {
1380 Authorization: "Bearer ${selectedApiKey?.apiKey}",
1381 "Content-Type": "application/json"
1382 }
1383 `;
1384 };
1385
1386 return <CopyBlock text={getCodeWithAuthorization()} />;
1387 };
1388 "#;
1389
1390 let matches = engine.scan_content(code_gen_content, false, &meta);
1391 assert!(
1393 matches.is_empty() || matches.iter().all(|m| m.confidence < 0.3),
1394 "Code generation context should have very low confidence"
1395 );
1396 }
1397
1398 #[test]
1399 fn test_quick_reject() {
1400 let config = TurboConfig::default();
1401 let engine = PatternEngine::new(&config).unwrap();
1402 let meta = dummy_metadata("main.rs");
1403
1404 let safe_content = "fn main() { println!(\"Hello, world!\"); }";
1405 let matches = engine.scan_content(safe_content, true, &meta);
1406 assert!(matches.is_empty());
1407 }
1408
1409 #[test]
1410 fn test_package_json_filtering() {
1411 let config = TurboConfig::default();
1412 let engine = PatternEngine::new(&config).unwrap();
1413 let meta = dummy_metadata("package.json");
1414
1415 let content = r#"
1416 {
1417 "name": "my-cool-package-with-a-long-name-that-could-be-a-secret",
1418 "version": "1.0.0-beta.this.is.a.very.long.version.string.that.is.not.a.key",
1419 "description": "a string that is not a secret"
1420 }
1421 "#;
1422
1423 let mut test_engine = engine;
1425 test_engine.complex_patterns.push((
1426 Regex::new(r#"[a-zA-Z0-9-]{20,}"#).unwrap(),
1427 Arc::new(CompiledPattern {
1428 id: "generic-long-string".to_string(),
1429 name: "Generic Long String".to_string(),
1430 severity: SecuritySeverity::High,
1431 category: SecurityCategory::SecretsExposure,
1432 description: "A generic long string.".to_string(),
1433 remediation: vec![],
1434 references: vec![],
1435 cwe_id: None,
1436 confidence_boost_keywords: vec![],
1437 false_positive_keywords: vec![],
1438 }),
1439 ));
1440
1441 let matches = test_engine.scan_content(content, false, &meta);
1442 assert!(
1443 matches.is_empty(),
1444 "Should not find secrets in safe package.json keys"
1445 );
1446 }
1447}