1use ahash::AHashMap;
6use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
7use log::debug;
8use regex::Regex;
9use std::sync::Arc;
10
11use super::{SecurityError, TurboConfig};
12use crate::analyzer::security::{SecurityCategory, SecuritySeverity};
13
14#[derive(Debug, Clone)]
16pub struct CompiledPattern {
17 pub id: String,
18 pub name: String,
19 pub severity: SecuritySeverity,
20 pub category: SecurityCategory,
21 pub description: String,
22 pub remediation: Vec<String>,
23 pub references: Vec<String>,
24 pub cwe_id: Option<String>,
25 pub confidence_boost_keywords: Vec<String>,
26 pub false_positive_keywords: Vec<String>,
27}
28
29#[derive(Debug, Clone)]
31pub struct PatternMatch {
32 pub pattern: Arc<CompiledPattern>,
33 pub line_number: usize,
34 pub column_number: usize,
35 pub evidence: String,
36 pub confidence: f32,
37}
38
39pub struct PatternEngine {
41 secret_matcher: AhoCorasick,
43 env_var_matcher: AhoCorasick,
44 api_key_matcher: AhoCorasick,
45
46 secret_patterns: AHashMap<usize, Arc<CompiledPattern>>,
48 env_var_patterns: AHashMap<usize, Arc<CompiledPattern>>,
49 api_key_patterns: AHashMap<usize, Arc<CompiledPattern>>,
50
51 complex_patterns: Vec<(Regex, Arc<CompiledPattern>)>,
53
54 total_patterns: usize,
56}
57
58impl PatternEngine {
59 pub fn new(config: &TurboConfig) -> Result<Self, SecurityError> {
60 debug!(
61 "Initializing pattern engine with pattern sets: {:?}",
62 config.pattern_sets
63 );
64
65 let (secret_patterns, env_var_patterns, api_key_patterns, complex_patterns) =
67 Self::load_patterns(&config.pattern_sets)?;
68
69 let secret_matcher = Self::build_matcher(&secret_patterns)?;
71 let env_var_matcher = Self::build_matcher(&env_var_patterns)?;
72 let api_key_matcher = Self::build_matcher(&api_key_patterns)?;
73
74 let total_patterns = secret_patterns.len()
75 + env_var_patterns.len()
76 + api_key_patterns.len()
77 + complex_patterns.len();
78
79 debug!(
80 "Pattern engine initialized with {} total patterns",
81 total_patterns
82 );
83
84 Ok(Self {
85 secret_matcher,
86 env_var_matcher,
87 api_key_matcher,
88 secret_patterns: Self::create_pattern_map(secret_patterns),
89 env_var_patterns: Self::create_pattern_map(env_var_patterns),
90 api_key_patterns: Self::create_pattern_map(api_key_patterns),
91 complex_patterns,
92 total_patterns,
93 })
94 }
95
96 pub fn pattern_count(&self) -> usize {
98 self.total_patterns
99 }
100
101 pub fn scan_content(
103 &self,
104 content: &str,
105 quick_reject: bool,
106 file_meta: &super::file_discovery::FileMetadata,
107 ) -> Vec<PatternMatch> {
108 if quick_reject && !self.quick_contains_secrets(content) {
110 return Vec::new();
111 }
112
113 let mut matches = Vec::new();
114
115 let lines: Vec<&str> = content.lines().collect();
117 let mut line_offsets = vec![0];
118 let mut offset = 0;
119
120 for line in &lines {
121 offset += line.len() + 1; line_offsets.push(offset);
123 }
124
125 matches.extend(self.run_matcher(
127 &self.secret_matcher,
128 content,
129 &self.secret_patterns,
130 &lines,
131 &line_offsets,
132 file_meta,
133 ));
134 matches.extend(self.run_matcher(
135 &self.env_var_matcher,
136 content,
137 &self.env_var_patterns,
138 &lines,
139 &line_offsets,
140 file_meta,
141 ));
142 matches.extend(self.run_matcher(
143 &self.api_key_matcher,
144 content,
145 &self.api_key_patterns,
146 &lines,
147 &line_offsets,
148 file_meta,
149 ));
150
151 for (line_num, line) in lines.iter().enumerate() {
153 for (regex, pattern) in &self.complex_patterns {
154 if let Some(mat) = regex.find(line) {
155 let confidence = self.calculate_confidence(line, content, &pattern, file_meta);
156
157 matches.push(PatternMatch {
158 pattern: Arc::clone(pattern),
159 line_number: line_num + 1,
160 column_number: mat.start() + 1,
161 evidence: self.extract_evidence(line, mat.start(), mat.end()),
162 confidence,
163 });
164 }
165 }
166 }
167
168 matches.retain(|m| {
170 let threshold = match m.pattern.id.as_str() {
171 id if id.contains("aws-access-key") => 0.4, id if id.contains("openai-api-key") => 0.4, id if id.contains("jwt-token") => 0.6, id if id.contains("database-url") => 0.5, id if id.contains("bearer-token") => 0.7, id if id.contains("generic") => 0.8, id if id.contains("long-secret-value") => 0.7, _ => 0.7, };
180 m.confidence > threshold
181 });
182
183 matches
184 }
185
186 fn quick_contains_secrets(&self, content: &str) -> bool {
188 if self.is_likely_false_positive_content(content) {
190 return false;
191 }
192
193 const QUICK_PATTERNS: &[&str] = &[
195 "api",
196 "key",
197 "secret",
198 "token",
199 "password",
200 "credential",
201 "auth",
202 "private",
203 "-----BEGIN",
204 "sk_",
205 "pk_",
206 "eyJ",
207 ];
208
209 let content_lower = content.to_lowercase();
210 QUICK_PATTERNS
211 .iter()
212 .any(|&pattern| content_lower.contains(pattern))
213 }
214
215 fn is_likely_false_positive_content(&self, content: &str) -> bool {
217 let content_len = content.len();
218
219 if content_len < 10 {
221 return true;
222 }
223
224 if content.contains("data:image/") || content.contains("data:font/") {
226 return true;
227 }
228
229 let lines: Vec<&str> = content.lines().collect();
231 if lines.len() < 5
232 && lines
233 .iter()
234 .any(|line| line.len() > 500 && line.matches(' ').count() < line.len() / 50)
235 {
236 return true;
237 }
238
239 let base64_chars = content
241 .chars()
242 .filter(|c| c.is_alphanumeric() || *c == '+' || *c == '/' || *c == '=')
243 .count();
244 let base64_ratio = base64_chars as f32 / content_len as f32;
245
246 if base64_ratio > 0.8 && !content.contains("eyJ") && content_len > 1000 {
248 return true;
249 }
250
251 if content.contains("<svg") || content.contains("xmlns=\"http://www.w3.org/2000/svg\"") {
253 return true;
254 }
255
256 if content.contains("@media")
258 || content.contains("@import")
259 || (content.contains("{") && content.contains("}") && content.contains(":"))
260 {
261 return true;
262 }
263
264 false
265 }
266
267 fn run_matcher(
269 &self,
270 matcher: &AhoCorasick,
271 content: &str,
272 patterns: &AHashMap<usize, Arc<CompiledPattern>>,
273 lines: &[&str],
274 line_offsets: &[usize],
275 file_meta: &super::file_discovery::FileMetadata,
276 ) -> Vec<PatternMatch> {
277 let mut matches = Vec::new();
278
279 for mat in matcher.find_iter(content) {
280 let pattern_id = mat.pattern().as_usize();
281 if let Some(pattern) = patterns.get(&pattern_id) {
282 let (line_num, col_num) = self.offset_to_line_col(mat.start(), line_offsets);
284 let line = lines.get(line_num.saturating_sub(1)).unwrap_or(&"");
285
286 let confidence = self.calculate_confidence(line, content, pattern, file_meta);
287
288 matches.push(PatternMatch {
289 pattern: Arc::clone(pattern),
290 line_number: line_num,
291 column_number: col_num,
292 evidence: self.extract_evidence(line, mat.start(), mat.end()),
293 confidence,
294 });
295 }
296 }
297
298 matches
299 }
300
301 fn offset_to_line_col(&self, offset: usize, line_offsets: &[usize]) -> (usize, usize) {
303 let line_num = line_offsets
304 .binary_search(&offset)
305 .unwrap_or_else(|i| i.saturating_sub(1));
306
307 let line_start = line_offsets.get(line_num).copied().unwrap_or(0);
308 let col_num = offset - line_start + 1;
309
310 (line_num + 1, col_num)
311 }
312
313 fn calculate_confidence(
315 &self,
316 line: &str,
317 content: &str,
318 pattern: &CompiledPattern,
319 file_meta: &super::file_discovery::FileMetadata,
320 ) -> f32 {
321 let mut confidence: f32 = 0.6;
322
323 let _line_lower = line.to_lowercase();
324 let _content_lower = content.to_lowercase();
325
326 if self.is_obvious_false_positive(line, content, file_meta) {
328 return 0.0;
329 }
330
331 confidence = self.adjust_confidence_for_context(confidence, line, content, pattern);
333
334 confidence = self.adjust_confidence_for_pattern(confidence, line, content, pattern);
336
337 confidence.clamp(0.0, 1.0)
338 }
339
340 fn is_obvious_false_positive(
342 &self,
343 line: &str,
344 content: &str,
345 file_meta: &super::file_discovery::FileMetadata,
346 ) -> bool {
347 let line_lower = line.to_lowercase();
348
349 if line_lower.trim_start().starts_with("//")
351 || line_lower.trim_start().starts_with("#")
352 || line_lower.trim_start().starts_with("*")
353 || line_lower.trim_start().starts_with("<!--")
354 {
355 return true;
356 }
357
358 if self.is_safe_dependency_metadata(line, file_meta) {
360 return true;
361 }
362
363 if line.contains("${") && line.contains("}") {
365 return true;
366 }
367
368 if line.contains("${selectedApiKey")
370 || line.contains("${apiKey")
371 || line.contains("${key")
372 || line.contains("${token")
373 {
374 return true;
375 }
376
377 if self.is_in_code_generation_context(content) && self.looks_like_template_code(line) {
379 return true;
380 }
381
382 let false_positive_patterns = [
384 "example",
385 "placeholder",
386 "your_",
387 "todo",
388 "fixme",
389 "xxx",
390 "xxxxxxxx",
391 "12345",
392 "abcdef",
393 "test",
394 "demo",
395 "sample",
396 "lorem",
397 "ipsum",
398 "change_me",
399 "replace_me",
400 "insert_",
401 "enter_your",
402 "add_your",
403 "put_your",
404 "use_your",
405 "props.",
407 "state.",
408 "this.",
409 "component",
410 ];
411
412 if false_positive_patterns
413 .iter()
414 .any(|&pattern| line_lower.contains(pattern))
415 {
416 return true;
417 }
418
419 if line_lower.contains("@example")
421 || line_lower.contains("@param")
422 || line_lower.contains("interface")
423 || line_lower.contains("type ")
424 {
425 return true;
426 }
427
428 if line.contains("data:image/")
430 || line.contains("data:font/")
431 || line.contains("data:application/")
432 {
433 return true;
434 }
435
436 if (line.contains("http://") || line.contains("https://"))
438 && self.is_in_array_or_list(content)
439 {
440 return true;
441 }
442
443 if self.is_command_line_script(line) {
446 return true;
447 }
448
449 if self.is_env_var_interpolation(line, file_meta) {
451 return true;
452 }
453
454 if line.len() > 200 && line.matches(' ').count() < line.len() / 20 {
456 return true;
457 }
458
459 if line.contains("return `") || line.contains("const ") && line.contains(" = `") {
461 return true;
462 }
463
464 false
465 }
466
467 fn is_in_array_or_list(&self, content: &str) -> bool {
469 let content_lower = content.to_lowercase();
470 let array_patterns = [
472 "const ",
473 "let ",
474 "var ",
475 "export const ",
476 "export let ",
477 "authorized_parties",
478 "allowed_origins",
479 "authorized_domains",
480 "hosts",
481 "urls",
482 "uris",
483 "endpoints",
484 "domains",
485 "redirect_uris",
486 "allowed_hosts",
487 "cors_origins",
488 "trusted_sources",
489 ];
490
491 array_patterns.iter().any(|p| content_lower.contains(p)) &&
492 (content.contains("[") && content.contains("]")) || (content.contains("(") && content.contains(")")) || (content.contains("{") && content.contains("}")) }
496
497 fn is_command_line_script(&self, line: &str) -> bool {
501 if !line.contains("--") {
503 return false;
504 }
505
506 let line_lower = line.to_lowercase();
507
508 let command_keywords = [
511 "run",
513 "exec",
514 "build",
515 "start",
516 "test",
517 "deploy",
518 "gen",
519 "generate",
520 "get",
521 "set",
522 "create",
523 "delete",
524 "update",
525 "push",
526 "pull",
527 "watch",
528 "serve",
529 "lint",
530 "format",
531 "client",
533 "server",
534 "output",
535 "input",
536 "file",
537 "env",
538 "environment",
539 "config",
540 "path",
541 "dir",
542 "port",
543 "host",
544 "watch",
545 "prod",
546 "dev",
547 "npm",
549 "yarn",
550 "pnpm",
551 "npx",
552 "node",
553 "python",
554 "pip",
555 "go",
556 "cargo",
557 "docker",
558 "aws",
559 "gcloud",
560 "az",
561 "kubectl",
562 "terraform",
563 "encore",
564 "bun",
565 "bunx",
566 "maven",
567 "gradle",
568 "gradlew",
569 "gradlew.bat",
570 "gradlew.sh",
571 "gradlew.jar",
572 "gradlew.zip",
573 "mvn",
574 "pipx",
575 "pipenv",
576 "poetry",
577 "ruff",
578 "black",
579 "isort",
580 "flake8",
581 "mypy",
582 "pytest",
583 "jest",
584 "mocha",
585 "jasmine",
586 "cypress",
587 "playwright",
588 "selenium",
589 "puppeteer",
590 "webdriver",
591 "puppeteer-extra",
592 "puppeteer-extra-plugin-stealth",
593 "puppeteer-extra-plugin-recaptcha",
594 ];
595
596 if command_keywords.iter().any(|&kw| line_lower.contains(kw)) {
598 return true;
599 }
600
601 if line.contains("--") && (line.contains('/') || line.contains('\\') || line.contains('='))
603 {
604 return true;
605 }
606
607 false
608 }
609
610 fn is_in_code_generation_context(&self, content: &str) -> bool {
612 let content_lower = content.to_lowercase();
613
614 let code_gen_patterns = [
616 "getcode",
617 "generatecode",
618 "codecomponent",
619 "apicodedialog",
620 "const getcode",
621 "function getcode",
622 "const code",
623 "function code",
624 "codesnippet",
625 "codeexample",
626 "template",
627 "example code",
628 "code generator",
629 "api example",
630 "curl example",
631 "codeblock",
633 "copyblock",
634 "syntax highlight",
635 ];
636
637 code_gen_patterns
638 .iter()
639 .any(|&pattern| content_lower.contains(pattern))
640 }
641
642 fn looks_like_template_code(&self, line: &str) -> bool {
644 if line.contains("return `") || line.contains("= `") {
646 return true;
647 }
648
649 if line.contains("API_URL") || line.contains("/api/v1/") || line.contains("/prediction/") {
651 return true;
652 }
653
654 if line.contains("requests.post")
656 || line.contains("fetch(")
657 || line.contains("curl ")
658 || line.contains("import requests")
659 {
660 return true;
661 }
662
663 if line.contains("Authorization:") || line.contains("Bearer ") {
665 return true;
666 }
667
668 false
669 }
670
671 fn adjust_confidence_for_context(
673 &self,
674 mut confidence: f32,
675 line: &str,
676 content: &str,
677 _pattern: &CompiledPattern,
678 ) -> f32 {
679 let line_lower = line.to_lowercase();
680 let content_lower = content.to_lowercase();
681
682 if line.contains("=") || line.contains(":") {
684 confidence += 0.2;
685 }
686
687 if line_lower.contains("export ") || line_lower.contains("process.env") {
689 confidence += 0.3;
690 }
691
692 if line_lower.contains("import")
694 && (line_lower.contains("api") || line_lower.contains("key"))
695 {
696 confidence += 0.1;
697 }
698
699 if content_lower.contains("package.json") || content_lower.contains("node_modules") {
701 confidence -= 0.2;
702 }
703
704 if content_lower.contains("/test/")
706 || content_lower.contains("__test__")
707 || content_lower.contains(".test.")
708 || content_lower.contains(".spec.")
709 {
710 confidence -= 0.3;
711 }
712
713 if content_lower.contains("readme")
715 || content_lower.contains("documentation")
716 || content_lower.contains("docs/")
717 {
718 confidence -= 0.4;
719 }
720
721 confidence
722 }
723
724 fn adjust_confidence_for_pattern(
726 &self,
727 mut confidence: f32,
728 line: &str,
729 content: &str,
730 pattern: &CompiledPattern,
731 ) -> f32 {
732 let line_lower = line.to_lowercase();
733 let content_lower = content.to_lowercase();
734
735 if self.is_in_code_generation_context(content) {
737 confidence -= 0.6;
738 }
739
740 for keyword in &pattern.confidence_boost_keywords {
742 if content_lower.contains(&keyword.to_lowercase()) {
743 confidence += 0.1;
744 }
745 }
746
747 for keyword in &pattern.false_positive_keywords {
749 if line_lower.contains(&keyword.to_lowercase()) {
750 confidence -= 0.4;
751 }
752 }
753
754 match pattern.id.as_str() {
756 "jwt-token" => {
757 if !line.contains("eyJ") || line.split('.').count() != 3 {
759 confidence -= 0.3;
760 }
761 if line_lower.contains("example") || line_lower.contains("jwt") {
763 confidence -= 0.2;
764 }
765 if line.contains("${") {
767 confidence -= 0.8;
768 }
769 }
770 "openai-api-key" => {
771 if !line.contains("sk-") {
773 confidence -= 0.5;
774 }
775 if line_lower.contains("openai") || line_lower.contains("gpt") {
777 confidence += 0.2;
778 }
779 if line.contains("${") || line.contains("selectedApiKey") {
781 confidence -= 0.9;
782 }
783 }
784 "database-url-with-creds" => {
785 if !line.contains("://") || line.contains("example.com") {
787 confidence -= 0.4;
788 }
789
790 let placeholder_creds = [
792 "user:pass",
793 "user:password",
794 "admin:admin",
795 "admin:password",
796 "username:password",
797 "test:test",
798 "root:root",
799 "postgres:postgres",
800 ];
801 if placeholder_creds.iter().any(|p| line.contains(p)) {
802 confidence -= 0.8; }
804
805 if line.contains("${") {
807 confidence -= 0.7;
808 }
809 }
810 "long-secret-value" | "generic-api-key" => {
811 if line.contains("${")
813 || line.contains("selectedApiKey")
814 || line.contains("apiKey") && line.contains("?")
815 {
816 confidence -= 0.8;
817 }
818 if line.contains("Bearer ") && line.contains("${") {
820 confidence -= 0.9;
821 }
822 }
823 _ => {
824 if line.contains("${") {
826 confidence -= 0.6;
827 }
828 }
829 }
830
831 if content_lower.contains("react")
833 || content_lower.contains("jsx")
834 || content_lower.contains("component")
835 {
836 if line.contains("${") || line.contains("props.") || line.contains("state.") {
837 confidence -= 0.5;
838 }
839 }
840
841 confidence
842 }
843
844 fn extract_evidence(&self, line: &str, start: usize, end: usize) -> String {
846 let prefix = &line[..start.min(line.len())];
848 let suffix = &line[end.min(line.len())..];
849 let masked = "*".repeat((end - start).min(20));
850
851 format!("{}{}{}", prefix, masked, suffix).trim().to_string()
852 }
853
854 fn build_matcher(
856 patterns: &[(String, Arc<CompiledPattern>)],
857 ) -> Result<AhoCorasick, SecurityError> {
858 let strings: Vec<&str> = patterns.iter().map(|(s, _)| s.as_str()).collect();
859
860 let matcher = AhoCorasickBuilder::new()
861 .match_kind(MatchKind::LeftmostFirst)
862 .ascii_case_insensitive(true)
863 .build(&strings)
864 .map_err(|e| SecurityError::PatternEngine(format!("Failed to build matcher: {}", e)))?;
865
866 Ok(matcher)
867 }
868
869 fn create_pattern_map(
871 patterns: Vec<(String, Arc<CompiledPattern>)>,
872 ) -> AHashMap<usize, Arc<CompiledPattern>> {
873 patterns
874 .into_iter()
875 .enumerate()
876 .map(|(id, (_, pattern))| (id, pattern))
877 .collect()
878 }
879
880 fn load_patterns(
882 pattern_sets: &[String],
883 ) -> Result<
884 (
885 Vec<(String, Arc<CompiledPattern>)>,
886 Vec<(String, Arc<CompiledPattern>)>,
887 Vec<(String, Arc<CompiledPattern>)>,
888 Vec<(Regex, Arc<CompiledPattern>)>,
889 ),
890 SecurityError,
891 > {
892 let mut secret_patterns = Vec::new();
893 let mut env_var_patterns = Vec::new();
894 let mut api_key_patterns = Vec::new();
895 let mut complex_patterns = Vec::new();
896
897 if pattern_sets.contains(&"default".to_string()) {
899 Self::load_default_patterns(
900 &mut secret_patterns,
901 &mut env_var_patterns,
902 &mut api_key_patterns,
903 &mut complex_patterns,
904 )?;
905 }
906
907 for set in pattern_sets {
909 match set.as_str() {
910 "aws" => Self::load_aws_patterns(&mut api_key_patterns)?,
911 "gcp" => Self::load_gcp_patterns(&mut api_key_patterns)?,
912 "azure" => Self::load_azure_patterns(&mut api_key_patterns)?,
913 "crypto" => Self::load_crypto_patterns(&mut secret_patterns)?,
914 _ => {}
915 }
916 }
917
918 Ok((
919 secret_patterns,
920 env_var_patterns,
921 api_key_patterns,
922 complex_patterns,
923 ))
924 }
925
926 fn load_default_patterns(
928 secret_patterns: &mut Vec<(String, Arc<CompiledPattern>)>,
929 _env_var_patterns: &mut Vec<(String, Arc<CompiledPattern>)>,
930 api_key_patterns: &mut Vec<(String, Arc<CompiledPattern>)>,
931 complex_patterns: &mut Vec<(Regex, Arc<CompiledPattern>)>,
932 ) -> Result<(), SecurityError> {
933 api_key_patterns.push((
937 "sk-".to_string(),
938 Arc::new(CompiledPattern {
939 id: "openai-api-key".to_string(),
940 name: "OpenAI API Key".to_string(),
941 severity: SecuritySeverity::Critical,
942 category: SecurityCategory::SecretsExposure,
943 description: "OpenAI API key detected".to_string(),
944 remediation: vec![
945 "Remove API key from source code".to_string(),
946 "Use environment variables".to_string(),
947 ],
948 references: vec!["https://platform.openai.com/docs/api-reference".to_string()],
949 cwe_id: Some("CWE-798".to_string()),
950 confidence_boost_keywords: vec!["openai".to_string(), "gpt".to_string()],
951 false_positive_keywords: vec![
952 "sk-xxxxxxxx".to_string(),
953 "sk-...".to_string(),
954 "sk_test".to_string(),
955 "example".to_string(),
956 "placeholder".to_string(),
957 "your_".to_string(),
958 "TODO".to_string(),
959 "FIXME".to_string(),
960 "XXX".to_string(),
961 ],
962 }),
963 ));
964
965 complex_patterns.push((
967 Regex::new(r#"(?i)(?:api[_-]?key|secret[_-]?key|access[_-]?token)\s*[:=]\s*['"]([a-zA-Z0-9+/=]{32,})['"]"#)
969 .map_err(|e| SecurityError::PatternEngine(format!("Regex error: {}", e)))?,
970 Arc::new(CompiledPattern {
971 id: "long-secret-value".to_string(),
972 name: "Hardcoded Secret Value".to_string(),
973 severity: SecuritySeverity::Critical,
974 category: SecurityCategory::SecretsExposure,
975 description: "Long secret value hardcoded in source code".to_string(),
976 remediation: vec![
977 "Use environment variables for secrets".to_string(),
978 "Implement proper secret management".to_string(),
979 ],
980 references: vec![],
981 cwe_id: Some("CWE-798".to_string()),
982 confidence_boost_keywords: vec!["bearer".to_string(), "auth".to_string()],
983 false_positive_keywords: vec![
984 "process.env".to_string(), "getenv".to_string(), "example".to_string(),
985 "placeholder".to_string(), "your_".to_string(), "TODO".to_string(),
986 "test".to_string(), "demo".to_string(), "fake".to_string(),
987 ],
988 }),
989 ));
990
991 complex_patterns.push((
993 Regex::new(r#"\beyJ[a-zA-Z0-9+/=]{100,}\b"#)
994 .map_err(|e| SecurityError::PatternEngine(format!("Regex error: {}", e)))?,
995 Arc::new(CompiledPattern {
996 id: "jwt-token".to_string(),
997 name: "JWT Token".to_string(),
998 severity: SecuritySeverity::High,
999 category: SecurityCategory::SecretsExposure,
1000 description: "JWT token detected in source code".to_string(),
1001 remediation: vec![
1002 "Never hardcode JWT tokens".to_string(),
1003 "Use secure token storage".to_string(),
1004 ],
1005 references: vec![],
1006 cwe_id: Some("CWE-798".to_string()),
1007 confidence_boost_keywords: vec!["bearer".to_string(), "authorization".to_string()],
1008 false_positive_keywords: vec!["example".to_string(), "demo".to_string()],
1009 }),
1010 ));
1011
1012 complex_patterns.push((
1014 Regex::new(r#"(?i)(?:postgres|postgresql|mysql|mongodb|redis|mariadb)://[^:\s]+:[^@\s]+@[^/\s]+/[^\s]*"#)
1015 .map_err(|e| SecurityError::PatternEngine(format!("Regex error: {}", e)))?,
1016 Arc::new(CompiledPattern {
1017 id: "database-url-with-creds".to_string(),
1018 name: "Database URL with Credentials".to_string(),
1019 severity: SecuritySeverity::Critical,
1020 category: SecurityCategory::SecretsExposure,
1021 description: "Database connection string with embedded credentials".to_string(),
1022 remediation: vec![
1023 "Use environment variables for database credentials".to_string(),
1024 "Use connection string without embedded passwords".to_string(),
1025 ],
1026 references: vec![],
1027 cwe_id: Some("CWE-798".to_string()),
1028 confidence_boost_keywords: vec!["connection".to_string(), "database".to_string()],
1029 false_positive_keywords: vec![
1030 "example.com".to_string(), "localhost".to_string(), "placeholder".to_string(),
1031 "your_".to_string(), "user:pass".to_string(),
1032 ],
1033 }),
1034 ));
1035
1036 secret_patterns.push((
1038 "-----BEGIN".to_string(),
1039 Arc::new(CompiledPattern {
1040 id: "private-key-header".to_string(),
1041 name: "Private Key".to_string(),
1042 severity: SecuritySeverity::Critical,
1043 category: SecurityCategory::SecretsExposure,
1044 description: "Private key detected".to_string(),
1045 remediation: vec![
1046 "Never commit private keys to version control".to_string(),
1047 "Use secure key storage solutions".to_string(),
1048 ],
1049 references: vec![],
1050 cwe_id: Some("CWE-321".to_string()),
1051 confidence_boost_keywords: vec![
1052 "PRIVATE".to_string(),
1053 "RSA".to_string(),
1054 "DSA".to_string(),
1055 ],
1056 false_positive_keywords: vec!["PUBLIC".to_string(), "CERTIFICATE".to_string()],
1057 }),
1058 ));
1059
1060 Ok(())
1061 }
1062
1063 fn load_aws_patterns(
1065 api_key_patterns: &mut Vec<(String, Arc<CompiledPattern>)>,
1066 ) -> Result<(), SecurityError> {
1067 api_key_patterns.push((
1068 "AKIA".to_string(),
1069 Arc::new(CompiledPattern {
1070 id: "aws-access-key".to_string(),
1071 name: "AWS Access Key".to_string(),
1072 severity: SecuritySeverity::Critical,
1073 category: SecurityCategory::SecretsExposure,
1074 description: "AWS Access Key ID detected".to_string(),
1075 remediation: vec![
1076 "Remove AWS credentials from source code".to_string(),
1077 "Use IAM roles or environment variables".to_string(),
1078 "Rotate the exposed key immediately".to_string(),
1079 ],
1080 references: vec!["https://docs.aws.amazon.com/security/".to_string()],
1081 cwe_id: Some("CWE-798".to_string()),
1082 confidence_boost_keywords: vec![
1083 "aws".to_string(),
1084 "s3".to_string(),
1085 "ec2".to_string(),
1086 ],
1087 false_positive_keywords: vec!["AKIA00000000".to_string()],
1088 }),
1089 ));
1090
1091 Ok(())
1092 }
1093
1094 fn load_gcp_patterns(
1096 api_key_patterns: &mut Vec<(String, Arc<CompiledPattern>)>,
1097 ) -> Result<(), SecurityError> {
1098 api_key_patterns.push((
1099 "AIza".to_string(),
1100 Arc::new(CompiledPattern {
1101 id: "gcp-api-key".to_string(),
1102 name: "Google Cloud API Key".to_string(),
1103 severity: SecuritySeverity::High,
1104 category: SecurityCategory::SecretsExposure,
1105 description: "Google Cloud API key detected".to_string(),
1106 remediation: vec![
1107 "Use service accounts instead of API keys".to_string(),
1108 "Restrict API key usage by IP/referrer".to_string(),
1109 ],
1110 references: vec!["https://cloud.google.com/security/".to_string()],
1111 cwe_id: Some("CWE-798".to_string()),
1112 confidence_boost_keywords: vec![
1113 "google".to_string(),
1114 "gcp".to_string(),
1115 "firebase".to_string(),
1116 ],
1117 false_positive_keywords: vec![],
1118 }),
1119 ));
1120
1121 Ok(())
1122 }
1123
1124 fn load_azure_patterns(
1126 _api_key_patterns: &mut Vec<(String, Arc<CompiledPattern>)>,
1127 ) -> Result<(), SecurityError> {
1128 Ok(())
1130 }
1131
1132 fn load_crypto_patterns(
1134 secret_patterns: &mut Vec<(String, Arc<CompiledPattern>)>,
1135 ) -> Result<(), SecurityError> {
1136 secret_patterns.push((
1137 "-----BEGIN".to_string(),
1138 Arc::new(CompiledPattern {
1139 id: "private-key".to_string(),
1140 name: "Private Key".to_string(),
1141 severity: SecuritySeverity::Critical,
1142 category: SecurityCategory::SecretsExposure,
1143 description: "Private key detected".to_string(),
1144 remediation: vec![
1145 "Never commit private keys to version control".to_string(),
1146 "Use secure key storage solutions".to_string(),
1147 ],
1148 references: vec![],
1149 cwe_id: Some("CWE-321".to_string()),
1150 confidence_boost_keywords: vec!["RSA".to_string(), "PRIVATE".to_string()],
1151 false_positive_keywords: vec!["PUBLIC".to_string()],
1152 }),
1153 ));
1154
1155 Ok(())
1156 }
1157
1158 fn is_safe_dependency_metadata(
1160 &self,
1161 line: &str,
1162 file_meta: &super::file_discovery::FileMetadata,
1163 ) -> bool {
1164 let filename = file_meta
1165 .path
1166 .file_name()
1167 .and_then(|s| s.to_str())
1168 .unwrap_or("");
1169 let line_trimmed = line.trim();
1170
1171 match filename {
1172 "package.json" => {
1173 let safe_keys = [
1175 "\"name\"",
1176 "\"version\"",
1177 "\"description\"",
1178 "\"main\"",
1179 "\"module\"",
1180 "\"type\"",
1181 "\"private\"",
1182 "\"license\"",
1183 "\"author\"",
1184 "\"homepage\"",
1185 "\"repository\"",
1186 "\"bugs\"",
1187 "\"keywords\"",
1188 "\"workspaces\"",
1189 ];
1190 safe_keys.iter().any(|key| line_trimmed.starts_with(key))
1191 }
1192 "Cargo.toml" | "pyproject.toml" => {
1193 let safe_keys = [
1195 "name =",
1196 "version =",
1197 "description =",
1198 "edition =",
1199 "license =",
1200 "authors =",
1201 "homepage =",
1202 "repository =",
1203 "documentation =",
1204 "keywords =",
1205 ];
1206 safe_keys.iter().any(|key| line_trimmed.starts_with(key))
1207 }
1208 "go.mod" => line_trimmed.starts_with("module ") || line_trimmed.starts_with("go "),
1209 "pom.xml" => {
1210 let safe_tags = [
1212 "<groupId>",
1213 "<artifactId>",
1214 "<version>",
1215 "<name>",
1216 "<description>",
1217 "<url>",
1218 "<license>",
1219 ];
1220 safe_tags.iter().any(|tag| line_trimmed.contains(tag))
1221 }
1222 "build.gradle" | "build.gradle.kts" => {
1223 let safe_assignments = ["rootProject.name =", "group =", "version ="];
1224 safe_assignments.iter().any(|s| line_trimmed.starts_with(s))
1225 }
1226 _ => false,
1227 }
1228 }
1229
1230 fn is_env_var_interpolation(
1232 &self,
1233 line: &str,
1234 file_meta: &super::file_discovery::FileMetadata,
1235 ) -> bool {
1236 let filename = file_meta
1237 .path
1238 .file_name()
1239 .and_then(|s| s.to_str())
1240 .unwrap_or("");
1241
1242 if line.contains("\"$env\"") {
1244 return true;
1245 }
1246
1247 if line.contains('$') {
1249 if line.contains("${") && line.contains("}") {
1251 let is_config_file = matches!(
1252 filename,
1253 "docker-compose.yml"
1254 | "docker-compose.yaml"
1255 | "Dockerfile"
1256 | "Jenkinsfile"
1257 | "Makefile"
1258 ) || filename.ends_with(".env")
1259 || filename.ends_with(".sh")
1260 || filename.ends_with(".yml")
1261 || filename.ends_with(".yaml");
1262
1263 if is_config_file {
1264 return true;
1265 }
1266
1267 let line_lower = line.to_lowercase();
1269 let env_context_keywords =
1270 ["environment:", "command:", "entrypoint:", "value:", "args:"];
1271 if env_context_keywords
1272 .iter()
1273 .any(|kw| line_lower.contains(kw))
1274 {
1275 return true;
1276 }
1277 }
1278 }
1279
1280 false
1281 }
1282}
1283
1284#[cfg(test)]
1285mod tests {
1286 use super::*;
1287 use crate::analyzer::security::turbo::file_discovery::{FileMetadata, PriorityHints};
1288 use std::path::PathBuf;
1289 use std::time::SystemTime;
1290
1291 fn dummy_metadata(path: &str) -> FileMetadata {
1292 FileMetadata {
1293 path: PathBuf::from(path),
1294 size: 100,
1295 extension: Some(
1296 PathBuf::from(path)
1297 .extension()
1298 .and_then(|s| s.to_str())
1299 .unwrap_or("")
1300 .to_string(),
1301 ),
1302 is_gitignored: false,
1303 modified: SystemTime::now(),
1304 priority_hints: PriorityHints::default(),
1305 }
1306 }
1307
1308 #[test]
1309 fn test_pattern_engine_creation() {
1310 let config = TurboConfig::default();
1311 let engine = PatternEngine::new(&config);
1312 assert!(engine.is_ok());
1313
1314 let engine = engine.unwrap();
1315 assert!(engine.pattern_count() > 0);
1316 }
1317
1318 #[test]
1319 #[ignore] fn test_pattern_matching() {
1321 let config = TurboConfig::default();
1322 let engine = PatternEngine::new(&config).unwrap();
1323 let meta = dummy_metadata("test.js");
1324
1325 let content = r#"
1326 const apiKey = "sk-1234567890abcdef1234567890abcdef12345678";
1327 password = "super_secret_password_that_is_long_enough";
1328 process.env.DATABASE_URL
1329 "#;
1330
1331 let matches = engine.scan_content(content, false, &meta);
1332 assert!(!matches.is_empty());
1333
1334 assert!(
1336 matches
1337 .iter()
1338 .any(|m| m.pattern.id.contains("openai") || m.pattern.id.contains("secret"))
1339 );
1340 }
1341
1342 #[test]
1343 fn test_template_literal_filtering() {
1344 let config = TurboConfig::default();
1345 let engine = PatternEngine::new(&config).unwrap();
1346 let meta = dummy_metadata("test.js");
1347
1348 let template_content = r#"
1350 const getCode = () => {
1351 return `Authorization: "Bearer ${selectedApiKey?.apiKey}"`;
1352 }
1353
1354 function generateExample() {
1355 return "Bearer " + apiKey;
1356 }
1357 "#;
1358
1359 let matches = engine.scan_content(template_content, false, &meta);
1360 assert!(
1362 matches.len() <= 1,
1363 "Template literals should be filtered out"
1364 );
1365 }
1366
1367 #[test]
1368 fn test_code_generation_context() {
1369 let config = TurboConfig::default();
1370 let engine = PatternEngine::new(&config).unwrap();
1371 let meta = dummy_metadata("APICodeDialog.jsx");
1372
1373 let code_gen_content = r#"
1375 import { CopyBlock } from 'react-code-blocks';
1376
1377 const APICodeDialog = () => {
1378 const getCodeWithAuthorization = () => {
1379 return `
1380 headers: {
1381 Authorization: "Bearer ${selectedApiKey?.apiKey}",
1382 "Content-Type": "application/json"
1383 }
1384 `;
1385 };
1386
1387 return <CopyBlock text={getCodeWithAuthorization()} />;
1388 };
1389 "#;
1390
1391 let matches = engine.scan_content(code_gen_content, false, &meta);
1392 assert!(
1394 matches.is_empty() || matches.iter().all(|m| m.confidence < 0.3),
1395 "Code generation context should have very low confidence"
1396 );
1397 }
1398
1399 #[test]
1400 fn test_quick_reject() {
1401 let config = TurboConfig::default();
1402 let engine = PatternEngine::new(&config).unwrap();
1403 let meta = dummy_metadata("main.rs");
1404
1405 let safe_content = "fn main() { println!(\"Hello, world!\"); }";
1406 let matches = engine.scan_content(safe_content, true, &meta);
1407 assert!(matches.is_empty());
1408 }
1409
1410 #[test]
1411 fn test_package_json_filtering() {
1412 let config = TurboConfig::default();
1413 let engine = PatternEngine::new(&config).unwrap();
1414 let meta = dummy_metadata("package.json");
1415
1416 let content = r#"
1417 {
1418 "name": "my-cool-package-with-a-long-name-that-could-be-a-secret",
1419 "version": "1.0.0-beta.this.is.a.very.long.version.string.that.is.not.a.key",
1420 "description": "a string that is not a secret"
1421 }
1422 "#;
1423
1424 let mut test_engine = engine;
1426 test_engine.complex_patterns.push((
1427 Regex::new(r#"[a-zA-Z0-9-]{20,}"#).unwrap(),
1428 Arc::new(CompiledPattern {
1429 id: "generic-long-string".to_string(),
1430 name: "Generic Long String".to_string(),
1431 severity: SecuritySeverity::High,
1432 category: SecurityCategory::SecretsExposure,
1433 description: "A generic long string.".to_string(),
1434 remediation: vec![],
1435 references: vec![],
1436 cwe_id: None,
1437 confidence_boost_keywords: vec![],
1438 false_positive_keywords: vec![],
1439 }),
1440 ));
1441
1442 let matches = test_engine.scan_content(content, false, &meta);
1443 assert!(
1444 matches.is_empty(),
1445 "Should not find secrets in safe package.json keys"
1446 );
1447 }
1448}