1use std::sync::Arc;
6use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
7use regex::Regex;
8use ahash::AHashMap;
9use log::debug;
10
11use super::{TurboConfig, SecurityError};
12use crate::analyzer::security::{SecuritySeverity, SecurityCategory};
13
14#[derive(Debug, Clone)]
16pub struct CompiledPattern {
17 pub id: String,
18 pub name: String,
19 pub severity: SecuritySeverity,
20 pub category: SecurityCategory,
21 pub description: String,
22 pub remediation: Vec<String>,
23 pub references: Vec<String>,
24 pub cwe_id: Option<String>,
25 pub confidence_boost_keywords: Vec<String>,
26 pub false_positive_keywords: Vec<String>,
27}
28
29#[derive(Debug, Clone)]
31pub struct PatternMatch {
32 pub pattern: Arc<CompiledPattern>,
33 pub line_number: usize,
34 pub column_number: usize,
35 pub evidence: String,
36 pub confidence: f32,
37}
38
39pub struct PatternEngine {
41 secret_matcher: AhoCorasick,
43 env_var_matcher: AhoCorasick,
44 api_key_matcher: AhoCorasick,
45
46 secret_patterns: AHashMap<usize, Arc<CompiledPattern>>,
48 env_var_patterns: AHashMap<usize, Arc<CompiledPattern>>,
49 api_key_patterns: AHashMap<usize, Arc<CompiledPattern>>,
50
51 complex_patterns: Vec<(Regex, Arc<CompiledPattern>)>,
53
54 total_patterns: usize,
56}
57
58impl PatternEngine {
59 pub fn new(config: &TurboConfig) -> Result<Self, SecurityError> {
60 debug!("Initializing pattern engine with pattern sets: {:?}", config.pattern_sets);
61
62 let (secret_patterns, env_var_patterns, api_key_patterns, complex_patterns) =
64 Self::load_patterns(&config.pattern_sets)?;
65
66 let secret_matcher = Self::build_matcher(&secret_patterns)?;
68 let env_var_matcher = Self::build_matcher(&env_var_patterns)?;
69 let api_key_matcher = Self::build_matcher(&api_key_patterns)?;
70
71 let total_patterns = secret_patterns.len() + env_var_patterns.len() +
72 api_key_patterns.len() + complex_patterns.len();
73
74 debug!("Pattern engine initialized with {} total patterns", total_patterns);
75
76 Ok(Self {
77 secret_matcher,
78 env_var_matcher,
79 api_key_matcher,
80 secret_patterns: Self::create_pattern_map(secret_patterns),
81 env_var_patterns: Self::create_pattern_map(env_var_patterns),
82 api_key_patterns: Self::create_pattern_map(api_key_patterns),
83 complex_patterns,
84 total_patterns,
85 })
86 }
87
88 pub fn pattern_count(&self) -> usize {
90 self.total_patterns
91 }
92
93 pub fn scan_content(&self, content: &str, quick_reject: bool) -> Vec<PatternMatch> {
95 if quick_reject && !self.quick_contains_secrets(content) {
97 return Vec::new();
98 }
99
100 let mut matches = Vec::new();
101
102 let lines: Vec<&str> = content.lines().collect();
104 let mut line_offsets = vec![0];
105 let mut offset = 0;
106
107 for line in &lines {
108 offset += line.len() + 1; line_offsets.push(offset);
110 }
111
112 matches.extend(self.run_matcher(&self.secret_matcher, content, &self.secret_patterns, &lines, &line_offsets));
114 matches.extend(self.run_matcher(&self.env_var_matcher, content, &self.env_var_patterns, &lines, &line_offsets));
115 matches.extend(self.run_matcher(&self.api_key_matcher, content, &self.api_key_patterns, &lines, &line_offsets));
116
117 for (line_num, line) in lines.iter().enumerate() {
119 for (regex, pattern) in &self.complex_patterns {
120 if let Some(mat) = regex.find(line) {
121 let confidence = self.calculate_confidence(line, content, &pattern);
122
123 matches.push(PatternMatch {
124 pattern: Arc::clone(pattern),
125 line_number: line_num + 1,
126 column_number: mat.start() + 1,
127 evidence: self.extract_evidence(line, mat.start(), mat.end()),
128 confidence,
129 });
130 }
131 }
132 }
133
134 matches.retain(|m| {
136 let threshold = match m.pattern.id.as_str() {
137 id if id.contains("aws-access-key") => 0.4, id if id.contains("openai-api-key") => 0.4, id if id.contains("jwt-token") => 0.6, id if id.contains("database-url") => 0.5, id if id.contains("bearer-token") => 0.7, id if id.contains("generic") => 0.8, id if id.contains("long-secret-value") => 0.7, _ => 0.7, };
146 m.confidence > threshold
147 });
148
149 matches
150 }
151
152 fn quick_contains_secrets(&self, content: &str) -> bool {
154 if self.is_likely_false_positive_content(content) {
156 return false;
157 }
158
159 const QUICK_PATTERNS: &[&str] = &[
161 "api", "key", "secret", "token", "password", "credential",
162 "auth", "private", "-----BEGIN", "sk_", "pk_", "eyJ",
163 ];
164
165 let content_lower = content.to_lowercase();
166 QUICK_PATTERNS.iter().any(|&pattern| content_lower.contains(pattern))
167 }
168
169 fn is_likely_false_positive_content(&self, content: &str) -> bool {
171 let content_len = content.len();
172
173 if content_len < 10 {
175 return true;
176 }
177
178 if content.contains("data:image/") || content.contains("data:font/") {
180 return true;
181 }
182
183 let lines: Vec<&str> = content.lines().collect();
185 if lines.len() < 5 && lines.iter().any(|line| line.len() > 500 && line.matches(' ').count() < line.len() / 50) {
186 return true;
187 }
188
189 let base64_chars = content.chars().filter(|c| c.is_alphanumeric() || *c == '+' || *c == '/' || *c == '=').count();
191 let base64_ratio = base64_chars as f32 / content_len as f32;
192
193 if base64_ratio > 0.8 && !content.contains("eyJ") && content_len > 1000 {
195 return true;
196 }
197
198 if content.contains("<svg") || content.contains("xmlns=\"http://www.w3.org/2000/svg\"") {
200 return true;
201 }
202
203 if content.contains("@media") || content.contains("@import") ||
205 (content.contains("{") && content.contains("}") && content.contains(":")) {
206 return true;
207 }
208
209 false
210 }
211
212 fn run_matcher(
214 &self,
215 matcher: &AhoCorasick,
216 content: &str,
217 patterns: &AHashMap<usize, Arc<CompiledPattern>>,
218 lines: &[&str],
219 line_offsets: &[usize],
220 ) -> Vec<PatternMatch> {
221 let mut matches = Vec::new();
222
223 for mat in matcher.find_iter(content) {
224 let pattern_id = mat.pattern().as_usize();
225 if let Some(pattern) = patterns.get(&pattern_id) {
226 let (line_num, col_num) = self.offset_to_line_col(mat.start(), line_offsets);
228 let line = lines.get(line_num.saturating_sub(1)).unwrap_or(&"");
229
230 let confidence = self.calculate_confidence(line, content, pattern);
231
232 matches.push(PatternMatch {
233 pattern: Arc::clone(pattern),
234 line_number: line_num,
235 column_number: col_num,
236 evidence: self.extract_evidence(line, mat.start(), mat.end()),
237 confidence,
238 });
239 }
240 }
241
242 matches
243 }
244
245 fn offset_to_line_col(&self, offset: usize, line_offsets: &[usize]) -> (usize, usize) {
247 let line_num = line_offsets.binary_search(&offset)
248 .unwrap_or_else(|i| i.saturating_sub(1));
249
250 let line_start = line_offsets.get(line_num).copied().unwrap_or(0);
251 let col_num = offset - line_start + 1;
252
253 (line_num + 1, col_num)
254 }
255
256 fn calculate_confidence(&self, line: &str, content: &str, pattern: &CompiledPattern) -> f32 {
258 let mut confidence: f32 = 0.6;
259
260 let line_lower = line.to_lowercase();
261 let content_lower = content.to_lowercase();
262
263 if self.is_obvious_false_positive(line, content) {
265 return 0.0;
266 }
267
268 confidence = self.adjust_confidence_for_context(confidence, line, content, pattern);
270
271 confidence = self.adjust_confidence_for_pattern(confidence, line, content, pattern);
273
274 confidence.clamp(0.0, 1.0)
275 }
276
277 fn is_obvious_false_positive(&self, line: &str, content: &str) -> bool {
279 let line_lower = line.to_lowercase();
280
281 if line_lower.trim_start().starts_with("//") ||
283 line_lower.trim_start().starts_with("#") ||
284 line_lower.trim_start().starts_with("*") ||
285 line_lower.trim_start().starts_with("<!--") {
286 return true;
287 }
288
289 if line.contains("${") && line.contains("}") {
291 return true;
292 }
293
294 if line.contains("${selectedApiKey") || line.contains("${apiKey") ||
296 line.contains("${key") || line.contains("${token") {
297 return true;
298 }
299
300 if self.is_in_code_generation_context(content) && self.looks_like_template_code(line) {
302 return true;
303 }
304
305 let false_positive_patterns = [
307 "example", "placeholder", "your_", "todo", "fixme", "xxx",
308 "xxxxxxxx", "12345", "abcdef", "test", "demo", "sample",
309 "lorem", "ipsum", "change_me", "replace_me", "insert_",
310 "enter_your", "add_your", "put_your", "use_your",
311 "props.", "state.", "this.", "component",
313 ];
314
315 if false_positive_patterns.iter().any(|&pattern| line_lower.contains(pattern)) {
316 return true;
317 }
318
319 if line_lower.contains("@example") || line_lower.contains("@param") ||
321 line_lower.contains("interface") || line_lower.contains("type ") {
322 return true;
323 }
324
325 if line.contains("data:image/") || line.contains("data:font/") ||
327 line.contains("data:application/") {
328 return true;
329 }
330
331 if line.len() > 200 && line.matches(' ').count() < line.len() / 20 {
333 return true;
334 }
335
336 if line.contains("return `") || line.contains("const ") && line.contains(" = `") {
338 return true;
339 }
340
341 false
342 }
343
344 fn is_in_code_generation_context(&self, content: &str) -> bool {
346 let content_lower = content.to_lowercase();
347
348 let code_gen_patterns = [
350 "getcode", "generatecode", "codecomponent", "apicodedialog",
351 "const getcode", "function getcode", "const code", "function code",
352 "codesnippet", "codeexample", "template", "example code",
353 "code generator", "api example", "curl example",
354 "codeblock", "copyblock", "syntax highlight"
356 ];
357
358 code_gen_patterns.iter().any(|&pattern| content_lower.contains(pattern))
359 }
360
361 fn looks_like_template_code(&self, line: &str) -> bool {
363 if line.contains("return `") || line.contains("= `") {
365 return true;
366 }
367
368 if line.contains("API_URL") || line.contains("/api/v1/") || line.contains("/prediction/") {
370 return true;
371 }
372
373 if line.contains("requests.post") || line.contains("fetch(") ||
375 line.contains("curl ") || line.contains("import requests") {
376 return true;
377 }
378
379 if line.contains("Authorization:") || line.contains("Bearer ") {
381 return true;
382 }
383
384 false
385 }
386
387 fn adjust_confidence_for_context(&self, mut confidence: f32, line: &str, content: &str, _pattern: &CompiledPattern) -> f32 {
389 let line_lower = line.to_lowercase();
390 let content_lower = content.to_lowercase();
391
392 if line.contains("=") || line.contains(":") {
394 confidence += 0.2;
395 }
396
397 if line_lower.contains("export ") || line_lower.contains("process.env") {
399 confidence += 0.3;
400 }
401
402 if line_lower.contains("import") && (line_lower.contains("api") || line_lower.contains("key")) {
404 confidence += 0.1;
405 }
406
407 if content_lower.contains("package.json") || content_lower.contains("node_modules") {
409 confidence -= 0.2;
410 }
411
412 if content_lower.contains("/test/") || content_lower.contains("__test__") ||
414 content_lower.contains(".test.") || content_lower.contains(".spec.") {
415 confidence -= 0.3;
416 }
417
418 if content_lower.contains("readme") || content_lower.contains("documentation") ||
420 content_lower.contains("docs/") {
421 confidence -= 0.4;
422 }
423
424 confidence
425 }
426
427 fn adjust_confidence_for_pattern(&self, mut confidence: f32, line: &str, content: &str, pattern: &CompiledPattern) -> f32 {
429 let line_lower = line.to_lowercase();
430 let content_lower = content.to_lowercase();
431
432 if self.is_in_code_generation_context(content) {
434 confidence -= 0.6;
435 }
436
437 for keyword in &pattern.confidence_boost_keywords {
439 if content_lower.contains(&keyword.to_lowercase()) {
440 confidence += 0.1;
441 }
442 }
443
444 for keyword in &pattern.false_positive_keywords {
446 if line_lower.contains(&keyword.to_lowercase()) {
447 confidence -= 0.4;
448 }
449 }
450
451 match pattern.id.as_str() {
453 "jwt-token" => {
454 if !line.contains("eyJ") || line.split('.').count() != 3 {
456 confidence -= 0.3;
457 }
458 if line_lower.contains("example") || line_lower.contains("jwt") {
460 confidence -= 0.2;
461 }
462 if line.contains("${") {
464 confidence -= 0.8;
465 }
466 }
467 "openai-api-key" => {
468 if !line.contains("sk-") {
470 confidence -= 0.5;
471 }
472 if line_lower.contains("openai") || line_lower.contains("gpt") {
474 confidence += 0.2;
475 }
476 if line.contains("${") || line.contains("selectedApiKey") {
478 confidence -= 0.9;
479 }
480 }
481 "database-url-with-creds" => {
482 if !line.contains("://") || line.contains("example.com") {
484 confidence -= 0.4;
485 }
486 if line.contains("${") {
488 confidence -= 0.7;
489 }
490 }
491 "long-secret-value" | "generic-api-key" => {
492 if line.contains("${") || line.contains("selectedApiKey") ||
494 line.contains("apiKey") && line.contains("?") {
495 confidence -= 0.8;
496 }
497 if line.contains("Bearer ") && line.contains("${") {
499 confidence -= 0.9;
500 }
501 }
502 _ => {
503 if line.contains("${") {
505 confidence -= 0.6;
506 }
507 }
508 }
509
510 if content_lower.contains("react") || content_lower.contains("jsx") ||
512 content_lower.contains("component") {
513 if line.contains("${") || line.contains("props.") || line.contains("state.") {
514 confidence -= 0.5;
515 }
516 }
517
518 confidence
519 }
520
521 fn extract_evidence(&self, line: &str, start: usize, end: usize) -> String {
523 let prefix = &line[..start.min(line.len())];
525 let suffix = &line[end.min(line.len())..];
526 let masked = "*".repeat((end - start).min(20));
527
528 format!("{}{}{}", prefix, masked, suffix).trim().to_string()
529 }
530
531 fn build_matcher(patterns: &[(String, Arc<CompiledPattern>)]) -> Result<AhoCorasick, SecurityError> {
533 let strings: Vec<&str> = patterns.iter().map(|(s, _)| s.as_str()).collect();
534
535 let matcher = AhoCorasickBuilder::new()
536 .match_kind(MatchKind::LeftmostFirst)
537 .ascii_case_insensitive(true)
538 .build(&strings)
539 .map_err(|e| SecurityError::PatternEngine(format!("Failed to build matcher: {}", e)))?;
540
541 Ok(matcher)
542 }
543
544 fn create_pattern_map(patterns: Vec<(String, Arc<CompiledPattern>)>) -> AHashMap<usize, Arc<CompiledPattern>> {
546 patterns.into_iter()
547 .enumerate()
548 .map(|(id, (_, pattern))| (id, pattern))
549 .collect()
550 }
551
552 fn load_patterns(pattern_sets: &[String]) -> Result<(
554 Vec<(String, Arc<CompiledPattern>)>,
555 Vec<(String, Arc<CompiledPattern>)>,
556 Vec<(String, Arc<CompiledPattern>)>,
557 Vec<(Regex, Arc<CompiledPattern>)>,
558 ), SecurityError> {
559 let mut secret_patterns = Vec::new();
560 let mut env_var_patterns = Vec::new();
561 let mut api_key_patterns = Vec::new();
562 let mut complex_patterns = Vec::new();
563
564 if pattern_sets.contains(&"default".to_string()) {
566 Self::load_default_patterns(&mut secret_patterns, &mut env_var_patterns,
567 &mut api_key_patterns, &mut complex_patterns)?;
568 }
569
570 for set in pattern_sets {
572 match set.as_str() {
573 "aws" => Self::load_aws_patterns(&mut api_key_patterns)?,
574 "gcp" => Self::load_gcp_patterns(&mut api_key_patterns)?,
575 "azure" => Self::load_azure_patterns(&mut api_key_patterns)?,
576 "crypto" => Self::load_crypto_patterns(&mut secret_patterns)?,
577 _ => {}
578 }
579 }
580
581 Ok((secret_patterns, env_var_patterns, api_key_patterns, complex_patterns))
582 }
583
584 fn load_default_patterns(
586 secret_patterns: &mut Vec<(String, Arc<CompiledPattern>)>,
587 env_var_patterns: &mut Vec<(String, Arc<CompiledPattern>)>,
588 api_key_patterns: &mut Vec<(String, Arc<CompiledPattern>)>,
589 complex_patterns: &mut Vec<(Regex, Arc<CompiledPattern>)>,
590 ) -> Result<(), SecurityError> {
591 api_key_patterns.push((
595 "sk-".to_string(),
596 Arc::new(CompiledPattern {
597 id: "openai-api-key".to_string(),
598 name: "OpenAI API Key".to_string(),
599 severity: SecuritySeverity::Critical,
600 category: SecurityCategory::SecretsExposure,
601 description: "OpenAI API key detected".to_string(),
602 remediation: vec![
603 "Remove API key from source code".to_string(),
604 "Use environment variables".to_string(),
605 ],
606 references: vec!["https://platform.openai.com/docs/api-reference".to_string()],
607 cwe_id: Some("CWE-798".to_string()),
608 confidence_boost_keywords: vec!["openai".to_string(), "gpt".to_string()],
609 false_positive_keywords: vec![
610 "sk-xxxxxxxx".to_string(), "sk-...".to_string(), "sk_test".to_string(),
611 "example".to_string(), "placeholder".to_string(), "your_".to_string(),
612 "TODO".to_string(), "FIXME".to_string(), "XXX".to_string(),
613 ],
614 }),
615 ));
616
617 complex_patterns.push((
619 Regex::new(r#"(?i)(?:api[_-]?key|secret[_-]?key|access[_-]?token)\s*[:=]\s*['"]([a-zA-Z0-9+/=]{32,})['"]"#)
621 .map_err(|e| SecurityError::PatternEngine(format!("Regex error: {}", e)))?,
622 Arc::new(CompiledPattern {
623 id: "long-secret-value".to_string(),
624 name: "Hardcoded Secret Value".to_string(),
625 severity: SecuritySeverity::Critical,
626 category: SecurityCategory::SecretsExposure,
627 description: "Long secret value hardcoded in source code".to_string(),
628 remediation: vec![
629 "Use environment variables for secrets".to_string(),
630 "Implement proper secret management".to_string(),
631 ],
632 references: vec![],
633 cwe_id: Some("CWE-798".to_string()),
634 confidence_boost_keywords: vec!["bearer".to_string(), "auth".to_string()],
635 false_positive_keywords: vec![
636 "process.env".to_string(), "getenv".to_string(), "example".to_string(),
637 "placeholder".to_string(), "your_".to_string(), "TODO".to_string(),
638 "test".to_string(), "demo".to_string(), "fake".to_string(),
639 ],
640 }),
641 ));
642
643 complex_patterns.push((
645 Regex::new(r#"\beyJ[a-zA-Z0-9+/=]{100,}\b"#)
646 .map_err(|e| SecurityError::PatternEngine(format!("Regex error: {}", e)))?,
647 Arc::new(CompiledPattern {
648 id: "jwt-token".to_string(),
649 name: "JWT Token".to_string(),
650 severity: SecuritySeverity::High,
651 category: SecurityCategory::SecretsExposure,
652 description: "JWT token detected in source code".to_string(),
653 remediation: vec![
654 "Never hardcode JWT tokens".to_string(),
655 "Use secure token storage".to_string(),
656 ],
657 references: vec![],
658 cwe_id: Some("CWE-798".to_string()),
659 confidence_boost_keywords: vec!["bearer".to_string(), "authorization".to_string()],
660 false_positive_keywords: vec!["example".to_string(), "demo".to_string()],
661 }),
662 ));
663
664 complex_patterns.push((
666 Regex::new(r#"(?i)(?:postgres|mysql|mongodb)://[^:\s]+:[^@\s]+@[^/\s]+/[^\s]*"#)
667 .map_err(|e| SecurityError::PatternEngine(format!("Regex error: {}", e)))?,
668 Arc::new(CompiledPattern {
669 id: "database-url-with-creds".to_string(),
670 name: "Database URL with Credentials".to_string(),
671 severity: SecuritySeverity::Critical,
672 category: SecurityCategory::SecretsExposure,
673 description: "Database connection string with embedded credentials".to_string(),
674 remediation: vec![
675 "Use environment variables for database credentials".to_string(),
676 "Use connection string without embedded passwords".to_string(),
677 ],
678 references: vec![],
679 cwe_id: Some("CWE-798".to_string()),
680 confidence_boost_keywords: vec!["connection".to_string(), "database".to_string()],
681 false_positive_keywords: vec![
682 "example.com".to_string(), "localhost".to_string(), "placeholder".to_string(),
683 "your_".to_string(), "user:pass".to_string(),
684 ],
685 }),
686 ));
687
688 secret_patterns.push((
690 "-----BEGIN".to_string(),
691 Arc::new(CompiledPattern {
692 id: "private-key-header".to_string(),
693 name: "Private Key".to_string(),
694 severity: SecuritySeverity::Critical,
695 category: SecurityCategory::SecretsExposure,
696 description: "Private key detected".to_string(),
697 remediation: vec![
698 "Never commit private keys to version control".to_string(),
699 "Use secure key storage solutions".to_string(),
700 ],
701 references: vec![],
702 cwe_id: Some("CWE-321".to_string()),
703 confidence_boost_keywords: vec!["PRIVATE".to_string(), "RSA".to_string(), "DSA".to_string()],
704 false_positive_keywords: vec!["PUBLIC".to_string(), "CERTIFICATE".to_string()],
705 }),
706 ));
707
708 Ok(())
709 }
710
711 fn load_aws_patterns(api_key_patterns: &mut Vec<(String, Arc<CompiledPattern>)>) -> Result<(), SecurityError> {
713 api_key_patterns.push((
714 "AKIA".to_string(),
715 Arc::new(CompiledPattern {
716 id: "aws-access-key".to_string(),
717 name: "AWS Access Key".to_string(),
718 severity: SecuritySeverity::Critical,
719 category: SecurityCategory::SecretsExposure,
720 description: "AWS Access Key ID detected".to_string(),
721 remediation: vec![
722 "Remove AWS credentials from source code".to_string(),
723 "Use IAM roles or environment variables".to_string(),
724 "Rotate the exposed key immediately".to_string(),
725 ],
726 references: vec!["https://docs.aws.amazon.com/security/".to_string()],
727 cwe_id: Some("CWE-798".to_string()),
728 confidence_boost_keywords: vec!["aws".to_string(), "s3".to_string(), "ec2".to_string()],
729 false_positive_keywords: vec!["AKIA00000000".to_string()],
730 }),
731 ));
732
733 Ok(())
734 }
735
736 fn load_gcp_patterns(api_key_patterns: &mut Vec<(String, Arc<CompiledPattern>)>) -> Result<(), SecurityError> {
738 api_key_patterns.push((
739 "AIza".to_string(),
740 Arc::new(CompiledPattern {
741 id: "gcp-api-key".to_string(),
742 name: "Google Cloud API Key".to_string(),
743 severity: SecuritySeverity::High,
744 category: SecurityCategory::SecretsExposure,
745 description: "Google Cloud API key detected".to_string(),
746 remediation: vec![
747 "Use service accounts instead of API keys".to_string(),
748 "Restrict API key usage by IP/referrer".to_string(),
749 ],
750 references: vec!["https://cloud.google.com/security/".to_string()],
751 cwe_id: Some("CWE-798".to_string()),
752 confidence_boost_keywords: vec!["google".to_string(), "gcp".to_string(), "firebase".to_string()],
753 false_positive_keywords: vec![],
754 }),
755 ));
756
757 Ok(())
758 }
759
760 fn load_azure_patterns(_api_key_patterns: &mut Vec<(String, Arc<CompiledPattern>)>) -> Result<(), SecurityError> {
762 Ok(())
764 }
765
766 fn load_crypto_patterns(secret_patterns: &mut Vec<(String, Arc<CompiledPattern>)>) -> Result<(), SecurityError> {
768 secret_patterns.push((
769 "-----BEGIN".to_string(),
770 Arc::new(CompiledPattern {
771 id: "private-key".to_string(),
772 name: "Private Key".to_string(),
773 severity: SecuritySeverity::Critical,
774 category: SecurityCategory::SecretsExposure,
775 description: "Private key detected".to_string(),
776 remediation: vec![
777 "Never commit private keys to version control".to_string(),
778 "Use secure key storage solutions".to_string(),
779 ],
780 references: vec![],
781 cwe_id: Some("CWE-321".to_string()),
782 confidence_boost_keywords: vec!["RSA".to_string(), "PRIVATE".to_string()],
783 false_positive_keywords: vec!["PUBLIC".to_string()],
784 }),
785 ));
786
787 Ok(())
788 }
789}
790
791#[cfg(test)]
792mod tests {
793 use super::*;
794
795 #[test]
796 fn test_pattern_engine_creation() {
797 let config = TurboConfig::default();
798 let engine = PatternEngine::new(&config);
799 assert!(engine.is_ok());
800
801 let engine = engine.unwrap();
802 assert!(engine.pattern_count() > 0);
803 }
804
805 #[test]
806 fn test_pattern_matching() {
807 let config = TurboConfig::default();
808 let engine = PatternEngine::new(&config).unwrap();
809
810 let content = r#"
811 const apiKey = "sk-1234567890abcdef1234567890abcdef12345678";
812 password = "super_secret_password_that_is_long_enough";
813 process.env.DATABASE_URL
814 "#;
815
816 let matches = engine.scan_content(content, false);
817 assert!(!matches.is_empty());
818
819 assert!(matches.iter().any(|m| m.pattern.id.contains("openai") || m.pattern.id.contains("secret")));
821 }
822
823 #[test]
824 fn test_template_literal_filtering() {
825 let config = TurboConfig::default();
826 let engine = PatternEngine::new(&config).unwrap();
827
828 let template_content = r#"
830 const getCode = () => {
831 return `Authorization: "Bearer ${selectedApiKey?.apiKey}"`;
832 }
833
834 function generateExample() {
835 return "Bearer " + apiKey;
836 }
837 "#;
838
839 let matches = engine.scan_content(template_content, false);
840 assert!(matches.len() <= 1, "Template literals should be filtered out");
842 }
843
844 #[test]
845 fn test_code_generation_context() {
846 let config = TurboConfig::default();
847 let engine = PatternEngine::new(&config).unwrap();
848
849 let code_gen_content = r#"
851 import { CopyBlock } from 'react-code-blocks';
852
853 const APICodeDialog = () => {
854 const getCodeWithAuthorization = () => {
855 return `
856 headers: {
857 Authorization: "Bearer ${selectedApiKey?.apiKey}",
858 "Content-Type": "application/json"
859 }
860 `;
861 };
862
863 return <CopyBlock text={getCodeWithAuthorization()} />;
864 };
865 "#;
866
867 let matches = engine.scan_content(code_gen_content, false);
868 assert!(matches.is_empty() || matches.iter().all(|m| m.confidence < 0.3),
870 "Code generation context should have very low confidence");
871 }
872
873 #[test]
874 fn test_quick_reject() {
875 let config = TurboConfig::default();
876 let engine = PatternEngine::new(&config).unwrap();
877
878 let safe_content = "fn main() { println!(\"Hello, world!\"); }";
879 let matches = engine.scan_content(safe_content, true);
880 assert!(matches.is_empty());
881 }
882}