1use once_cell::sync::Lazy;
4use regex::Regex;
5use std::collections::HashSet;
6
7static RE_EXAMPLE_WORD: Lazy<Regex> = Lazy::new(|| {
9 Regex::new(r"(?i)(?:^|[^a-zA-Z0-9.])example(?:[^a-zA-Z0-9.]|$)")
20 .expect("RE_EXAMPLE_WORD: invalid regex pattern")
21});
22
23static RE_AWS_KEY: Lazy<Regex> =
25 Lazy::new(|| Regex::new(r"AKIA[0-9A-Z]{16}").expect("RE_AWS_KEY: invalid regex pattern"));
26static RE_AWS_SECRET: Lazy<Regex> = Lazy::new(|| {
27 Regex::new(r#"(?i)aws[_-]?secret[_-]?access[_-]?key['"]?\s*[:=]\s*['"]?([A-Za-z0-9/+=]{40})"#)
28 .expect("RE_AWS_SECRET: invalid regex pattern")
29});
30static RE_GITHUB_PAT: Lazy<Regex> =
32 Lazy::new(|| Regex::new(r"ghp_[A-Za-z0-9]{36}").expect("RE_GITHUB_PAT: invalid regex pattern"));
33static RE_GITHUB_FINE_PAT: Lazy<Regex> = Lazy::new(|| {
35 Regex::new(r"github_pat_[A-Za-z0-9]{22}_[A-Za-z0-9]{59}")
36 .expect("RE_GITHUB_FINE_PAT: invalid regex pattern")
37});
38static RE_GITHUB_OTHER_TOKENS: Lazy<Regex> = Lazy::new(|| {
40 Regex::new(r"gh[ours]_[A-Za-z0-9]{36,}").expect("RE_GITHUB_OTHER_TOKENS: invalid regex pattern")
41});
42static RE_PRIVATE_KEY: Lazy<Regex> = Lazy::new(|| {
43 Regex::new(r"-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----")
44 .expect("RE_PRIVATE_KEY: invalid regex pattern")
45});
46static RE_API_KEY: Lazy<Regex> = Lazy::new(|| {
47 Regex::new(r#"(?i)(?:api[_-]?key|apikey)['"]?\s*[:=]\s*['"]?([A-Za-z0-9_-]{20,})"#)
48 .expect("RE_API_KEY: invalid regex pattern")
49});
50static RE_SECRET_TOKEN: Lazy<Regex> = Lazy::new(|| {
51 Regex::new(r#"(?i)(?:secret|token)['"]?\s*[:=]\s*['"]?([A-Za-z0-9_-]{20,})"#)
52 .expect("RE_SECRET_TOKEN: invalid regex pattern")
53});
54static RE_PASSWORD: Lazy<Regex> = Lazy::new(|| {
55 Regex::new(r#"(?i)password['"]?\s*[:=]\s*['"]?([^'"\s]{8,})"#)
56 .expect("RE_PASSWORD: invalid regex pattern")
57});
58static RE_CONN_STRING: Lazy<Regex> = Lazy::new(|| {
59 Regex::new(
61 r#"(?i)(?:mongodb|postgres(?:ql)?|mysql|redis|mariadb|cockroachdb|mssql)://[^\s'"]+"#,
62 )
63 .expect("RE_CONN_STRING: invalid regex pattern")
64});
65static RE_JWT: Lazy<Regex> = Lazy::new(|| {
66 Regex::new(r"eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]*")
67 .expect("RE_JWT: invalid regex pattern")
68});
69static RE_SLACK: Lazy<Regex> = Lazy::new(|| {
70 Regex::new(r"xox[baprs]-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24}")
71 .expect("RE_SLACK: invalid regex pattern")
72});
73static RE_STRIPE: Lazy<Regex> = Lazy::new(|| {
74 Regex::new(r"(?:sk|pk)_(?:test|live)_[A-Za-z0-9]{24,}")
75 .expect("RE_STRIPE: invalid regex pattern")
76});
77static RE_OPENAI: Lazy<Regex> =
79 Lazy::new(|| Regex::new(r"sk-[A-Za-z0-9]{32,}").expect("RE_OPENAI: invalid regex pattern"));
80static RE_ANTHROPIC: Lazy<Regex> = Lazy::new(|| {
82 Regex::new(r"sk-ant-[A-Za-z0-9-]{40,}").expect("RE_ANTHROPIC: invalid regex pattern")
83});
84
85#[derive(Debug, Clone)]
87pub struct SecretFinding {
88 pub kind: SecretKind,
90 pub file: String,
92 pub line: u32,
94 pub pattern: String,
96 pub severity: Severity,
98 pub in_comment: bool,
100}
101
102#[derive(Debug, Clone, Copy, PartialEq, Eq)]
104pub enum SecretKind {
105 ApiKey,
107 AccessToken,
109 PrivateKey,
111 Password,
113 ConnectionString,
115 AwsCredential,
117 GitHubToken,
119 Generic,
121}
122
123impl SecretKind {
124 pub fn name(&self) -> &'static str {
126 match self {
127 Self::ApiKey => "API Key",
128 Self::AccessToken => "Access Token",
129 Self::PrivateKey => "Private Key",
130 Self::Password => "Password",
131 Self::ConnectionString => "Connection String",
132 Self::AwsCredential => "AWS Credential",
133 Self::GitHubToken => "GitHub Token",
134 Self::Generic => "Generic Secret",
135 }
136 }
137}
138
139#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
141pub enum Severity {
142 Low,
143 Medium,
144 High,
145 Critical,
146}
147
148pub struct SecurityScanner {
150 patterns: Vec<SecretPattern>,
151 custom_patterns: Vec<CustomSecretPattern>,
152 allowlist: HashSet<String>,
153}
154
155struct SecretPattern {
156 kind: SecretKind,
157 regex: &'static Lazy<Regex>,
158 severity: Severity,
159}
160
161struct CustomSecretPattern {
163 regex: Regex,
164 severity: Severity,
165}
166
167impl Default for SecurityScanner {
168 fn default() -> Self {
169 Self::new()
170 }
171}
172
173impl SecurityScanner {
174 pub fn new() -> Self {
181 let patterns = vec![
182 SecretPattern {
185 kind: SecretKind::AwsCredential,
186 regex: &RE_AWS_KEY,
187 severity: Severity::Critical,
188 },
189 SecretPattern {
190 kind: SecretKind::AwsCredential,
191 regex: &RE_AWS_SECRET,
192 severity: Severity::Critical,
193 },
194 SecretPattern {
196 kind: SecretKind::GitHubToken,
197 regex: &RE_GITHUB_PAT,
198 severity: Severity::Critical,
199 },
200 SecretPattern {
201 kind: SecretKind::GitHubToken,
202 regex: &RE_GITHUB_FINE_PAT,
203 severity: Severity::Critical,
204 },
205 SecretPattern {
206 kind: SecretKind::GitHubToken,
207 regex: &RE_GITHUB_OTHER_TOKENS,
208 severity: Severity::Critical,
209 },
210 SecretPattern {
212 kind: SecretKind::PrivateKey,
213 regex: &RE_PRIVATE_KEY,
214 severity: Severity::Critical,
215 },
216 SecretPattern {
218 kind: SecretKind::ApiKey,
219 regex: &RE_ANTHROPIC,
220 severity: Severity::Critical,
221 },
222 SecretPattern {
224 kind: SecretKind::ApiKey,
225 regex: &RE_OPENAI,
226 severity: Severity::Critical,
227 },
228 SecretPattern {
230 kind: SecretKind::ApiKey,
231 regex: &RE_STRIPE,
232 severity: Severity::Critical,
233 },
234 SecretPattern {
237 kind: SecretKind::AccessToken,
238 regex: &RE_SLACK,
239 severity: Severity::High,
240 },
241 SecretPattern {
243 kind: SecretKind::AccessToken,
244 regex: &RE_JWT,
245 severity: Severity::High,
246 },
247 SecretPattern {
249 kind: SecretKind::ConnectionString,
250 regex: &RE_CONN_STRING,
251 severity: Severity::High,
252 },
253 SecretPattern {
256 kind: SecretKind::ApiKey,
257 regex: &RE_API_KEY,
258 severity: Severity::High,
259 },
260 SecretPattern {
262 kind: SecretKind::Generic,
263 regex: &RE_SECRET_TOKEN,
264 severity: Severity::High,
265 },
266 SecretPattern {
268 kind: SecretKind::Password,
269 regex: &RE_PASSWORD,
270 severity: Severity::High,
271 },
272 ];
273
274 Self { patterns, custom_patterns: Vec::new(), allowlist: HashSet::new() }
275 }
276
277 pub fn allowlist(&mut self, pattern: &str) {
279 self.allowlist.insert(pattern.to_owned());
280 }
281
282 pub fn add_custom_pattern(&mut self, pattern: &str) {
295 if let Ok(regex) = Regex::new(pattern) {
296 self.custom_patterns
297 .push(CustomSecretPattern { regex, severity: Severity::High });
298 }
299 }
300
301 pub fn add_custom_patterns(&mut self, patterns: &[String]) {
303 for pattern in patterns {
304 self.add_custom_pattern(pattern);
305 }
306 }
307
308 pub fn scan(&self, content: &str, file_path: &str) -> Vec<SecretFinding> {
310 let mut findings = Vec::new();
311
312 for (line_num, line) in content.lines().enumerate() {
313 let trimmed = line.trim();
314
315 let is_jsdoc_continuation =
318 trimmed.starts_with("* ") && !trimmed.contains('=') && !trimmed.contains(':');
319 let is_comment = trimmed.starts_with("//")
320 || trimmed.starts_with('#')
321 || trimmed.starts_with("/*")
322 || trimmed.starts_with("*")
323 || is_jsdoc_continuation;
324
325 let is_obvious_false_positive = is_comment
327 || RE_EXAMPLE_WORD.is_match(trimmed)
328 || trimmed.to_lowercase().contains("placeholder")
329 || trimmed.contains("xxxxx");
330
331 if is_obvious_false_positive {
332 continue;
333 }
334
335 for pattern in &self.patterns {
336 for m in pattern.regex.find_iter(line) {
338 let matched = m.as_str();
339
340 if self.allowlist.iter().any(|a| matched.contains(a)) {
342 continue;
343 }
344
345 findings.push(SecretFinding {
346 kind: pattern.kind,
347 file: file_path.to_owned(),
348 line: (line_num + 1) as u32,
349 pattern: redact(matched),
350 severity: pattern.severity,
351 in_comment: false, });
353 }
354 }
355
356 for custom in &self.custom_patterns {
358 for m in custom.regex.find_iter(line) {
359 let matched = m.as_str();
360
361 if self.allowlist.iter().any(|a| matched.contains(a)) {
363 continue;
364 }
365
366 findings.push(SecretFinding {
367 kind: SecretKind::Generic,
368 file: file_path.to_owned(),
369 line: (line_num + 1) as u32,
370 pattern: redact(matched),
371 severity: custom.severity,
372 in_comment: false,
373 });
374 }
375 }
376 }
377
378 findings
379 }
380
381 pub fn is_safe(&self, content: &str, file_path: &str) -> bool {
383 let findings = self.scan(content, file_path);
384 findings.iter().all(|f| f.severity < Severity::High)
385 }
386
387 pub fn summarize(findings: &[SecretFinding]) -> String {
389 if findings.is_empty() {
390 return "No secrets detected".to_owned();
391 }
392
393 let critical = findings
394 .iter()
395 .filter(|f| f.severity == Severity::Critical)
396 .count();
397 let high = findings
398 .iter()
399 .filter(|f| f.severity == Severity::High)
400 .count();
401
402 format!(
403 "Found {} potential secrets ({} critical, {} high severity)",
404 findings.len(),
405 critical,
406 high
407 )
408 }
409
410 pub fn redact_content(&self, content: &str, _file_path: &str) -> String {
413 let mut result = content.to_owned();
414
415 for (line_num, line) in content.lines().enumerate() {
416 let trimmed = line.trim();
417
418 let is_obvious_false_positive = RE_EXAMPLE_WORD.is_match(trimmed)
420 || trimmed.to_lowercase().contains("placeholder")
421 || trimmed.contains("xxxxx");
422
423 if is_obvious_false_positive {
424 continue;
425 }
426
427 for pattern in &self.patterns {
428 for m in pattern.regex.find_iter(line) {
430 let matched = m.as_str();
431
432 if self.allowlist.iter().any(|a| matched.contains(a)) {
434 continue;
435 }
436
437 if pattern.severity >= Severity::High {
439 let redacted = redact(matched);
440 let line_start = result
442 .lines()
443 .take(line_num)
444 .map(|l| l.len() + 1)
445 .sum::<usize>();
446 if let Some(pos) = result[line_start..].find(matched) {
447 let abs_pos = line_start + pos;
448 result.replace_range(abs_pos..abs_pos + matched.len(), &redacted);
449 }
450 }
451 }
452 }
453
454 for custom in &self.custom_patterns {
456 for m in custom.regex.find_iter(line) {
457 let matched = m.as_str();
458
459 if self.allowlist.iter().any(|a| matched.contains(a)) {
461 continue;
462 }
463
464 if custom.severity >= Severity::High {
466 let redacted = redact(matched);
467 let line_start = result
468 .lines()
469 .take(line_num)
470 .map(|l| l.len() + 1)
471 .sum::<usize>();
472 if let Some(pos) = result[line_start..].find(matched) {
473 let abs_pos = line_start + pos;
474 result.replace_range(abs_pos..abs_pos + matched.len(), &redacted);
475 }
476 }
477 }
478 }
479 }
480
481 result
482 }
483
484 #[must_use = "security findings should be reviewed"]
495 pub fn scan_and_redact(&self, content: &str, file_path: &str) -> (String, Vec<SecretFinding>) {
496 let findings = self.scan(content, file_path);
497 let redacted = self.redact_content(content, file_path);
498 (redacted, findings)
499 }
500}
501
502fn redact(s: &str) -> String {
507 let char_count = s.chars().count();
508
509 if char_count <= 8 {
510 return "*".repeat(char_count);
511 }
512
513 let prefix_chars = 4.min(char_count / 4);
515 let suffix_chars = 4.min(char_count / 4);
516 let redact_chars = char_count.saturating_sub(prefix_chars + suffix_chars);
517
518 let prefix: String = s.chars().take(prefix_chars).collect();
520
521 let suffix: String = s.chars().skip(char_count - suffix_chars).collect();
523
524 format!("{}{}{}", prefix, "*".repeat(redact_chars), suffix)
525}
526
527#[cfg(test)]
528mod tests {
529 use super::*;
530
531 #[test]
532 fn test_aws_key_detection() {
533 let scanner = SecurityScanner::new();
534 let content = r#"AWS_ACCESS_KEY_ID = "AKIAIOSFODNN7EXAMPLE""#;
535
536 let findings = scanner.scan(content, "config.py");
537
538 assert!(!findings.is_empty());
539 assert!(findings.iter().any(|f| f.kind == SecretKind::AwsCredential));
540 }
541
542 #[test]
543 fn test_github_token_detection() {
544 let scanner = SecurityScanner::new();
545 let content = r#"GITHUB_TOKEN = "ghp_abcdefghijklmnopqrstuvwxyz1234567890""#;
546
547 let findings = scanner.scan(content, ".env");
548
549 assert!(!findings.is_empty());
550 assert!(findings.iter().any(|f| f.kind == SecretKind::GitHubToken));
551 }
552
553 #[test]
554 fn test_private_key_detection() {
555 let scanner = SecurityScanner::new();
556 let content = "-----BEGIN RSA PRIVATE KEY-----\nMIIEpA...";
557
558 let findings = scanner.scan(content, "key.pem");
559
560 assert!(!findings.is_empty());
561 assert!(findings.iter().any(|f| f.kind == SecretKind::PrivateKey));
562 }
563
564 #[test]
565 fn test_allowlist() {
566 let mut scanner = SecurityScanner::new();
567 scanner.allowlist("EXAMPLE");
568
569 let content = r#"api_key = "AKIAIOSFODNN7EXAMPLE""#;
570 let findings = scanner.scan(content, "test.py");
571
572 assert!(findings.is_empty());
573 }
574
575 #[test]
576 fn test_redact() {
577 assert_eq!(redact("AKIAIOSFODNN7EXAMPLE"), "AKIA************MPLE");
578 assert_eq!(redact("short"), "*****");
579 }
580
581 #[test]
582 fn test_redact_unicode_safety() {
583 let chinese_secret = "密钥ABCDEFGHIJKLMNOP密钥";
586 let result = redact(chinese_secret);
587 assert!(std::str::from_utf8(result.as_bytes()).is_ok());
589 assert!(result.contains('*'));
591
592 let emoji_secret = "🔑ABCDEFGHIJKLMNOP🔒";
594 let result = redact(emoji_secret);
595 assert!(std::str::from_utf8(result.as_bytes()).is_ok());
596 assert!(result.contains('*'));
597
598 let mixed_secret = "абвгдежзийклмноп"; let result = redact(mixed_secret);
601 assert!(std::str::from_utf8(result.as_bytes()).is_ok());
602 assert!(result.contains('*'));
603
604 let short_chinese = "密钥";
606 let result = redact(short_chinese);
607 assert_eq!(result, "**"); }
609
610 #[test]
611 fn test_redact_edge_cases() {
612 assert_eq!(redact(""), "");
614
615 assert_eq!(redact("x"), "*");
617
618 assert_eq!(redact("12345678"), "********");
620
621 let result = redact("123456789");
623 assert!(result.contains('*'));
624 assert!(result.starts_with('1') || result.starts_with('*'));
625 }
626
627 #[test]
628 fn test_comments_are_skipped() {
629 let scanner = SecurityScanner::new();
630 let content = "# api_key = 'some_secret_key_12345678901234567890'";
631
632 let findings = scanner.scan(content, "test.py");
633
634 assert!(findings.is_empty(), "Secrets in comments should be skipped");
636 }
637
638 #[test]
639 fn test_non_comment_detected() {
640 let scanner = SecurityScanner::new();
641 let content = "api_key = 'some_secret_key_12345678901234567890'";
642
643 let findings = scanner.scan(content, "test.py");
644
645 assert!(!findings.is_empty(), "Secrets in non-comments should be detected");
646 assert!(
647 findings.iter().all(|f| !f.in_comment),
648 "in_comment should be false for non-comment lines"
649 );
650 }
651
652 #[test]
653 fn test_custom_pattern() {
654 let mut scanner = SecurityScanner::new();
655 scanner.add_custom_pattern(r"CUSTOM_SECRET_[A-Z0-9]{16}");
656
657 let content = "my_secret = CUSTOM_SECRET_ABCD1234EFGH5678";
658 let findings = scanner.scan(content, "test.py");
659
660 assert!(!findings.is_empty(), "Custom pattern should be detected");
661 assert!(findings.iter().any(|f| f.kind == SecretKind::Generic));
662 }
663
664 #[test]
665 fn test_custom_patterns_multiple() {
666 let mut scanner = SecurityScanner::new();
667 scanner.add_custom_patterns(&[
668 r"MYAPP_KEY_[a-f0-9]{32}".to_owned(),
669 r"MYAPP_TOKEN_[A-Z]{20}".to_owned(),
670 ]);
671
672 let content = "key = MYAPP_KEY_0123456789abcdef0123456789abcdef";
673 let findings = scanner.scan(content, "test.py");
674
675 assert!(!findings.is_empty(), "Custom patterns should be detected");
676 }
677
678 #[test]
679 fn test_invalid_custom_pattern_ignored() {
680 let mut scanner = SecurityScanner::new();
681 scanner.add_custom_pattern(r"INVALID_[PATTERN");
683
684 let content = "INVALID_[PATTERN here";
686 let _findings = scanner.scan(content, "test.py");
687 }
688}