1use once_cell::sync::Lazy;
4use regex::Regex;
5use std::collections::HashSet;
6
7static RE_EXAMPLE_WORD: Lazy<Regex> = Lazy::new(|| {
9 Regex::new(r"(?i)(?:^|[^a-zA-Z0-9.])example(?:[^a-zA-Z0-9.]|$)").unwrap()
20});
21
22static RE_AWS_KEY: Lazy<Regex> = Lazy::new(|| Regex::new(r"AKIA[0-9A-Z]{16}").unwrap());
24static RE_AWS_SECRET: Lazy<Regex> = Lazy::new(|| {
25 Regex::new(r#"(?i)aws[_-]?secret[_-]?access[_-]?key['"]?\s*[:=]\s*['"]?([A-Za-z0-9/+=]{40})"#)
26 .unwrap()
27});
28static RE_GITHUB_PAT: Lazy<Regex> = Lazy::new(|| Regex::new(r"ghp_[A-Za-z0-9]{36}").unwrap());
30static RE_GITHUB_FINE_PAT: Lazy<Regex> =
32 Lazy::new(|| Regex::new(r"github_pat_[A-Za-z0-9]{22}_[A-Za-z0-9]{59}").unwrap());
33static RE_GITHUB_OTHER_TOKENS: Lazy<Regex> =
35 Lazy::new(|| Regex::new(r"gh[ours]_[A-Za-z0-9]{36,}").unwrap());
36static RE_PRIVATE_KEY: Lazy<Regex> =
37 Lazy::new(|| Regex::new(r"-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----").unwrap());
38static RE_API_KEY: Lazy<Regex> = Lazy::new(|| {
39 Regex::new(r#"(?i)(?:api[_-]?key|apikey)['"]?\s*[:=]\s*['"]?([A-Za-z0-9_-]{20,})"#).unwrap()
40});
41static RE_SECRET_TOKEN: Lazy<Regex> = Lazy::new(|| {
42 Regex::new(r#"(?i)(?:secret|token)['"]?\s*[:=]\s*['"]?([A-Za-z0-9_-]{20,})"#).unwrap()
43});
44static RE_PASSWORD: Lazy<Regex> =
45 Lazy::new(|| Regex::new(r#"(?i)password['"]?\s*[:=]\s*['"]?([^'"\s]{8,})"#).unwrap());
46static RE_CONN_STRING: Lazy<Regex> = Lazy::new(|| {
47 Regex::new(r#"(?i)(?:mongodb|postgres(?:ql)?|mysql|redis|mariadb|cockroachdb|mssql)://[^\s'"]+"#)
49 .unwrap()
50});
51static RE_JWT: Lazy<Regex> =
52 Lazy::new(|| Regex::new(r"eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]*").unwrap());
53static RE_SLACK: Lazy<Regex> =
54 Lazy::new(|| Regex::new(r"xox[baprs]-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24}").unwrap());
55static RE_STRIPE: Lazy<Regex> =
56 Lazy::new(|| Regex::new(r"(?:sk|pk)_(?:test|live)_[A-Za-z0-9]{24,}").unwrap());
57static RE_OPENAI: Lazy<Regex> =
59 Lazy::new(|| Regex::new(r"sk-[A-Za-z0-9]{32,}").unwrap());
60static RE_ANTHROPIC: Lazy<Regex> =
62 Lazy::new(|| Regex::new(r"sk-ant-[A-Za-z0-9-]{40,}").unwrap());
63
64#[derive(Debug, Clone)]
66pub struct SecretFinding {
67 pub kind: SecretKind,
69 pub file: String,
71 pub line: u32,
73 pub pattern: String,
75 pub severity: Severity,
77 pub in_comment: bool,
79}
80
81#[derive(Debug, Clone, Copy, PartialEq, Eq)]
83pub enum SecretKind {
84 ApiKey,
86 AccessToken,
88 PrivateKey,
90 Password,
92 ConnectionString,
94 AwsCredential,
96 GitHubToken,
98 Generic,
100}
101
102impl SecretKind {
103 pub fn name(&self) -> &'static str {
105 match self {
106 Self::ApiKey => "API Key",
107 Self::AccessToken => "Access Token",
108 Self::PrivateKey => "Private Key",
109 Self::Password => "Password",
110 Self::ConnectionString => "Connection String",
111 Self::AwsCredential => "AWS Credential",
112 Self::GitHubToken => "GitHub Token",
113 Self::Generic => "Generic Secret",
114 }
115 }
116}
117
118#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
120pub enum Severity {
121 Low,
122 Medium,
123 High,
124 Critical,
125}
126
127pub struct SecurityScanner {
129 patterns: Vec<SecretPattern>,
130 custom_patterns: Vec<CustomSecretPattern>,
131 allowlist: HashSet<String>,
132}
133
134struct SecretPattern {
135 kind: SecretKind,
136 regex: &'static Lazy<Regex>,
137 severity: Severity,
138}
139
140struct CustomSecretPattern {
142 regex: Regex,
143 severity: Severity,
144}
145
146impl Default for SecurityScanner {
147 fn default() -> Self {
148 Self::new()
149 }
150}
151
152impl SecurityScanner {
153 pub fn new() -> Self {
160 let patterns = vec![
161 SecretPattern {
164 kind: SecretKind::AwsCredential,
165 regex: &RE_AWS_KEY,
166 severity: Severity::Critical,
167 },
168 SecretPattern {
169 kind: SecretKind::AwsCredential,
170 regex: &RE_AWS_SECRET,
171 severity: Severity::Critical,
172 },
173 SecretPattern {
175 kind: SecretKind::GitHubToken,
176 regex: &RE_GITHUB_PAT,
177 severity: Severity::Critical,
178 },
179 SecretPattern {
180 kind: SecretKind::GitHubToken,
181 regex: &RE_GITHUB_FINE_PAT,
182 severity: Severity::Critical,
183 },
184 SecretPattern {
185 kind: SecretKind::GitHubToken,
186 regex: &RE_GITHUB_OTHER_TOKENS,
187 severity: Severity::Critical,
188 },
189 SecretPattern {
191 kind: SecretKind::PrivateKey,
192 regex: &RE_PRIVATE_KEY,
193 severity: Severity::Critical,
194 },
195 SecretPattern {
197 kind: SecretKind::ApiKey,
198 regex: &RE_ANTHROPIC,
199 severity: Severity::Critical,
200 },
201 SecretPattern {
203 kind: SecretKind::ApiKey,
204 regex: &RE_OPENAI,
205 severity: Severity::Critical,
206 },
207 SecretPattern {
209 kind: SecretKind::ApiKey,
210 regex: &RE_STRIPE,
211 severity: Severity::Critical,
212 },
213 SecretPattern {
216 kind: SecretKind::AccessToken,
217 regex: &RE_SLACK,
218 severity: Severity::High,
219 },
220 SecretPattern {
222 kind: SecretKind::AccessToken,
223 regex: &RE_JWT,
224 severity: Severity::High,
225 },
226 SecretPattern {
228 kind: SecretKind::ConnectionString,
229 regex: &RE_CONN_STRING,
230 severity: Severity::High,
231 },
232 SecretPattern {
235 kind: SecretKind::ApiKey,
236 regex: &RE_API_KEY,
237 severity: Severity::High,
238 },
239 SecretPattern {
241 kind: SecretKind::Generic,
242 regex: &RE_SECRET_TOKEN,
243 severity: Severity::High,
244 },
245 SecretPattern {
247 kind: SecretKind::Password,
248 regex: &RE_PASSWORD,
249 severity: Severity::High,
250 },
251 ];
252
253 Self { patterns, custom_patterns: Vec::new(), allowlist: HashSet::new() }
254 }
255
256 pub fn allowlist(&mut self, pattern: &str) {
258 self.allowlist.insert(pattern.to_owned());
259 }
260
261 pub fn add_custom_pattern(&mut self, pattern: &str) {
274 if let Ok(regex) = Regex::new(pattern) {
275 self.custom_patterns
276 .push(CustomSecretPattern { regex, severity: Severity::High });
277 }
278 }
279
280 pub fn add_custom_patterns(&mut self, patterns: &[String]) {
282 for pattern in patterns {
283 self.add_custom_pattern(pattern);
284 }
285 }
286
287 pub fn scan(&self, content: &str, file_path: &str) -> Vec<SecretFinding> {
289 let mut findings = Vec::new();
290
291 for (line_num, line) in content.lines().enumerate() {
292 let trimmed = line.trim();
293
294 let is_jsdoc_continuation =
297 trimmed.starts_with("* ") && !trimmed.contains('=') && !trimmed.contains(':');
298 let is_comment = trimmed.starts_with("//")
299 || trimmed.starts_with('#')
300 || trimmed.starts_with("/*")
301 || trimmed.starts_with("*")
302 || is_jsdoc_continuation;
303
304 let is_obvious_false_positive = is_comment
306 || RE_EXAMPLE_WORD.is_match(trimmed)
307 || trimmed.to_lowercase().contains("placeholder")
308 || trimmed.contains("xxxxx");
309
310 if is_obvious_false_positive {
311 continue;
312 }
313
314 for pattern in &self.patterns {
315 for m in pattern.regex.find_iter(line) {
317 let matched = m.as_str();
318
319 if self.allowlist.iter().any(|a| matched.contains(a)) {
321 continue;
322 }
323
324 findings.push(SecretFinding {
325 kind: pattern.kind,
326 file: file_path.to_owned(),
327 line: (line_num + 1) as u32,
328 pattern: redact(matched),
329 severity: pattern.severity,
330 in_comment: false, });
332 }
333 }
334
335 for custom in &self.custom_patterns {
337 for m in custom.regex.find_iter(line) {
338 let matched = m.as_str();
339
340 if self.allowlist.iter().any(|a| matched.contains(a)) {
342 continue;
343 }
344
345 findings.push(SecretFinding {
346 kind: SecretKind::Generic,
347 file: file_path.to_owned(),
348 line: (line_num + 1) as u32,
349 pattern: redact(matched),
350 severity: custom.severity,
351 in_comment: false,
352 });
353 }
354 }
355 }
356
357 findings
358 }
359
360 pub fn is_safe(&self, content: &str, file_path: &str) -> bool {
362 let findings = self.scan(content, file_path);
363 findings.iter().all(|f| f.severity < Severity::High)
364 }
365
366 pub fn summarize(findings: &[SecretFinding]) -> String {
368 if findings.is_empty() {
369 return "No secrets detected".to_owned();
370 }
371
372 let critical = findings
373 .iter()
374 .filter(|f| f.severity == Severity::Critical)
375 .count();
376 let high = findings
377 .iter()
378 .filter(|f| f.severity == Severity::High)
379 .count();
380
381 format!(
382 "Found {} potential secrets ({} critical, {} high severity)",
383 findings.len(),
384 critical,
385 high
386 )
387 }
388
389 pub fn redact_content(&self, content: &str, _file_path: &str) -> String {
392 let mut result = content.to_owned();
393
394 for (line_num, line) in content.lines().enumerate() {
395 let trimmed = line.trim();
396
397 let is_obvious_false_positive = RE_EXAMPLE_WORD.is_match(trimmed)
399 || trimmed.to_lowercase().contains("placeholder")
400 || trimmed.contains("xxxxx");
401
402 if is_obvious_false_positive {
403 continue;
404 }
405
406 for pattern in &self.patterns {
407 for m in pattern.regex.find_iter(line) {
409 let matched = m.as_str();
410
411 if self.allowlist.iter().any(|a| matched.contains(a)) {
413 continue;
414 }
415
416 if pattern.severity >= Severity::High {
418 let redacted = redact(matched);
419 let line_start = result
421 .lines()
422 .take(line_num)
423 .map(|l| l.len() + 1)
424 .sum::<usize>();
425 if let Some(pos) = result[line_start..].find(matched) {
426 let abs_pos = line_start + pos;
427 result.replace_range(abs_pos..abs_pos + matched.len(), &redacted);
428 }
429 }
430 }
431 }
432
433 for custom in &self.custom_patterns {
435 for m in custom.regex.find_iter(line) {
436 let matched = m.as_str();
437
438 if self.allowlist.iter().any(|a| matched.contains(a)) {
440 continue;
441 }
442
443 if custom.severity >= Severity::High {
445 let redacted = redact(matched);
446 let line_start = result
447 .lines()
448 .take(line_num)
449 .map(|l| l.len() + 1)
450 .sum::<usize>();
451 if let Some(pos) = result[line_start..].find(matched) {
452 let abs_pos = line_start + pos;
453 result.replace_range(abs_pos..abs_pos + matched.len(), &redacted);
454 }
455 }
456 }
457 }
458 }
459
460 result
461 }
462
463 #[must_use = "security findings should be reviewed"]
474 pub fn scan_and_redact(&self, content: &str, file_path: &str) -> (String, Vec<SecretFinding>) {
475 let findings = self.scan(content, file_path);
476 let redacted = self.redact_content(content, file_path);
477 (redacted, findings)
478 }
479}
480
481fn redact(s: &str) -> String {
483 if s.len() <= 8 {
484 return "*".repeat(s.len());
485 }
486
487 let prefix_len = 4.min(s.len() / 4);
488 let suffix_len = 4.min(s.len() / 4);
489
490 format!(
491 "{}{}{}",
492 &s[..prefix_len],
493 "*".repeat(s.len() - prefix_len - suffix_len),
494 &s[s.len() - suffix_len..]
495 )
496}
497
498#[cfg(test)]
499mod tests {
500 use super::*;
501
502 #[test]
503 fn test_aws_key_detection() {
504 let scanner = SecurityScanner::new();
505 let content = r#"AWS_ACCESS_KEY_ID = "AKIAIOSFODNN7EXAMPLE""#;
506
507 let findings = scanner.scan(content, "config.py");
508
509 assert!(!findings.is_empty());
510 assert!(findings.iter().any(|f| f.kind == SecretKind::AwsCredential));
511 }
512
513 #[test]
514 fn test_github_token_detection() {
515 let scanner = SecurityScanner::new();
516 let content = r#"GITHUB_TOKEN = "ghp_abcdefghijklmnopqrstuvwxyz1234567890""#;
517
518 let findings = scanner.scan(content, ".env");
519
520 assert!(!findings.is_empty());
521 assert!(findings.iter().any(|f| f.kind == SecretKind::GitHubToken));
522 }
523
524 #[test]
525 fn test_private_key_detection() {
526 let scanner = SecurityScanner::new();
527 let content = "-----BEGIN RSA PRIVATE KEY-----\nMIIEpA...";
528
529 let findings = scanner.scan(content, "key.pem");
530
531 assert!(!findings.is_empty());
532 assert!(findings.iter().any(|f| f.kind == SecretKind::PrivateKey));
533 }
534
535 #[test]
536 fn test_allowlist() {
537 let mut scanner = SecurityScanner::new();
538 scanner.allowlist("EXAMPLE");
539
540 let content = r#"api_key = "AKIAIOSFODNN7EXAMPLE""#;
541 let findings = scanner.scan(content, "test.py");
542
543 assert!(findings.is_empty());
544 }
545
546 #[test]
547 fn test_redact() {
548 assert_eq!(redact("AKIAIOSFODNN7EXAMPLE"), "AKIA************MPLE");
549 assert_eq!(redact("short"), "*****");
550 }
551
552 #[test]
553 fn test_comments_are_skipped() {
554 let scanner = SecurityScanner::new();
555 let content = "# api_key = 'some_secret_key_12345678901234567890'";
556
557 let findings = scanner.scan(content, "test.py");
558
559 assert!(findings.is_empty(), "Secrets in comments should be skipped");
561 }
562
563 #[test]
564 fn test_non_comment_detected() {
565 let scanner = SecurityScanner::new();
566 let content = "api_key = 'some_secret_key_12345678901234567890'";
567
568 let findings = scanner.scan(content, "test.py");
569
570 assert!(!findings.is_empty(), "Secrets in non-comments should be detected");
571 assert!(
572 findings.iter().all(|f| !f.in_comment),
573 "in_comment should be false for non-comment lines"
574 );
575 }
576
577 #[test]
578 fn test_custom_pattern() {
579 let mut scanner = SecurityScanner::new();
580 scanner.add_custom_pattern(r"CUSTOM_SECRET_[A-Z0-9]{16}");
581
582 let content = "my_secret = CUSTOM_SECRET_ABCD1234EFGH5678";
583 let findings = scanner.scan(content, "test.py");
584
585 assert!(!findings.is_empty(), "Custom pattern should be detected");
586 assert!(findings.iter().any(|f| f.kind == SecretKind::Generic));
587 }
588
589 #[test]
590 fn test_custom_patterns_multiple() {
591 let mut scanner = SecurityScanner::new();
592 scanner.add_custom_patterns(&[
593 r"MYAPP_KEY_[a-f0-9]{32}".to_owned(),
594 r"MYAPP_TOKEN_[A-Z]{20}".to_owned(),
595 ]);
596
597 let content = "key = MYAPP_KEY_0123456789abcdef0123456789abcdef";
598 let findings = scanner.scan(content, "test.py");
599
600 assert!(!findings.is_empty(), "Custom patterns should be detected");
601 }
602
603 #[test]
604 fn test_invalid_custom_pattern_ignored() {
605 let mut scanner = SecurityScanner::new();
606 scanner.add_custom_pattern(r"INVALID_[PATTERN");
608
609 let content = "INVALID_[PATTERN here";
611 let _findings = scanner.scan(content, "test.py");
612 }
613}