1use once_cell::sync::Lazy;
4use regex::Regex;
5use std::collections::HashSet;
6
7static RE_EXAMPLE_WORD: Lazy<Regex> = Lazy::new(|| {
9 Regex::new(r"(?i)(?:^|[^a-zA-Z0-9.])example(?:[^a-zA-Z0-9.]|$)").unwrap()
20});
21
22static RE_AWS_KEY: Lazy<Regex> = Lazy::new(|| Regex::new(r"AKIA[0-9A-Z]{16}").unwrap());
24static RE_AWS_SECRET: Lazy<Regex> = Lazy::new(|| {
25 Regex::new(r#"(?i)aws[_-]?secret[_-]?access[_-]?key['"]?\s*[:=]\s*['"]?([A-Za-z0-9/+=]{40})"#)
26 .unwrap()
27});
28static RE_GITHUB_PAT: Lazy<Regex> = Lazy::new(|| Regex::new(r"ghp_[A-Za-z0-9]{36}").unwrap());
29static RE_GITHUB_FINE_PAT: Lazy<Regex> =
30 Lazy::new(|| Regex::new(r"github_pat_[A-Za-z0-9]{22}_[A-Za-z0-9]{59}").unwrap());
31static RE_PRIVATE_KEY: Lazy<Regex> =
32 Lazy::new(|| Regex::new(r"-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----").unwrap());
33static RE_API_KEY: Lazy<Regex> = Lazy::new(|| {
34 Regex::new(r#"(?i)(?:api[_-]?key|apikey)['"]?\s*[:=]\s*['"]?([A-Za-z0-9_-]{20,})"#).unwrap()
35});
36static RE_SECRET_TOKEN: Lazy<Regex> = Lazy::new(|| {
37 Regex::new(r#"(?i)(?:secret|token)['"]?\s*[:=]\s*['"]?([A-Za-z0-9_-]{20,})"#).unwrap()
38});
39static RE_PASSWORD: Lazy<Regex> =
40 Lazy::new(|| Regex::new(r#"(?i)password['"]?\s*[:=]\s*['"]?([^'"\s]{8,})"#).unwrap());
41static RE_CONN_STRING: Lazy<Regex> =
42 Lazy::new(|| Regex::new(r#"(?i)(?:mongodb|postgres|mysql|redis)://[^\s'"]+"#).unwrap());
43static RE_JWT: Lazy<Regex> =
44 Lazy::new(|| Regex::new(r"eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]*").unwrap());
45static RE_SLACK: Lazy<Regex> =
46 Lazy::new(|| Regex::new(r"xox[baprs]-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24}").unwrap());
47static RE_STRIPE: Lazy<Regex> =
48 Lazy::new(|| Regex::new(r"(?:sk|pk)_(?:test|live)_[A-Za-z0-9]{24,}").unwrap());
49
50#[derive(Debug, Clone)]
52pub struct SecretFinding {
53 pub kind: SecretKind,
55 pub file: String,
57 pub line: u32,
59 pub pattern: String,
61 pub severity: Severity,
63 pub in_comment: bool,
65}
66
67#[derive(Debug, Clone, Copy, PartialEq, Eq)]
69pub enum SecretKind {
70 ApiKey,
72 AccessToken,
74 PrivateKey,
76 Password,
78 ConnectionString,
80 AwsCredential,
82 GitHubToken,
84 Generic,
86}
87
88impl SecretKind {
89 pub fn name(&self) -> &'static str {
91 match self {
92 Self::ApiKey => "API Key",
93 Self::AccessToken => "Access Token",
94 Self::PrivateKey => "Private Key",
95 Self::Password => "Password",
96 Self::ConnectionString => "Connection String",
97 Self::AwsCredential => "AWS Credential",
98 Self::GitHubToken => "GitHub Token",
99 Self::Generic => "Generic Secret",
100 }
101 }
102}
103
104#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
106pub enum Severity {
107 Low,
108 Medium,
109 High,
110 Critical,
111}
112
113pub struct SecurityScanner {
115 patterns: Vec<SecretPattern>,
116 custom_patterns: Vec<CustomSecretPattern>,
117 allowlist: HashSet<String>,
118}
119
120struct SecretPattern {
121 kind: SecretKind,
122 regex: &'static Lazy<Regex>,
123 severity: Severity,
124}
125
126struct CustomSecretPattern {
128 regex: Regex,
129 severity: Severity,
130}
131
132impl Default for SecurityScanner {
133 fn default() -> Self {
134 Self::new()
135 }
136}
137
138impl SecurityScanner {
139 pub fn new() -> Self {
146 let patterns = vec![
147 SecretPattern {
150 kind: SecretKind::AwsCredential,
151 regex: &RE_AWS_KEY,
152 severity: Severity::Critical,
153 },
154 SecretPattern {
155 kind: SecretKind::AwsCredential,
156 regex: &RE_AWS_SECRET,
157 severity: Severity::Critical,
158 },
159 SecretPattern {
161 kind: SecretKind::GitHubToken,
162 regex: &RE_GITHUB_PAT,
163 severity: Severity::Critical,
164 },
165 SecretPattern {
166 kind: SecretKind::GitHubToken,
167 regex: &RE_GITHUB_FINE_PAT,
168 severity: Severity::Critical,
169 },
170 SecretPattern {
172 kind: SecretKind::PrivateKey,
173 regex: &RE_PRIVATE_KEY,
174 severity: Severity::Critical,
175 },
176 SecretPattern {
178 kind: SecretKind::ApiKey,
179 regex: &RE_STRIPE,
180 severity: Severity::Critical,
181 },
182 SecretPattern {
185 kind: SecretKind::AccessToken,
186 regex: &RE_SLACK,
187 severity: Severity::High,
188 },
189 SecretPattern {
191 kind: SecretKind::AccessToken,
192 regex: &RE_JWT,
193 severity: Severity::High,
194 },
195 SecretPattern {
197 kind: SecretKind::ConnectionString,
198 regex: &RE_CONN_STRING,
199 severity: Severity::High,
200 },
201 SecretPattern {
204 kind: SecretKind::ApiKey,
205 regex: &RE_API_KEY,
206 severity: Severity::High,
207 },
208 SecretPattern {
210 kind: SecretKind::Generic,
211 regex: &RE_SECRET_TOKEN,
212 severity: Severity::High,
213 },
214 SecretPattern {
216 kind: SecretKind::Password,
217 regex: &RE_PASSWORD,
218 severity: Severity::High,
219 },
220 ];
221
222 Self { patterns, custom_patterns: Vec::new(), allowlist: HashSet::new() }
223 }
224
225 pub fn allowlist(&mut self, pattern: &str) {
227 self.allowlist.insert(pattern.to_owned());
228 }
229
230 pub fn add_custom_pattern(&mut self, pattern: &str) {
243 if let Ok(regex) = Regex::new(pattern) {
244 self.custom_patterns
245 .push(CustomSecretPattern { regex, severity: Severity::High });
246 }
247 }
248
249 pub fn add_custom_patterns(&mut self, patterns: &[String]) {
251 for pattern in patterns {
252 self.add_custom_pattern(pattern);
253 }
254 }
255
256 pub fn scan(&self, content: &str, file_path: &str) -> Vec<SecretFinding> {
258 let mut findings = Vec::new();
259
260 for (line_num, line) in content.lines().enumerate() {
261 let trimmed = line.trim();
262
263 let is_jsdoc_continuation =
266 trimmed.starts_with("* ") && !trimmed.contains('=') && !trimmed.contains(':');
267 let is_comment = trimmed.starts_with("//")
268 || trimmed.starts_with('#')
269 || trimmed.starts_with("/*")
270 || trimmed.starts_with("*")
271 || is_jsdoc_continuation;
272
273 let is_obvious_false_positive = is_comment
275 || RE_EXAMPLE_WORD.is_match(trimmed)
276 || trimmed.to_lowercase().contains("placeholder")
277 || trimmed.contains("xxxxx");
278
279 if is_obvious_false_positive {
280 continue;
281 }
282
283 for pattern in &self.patterns {
284 for m in pattern.regex.find_iter(line) {
286 let matched = m.as_str();
287
288 if self.allowlist.iter().any(|a| matched.contains(a)) {
290 continue;
291 }
292
293 findings.push(SecretFinding {
294 kind: pattern.kind,
295 file: file_path.to_owned(),
296 line: (line_num + 1) as u32,
297 pattern: redact(matched),
298 severity: pattern.severity,
299 in_comment: false, });
301 }
302 }
303
304 for custom in &self.custom_patterns {
306 for m in custom.regex.find_iter(line) {
307 let matched = m.as_str();
308
309 if self.allowlist.iter().any(|a| matched.contains(a)) {
311 continue;
312 }
313
314 findings.push(SecretFinding {
315 kind: SecretKind::Generic,
316 file: file_path.to_owned(),
317 line: (line_num + 1) as u32,
318 pattern: redact(matched),
319 severity: custom.severity,
320 in_comment: false,
321 });
322 }
323 }
324 }
325
326 findings
327 }
328
329 pub fn is_safe(&self, content: &str, file_path: &str) -> bool {
331 let findings = self.scan(content, file_path);
332 findings.iter().all(|f| f.severity < Severity::High)
333 }
334
335 pub fn summarize(findings: &[SecretFinding]) -> String {
337 if findings.is_empty() {
338 return "No secrets detected".to_owned();
339 }
340
341 let critical = findings
342 .iter()
343 .filter(|f| f.severity == Severity::Critical)
344 .count();
345 let high = findings
346 .iter()
347 .filter(|f| f.severity == Severity::High)
348 .count();
349
350 format!(
351 "Found {} potential secrets ({} critical, {} high severity)",
352 findings.len(),
353 critical,
354 high
355 )
356 }
357
358 pub fn redact_content(&self, content: &str, _file_path: &str) -> String {
361 let mut result = content.to_owned();
362
363 for (line_num, line) in content.lines().enumerate() {
364 let trimmed = line.trim();
365
366 let is_obvious_false_positive = RE_EXAMPLE_WORD.is_match(trimmed)
368 || trimmed.to_lowercase().contains("placeholder")
369 || trimmed.contains("xxxxx");
370
371 if is_obvious_false_positive {
372 continue;
373 }
374
375 for pattern in &self.patterns {
376 for m in pattern.regex.find_iter(line) {
378 let matched = m.as_str();
379
380 if self.allowlist.iter().any(|a| matched.contains(a)) {
382 continue;
383 }
384
385 if pattern.severity >= Severity::High {
387 let redacted = redact(matched);
388 let line_start = result
390 .lines()
391 .take(line_num)
392 .map(|l| l.len() + 1)
393 .sum::<usize>();
394 if let Some(pos) = result[line_start..].find(matched) {
395 let abs_pos = line_start + pos;
396 result.replace_range(abs_pos..abs_pos + matched.len(), &redacted);
397 }
398 }
399 }
400 }
401
402 for custom in &self.custom_patterns {
404 for m in custom.regex.find_iter(line) {
405 let matched = m.as_str();
406
407 if self.allowlist.iter().any(|a| matched.contains(a)) {
409 continue;
410 }
411
412 if custom.severity >= Severity::High {
414 let redacted = redact(matched);
415 let line_start = result
416 .lines()
417 .take(line_num)
418 .map(|l| l.len() + 1)
419 .sum::<usize>();
420 if let Some(pos) = result[line_start..].find(matched) {
421 let abs_pos = line_start + pos;
422 result.replace_range(abs_pos..abs_pos + matched.len(), &redacted);
423 }
424 }
425 }
426 }
427 }
428
429 result
430 }
431
432 #[must_use = "security findings should be reviewed"]
443 pub fn scan_and_redact(&self, content: &str, file_path: &str) -> (String, Vec<SecretFinding>) {
444 let findings = self.scan(content, file_path);
445 let redacted = self.redact_content(content, file_path);
446 (redacted, findings)
447 }
448}
449
450fn redact(s: &str) -> String {
452 if s.len() <= 8 {
453 return "*".repeat(s.len());
454 }
455
456 let prefix_len = 4.min(s.len() / 4);
457 let suffix_len = 4.min(s.len() / 4);
458
459 format!(
460 "{}{}{}",
461 &s[..prefix_len],
462 "*".repeat(s.len() - prefix_len - suffix_len),
463 &s[s.len() - suffix_len..]
464 )
465}
466
467#[cfg(test)]
468mod tests {
469 use super::*;
470
471 #[test]
472 fn test_aws_key_detection() {
473 let scanner = SecurityScanner::new();
474 let content = r#"AWS_ACCESS_KEY_ID = "AKIAIOSFODNN7EXAMPLE""#;
475
476 let findings = scanner.scan(content, "config.py");
477
478 assert!(!findings.is_empty());
479 assert!(findings.iter().any(|f| f.kind == SecretKind::AwsCredential));
480 }
481
482 #[test]
483 fn test_github_token_detection() {
484 let scanner = SecurityScanner::new();
485 let content = r#"GITHUB_TOKEN = "ghp_abcdefghijklmnopqrstuvwxyz1234567890""#;
486
487 let findings = scanner.scan(content, ".env");
488
489 assert!(!findings.is_empty());
490 assert!(findings.iter().any(|f| f.kind == SecretKind::GitHubToken));
491 }
492
493 #[test]
494 fn test_private_key_detection() {
495 let scanner = SecurityScanner::new();
496 let content = "-----BEGIN RSA PRIVATE KEY-----\nMIIEpA...";
497
498 let findings = scanner.scan(content, "key.pem");
499
500 assert!(!findings.is_empty());
501 assert!(findings.iter().any(|f| f.kind == SecretKind::PrivateKey));
502 }
503
504 #[test]
505 fn test_allowlist() {
506 let mut scanner = SecurityScanner::new();
507 scanner.allowlist("EXAMPLE");
508
509 let content = r#"api_key = "AKIAIOSFODNN7EXAMPLE""#;
510 let findings = scanner.scan(content, "test.py");
511
512 assert!(findings.is_empty());
513 }
514
515 #[test]
516 fn test_redact() {
517 assert_eq!(redact("AKIAIOSFODNN7EXAMPLE"), "AKIA************MPLE");
518 assert_eq!(redact("short"), "*****");
519 }
520
521 #[test]
522 fn test_comments_are_skipped() {
523 let scanner = SecurityScanner::new();
524 let content = "# api_key = 'some_secret_key_12345678901234567890'";
525
526 let findings = scanner.scan(content, "test.py");
527
528 assert!(findings.is_empty(), "Secrets in comments should be skipped");
530 }
531
532 #[test]
533 fn test_non_comment_detected() {
534 let scanner = SecurityScanner::new();
535 let content = "api_key = 'some_secret_key_12345678901234567890'";
536
537 let findings = scanner.scan(content, "test.py");
538
539 assert!(!findings.is_empty(), "Secrets in non-comments should be detected");
540 assert!(
541 findings.iter().all(|f| !f.in_comment),
542 "in_comment should be false for non-comment lines"
543 );
544 }
545
546 #[test]
547 fn test_custom_pattern() {
548 let mut scanner = SecurityScanner::new();
549 scanner.add_custom_pattern(r"CUSTOM_SECRET_[A-Z0-9]{16}");
550
551 let content = "my_secret = CUSTOM_SECRET_ABCD1234EFGH5678";
552 let findings = scanner.scan(content, "test.py");
553
554 assert!(!findings.is_empty(), "Custom pattern should be detected");
555 assert!(findings.iter().any(|f| f.kind == SecretKind::Generic));
556 }
557
558 #[test]
559 fn test_custom_patterns_multiple() {
560 let mut scanner = SecurityScanner::new();
561 scanner.add_custom_patterns(&[
562 r"MYAPP_KEY_[a-f0-9]{32}".to_owned(),
563 r"MYAPP_TOKEN_[A-Z]{20}".to_owned(),
564 ]);
565
566 let content = "key = MYAPP_KEY_0123456789abcdef0123456789abcdef";
567 let findings = scanner.scan(content, "test.py");
568
569 assert!(!findings.is_empty(), "Custom patterns should be detected");
570 }
571
572 #[test]
573 fn test_invalid_custom_pattern_ignored() {
574 let mut scanner = SecurityScanner::new();
575 scanner.add_custom_pattern(r"INVALID_[PATTERN");
577
578 let content = "INVALID_[PATTERN here";
580 let _findings = scanner.scan(content, "test.py");
581 }
582}