use once_cell::sync::Lazy;
use regex::Regex;
use std::collections::HashSet;
static RE_EXAMPLE_WORD: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?i)(?:^|[^a-zA-Z0-9.])example(?:[^a-zA-Z0-9.]|$)")
.expect("RE_EXAMPLE_WORD: invalid regex pattern")
});
static RE_AWS_KEY: Lazy<Regex> =
Lazy::new(|| Regex::new(r"AKIA[0-9A-Z]{16}").expect("RE_AWS_KEY: invalid regex pattern"));
static RE_AWS_SECRET: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"(?i)aws[_-]?secret[_-]?access[_-]?key['"]?\s*[:=]\s*['"]?([A-Za-z0-9/+=]{40})"#)
.expect("RE_AWS_SECRET: invalid regex pattern")
});
static RE_GITHUB_PAT: Lazy<Regex> =
Lazy::new(|| Regex::new(r"ghp_[A-Za-z0-9]{36}").expect("RE_GITHUB_PAT: invalid regex pattern"));
static RE_GITHUB_FINE_PAT: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"github_pat_[A-Za-z0-9]{22}_[A-Za-z0-9]{59}")
.expect("RE_GITHUB_FINE_PAT: invalid regex pattern")
});
static RE_GITHUB_OTHER_TOKENS: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"gh[ours]_[A-Za-z0-9]{36,}").expect("RE_GITHUB_OTHER_TOKENS: invalid regex pattern")
});
static RE_PRIVATE_KEY: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----")
.expect("RE_PRIVATE_KEY: invalid regex pattern")
});
static RE_API_KEY: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"(?i)(?:api[_-]?key|apikey)['"]?\s*[:=]\s*['"]?([A-Za-z0-9_-]{20,})"#)
.expect("RE_API_KEY: invalid regex pattern")
});
static RE_SECRET_TOKEN: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"(?i)(?:secret|token)['"]?\s*[:=]\s*['"]?([A-Za-z0-9_-]{20,})"#)
.expect("RE_SECRET_TOKEN: invalid regex pattern")
});
static RE_PASSWORD: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"(?i)password['"]?\s*[:=]\s*['"]?([^'"\s]{8,})"#)
.expect("RE_PASSWORD: invalid regex pattern")
});
static RE_CONN_STRING: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r#"(?i)(?:mongodb|postgres(?:ql)?|mysql|redis|mariadb|cockroachdb|mssql)://[^\s'"]+"#,
)
.expect("RE_CONN_STRING: invalid regex pattern")
});
static RE_JWT: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]*")
.expect("RE_JWT: invalid regex pattern")
});
static RE_SLACK: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"xox[baprs]-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24}")
.expect("RE_SLACK: invalid regex pattern")
});
static RE_STRIPE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?:sk|pk)_(?:test|live)_[A-Za-z0-9]{24,}")
.expect("RE_STRIPE: invalid regex pattern")
});
static RE_OPENAI: Lazy<Regex> =
Lazy::new(|| Regex::new(r"sk-[A-Za-z0-9_-]{32,}").expect("RE_OPENAI: invalid regex pattern"));
static RE_ANTHROPIC: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"sk-ant-[A-Za-z0-9-]{40,}").expect("RE_ANTHROPIC: invalid regex pattern")
});
#[derive(Debug, Clone)]
pub enum SecurityError {
InvalidPattern {
pattern: String,
message: String,
},
}
impl std::fmt::Display for SecurityError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::InvalidPattern { pattern, message } => {
write!(f, "Invalid regex pattern '{}': {}", pattern, message)
},
}
}
}
impl std::error::Error for SecurityError {}
#[derive(Debug, Clone)]
pub struct SecretFinding {
pub kind: SecretKind,
pub file: String,
pub line: u32,
pub pattern: String,
pub severity: Severity,
pub in_comment: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SecretKind {
ApiKey,
AccessToken,
PrivateKey,
Password,
ConnectionString,
AwsCredential,
GitHubToken,
Generic,
}
impl SecretKind {
pub fn name(&self) -> &'static str {
match self {
Self::ApiKey => "API Key",
Self::AccessToken => "Access Token",
Self::PrivateKey => "Private Key",
Self::Password => "Password",
Self::ConnectionString => "Connection String",
Self::AwsCredential => "AWS Credential",
Self::GitHubToken => "GitHub Token",
Self::Generic => "Generic Secret",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum Severity {
Low,
Medium,
High,
Critical,
}
pub struct SecurityScanner {
patterns: Vec<SecretPattern>,
custom_patterns: Vec<CustomSecretPattern>,
allowlist: HashSet<String>,
}
struct SecretPattern {
kind: SecretKind,
regex: &'static Lazy<Regex>,
severity: Severity,
}
struct CustomSecretPattern {
regex: Regex,
severity: Severity,
}
impl Default for SecurityScanner {
fn default() -> Self {
Self::new()
}
}
impl SecurityScanner {
pub fn new() -> Self {
let patterns = vec![
SecretPattern {
kind: SecretKind::AwsCredential,
regex: &RE_AWS_KEY,
severity: Severity::Critical,
},
SecretPattern {
kind: SecretKind::AwsCredential,
regex: &RE_AWS_SECRET,
severity: Severity::Critical,
},
SecretPattern {
kind: SecretKind::GitHubToken,
regex: &RE_GITHUB_PAT,
severity: Severity::Critical,
},
SecretPattern {
kind: SecretKind::GitHubToken,
regex: &RE_GITHUB_FINE_PAT,
severity: Severity::Critical,
},
SecretPattern {
kind: SecretKind::GitHubToken,
regex: &RE_GITHUB_OTHER_TOKENS,
severity: Severity::Critical,
},
SecretPattern {
kind: SecretKind::PrivateKey,
regex: &RE_PRIVATE_KEY,
severity: Severity::Critical,
},
SecretPattern {
kind: SecretKind::ApiKey,
regex: &RE_ANTHROPIC,
severity: Severity::Critical,
},
SecretPattern {
kind: SecretKind::ApiKey,
regex: &RE_OPENAI,
severity: Severity::Critical,
},
SecretPattern {
kind: SecretKind::ApiKey,
regex: &RE_STRIPE,
severity: Severity::Critical,
},
SecretPattern {
kind: SecretKind::AccessToken,
regex: &RE_SLACK,
severity: Severity::High,
},
SecretPattern {
kind: SecretKind::AccessToken,
regex: &RE_JWT,
severity: Severity::High,
},
SecretPattern {
kind: SecretKind::ConnectionString,
regex: &RE_CONN_STRING,
severity: Severity::High,
},
SecretPattern {
kind: SecretKind::ApiKey,
regex: &RE_API_KEY,
severity: Severity::High,
},
SecretPattern {
kind: SecretKind::Generic,
regex: &RE_SECRET_TOKEN,
severity: Severity::High,
},
SecretPattern {
kind: SecretKind::Password,
regex: &RE_PASSWORD,
severity: Severity::High,
},
];
Self { patterns, custom_patterns: Vec::new(), allowlist: HashSet::new() }
}
pub fn allowlist(&mut self, pattern: &str) {
self.allowlist.insert(pattern.to_owned());
}
pub fn add_custom_pattern(&mut self, pattern: &str) -> Result<(), SecurityError> {
let regex = Regex::new(pattern).map_err(|e| SecurityError::InvalidPattern {
pattern: pattern.to_owned(),
message: e.to_string(),
})?;
self.custom_patterns
.push(CustomSecretPattern { regex, severity: Severity::High });
Ok(())
}
pub fn add_custom_pattern_unchecked(&mut self, pattern: &str) {
let _ = self.add_custom_pattern(pattern);
}
pub fn add_custom_patterns(&mut self, patterns: &[String]) -> Result<(), SecurityError> {
for pattern in patterns {
self.add_custom_pattern(pattern)?;
}
Ok(())
}
pub fn add_custom_patterns_unchecked(&mut self, patterns: &[String]) {
for pattern in patterns {
self.add_custom_pattern_unchecked(pattern);
}
}
pub fn scan(&self, content: &str, file_path: &str) -> Vec<SecretFinding> {
let mut findings = Vec::new();
for (line_num, line) in content.lines().enumerate() {
let trimmed = line.trim();
let is_jsdoc_continuation =
trimmed.starts_with("* ") && !trimmed.contains('=') && !trimmed.contains(':');
let is_comment = trimmed.starts_with("//")
|| trimmed.starts_with('#')
|| trimmed.starts_with("/*")
|| trimmed.starts_with('*')
|| is_jsdoc_continuation;
let is_obvious_false_positive = is_comment
|| RE_EXAMPLE_WORD.is_match(trimmed)
|| trimmed.to_lowercase().contains("placeholder")
|| trimmed.contains("xxxxx");
if is_obvious_false_positive {
continue;
}
for pattern in &self.patterns {
for m in pattern.regex.find_iter(line) {
let matched = m.as_str();
if self.allowlist.iter().any(|a| matched.contains(a)) {
continue;
}
findings.push(SecretFinding {
kind: pattern.kind,
file: file_path.to_owned(),
line: (line_num + 1) as u32,
pattern: redact(matched),
severity: pattern.severity,
in_comment: false, });
}
}
for custom in &self.custom_patterns {
for m in custom.regex.find_iter(line) {
let matched = m.as_str();
if self.allowlist.iter().any(|a| matched.contains(a)) {
continue;
}
findings.push(SecretFinding {
kind: SecretKind::Generic,
file: file_path.to_owned(),
line: (line_num + 1) as u32,
pattern: redact(matched),
severity: custom.severity,
in_comment: false,
});
}
}
}
findings
}
pub fn is_safe(&self, content: &str, file_path: &str) -> bool {
let findings = self.scan(content, file_path);
findings.iter().all(|f| f.severity < Severity::High)
}
pub fn summarize(findings: &[SecretFinding]) -> String {
if findings.is_empty() {
return "No secrets detected".to_owned();
}
let critical = findings
.iter()
.filter(|f| f.severity == Severity::Critical)
.count();
let high = findings
.iter()
.filter(|f| f.severity == Severity::High)
.count();
format!(
"Found {} potential secrets ({} critical, {} high severity)",
findings.len(),
critical,
high
)
}
pub fn redact_content(&self, content: &str, _file_path: &str) -> String {
let mut replacements: Vec<(usize, usize, String)> = Vec::new();
let mut current_byte_offset = 0usize;
for line in content.lines() {
let trimmed = line.trim();
let is_obvious_false_positive = RE_EXAMPLE_WORD.is_match(trimmed)
|| trimmed.to_lowercase().contains("placeholder")
|| trimmed.contains("xxxxx");
if !is_obvious_false_positive {
for pattern in &self.patterns {
if pattern.severity >= Severity::High {
for m in pattern.regex.find_iter(line) {
let matched = m.as_str();
if self.allowlist.iter().any(|a| matched.contains(a)) {
continue;
}
let start = current_byte_offset + m.start();
let end = current_byte_offset + m.end();
replacements.push((start, end, redact(matched)));
}
}
}
for custom in &self.custom_patterns {
if custom.severity >= Severity::High {
for m in custom.regex.find_iter(line) {
let matched = m.as_str();
if self.allowlist.iter().any(|a| matched.contains(a)) {
continue;
}
let start = current_byte_offset + m.start();
let end = current_byte_offset + m.end();
replacements.push((start, end, redact(matched)));
}
}
}
}
current_byte_offset += line.len() + 1;
}
replacements.sort_by(|a, b| {
let a_len = a.1 - a.0;
let b_len = b.1 - b.0;
a_len.cmp(&b_len).then(a.0.cmp(&b.0))
});
let mut filtered: Vec<(usize, usize, String)> = Vec::new();
for replacement in replacements {
let overlaps = filtered.iter().any(|(start, end, _)| {
replacement.0 < *end && *start < replacement.1
});
if !overlaps {
filtered.push(replacement);
}
}
let mut result = content.to_owned();
for (start, end, redacted) in filtered.into_iter().rev() {
if end <= result.len() {
result.replace_range(start..end, &redacted);
}
}
result
}
#[must_use = "security findings should be reviewed"]
pub fn scan_and_redact(&self, content: &str, file_path: &str) -> (String, Vec<SecretFinding>) {
let findings = self.scan(content, file_path);
let redacted = self.redact_content(content, file_path);
(redacted, findings)
}
}
fn redact(s: &str) -> String {
let char_count = s.chars().count();
if char_count <= 8 {
return "*".repeat(char_count);
}
let prefix_chars = 4.min(char_count / 4);
let suffix_chars = 4.min(char_count / 4);
let redact_chars = char_count.saturating_sub(prefix_chars + suffix_chars);
let prefix: String = s.chars().take(prefix_chars).collect();
let suffix: String = s.chars().skip(char_count - suffix_chars).collect();
format!("{}{}{}", prefix, "*".repeat(redact_chars), suffix)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_aws_key_detection() {
let scanner = SecurityScanner::new();
let content = r#"AWS_ACCESS_KEY_ID = "AKIAIOSFODNN7EXAMPLE""#;
let findings = scanner.scan(content, "config.py");
assert!(!findings.is_empty());
assert!(findings.iter().any(|f| f.kind == SecretKind::AwsCredential));
}
#[test]
fn test_github_token_detection() {
let scanner = SecurityScanner::new();
let content = r#"GITHUB_TOKEN = "ghp_abcdefghijklmnopqrstuvwxyz1234567890""#;
let findings = scanner.scan(content, ".env");
assert!(!findings.is_empty());
assert!(findings.iter().any(|f| f.kind == SecretKind::GitHubToken));
}
#[test]
fn test_private_key_detection() {
let scanner = SecurityScanner::new();
let content = "-----BEGIN RSA PRIVATE KEY-----\nMIIEpA...";
let findings = scanner.scan(content, "key.pem");
assert!(!findings.is_empty());
assert!(findings.iter().any(|f| f.kind == SecretKind::PrivateKey));
}
#[test]
fn test_allowlist() {
let mut scanner = SecurityScanner::new();
scanner.allowlist("EXAMPLE");
let content = r#"api_key = "AKIAIOSFODNN7EXAMPLE""#;
let findings = scanner.scan(content, "test.py");
assert!(findings.is_empty());
}
#[test]
fn test_redact() {
assert_eq!(redact("AKIAIOSFODNN7EXAMPLE"), "AKIA************MPLE");
assert_eq!(redact("short"), "*****");
}
#[test]
fn test_redact_unicode_safety() {
let chinese_secret = "密钥ABCDEFGHIJKLMNOP密钥";
let result = redact(chinese_secret);
assert!(std::str::from_utf8(result.as_bytes()).is_ok());
assert!(result.contains('*'));
let emoji_secret = "🔑ABCDEFGHIJKLMNOP🔒";
let result = redact(emoji_secret);
assert!(std::str::from_utf8(result.as_bytes()).is_ok());
assert!(result.contains('*'));
let mixed_secret = "абвгдежзийклмноп"; let result = redact(mixed_secret);
assert!(std::str::from_utf8(result.as_bytes()).is_ok());
assert!(result.contains('*'));
let short_chinese = "密钥";
let result = redact(short_chinese);
assert_eq!(result, "**"); }
#[test]
fn test_redact_edge_cases() {
assert_eq!(redact(""), "");
assert_eq!(redact("x"), "*");
assert_eq!(redact("12345678"), "********");
let result = redact("123456789");
assert!(result.contains('*'));
assert!(result.starts_with('1') || result.starts_with('*'));
}
#[test]
fn test_comments_are_skipped() {
let scanner = SecurityScanner::new();
let content = "# api_key = 'some_secret_key_12345678901234567890'";
let findings = scanner.scan(content, "test.py");
assert!(findings.is_empty(), "Secrets in comments should be skipped");
}
#[test]
fn test_non_comment_detected() {
let scanner = SecurityScanner::new();
let content = "api_key = 'some_secret_key_12345678901234567890'";
let findings = scanner.scan(content, "test.py");
assert!(!findings.is_empty(), "Secrets in non-comments should be detected");
assert!(
findings.iter().all(|f| !f.in_comment),
"in_comment should be false for non-comment lines"
);
}
#[test]
fn test_custom_pattern() {
let mut scanner = SecurityScanner::new();
scanner
.add_custom_pattern(r"CUSTOM_SECRET_[A-Z0-9]{16}")
.unwrap();
let content = "my_secret = CUSTOM_SECRET_ABCD1234EFGH5678";
let findings = scanner.scan(content, "test.py");
assert!(!findings.is_empty(), "Custom pattern should be detected");
assert!(findings.iter().any(|f| f.kind == SecretKind::Generic));
}
#[test]
fn test_custom_patterns_multiple() {
let mut scanner = SecurityScanner::new();
scanner
.add_custom_patterns(&[
r"MYAPP_KEY_[a-f0-9]{32}".to_owned(),
r"MYAPP_TOKEN_[A-Z]{20}".to_owned(),
])
.unwrap();
let content = "key = MYAPP_KEY_0123456789abcdef0123456789abcdef";
let findings = scanner.scan(content, "test.py");
assert!(!findings.is_empty(), "Custom patterns should be detected");
}
#[test]
fn test_invalid_custom_pattern_returns_error() {
let mut scanner = SecurityScanner::new();
let result = scanner.add_custom_pattern(r"INVALID_[PATTERN");
assert!(result.is_err(), "Invalid regex should return error");
let err = result.unwrap_err();
match err {
SecurityError::InvalidPattern { pattern, message } => {
assert_eq!(pattern, r"INVALID_[PATTERN");
assert!(!message.is_empty(), "Error message should not be empty");
},
}
}
#[test]
fn test_invalid_custom_pattern_unchecked() {
let mut scanner = SecurityScanner::new();
scanner.add_custom_pattern_unchecked(r"INVALID_[PATTERN");
let content = "INVALID_[PATTERN here";
let _findings = scanner.scan(content, "test.py");
}
#[test]
fn test_multiple_secrets_same_line() {
let scanner = SecurityScanner::new();
let content = r#"TOKEN1="ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" TOKEN2="ghp_bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb""#;
let findings = scanner.scan(content, "test.env");
assert_eq!(findings.len(), 2, "Should detect both tokens on the same line");
let (redacted, _) = scanner.scan_and_redact(content, "test.env");
assert!(
!redacted.contains("ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
"First token should be redacted"
);
assert!(
!redacted.contains("ghp_bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"),
"Second token should be redacted"
);
assert!(redacted.contains('*'), "Redacted content should contain asterisks");
}
#[test]
fn test_redaction_preserves_structure() {
let scanner = SecurityScanner::new();
let content = "line1\napi_key = 'secret_key_12345678901234567890'\nline3";
let (redacted, _) = scanner.scan_and_redact(content, "test.py");
let lines: Vec<&str> = redacted.lines().collect();
assert_eq!(lines.len(), 3, "Should preserve line count");
assert_eq!(lines[0], "line1");
assert_eq!(lines[2], "line3");
}
}