use crate::error::Result;
use crate::schema::{ScanResult, SecretFinding, SourceLocation};
use once_cell::sync::Lazy;
use regex::Regex;
static SECRET_PATTERNS: Lazy<Vec<(&str, Regex)>> = Lazy::new(|| {
vec![
("openai_key", Regex::new(r#"sk-[a-zA-Z0-9-]{20,}"#).unwrap()),
(
"anthropic_key",
Regex::new(r#"sk-ant-[a-zA-Z0-9-]{50,}"#).unwrap(),
),
(
"aws_access_key",
Regex::new(r#"\bAKIA[0-9A-Z]{16}\b"#).unwrap(),
),
(
"github_token",
Regex::new(r#"\bghp_[a-zA-Z0-9]{36}\b"#).unwrap(),
),
(
"github_oauth",
Regex::new(r#"\bgho_[a-zA-Z0-9]{36}\b"#).unwrap(),
),
(
"stripe_key",
Regex::new(r#"sk_live_[a-zA-Z0-9]{24,}"#).unwrap(),
),
(
"jwt_token",
Regex::new(r#"eyJ[a-zA-Z0-9_-]+\.eyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+"#).unwrap(),
),
(
"google_oauth",
Regex::new(r#"ya29\.[0-9A-Za-z_-]+"#).unwrap(),
),
(
"slack_token",
Regex::new(r#"xox[baprs]-[0-9]{10,12}-[0-9]{10,12}-[a-zA-Z0-9]{24,}"#).unwrap(),
),
(
"connection_string",
Regex::new(r#"(?i)(postgresql|mysql|mongodb|redis)://[^\s'"]+:[^\s'"]+@[^\s'"]+"#)
.unwrap(),
),
(
"private_key",
Regex::new(r#"-----BEGIN [A-Z ]+PRIVATE KEY-----"#).unwrap(),
),
]
});
static SENSITIVE_VAR_NAMES: &[&str] = &[
"api_key",
"apikey",
"api_secret",
"secret_key",
"secret",
"password",
"passwd",
"pwd",
"token",
"access_token",
"refresh_token",
"auth_token",
"bearer_token",
"credential",
"credentials",
"private_key",
"privatekey",
"aws_access_key_id",
"aws_secret_access_key",
"database_password",
"db_password",
"encryption_key",
"master_key",
"client_secret",
];
static FALSE_POSITIVE_PATTERNS: &[&str] = &[
"your_key_here",
"insert_key_here",
"replace_with",
"placeholder",
"changeme",
"secret_goes_here",
"example_password",
"test_password",
];
pub fn analyze(result: &ScanResult) -> Result<Vec<SecretFinding>> {
let mut findings = Vec::new();
for file_path in &result.manifest.files {
if let Ok(content) = std::fs::read_to_string(file_path) {
let file_findings = scan_file(file_path, &content)?;
findings.extend(file_findings);
}
}
Ok(findings)
}
fn scan_file(file_path: &str, content: &str) -> Result<Vec<SecretFinding>> {
let mut findings = Vec::new();
for (line_num, line) in content.lines().enumerate() {
let line_number = (line_num + 1) as u32;
for (secret_type, pattern) in SECRET_PATTERNS.iter() {
if let Some(captures) = pattern.find(line) {
let matched_value = captures.as_str();
#[cfg(test)]
eprintln!("Found {secret_type} pattern: {matched_value} in line: {line}");
if is_false_positive(matched_value) {
#[cfg(test)]
eprintln!(" Skipped as false positive");
continue;
}
findings.push(SecretFinding {
id: format!(
"secret_{}_{}",
file_path.replace(['/', '.'], "_"),
line_number
),
secret_type: secret_type.to_string(),
location: SourceLocation {
file: file_path.to_string(),
line: line_number,
end_line: Some(line_number),
function: None,
},
severity: determine_severity(secret_type),
message: format!(
"Potential {} detected in source code",
secret_type.replace('_', " ")
),
matched_pattern: Some(mask_secret(matched_value)),
});
}
}
findings.extend(check_sensitive_variables(file_path, line, line_number)?);
}
Ok(findings)
}
fn check_sensitive_variables(
file_path: &str,
line: &str,
line_number: u32,
) -> Result<Vec<SecretFinding>> {
let mut findings = Vec::new();
let assignment_pattern = Regex::new(r#"(\w+)\s*=\s*["']([^"']+)["']"#).unwrap();
if let Some(captures) = assignment_pattern.captures(line) {
let var_name = captures.get(1).map(|m| m.as_str()).unwrap_or("");
let var_value = captures.get(2).map(|m| m.as_str()).unwrap_or("");
let var_name_lower = var_name.to_lowercase();
if SENSITIVE_VAR_NAMES
.iter()
.any(|sensitive| var_name_lower.contains(sensitive))
{
if is_false_positive(var_value) {
return Ok(findings);
}
let entropy = calculate_shannon_entropy(var_value);
if entropy > 3.5 {
findings.push(SecretFinding {
id: format!(
"secret_var_{}_{}",
file_path.replace(['/', '.'], "_"),
line_number
),
secret_type: "sensitive_variable".to_string(),
location: SourceLocation {
file: file_path.to_string(),
line: line_number,
end_line: Some(line_number),
function: None,
},
severity: "high".to_string(),
message: format!(
"Sensitive variable '{var_name}' assigned with hardcoded value"
),
matched_pattern: Some(format!("{} = {}", var_name, mask_secret(var_value))),
});
}
}
}
Ok(findings)
}
fn calculate_shannon_entropy(s: &str) -> f64 {
if s.is_empty() {
return 0.0;
}
let mut char_counts = std::collections::HashMap::new();
for c in s.chars() {
*char_counts.entry(c).or_insert(0) += 1;
}
let len = s.len() as f64;
char_counts
.values()
.map(|&count| {
let p = count as f64 / len;
-p * p.log2()
})
.sum()
}
fn is_false_positive(s: &str) -> bool {
let s_lower = s.to_lowercase();
FALSE_POSITIVE_PATTERNS
.iter()
.any(|pattern| s_lower == *pattern || s_lower.contains(pattern))
}
fn mask_secret(s: &str) -> String {
if s.len() <= 8 {
"*".repeat(s.len())
} else {
let visible_chars = 4.min(s.len() / 4);
format!(
"{}...{}",
&s[..visible_chars],
"*".repeat(s.len() - visible_chars)
)
}
}
fn determine_severity(secret_type: &str) -> String {
match secret_type {
"openai_key" | "anthropic_key" | "aws_access_key" | "private_key" => "critical".to_string(),
"github_token" | "stripe_key" | "connection_string" => "high".to_string(),
"jwt_token" | "slack_token" | "azure_token" => "medium".to_string(),
_ => "low".to_string(),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_shannon_entropy() {
assert!(calculate_shannon_entropy("aaaaaaa") < 1.0);
assert!(calculate_shannon_entropy("sk-1234567890abcdef") > 3.0);
let entropy = calculate_shannon_entropy("xK9$mP2!qL7#vN4@");
assert!(entropy > 3.5); }
#[test]
fn test_openai_key_pattern() {
let code = r#"api_key = "sk-proj-1234567890abcdefghijklmnopqrstuvwxyz123456""#;
let findings = scan_file("test.py", code).unwrap();
eprintln!("Findings: {findings:?}");
assert!(!findings.is_empty(), "Expected to find openai_key pattern");
assert!(findings.iter().any(|f| f.secret_type == "openai_key"));
}
#[test]
fn test_anthropic_key_pattern() {
let code = r#"client = Anthropic(api_key="sk-ant-api03-1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdef")"#;
let findings = scan_file("test.py", code).unwrap();
assert!(!findings.is_empty());
assert!(findings.iter().any(|f| f.secret_type == "anthropic_key"));
}
#[test]
fn test_aws_access_key_pattern() {
let code = r#"AWS_ACCESS_KEY_ID = "AKIAIOSFODNN7EXAMPLE""#;
let findings = scan_file("test.py", code).unwrap();
assert!(!findings.is_empty());
assert!(findings.iter().any(|f| f.secret_type == "aws_access_key"));
}
#[test]
fn test_github_token_pattern() {
let code = r#"token = "ghp_1234567890abcdefghijklmnopqrstuvwxyz""#;
let findings = scan_file("test.py", code).unwrap();
assert!(!findings.is_empty());
assert!(findings.iter().any(|f| f.secret_type == "github_token"));
}
#[test]
fn test_connection_string_pattern() {
let code = r#"db_url = "postgresql://user:password@localhost:5432/mydb""#;
let findings = scan_file("test.py", code).unwrap();
assert!(!findings.is_empty());
assert!(findings
.iter()
.any(|f| f.secret_type == "connection_string"));
}
#[test]
fn test_sensitive_variable_detection() {
let code = r#"api_key = "sk9mP2qL7vN4xK1wR8tY3""#;
let findings = scan_file("test.py", code).unwrap();
assert!(!findings.is_empty());
}
#[test]
fn test_false_positive_filtering() {
let code = r#"api_key = "your_key_here""#;
let findings = scan_file("test.py", code).unwrap();
assert!(findings.is_empty());
}
#[test]
fn test_example_values_filtered() {
let code = r#"password = "example_password""#;
let findings = scan_file("test.py", code).unwrap();
assert!(findings.is_empty());
}
#[test]
fn test_jwt_token_pattern() {
let code = r#"token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c""#;
let findings = scan_file("test.py", code).unwrap();
assert!(!findings.is_empty());
assert!(findings.iter().any(|f| f.secret_type == "jwt_token"));
}
#[test]
fn test_private_key_pattern() {
let code = r#"key = """-----BEGIN RSA PRIVATE KEY-----
MIIEpAIBAAKCAQEA...
-----END RSA PRIVATE KEY-----""""#;
let findings = scan_file("test.py", code).unwrap();
assert!(!findings.is_empty());
assert!(findings.iter().any(|f| f.secret_type == "private_key"));
assert!(findings.iter().any(|f| f.severity == "critical"));
}
#[test]
fn test_severity_levels() {
assert_eq!(determine_severity("openai_key"), "critical");
assert_eq!(determine_severity("github_token"), "high");
assert_eq!(determine_severity("jwt_token"), "medium");
assert_eq!(determine_severity("generic_api_key"), "low");
}
#[test]
fn test_secret_masking() {
assert_eq!(mask_secret("sk-1234567890abcdef"), "sk-1...***************");
assert_eq!(mask_secret("short"), "*****");
}
}