use std::path::Path;
use anyhow::Result;
use once_cell::sync::Lazy;
use regex::Regex;
use walkdir::WalkDir;
use crate::finding::{redact, Category, Finding, Severity};
use crate::scanner::{ScanContext, Scanner};
struct SecretPattern {
name: &'static str,
severity: Severity,
keep: usize,
re: Regex,
}
static PATTERNS: Lazy<Vec<SecretPattern>> = Lazy::new(|| {
vec![
SecretPattern {
name: "Anthropic API key",
severity: Severity::Critical,
keep: 10,
re: Regex::new(r"sk-ant-[a-z]{2,8}\d{0,6}-[A-Za-z0-9_-]{20,}").unwrap(),
},
SecretPattern {
name: "OpenAI API key",
severity: Severity::Critical,
keep: 7,
re: Regex::new(r"sk-(?:proj-)?[A-Za-z0-9_-]{20}T3BlbkFJ[A-Za-z0-9_-]{20}").unwrap(),
},
SecretPattern {
name: "OpenAI project key",
severity: Severity::Critical,
keep: 8,
re: Regex::new(r"sk-proj-[A-Za-z0-9_-]{48,}").unwrap(),
},
SecretPattern {
name: "xAI / Grok API key",
severity: Severity::Critical,
keep: 4,
re: Regex::new(r"xai-[A-Za-z0-9_-]{32,}").unwrap(),
},
SecretPattern {
name: "OpenRouter API key",
severity: Severity::Critical,
keep: 9,
re: Regex::new(r"sk-or-v1-[A-Za-z0-9_-]{48,}").unwrap(),
},
SecretPattern {
name: "Google AI / Gemini key",
severity: Severity::Critical,
keep: 4,
re: Regex::new(r"AIza[0-9A-Za-z\-_]{35}").unwrap(),
},
SecretPattern {
name: "Hugging Face token",
severity: Severity::High,
keep: 3,
re: Regex::new(r"hf_[A-Za-z0-9]{34}").unwrap(),
},
SecretPattern {
name: "AWS access key ID",
severity: Severity::Critical,
keep: 4,
re: Regex::new(r"(?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z0-9]{16}").unwrap(),
},
SecretPattern {
name: "GitHub personal access token",
severity: Severity::High,
keep: 4,
re: Regex::new(r"ghp_[0-9a-zA-Z]{36}").unwrap(),
},
SecretPattern {
name: "GitHub OAuth token",
severity: Severity::High,
keep: 4,
re: Regex::new(r"gho_[0-9a-zA-Z]{36}").unwrap(),
},
SecretPattern {
name: "GitHub fine-grained PAT",
severity: Severity::High,
keep: 11,
re: Regex::new(r"github_pat_[0-9a-zA-Z_]{82}").unwrap(),
},
SecretPattern {
name: "GitLab personal access token",
severity: Severity::High,
keep: 7,
re: Regex::new(r"glpat-[0-9a-zA-Z\-_]{20}").unwrap(),
},
SecretPattern {
name: "PEM private key",
severity: Severity::Critical,
keep: 11,
re: Regex::new(r"-----BEGIN (?:RSA |DSA |EC |OPENSSH )?PRIVATE KEY").unwrap(),
},
SecretPattern {
name: "JWT token",
severity: Severity::High,
keep: 5,
re: Regex::new(r"eyJ[A-Za-z0-9_-]{4,}\.eyJ[A-Za-z0-9_-]{4,}\.[A-Za-z0-9_-]{4,}").unwrap(),
},
SecretPattern {
name: "Database connection string with credentials",
severity: Severity::High,
keep: 8,
re: Regex::new(r"(?i)(?:postgres|mysql|mongodb|redis)://[^:@\s]{1,64}:[^@\s]{1,64}@").unwrap(),
},
SecretPattern {
name: "Generic high-entropy secret",
severity: Severity::Medium,
keep: 6,
re: Regex::new(
r#"(?i)(?:api[_-]?key|secret[_-]?key?|auth[_-]?token|access[_-]?token|password|passwd|pwd)\s*[=:]\s*['"]?([A-Za-z0-9/+!@#$%^&*]{32,})['"]?"#,
).unwrap(),
},
]
});
const SKIP_EXTENSIONS: &[&str] = &[
"png", "jpg", "jpeg", "gif", "bmp", "ico", "webp", "svg", "mp3", "mp4", "wav", "ogg", "flac",
"zip", "gz", "bz2", "tar", "xz", "7z", "pdf", "doc", "docx", "xls", "xlsx", "bin", "exe",
"dll", "so", "dylib", "wasm", "class",
];
const MAX_FILE_SIZE: u64 = 8 * 1024 * 1024;
pub struct SecretsScanner;
impl Scanner for SecretsScanner {
fn name(&self) -> &'static str {
"secrets"
}
fn scan(&self, ctx: &ScanContext) -> Result<Vec<Finding>> {
let mut findings = Vec::new();
scan_dir(&ctx.root, &mut findings);
Ok(findings)
}
}
fn scan_dir(root: &Path, findings: &mut Vec<Finding>) {
for entry in WalkDir::new(root)
.follow_links(false)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
{
let path = entry.path();
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
if SKIP_EXTENSIONS.contains(&ext.to_lowercase().as_str()) {
continue;
}
}
if let Ok(meta) = entry.metadata() {
if meta.len() > MAX_FILE_SIZE {
continue;
}
}
if let Ok(bytes) = std::fs::read(path) {
let content = String::from_utf8_lossy(&bytes);
scan_content(&content, path, findings);
}
}
}
fn scan_content(content: &str, path: &Path, findings: &mut Vec<Finding>) {
for (line_no, line) in content.lines().enumerate() {
for pattern in PATTERNS.iter() {
let Some(caps) = pattern.re.captures(line) else {
continue;
};
let matched = caps.get(1).or_else(|| caps.get(0));
let Some(m) = matched else { continue };
let evidence = redact(m.as_str(), pattern.keep);
findings.push(
Finding::new(
pattern.severity,
Category::SecretDetection,
format!("{} detected", pattern.name),
format!(
"A {} was found in '{}'. This credential may have been \
pasted into a conversation or written by an agent and \
is now stored in plain text.",
pattern.name,
path.display()
),
path,
"Rotate this credential immediately. Remove the file or \
redact the line. Consider running `ocls` again after \
rotation to verify the credential no longer appears.",
)
.with_line(line_no + 1)
.with_evidence(evidence),
);
break;
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn findings_for(content: &str) -> Vec<Finding> {
let mut findings = Vec::new();
scan_content(content, &PathBuf::from("/test/file.json"), &mut findings);
findings
}
#[test]
fn detects_anthropic_key() {
let content = r#"{"token": "sk-ant-api03-abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGH"}"#;
let findings = findings_for(content);
assert!(!findings.is_empty(), "should detect Anthropic key");
assert_eq!(findings[0].severity, Severity::Critical);
assert!(findings[0].title.contains("Anthropic"));
}
#[test]
fn detects_openai_key() {
let content = r#"api_key = "sk-proj-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz""#;
let findings = findings_for(content);
assert!(!findings.is_empty(), "should detect OpenAI project key");
assert_eq!(findings[0].severity, Severity::Critical);
}
#[test]
fn detects_xai_key() {
let content = r#"key = "xai-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefgh""#;
let findings = findings_for(content);
assert!(!findings.is_empty(), "should detect xAI key");
assert_eq!(findings[0].severity, Severity::Critical);
}
#[test]
fn detects_openrouter_key() {
let content =
r#"key = "sk-or-v1-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz12345678""#;
let findings = findings_for(content);
assert!(!findings.is_empty(), "should detect OpenRouter key");
}
#[test]
fn detects_google_ai_key() {
let content = r#"key = "AIzaABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghi""#;
let findings = findings_for(content);
assert!(!findings.is_empty(), "should detect Google AI key");
}
#[test]
fn detects_aws_key() {
let content = "access_key = AKIAIOSFODNN7EXAMPLE";
let findings = findings_for(content);
assert!(!findings.is_empty(), "should detect AWS key");
assert_eq!(findings[0].severity, Severity::Critical);
}
#[test]
fn detects_github_pat() {
let content = "token = ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij";
let findings = findings_for(content);
assert!(!findings.is_empty(), "should detect GitHub PAT");
assert_eq!(findings[0].severity, Severity::High);
}
#[test]
fn detects_gitlab_token() {
let content = "token = glpat-abcdefghijklmnopqrst";
let findings = findings_for(content);
assert!(!findings.is_empty(), "should detect GitLab token");
}
#[test]
fn detects_private_key_header() {
let content = "-----BEGIN RSA PRIVATE KEY-----\nMIIEo...";
let findings = findings_for(content);
assert!(!findings.is_empty(), "should detect PEM private key");
assert_eq!(findings[0].severity, Severity::Critical);
}
#[test]
fn detects_database_url_with_credentials() {
let content = r#"DATABASE_URL=postgres://admin:supersecret@localhost:5432/mydb"#;
let findings = findings_for(content);
assert!(!findings.is_empty(), "should detect DB connection string");
}
#[test]
fn no_false_positive_on_empty_line() {
assert!(findings_for("").is_empty());
}
#[test]
fn no_false_positive_on_safe_json() {
let content = r#"{"model": "claude-3-5-sonnet-20241022", "max_tokens": 4096}"#;
assert!(findings_for(content).is_empty());
}
#[test]
fn no_false_positive_on_env_variable_reference() {
let content = r#"api_key = "${OPENAI_API_KEY}""#;
let _findings = findings_for(content);
}
#[test]
fn evidence_is_redacted() {
let content = "token = AKIAIOSFODNN7EXAMPLE";
let findings = findings_for(content);
if let Some(ev) = findings.first().and_then(|f| f.evidence.as_deref()) {
assert!(ev.contains("****"), "evidence must be redacted: {}", ev);
assert!(
!ev.contains("EXAMPLE"),
"evidence must not contain full secret"
);
}
}
#[test]
fn finding_has_correct_line_number() {
let content = "normal line\ntoken = AKIAIOSFODNN7EXAMPLE\nanother line";
let findings = findings_for(content);
assert!(!findings.is_empty());
assert_eq!(findings[0].line, Some(2));
}
}