use std::collections::{HashMap, HashSet};
use std::fs;
use std::path::Path;
use anyhow::{Context, Result};
use chrono::Utc;
use regex::Regex;
use walkdir::{DirEntry, WalkDir};
use crate::model::{
CiSecretReference, FindingKind, ObservedEnvRead, ScanFinding, ScanReport, ScanSummary,
Severity, ATTEST_VERSION, SCAN_SCHEMA,
};
use crate::redact;
const KNOWN_SECRET_NAMES: &[&str] = &[
"AWS_ACCESS_KEY_ID",
"AWS_SECRET_ACCESS_KEY",
"STRIPE_SECRET_KEY",
"GITHUB_TOKEN",
"GH_TOKEN",
"DATABASE_URL",
"API_TOKEN",
"SECRET_KEY",
"PRIVATE_KEY",
"JWT_SECRET",
"OPENAI_API_KEY",
"ANTHROPIC_API_KEY",
"AZURE_CLIENT_SECRET",
"PROD_DEPLOY_KEY",
];
const HIGH_RISK_ENV_NAMES: &[&str] = &[
"AWS_ACCESS_KEY_ID",
"AWS_SECRET_ACCESS_KEY",
"STRIPE_SECRET_KEY",
"GITHUB_TOKEN",
"GH_TOKEN",
"PRIVATE_KEY",
"JWT_SECRET",
"OPENAI_API_KEY",
"ANTHROPIC_API_KEY",
"AZURE_CLIENT_SECRET",
"PROD_DEPLOY_KEY",
];
pub fn is_sensitive_env_name(name: &str) -> bool {
let upper = name.to_ascii_uppercase();
HIGH_RISK_ENV_NAMES.contains(&upper.as_str())
|| upper.contains("SECRET")
|| upper.contains("TOKEN")
|| upper.contains("CREDENTIAL")
|| upper.contains("CREDS")
|| upper.contains("PASSWORD")
|| upper.contains("PASSWD")
|| upper.contains("PRIVATE_KEY")
|| upper.ends_with("_KEY")
|| upper.ends_with("_PWD")
}
pub fn is_high_risk_env_name(name: &str) -> bool {
let upper = name.to_ascii_uppercase();
HIGH_RISK_ENV_NAMES.contains(&upper.as_str())
|| upper.starts_with("AWS_")
|| upper.starts_with("GH_")
|| upper.starts_with("GITHUB_")
|| upper.starts_with("STRIPE_")
|| upper.starts_with("AZURE_")
}
pub fn scan_repo(repo: &Path) -> Result<ScanReport> {
let repo =
fs::canonicalize(repo).with_context(|| format!("repo not found: {}", repo.display()))?;
let mut findings = Vec::new();
let mut observed_env_reads = Vec::new();
let mut ci_secret_references = Vec::new();
let mut observed_seen = HashSet::new();
for entry in WalkDir::new(&repo)
.into_iter()
.filter_entry(|entry| !is_ignored_entry(entry))
{
let entry = entry?;
if !entry.file_type().is_file() || !is_scannable_file(entry.path(), &repo) {
continue;
}
let relative = relative_path(entry.path(), &repo);
if entry
.metadata()
.map(|m| m.len() > 5 * 1024 * 1024)
.unwrap_or(false)
{
continue;
}
let Ok(content) = fs::read_to_string(entry.path()) else {
continue;
};
scan_file(
&relative,
&content,
&mut findings,
&mut observed_env_reads,
&mut observed_seen,
&mut ci_secret_references,
)?;
}
renumber_findings(&mut findings);
let summary = summarize(&findings);
Ok(ScanReport {
schema: SCAN_SCHEMA.to_string(),
repo_path: repo.display().to_string(),
repo_commit: current_commit(&repo),
scanned_at: Utc::now(),
scanner_version: ATTEST_VERSION.to_string(),
findings,
observed_env_reads,
ci_secret_references,
summary,
})
}
fn current_commit(repo: &Path) -> Option<String> {
let head = repo.join(".git/HEAD");
let head_content = fs::read_to_string(&head).ok()?;
let trimmed = head_content.trim();
if let Some(reference) = trimmed.strip_prefix("ref: ") {
let ref_path = repo.join(".git").join(reference);
let sha = fs::read_to_string(ref_path).ok()?.trim().to_string();
if sha.is_empty() {
return None;
}
Some(sha)
} else {
Some(trimmed.to_string())
}
}
pub fn write_scan(report: &ScanReport, output: &Path) -> Result<()> {
let json = serde_json::to_string_pretty(report)?;
ensure_parent_dir(output)?;
fs::write(output, json).with_context(|| format!("write scan report: {}", output.display()))
}
pub fn print_summary(report: &ScanReport) {
println!("tsafe attest scan complete");
println!("Repo: {}", report.repo_path);
println!(
"Commit: {}",
report.repo_commit.as_deref().unwrap_or("unknown")
);
println!("Findings:");
for finding in &report.findings {
let name = finding.name.as_deref().unwrap_or("-");
println!(
" {:<8} {:<32} {}",
finding.severity.label(),
format!("{}:{}", finding.file, finding.line),
format!("{name} {}", finding.message).trim()
);
}
println!("Risk score: {}/100", report.summary.risk_score);
}
#[allow(clippy::too_many_arguments)]
fn scan_file(
file: &str,
content: &str,
findings: &mut Vec<ScanFinding>,
observed_env_reads: &mut Vec<ObservedEnvRead>,
observed_seen: &mut HashSet<(String, String, usize)>,
ci_secret_references: &mut Vec<CiSecretReference>,
) -> Result<()> {
let env_assign = Regex::new(r#"^\s*([A-Z0-9_]+)\s*=\s*("?[^"\n]*"?|'?[^'\n]*'?)\s*$"#)?;
let private_key = Regex::new(r"-----BEGIN [A-Z ]*PRIVATE KEY-----")?;
let connection = Regex::new(r#"(?i)(postgres|mysql|mongodb|redis)://[^ \n'"]+"#)?;
let generic_secret = Regex::new(
r#"(?i)(api[_-]?key|secret|token|password|passwd|pwd)\s*[:=]\s*['"]?([A-Za-z0-9_\-./+=]{16,})['"]?"#,
)?;
let gha_secret = Regex::new(r#"\$\{\{\s*secrets\.([A-Z0-9_]+)\s*\}\}"#)?;
let aws_object_key_id = Regex::new(
r#"(?i)['"]?\b(?:aws[_-]?access[_-]?key[_-]?id|accessKeyId)\b['"]?\s*[:=]\s*['"]([A-Z0-9]{16,})['"]"#,
)?;
let aws_object_secret = Regex::new(
r#"(?i)['"]?\b(?:aws[_-]?secret[_-]?access[_-]?key|secretAccessKey)\b['"]?\s*[:=]\s*['"]([A-Za-z0-9+/=]{20,})['"]"#,
)?;
let aws_akia_literal = Regex::new(r#"\b(AKIA[0-9A-Z]{16})\b"#)?;
let js_full_env = Regex::new(r"\bprocess\.env\b")?;
let js_dot = Regex::new(r"process\.env\.([A-Z0-9_]+)")?;
let js_index = Regex::new(r#"process\.env\[['"]([A-Z0-9_]+)['"]\]"#)?;
let js_env_alias = Regex::new(
r#"\b(?:const|let|var)\s+([A-Za-z_$][A-Za-z0-9_$]*)\s*=\s*process\.env(?:\.([A-Z0-9_]+)|\[['"]([A-Z0-9_]+)['"]\])"#,
)?;
let py_environ = Regex::new(r#"os\.environ\[['"]([A-Z0-9_]+)['"]\]"#)?;
let py_getenv = Regex::new(r#"os\.getenv\(['"]([A-Z0-9_]+)['"]\)"#)?;
let rust_env = Regex::new(r#"(?:std::env::var|env::var)\(["']([A-Z0-9_]+)["']\)"#)?;
let shell_export = Regex::new(r"export\s+([A-Z0-9_]+)=")?;
let shell_var = Regex::new(r#"\$\{?([A-Z][A-Z0-9_]{2,})\}?"#)?;
let env_interpolation = Regex::new(r#"\$\{([A-Z][A-Z0-9_]{2,})(?::[-?][^}]*)?\}"#)?;
let compose_bare_env = Regex::new(r"^\s*-\s*([A-Z][A-Z0-9_]{2,})\s*(?:#.*)?$")?;
let docker_bare_arg = Regex::new(r"^\s*ARG\s+([A-Z][A-Z0-9_]{2,})\s*(?:#.*)?$")?;
let mut env_aliases: HashMap<String, String> = HashMap::new();
let docstring_lines: Vec<bool> = compute_docstring_lines(file, content);
let file_path_is_placeholder = is_placeholder_file_path(file);
for (index, line) in content.lines().enumerate() {
let line_number = index + 1;
let in_docstring = *docstring_lines.get(index).unwrap_or(&false);
if is_env_file(file) {
if let Some(caps) = env_assign.captures(line) {
let name = caps.get(1).map(|m| m.as_str()).unwrap_or_default();
let value = caps
.get(2)
.map(|m| m.as_str().trim().trim_matches(['"', '\'']))
.unwrap_or_default();
let secretish = is_sensitive_env_name(name) || connection.is_match(value);
if secretish {
let placeholder_match =
file_path_is_placeholder || is_env_interpolation_value(value);
let kind = if placeholder_match {
FindingKind::SecretPlaceholder
} else {
FindingKind::EnvFile
};
let severity = if placeholder_match {
Severity::Info
} else {
env_file_severity(file, value)
};
let message = if placeholder_match {
".env example/placeholder value (suppressed)"
} else {
".env file contains a likely secret-bearing variable"
};
push_secret_finding(
findings,
SecretFindingInput {
kind,
severity,
confidence: confidence_for_value(value, 0.85),
location: FindingLocation {
file,
line: line_number,
column: column_of(line, name),
},
secret_type: "generic-env",
name,
value,
message,
},
);
} else if file.ends_with(".example") {
findings.push(base_finding(
FindingKind::SecretPlaceholder,
Severity::Info,
0.35,
FindingLocation {
file,
line: line_number,
column: 1,
},
Some(name.to_string()),
".env example variable present (suppressed)".to_string(),
));
}
}
}
if private_key.is_match(line) {
let placeholder_match =
file_path_is_placeholder || in_docstring || is_doc_or_comment_line(file, line);
let kind = if placeholder_match {
FindingKind::SecretPlaceholder
} else {
FindingKind::PrivateKey
};
let severity = if placeholder_match {
Severity::Info
} else {
Severity::Critical
};
let message = if placeholder_match {
"Private key block in placeholder/comment context (suppressed)".to_string()
} else {
"Private key block committed".to_string()
};
findings.push(base_finding(
kind,
severity,
0.90,
FindingLocation {
file,
line: line_number,
column: 1,
},
Some("PRIVATE_KEY".to_string()),
message,
));
}
for secret_name in KNOWN_SECRET_NAMES {
let Some(value) = extract_assigned_value(line, secret_name) else {
continue;
};
if should_skip_hardcoded_secret(file, line, value) {
continue;
}
let placeholder_match =
file_path_is_placeholder || in_docstring || is_doc_or_comment_line(file, line);
let kind = if placeholder_match {
FindingKind::SecretPlaceholder
} else {
FindingKind::HardcodedSecret
};
let severity = if placeholder_match {
Severity::Info
} else if is_high_risk_env_name(secret_name) {
Severity::High
} else {
Severity::Medium
};
let message = if placeholder_match {
"Known secret-bearing variable in placeholder/comment context (suppressed)"
} else {
"Known secret-bearing variable appears in source"
};
push_secret_finding(
findings,
SecretFindingInput {
kind,
severity,
confidence: confidence_for_value(value, 0.95),
location: FindingLocation {
file,
line: line_number,
column: column_of(line, secret_name),
},
secret_type: "known-secret-name",
name: secret_name,
value,
message,
},
);
}
if !is_env_file(file) {
for caps in connection.captures_iter(line) {
if let Some(value) = caps.get(0) {
let placeholder_match = file_path_is_placeholder
|| in_docstring
|| is_doc_or_comment_line(file, line);
let kind = if placeholder_match {
FindingKind::SecretPlaceholder
} else {
FindingKind::HardcodedSecret
};
let severity = if placeholder_match {
Severity::Info
} else {
Severity::High
};
let message = if placeholder_match {
"Connection string in placeholder/comment context (suppressed)"
} else {
"Connection string appears in source"
};
push_secret_finding(
findings,
SecretFindingInput {
kind,
severity,
confidence: confidence_for_value(value.as_str(), 0.75),
location: FindingLocation {
file,
line: line_number,
column: value.start() + 1,
},
secret_type: "connection-string",
name: "CONNECTION_STRING",
value: value.as_str(),
message,
},
);
}
}
}
if !should_skip_generic_secret_line(file, line) {
for caps in generic_secret.captures_iter(line) {
let name = caps.get(1).map(|m| m.as_str()).unwrap_or("SECRET");
let value = caps.get(2).map(|m| m.as_str()).unwrap_or_default();
if value.contains("process.env") || value.contains("secrets.") {
continue;
}
let placeholder_match =
file_path_is_placeholder || in_docstring || is_doc_or_comment_line(file, line);
let kind = if placeholder_match {
FindingKind::SecretPlaceholder
} else {
FindingKind::HardcodedSecret
};
let severity = if placeholder_match {
Severity::Info
} else {
Severity::High
};
let message = if placeholder_match {
"Generic secret-looking value in placeholder/comment context (suppressed)"
} else {
"Generic secret-looking assignment appears in source"
};
push_secret_finding(
findings,
SecretFindingInput {
kind,
severity,
confidence: confidence_for_value(value, 0.75),
location: FindingLocation {
file,
line: line_number,
column: column_of(line, name),
},
secret_type: "generic-secret",
name,
value,
message,
},
);
}
}
if !is_yaml_comment_line(line) {
for caps in aws_object_key_id.captures_iter(line) {
if let Some(value) = caps.get(1) {
push_aws_object_finding(
findings,
AwsObjectFindingArgs {
file,
line,
line_number,
value: value.as_str(),
name: "AWS_ACCESS_KEY_ID",
file_path_is_placeholder,
in_docstring,
},
);
}
}
for caps in aws_object_secret.captures_iter(line) {
if let Some(value) = caps.get(1) {
push_aws_object_finding(
findings,
AwsObjectFindingArgs {
file,
line,
line_number,
value: value.as_str(),
name: "AWS_SECRET_ACCESS_KEY",
file_path_is_placeholder,
in_docstring,
},
);
}
}
let already_caught = aws_object_key_id.is_match(line)
|| extract_assigned_value(line, "AWS_ACCESS_KEY_ID").is_some();
if !already_caught {
for caps in aws_akia_literal.captures_iter(line) {
if let Some(value) = caps.get(1) {
push_aws_object_finding(
findings,
AwsObjectFindingArgs {
file,
line,
line_number,
value: value.as_str(),
name: "AWS_ACCESS_KEY_ID",
file_path_is_placeholder,
in_docstring,
},
);
}
}
}
}
if !is_yaml_comment_line(line) {
for caps in gha_secret.captures_iter(line) {
let name = caps.get(1).map(|m| m.as_str()).unwrap_or_default();
ci_secret_references.push(CiSecretReference {
name: name.to_string(),
provider: "github-actions".to_string(),
file: file.to_string(),
line: line_number,
context: format!("secrets.{name}"),
});
findings.push(base_finding(
FindingKind::CiSecretReference,
Severity::High,
0.95,
FindingLocation {
file,
line: line_number,
column: column_of(line, name),
},
Some(name.to_string()),
format!("GitHub Actions secret {name} referenced"),
));
}
}
if file == "package.json" && line.contains("\"scripts\"") {
findings.push(base_finding(
FindingKind::RiskyEnvPropagation,
Severity::Low,
0.50,
FindingLocation {
file,
line: line_number,
column: 1,
},
None,
"npm script likely inherits ambient environment".to_string(),
));
}
if !is_js_comment_line(line)
&& js_full_env.is_match(line)
&& !js_dot.is_match(line)
&& !js_index.is_match(line)
{
findings.push(base_finding(
FindingKind::RiskyEnvPropagation,
Severity::Medium,
0.70,
FindingLocation {
file,
line: line_number,
column: column_of(line, "process.env"),
},
Some("process.env".to_string()),
"JavaScript references the full process.env ambient environment".to_string(),
));
}
if !is_js_comment_line(line) {
for caps in js_env_alias.captures_iter(line) {
let Some(variable) = caps.get(1).map(|m| m.as_str()) else {
continue;
};
let Some(env_name) = caps.get(2).or_else(|| caps.get(3)).map(|m| m.as_str()) else {
continue;
};
env_aliases.insert(variable.to_string(), env_name.to_string());
}
for (variable, env_name) in &env_aliases {
if is_destructive_path_sink(line, variable) {
findings.push(base_finding(
FindingKind::RiskyEnvPropagation,
Severity::High,
0.80,
FindingLocation {
file,
line: line_number,
column: column_of(line, variable),
},
Some(env_name.to_string()),
format!(
"Environment-controlled path {env_name} is used in a destructive filesystem operation"
),
));
}
}
}
if is_dockerfile(file) {
if let Some(name) = docker_bare_arg
.captures(line)
.and_then(|caps| caps.get(1))
.map(|m| m.as_str())
{
push_env_authority_reference(
findings,
observed_env_reads,
observed_seen,
EnvAuthorityInput {
file,
source_line: line,
line_number,
name,
language: "dockerfile",
confidence: 0.65,
kind: FindingKind::RiskyEnvPropagation,
severity: Severity::Low,
message: format!(
"Dockerfile build argument {name} can receive ambient build environment"
),
},
);
}
}
if !is_yaml_comment_line(line) && (is_dockerfile(file) || is_docker_compose_file(file)) {
let language = if is_dockerfile(file) {
"dockerfile"
} else {
"docker-compose"
};
for caps in env_interpolation.captures_iter(line) {
if let Some(name) = caps.get(1).map(|m| m.as_str()) {
push_env_read(
findings,
observed_env_reads,
observed_seen,
EnvReadInput {
file,
source_line: line,
line_number,
name,
language,
confidence: 0.75,
},
);
}
}
}
if !is_yaml_comment_line(line) && is_docker_compose_file(file) {
if let Some(name) = compose_bare_env
.captures(line)
.and_then(|caps| caps.get(1))
.map(|m| m.as_str())
{
push_env_read(
findings,
observed_env_reads,
observed_seen,
EnvReadInput {
file,
source_line: line,
line_number,
name,
language: "docker-compose",
confidence: 0.65,
},
);
}
}
for (regex, language) in [
(&js_dot, "javascript"),
(&js_index, "javascript"),
(&py_environ, "python"),
(&py_getenv, "python"),
(&rust_env, "rust"),
] {
for caps in regex.captures_iter(line) {
if let Some(name) = caps.get(1).map(|m| m.as_str()) {
push_env_read(
findings,
observed_env_reads,
observed_seen,
EnvReadInput {
file,
source_line: line,
line_number,
name,
language,
confidence: 0.90,
},
);
}
}
}
if file.ends_with(".sh") {
for caps in shell_export.captures_iter(line) {
if let Some(name) = caps.get(1).map(|m| m.as_str()) {
findings.push(base_finding(
FindingKind::UnsafeExport,
Severity::Medium,
0.65,
FindingLocation {
file,
line: line_number,
column: column_of(line, name),
},
Some(name.to_string()),
format!("Shell script exports {name} into process environment"),
));
}
}
for caps in shell_var.captures_iter(line) {
if let Some(name) = caps.get(1).map(|m| m.as_str()) {
push_env_read(
findings,
observed_env_reads,
observed_seen,
EnvReadInput {
file,
source_line: line,
line_number,
name,
language: "shell",
confidence: 0.50,
},
);
}
}
}
}
Ok(())
}
#[derive(Clone, Copy)]
struct FindingLocation<'a> {
file: &'a str,
line: usize,
column: usize,
}
struct EnvReadInput<'a> {
file: &'a str,
source_line: &'a str,
line_number: usize,
name: &'a str,
language: &'a str,
confidence: f32,
}
struct EnvAuthorityInput<'a> {
file: &'a str,
source_line: &'a str,
line_number: usize,
name: &'a str,
language: &'a str,
confidence: f32,
kind: FindingKind,
severity: Severity,
message: String,
}
struct SecretFindingInput<'a> {
kind: FindingKind,
severity: Severity,
confidence: f32,
location: FindingLocation<'a>,
secret_type: &'a str,
name: &'a str,
value: &'a str,
message: &'a str,
}
#[derive(Clone, Copy)]
struct AwsObjectFindingArgs<'a> {
file: &'a str,
line: &'a str,
line_number: usize,
value: &'a str,
name: &'a str,
file_path_is_placeholder: bool,
in_docstring: bool,
}
fn push_env_read(
findings: &mut Vec<ScanFinding>,
observed_env_reads: &mut Vec<ObservedEnvRead>,
observed_seen: &mut HashSet<(String, String, usize)>,
input: EnvReadInput<'_>,
) {
push_env_authority_reference(
findings,
observed_env_reads,
observed_seen,
EnvAuthorityInput {
file: input.file,
source_line: input.source_line,
line_number: input.line_number,
name: input.name,
language: input.language,
confidence: input.confidence,
kind: FindingKind::RuntimeEnvRead,
severity: Severity::Medium,
message: format!("Runtime reads environment variable {}", input.name),
},
);
}
fn push_env_authority_reference(
findings: &mut Vec<ScanFinding>,
observed_env_reads: &mut Vec<ObservedEnvRead>,
observed_seen: &mut HashSet<(String, String, usize)>,
input: EnvAuthorityInput<'_>,
) {
let key = (
input.name.to_string(),
input.file.to_string(),
input.line_number,
);
if observed_seen.insert(key) {
observed_env_reads.push(ObservedEnvRead {
name: input.name.to_string(),
file: input.file.to_string(),
line: input.line_number,
language: input.language.to_string(),
confidence: input.confidence,
});
}
findings.push(base_finding(
input.kind,
input.severity,
input.confidence,
FindingLocation {
file: input.file,
line: input.line_number,
column: column_of(input.source_line, input.name),
},
Some(input.name.to_string()),
input.message,
));
}
fn push_secret_finding(findings: &mut Vec<ScanFinding>, input: SecretFindingInput<'_>) {
let mut finding = base_finding(
input.kind,
input.severity,
input.confidence,
input.location,
Some(input.name.to_string()),
input.message.to_string(),
);
finding.secret_type = Some(input.secret_type.to_string());
finding.redacted_value = Some(redact::redacted(input.value));
finding.hash = Some(redact::fingerprint(input.value));
findings.push(finding);
}
fn push_aws_object_finding(findings: &mut Vec<ScanFinding>, args: AwsObjectFindingArgs<'_>) {
let placeholder_match = args.file_path_is_placeholder
|| args.in_docstring
|| is_doc_or_comment_line(args.file, args.line);
let kind = if placeholder_match {
FindingKind::SecretPlaceholder
} else {
FindingKind::HardcodedSecret
};
let severity = if placeholder_match {
Severity::Info
} else {
Severity::High
};
let message = if placeholder_match {
"AWS credential in placeholder/comment context (suppressed)"
} else {
"AWS credential appears in object-syntax assignment"
};
push_secret_finding(
findings,
SecretFindingInput {
kind,
severity,
confidence: confidence_for_value(args.value, 0.92),
location: FindingLocation {
file: args.file,
line: args.line_number,
column: column_of(args.line, args.value),
},
secret_type: "aws-object-syntax",
name: args.name,
value: args.value,
message,
},
);
}
fn base_finding(
kind: FindingKind,
severity: Severity,
confidence: f32,
location: FindingLocation<'_>,
name: Option<String>,
message: String,
) -> ScanFinding {
ScanFinding {
id: String::new(),
kind,
severity,
confidence,
file: location.file.to_string(),
line: location.line,
column: location.column,
secret_type: None,
name,
redacted_value: None,
hash: None,
message,
}
}
fn renumber_findings(findings: &mut [ScanFinding]) {
for (index, finding) in findings.iter_mut().enumerate() {
finding.id = format!("TSAFE-FINDING-{number:04}", number = index + 1);
}
}
fn summarize(findings: &[ScanFinding]) -> ScanSummary {
let mut summary = ScanSummary {
total_findings: findings.len(),
..ScanSummary::default()
};
let mut score = 0;
for finding in findings {
score += finding.severity.weight();
match finding.severity {
Severity::Critical => summary.critical += 1,
Severity::High => summary.high += 1,
Severity::Medium => summary.medium += 1,
Severity::Low => summary.low += 1,
Severity::Info => {}
}
}
summary.risk_score = score.min(100);
summary
}
fn is_ignored_entry(entry: &DirEntry) -> bool {
let name = entry.file_name().to_string_lossy();
matches!(
name.as_ref(),
".git" | "node_modules" | "target" | "dist" | "build" | ".venv" | "vendor"
)
}
fn is_scannable_file(path: &Path, repo: &Path) -> bool {
let relative = relative_path(path, repo);
let name = path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or_default();
let extension = path
.extension()
.and_then(|e| e.to_str())
.unwrap_or_default();
is_env_file(&relative)
|| relative.starts_with(".github/workflows/") && matches!(extension, "yml" | "yaml")
|| matches!(
name,
"package.json" | "Dockerfile" | "docker-compose.yml" | "docker-compose.yaml"
)
|| matches!(
extension,
"env" | "sh" | "js" | "ts" | "py" | "rs" | "pem" | "json" | "yaml" | "yml"
)
}
fn is_env_file(file: &str) -> bool {
let name = Path::new(file)
.file_name()
.and_then(|name| name.to_str())
.unwrap_or(file);
name == ".env" || name.starts_with(".env.") || name.ends_with(".env")
}
fn is_placeholder_file_path(file: &str) -> bool {
let normalised = file.replace('\\', "/");
let lower = normalised.to_ascii_lowercase();
let name = Path::new(&normalised)
.file_name()
.and_then(|n| n.to_str())
.unwrap_or(&normalised)
.to_ascii_lowercase();
if name.ends_with(".example")
|| name.ends_with(".template")
|| name.ends_with(".sample")
|| name.ends_with(".dist")
|| name.ends_with(".fixture")
|| name.ends_with(".tmpl")
{
return true;
}
if name.contains(".example.") || name.contains(".template.") || name.contains(".sample.") {
return true;
}
if name.starts_with("example.")
|| name.starts_with("examples.")
|| name.starts_with("template.")
|| name.starts_with("sample.")
|| name.starts_with("test_fixture")
|| name.starts_with("test-fixture")
|| name.starts_with("fixture")
{
return true;
}
for marker in [
"/docs/",
"/doc/",
"/examples/",
"/example/",
"/test-fixtures/",
"/test_fixtures/",
"/fixtures/",
"/samples/",
"/sample/",
"/templates/",
"/template/",
] {
if lower.contains(marker) {
return true;
}
}
false
}
fn is_doc_or_comment_line(file: &str, line: &str) -> bool {
let trimmed = line.trim_start();
if trimmed.is_empty() {
return false;
}
if trimmed.starts_with("///") || trimmed.starts_with("//!") || trimmed.starts_with("//") {
return true;
}
if trimmed.starts_with("/*") || trimmed.starts_with('*') {
let after = trimmed.trim_start_matches(['*', '/']);
if after.is_empty() || after.starts_with(' ') || after.starts_with('\t') {
return true;
}
}
if trimmed.starts_with('#') {
let _ = file; return true;
}
false
}
fn is_env_interpolation_value(value: &str) -> bool {
let trimmed = value.trim().trim_matches(['"', '\'']);
trimmed.starts_with("${") && trimmed.ends_with('}')
}
fn compute_docstring_lines(file: &str, content: &str) -> Vec<bool> {
let path = Path::new(file);
let extension = path.extension().and_then(|e| e.to_str()).unwrap_or("");
let total_lines = content.lines().count();
if extension != "py" {
return vec![false; total_lines];
}
let mut out = Vec::with_capacity(total_lines);
#[derive(Clone, Copy, PartialEq, Eq)]
enum Quote {
None,
Double,
Single,
}
let mut state = Quote::None;
for line in content.lines() {
let started_in_string = state != Quote::None;
let mut remaining = line;
loop {
match state {
Quote::None => {
let dq = remaining.find("\"\"\"");
let sq = remaining.find("'''");
let next = match (dq, sq) {
(Some(a), Some(b)) if a < b => Some((a, Quote::Double)),
(Some(a), Some(_b)) => Some((a, Quote::Double)).filter(|_| a < _b),
(Some(a), None) => Some((a, Quote::Double)),
(None, Some(b)) => Some((b, Quote::Single)),
(None, None) => None,
};
match next {
Some((idx, q)) => {
state = q;
let after_idx = idx + 3;
if after_idx >= remaining.len() {
remaining = "";
} else {
remaining = &remaining[after_idx..];
}
}
None => break,
}
}
Quote::Double => {
if let Some(idx) = remaining.find("\"\"\"") {
state = Quote::None;
let after_idx = idx + 3;
if after_idx >= remaining.len() {
remaining = "";
} else {
remaining = &remaining[after_idx..];
}
} else {
break;
}
}
Quote::Single => {
if let Some(idx) = remaining.find("'''") {
state = Quote::None;
let after_idx = idx + 3;
if after_idx >= remaining.len() {
remaining = "";
} else {
remaining = &remaining[after_idx..];
}
} else {
break;
}
}
}
}
let ended_in_string = state != Quote::None;
out.push(started_in_string || ended_in_string);
}
out
}
fn is_dockerfile(file: &str) -> bool {
Path::new(file).file_name().and_then(|name| name.to_str()) == Some("Dockerfile")
}
fn is_docker_compose_file(file: &str) -> bool {
matches!(
Path::new(file).file_name().and_then(|name| name.to_str()),
Some("docker-compose.yml" | "docker-compose.yaml")
)
}
fn is_yaml_comment_line(line: &str) -> bool {
line.trim_start().starts_with('#')
}
fn is_js_comment_line(line: &str) -> bool {
let trimmed = line.trim_start();
trimmed.starts_with("//") || trimmed.starts_with("/*") || trimmed.starts_with('*')
}
fn is_destructive_path_sink(line: &str, variable: &str) -> bool {
line.contains(variable)
&& [
"forceRemove(",
"fs.rm(",
"fs.rmSync(",
"fs.unlink(",
"fs.unlinkSync(",
"rm(",
"rmSync(",
"remove(",
]
.iter()
.any(|sink| line.contains(sink))
}
fn env_file_severity(file: &str, value: &str) -> Severity {
if file.contains("production") && !redact::is_placeholder(value) {
Severity::Critical
} else if file.ends_with(".example") {
Severity::Low
} else {
Severity::High
}
}
fn confidence_for_value(value: &str, base: f32) -> f32 {
if redact::is_placeholder(value) {
(base - 0.25).max(0.35)
} else {
base
}
}
fn extract_assigned_value<'a>(line: &'a str, name: &str) -> Option<&'a str> {
let position = line.find(name)?;
let after = &line[position + name.len()..];
let after = after.trim_start();
let value = after
.strip_prefix('=')
.or_else(|| after.strip_prefix(':'))?
.trim_start()
.trim_start_matches(['"', '\''])
.split(['"', '\'', ' ', '\t'])
.next()?;
(!value.is_empty()).then_some(value)
}
fn should_skip_hardcoded_secret(file: &str, line: &str, value: &str) -> bool {
is_env_file(file)
|| line.contains("${{ secrets.")
|| line.contains("process.env.")
|| line.contains("process.env[")
|| line.contains("os.getenv(")
|| line.contains("os.environ[")
|| line.contains("std::env::var(")
|| line.contains("env::var(")
|| line.trim_start().starts_with("export ")
|| value.contains("process.env")
|| value.contains("os.getenv")
|| value.contains("os.environ")
|| value.contains("std::env::var")
|| value.contains("env::var")
|| value.contains("secrets.")
|| value.contains("${")
}
fn should_skip_generic_secret_line(file: &str, line: &str) -> bool {
is_env_file(file)
|| line.contains("${{ secrets.")
|| line.contains("process.env.")
|| line.contains("process.env[")
|| line.contains("os.getenv(")
|| line.contains("os.environ[")
|| line.contains("std::env::var(")
|| line.contains("env::var(")
|| line.trim_start().starts_with("export ")
|| line.contains("${")
}
fn ensure_parent_dir(path: &Path) -> Result<()> {
if let Some(parent) = path
.parent()
.filter(|parent| !parent.as_os_str().is_empty())
{
fs::create_dir_all(parent)
.with_context(|| format!("create output directory: {}", parent.display()))?;
}
Ok(())
}
fn column_of(line: &str, needle: &str) -> usize {
line.find(needle).map(|index| index + 1).unwrap_or(1)
}
fn relative_path(path: &Path, repo: &Path) -> String {
path.strip_prefix(repo)
.unwrap_or(path)
.to_string_lossy()
.replace('\\', "/")
}
#[cfg(test)]
mod tests {
use std::fs;
use tempfile::tempdir;
use super::*;
#[test]
fn detects_env_files_process_reads_github_secrets_and_exports() {
let tmp = tempdir().unwrap();
fs::create_dir_all(tmp.path().join(".github/workflows")).unwrap();
fs::create_dir_all(tmp.path().join("src")).unwrap();
fs::create_dir_all(tmp.path().join("scripts")).unwrap();
fs::write(
tmp.path().join(".env"),
"DATABASE_URL=postgres://user:password@localhost:5432/app\n",
)
.unwrap();
fs::write(
tmp.path().join(".github/workflows/ci.yml"),
"env:\n PROD_DEPLOY_KEY: ${{ secrets.PROD_DEPLOY_KEY }}\n",
)
.unwrap();
fs::write(
tmp.path().join("src/config.js"),
"const token = process.env.API_TOKEN;\n",
)
.unwrap();
fs::write(
tmp.path().join("scripts/dev.sh"),
"export STRIPE_SECRET_KEY=sk_live_fake_exported_1234567890\n",
)
.unwrap();
let report = scan_repo(tmp.path()).unwrap();
assert!(report.findings.iter().any(|f| f.file == ".env"));
assert!(report
.observed_env_reads
.iter()
.any(|read| read.name == "API_TOKEN"));
assert!(report
.ci_secret_references
.iter()
.any(|reference| reference.name == "PROD_DEPLOY_KEY"));
assert!(report
.findings
.iter()
.any(|finding| finding.kind == FindingKind::UnsafeExport));
}
#[test]
fn ignores_node_modules() {
let tmp = tempdir().unwrap();
fs::create_dir_all(tmp.path().join("node_modules/pkg")).unwrap();
fs::write(
tmp.path().join("node_modules/pkg/index.js"),
"const token = process.env.API_TOKEN;\n",
)
.unwrap();
let report = scan_repo(tmp.path()).unwrap();
assert!(report.observed_env_reads.is_empty());
}
#[test]
fn reduces_confidence_for_placeholder_values() {
let tmp = tempdir().unwrap();
fs::write(tmp.path().join(".env"), "API_TOKEN=your-key-here\n").unwrap();
let report = scan_repo(tmp.path()).unwrap();
assert!(report
.findings
.iter()
.any(|finding| finding.confidence < 0.85));
}
#[test]
fn treats_credential_names_as_sensitive() {
for name in ["GOOGLE_APPLICATION_CREDENTIALS", "GOOGLE_GHA_CREDS_PATH"] {
assert!(is_sensitive_env_name(name), "{name} was not sensitive");
}
}
#[test]
fn does_not_treat_env_reads_as_hardcoded_secrets() {
let tmp = tempdir().unwrap();
fs::create_dir_all(tmp.path().join("src")).unwrap();
fs::write(
tmp.path().join("src/config.js"),
"module.exports = { apiToken: process.env.API_TOKEN };\n",
)
.unwrap();
let report = scan_repo(tmp.path()).unwrap();
assert!(report
.findings
.iter()
.any(|finding| finding.kind == FindingKind::RuntimeEnvRead));
assert!(!report
.findings
.iter()
.any(|finding| finding.kind == FindingKind::HardcodedSecret));
}
#[test]
fn does_not_treat_output_labels_as_secret_assignments() {
let tmp = tempdir().unwrap();
fs::write(
tmp.path().join("test.js"),
"console.log(\"DATABASE_URL present:\", Boolean(config.databaseUrl));\n",
)
.unwrap();
let report = scan_repo(tmp.path()).unwrap();
assert!(report.findings.is_empty());
}
fn has_secret_finding(report: &ScanReport) -> bool {
report.findings.iter().any(|f| {
matches!(
f.kind,
FindingKind::EnvFile | FindingKind::HardcodedSecret | FindingKind::PrivateKey
)
})
}
fn has_placeholder_finding(report: &ScanReport) -> bool {
report
.findings
.iter()
.any(|f| f.kind == FindingKind::SecretPlaceholder)
}
#[test]
fn placeholder_env_example_with_your_x_here_does_not_emit_secret() {
let tmp = tempdir().unwrap();
fs::write(
tmp.path().join(".env.example"),
"ANTHROPIC_API_KEY=your-anthropic-key-here\n",
)
.unwrap();
let report = scan_repo(tmp.path()).unwrap();
assert!(!has_secret_finding(&report));
assert!(has_placeholder_finding(&report));
}
#[test]
fn placeholder_env_example_with_angle_brackets_does_not_emit_secret() {
let tmp = tempdir().unwrap();
fs::write(
tmp.path().join(".env.example"),
"GITHUB_TOKEN=<your-github-token>\n",
)
.unwrap();
let report = scan_repo(tmp.path()).unwrap();
assert!(!has_secret_finding(&report));
assert!(has_placeholder_finding(&report));
}
#[test]
fn placeholder_env_example_with_redacted_marker_does_not_emit_secret() {
let tmp = tempdir().unwrap();
fs::write(
tmp.path().join(".env.example"),
"DATABASE_URL=REDACTED_BEFORE_COMMIT\n",
)
.unwrap();
let report = scan_repo(tmp.path()).unwrap();
assert!(!has_secret_finding(&report));
}
#[test]
fn placeholder_env_example_with_aws_docs_example_does_not_emit_secret() {
let tmp = tempdir().unwrap();
fs::write(
tmp.path().join(".env.example"),
"AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE\n",
)
.unwrap();
let report = scan_repo(tmp.path()).unwrap();
assert!(!has_secret_finding(&report));
}
#[test]
fn env_interpolation_value_does_not_emit_secret() {
let tmp = tempdir().unwrap();
fs::write(
tmp.path().join("config.env"),
"GITHUB_TOKEN=${GITHUB_TOKEN}\n",
)
.unwrap();
let report = scan_repo(tmp.path()).unwrap();
assert!(!has_secret_finding(&report));
}
#[test]
fn doc_comment_example_does_not_emit_secret() {
let tmp = tempdir().unwrap();
fs::create_dir_all(tmp.path().join("src")).unwrap();
fs::write(
tmp.path().join("src/lib.rs"),
"/// # Example\n/// // Set GITHUB_TOKEN=ghp_example_here\npub fn from_env() {}\n",
)
.unwrap();
let report = scan_repo(tmp.path()).unwrap();
assert!(!has_secret_finding(&report));
}
#[test]
fn js_comment_example_does_not_emit_secret() {
let tmp = tempdir().unwrap();
fs::write(
tmp.path().join("examples.js"),
"// const apiKey = 'sk-ant-api03-REPLACE-WITH-YOUR-OWN-KEY';\n",
)
.unwrap();
let report = scan_repo(tmp.path()).unwrap();
assert!(!has_secret_finding(&report));
}
#[test]
fn python_docstring_example_does_not_emit_secret() {
let tmp = tempdir().unwrap();
fs::write(
tmp.path().join("deploy.py"),
"\"\"\"Module docstring.\n\nExample usage::\n\n GITHUB_TOKEN=ghp_REPLACE_ME_BEFORE_DEPLOY python deploy.py\n\n\"\"\"\nimport os\n",
)
.unwrap();
let report = scan_repo(tmp.path()).unwrap();
assert!(!has_secret_finding(&report));
}
#[test]
fn test_fixture_env_path_suppresses_findings() {
let tmp = tempdir().unwrap();
fs::write(
tmp.path().join("test_fixture.env"),
"TEST_SIGNING_KEY=test_signing_key_for_unit_tests_only_v1\n",
)
.unwrap();
let report = scan_repo(tmp.path()).unwrap();
assert!(!has_secret_finding(&report));
}
#[test]
fn detects_aws_credentials_in_js_object_syntax() {
let tmp = tempdir().unwrap();
fs::write(
tmp.path().join("config.js"),
"const creds = { accessKeyId: 'AKIA7D1410K9KELBYMXY', secretAccessKey: 'f7JoLTKaxe0cnoV7TLSl+95ovmXkSfaqdDr75A6R' };\n",
)
.unwrap();
let report = scan_repo(tmp.path()).unwrap();
assert!(report
.findings
.iter()
.any(|f| f.kind == FindingKind::HardcodedSecret
&& f.secret_type.as_deref() == Some("aws-object-syntax")));
}
#[test]
fn detects_aws_credentials_in_json_object_syntax() {
let tmp = tempdir().unwrap();
fs::write(
tmp.path().join("config.json"),
"{\n \"awsAccessKeyId\": \"AKIA7D1410K9KELBYMXY\",\n \"awsSecretAccessKey\": \"f7JoLTKaxe0cnoV7TLSl+95ovmXkSfaqdDr75A6R\"\n}\n",
)
.unwrap();
let report = scan_repo(tmp.path()).unwrap();
assert!(report
.findings
.iter()
.any(|f| f.kind == FindingKind::HardcodedSecret
&& f.secret_type.as_deref() == Some("aws-object-syntax")));
}
#[test]
fn detects_private_key_in_pem_file() {
let tmp = tempdir().unwrap();
fs::write(
tmp.path().join("deploy_key.pem"),
"-----BEGIN PRIVATE KEY-----\nMIIBVgIBADANBgkqhkiG9w0BAQEFAA\n-----END PRIVATE KEY-----\n",
)
.unwrap();
let report = scan_repo(tmp.path()).unwrap();
assert!(report
.findings
.iter()
.any(|f| f.kind == FindingKind::PrivateKey));
}
#[test]
fn detects_private_key_in_service_account_json() {
let tmp = tempdir().unwrap();
fs::write(
tmp.path().join("service-account.json"),
"{\n \"type\": \"service_account\",\n \"private_key\": \"-----BEGIN PRIVATE KEY-----\\nMIIBVgIBADANBgkqhkiG9w0BAQEFAA\\n-----END PRIVATE KEY-----\\n\"\n}\n",
)
.unwrap();
let report = scan_repo(tmp.path()).unwrap();
assert!(report
.findings
.iter()
.any(|f| f.kind == FindingKind::PrivateKey));
}
#[test]
fn finding_hash_uses_blake3_prefix() {
let tmp = tempdir().unwrap();
fs::write(
tmp.path().join(".env"),
"DATABASE_URL=postgres://user:password@db.internal:5432/app\n",
)
.unwrap();
let report = scan_repo(tmp.path()).unwrap();
let with_hash: Vec<_> = report
.findings
.iter()
.filter(|f| f.hash.is_some())
.collect();
assert!(
!with_hash.is_empty(),
"expected at least one finding with a hash"
);
for finding in with_hash {
let hash = finding.hash.as_deref().unwrap();
assert!(
hash.starts_with("blake3:"),
"Phase 3 fingerprint MUST be BLAKE3 (ec ADR-0003), got {hash:?} on {:?}",
finding.kind
);
assert_eq!(hash.len(), "blake3:".len() + 64, "blake3 hash length");
}
}
}