use crate::detectors::ast_fingerprint::parse_root_ext;
use crate::detectors::base::{is_test_file, Detector, DetectorConfig};
use crate::detectors::security::scan_inputs::{ScanAstInputs, ScanInputs};
use crate::graph::GraphQueryExt;
use crate::models::{Finding, Severity};
use crate::parsers::lightweight::Language;
use anyhow::Result;
use regex::Regex;
use std::path::{Path, PathBuf};
use std::sync::LazyLock;
use tracing::debug;
enum SecretPattern {
SelfShaped {
name: &'static str,
value_regex: Regex,
severity: Severity,
},
NameGated {
name: &'static str,
#[allow(dead_code)]
name_gate: Regex,
value_regex: Regex,
severity: Severity,
},
}
impl SecretPattern {
fn finding_name(&self) -> &'static str {
match self {
SecretPattern::SelfShaped { name, .. } | SecretPattern::NameGated { name, .. } => name,
}
}
fn severity(&self) -> Severity {
match self {
SecretPattern::SelfShaped { severity, .. }
| SecretPattern::NameGated { severity, .. } => *severity,
}
}
fn to_legacy_line_regex(&self) -> Regex {
match self {
SecretPattern::SelfShaped { value_regex, .. } => value_regex.clone(),
SecretPattern::NameGated {
name_gate: _,
value_regex,
name,
..
} => {
match *name {
"Generic Secret" => {
Regex::new(r#"(?i)(secret|password|passwd|pwd)\s*[=:]\s*["'][^"']{8,}["']"#)
.expect("valid regex")
}
"Generic API Key" => {
Regex::new(r"(?i)api[_-]?key\s*[=:]\s*[a-zA-Z0-9_\-]{20,}")
.expect("valid regex")
}
"AWS Secret Access Key" => {
Regex::new(r"(?i)aws_secret_access_key\s*[=:]\s*[A-Za-z0-9/+=]{40}")
.expect("valid regex")
}
_ => value_regex.clone(),
}
}
}
}
}
static SECRET_PATTERNS: LazyLock<Vec<SecretPattern>> = LazyLock::new(|| {
vec![
SecretPattern::SelfShaped {
name: "AWS Access Key ID",
value_regex: Regex::new(r"AKIA[0-9A-Z]{16}").expect("valid regex"),
severity: Severity::Critical,
},
SecretPattern::SelfShaped {
name: "GitHub Token",
value_regex: Regex::new(r"ghp_[a-zA-Z0-9]{36}").expect("valid regex"),
severity: Severity::Critical,
},
SecretPattern::SelfShaped {
name: "Private Key",
value_regex: Regex::new(r"-----BEGIN (RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----")
.expect("valid regex"),
severity: Severity::Critical,
},
SecretPattern::SelfShaped {
name: "Slack Token",
value_regex: Regex::new(r"xox[baprs]-[0-9]{10,13}-[0-9]{10,13}[a-zA-Z0-9-]*")
.expect("valid regex"),
severity: Severity::Critical,
},
SecretPattern::SelfShaped {
name: "Stripe API Key",
value_regex: Regex::new(r"sk_live_[a-zA-Z0-9]{24,}").expect("valid regex"),
severity: Severity::Critical,
},
SecretPattern::SelfShaped {
name: "Database URL with Password",
value_regex: Regex::new(r"(?i)(postgres|mysql|mongodb|redis)://[^:]+:[^@]+@")
.expect("valid regex"),
severity: Severity::Critical,
},
SecretPattern::SelfShaped {
name: "SendGrid API Key",
value_regex: Regex::new(r"SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}")
.expect("valid regex"),
severity: Severity::High,
},
SecretPattern::NameGated {
name: "AWS Secret Access Key",
name_gate: Regex::new(r"(?i)aws.?secret.?access.?key").expect("valid regex"),
value_regex: Regex::new(r"^[A-Za-z0-9/+=]{40}$").expect("valid regex"),
severity: Severity::Critical,
},
SecretPattern::NameGated {
name: "Generic API Key",
name_gate: Regex::new(r"(?i)^api[_-]?key$").expect("valid regex"),
value_regex: Regex::new(r"^[a-zA-Z0-9_\-]{20,}$").expect("valid regex"),
severity: Severity::High,
},
SecretPattern::NameGated {
name: "Generic Secret",
name_gate: Regex::new(r"(?i)(^|_)(secret|password|passwd|pwd|token)($|_)")
.expect("valid regex"),
value_regex: Regex::new(r"^[^\s]{8,}$").expect("valid regex"),
severity: Severity::High,
},
]
});
pub struct SecretDetector {
#[allow(dead_code)] config: DetectorConfig,
#[allow(dead_code)] repository_path: PathBuf,
max_findings: usize,
}
impl SecretDetector {
pub fn new(repository_path: impl Into<PathBuf>) -> Self {
Self {
config: DetectorConfig::default(),
repository_path: repository_path.into(),
max_findings: 100,
}
}
fn relative_path(&self, path: &Path) -> PathBuf {
crate::detectors::detector_relative_path(&self.repository_path, path)
}
fn has_python_env_fallback(line: &str) -> bool {
let line_lower = line.to_lowercase();
for pattern in ["os.environ.get(", "os.getenv("] {
if let Some(start) = line_lower.find(pattern) {
let after_pattern = &line[start + pattern.len()..];
let mut depth = 1;
let mut found_comma_at_depth_1 = false;
for ch in after_pattern.chars() {
match ch {
'(' => depth += 1,
')' => {
depth -= 1;
if depth == 0 {
break;
}
}
',' if depth == 1 => {
found_comma_at_depth_1 = true;
break;
}
_ => {}
}
}
if found_comma_at_depth_1 {
return true;
}
}
}
false
}
fn has_go_env_fallback(line: &str) -> bool {
let has_empty_check = line.contains(r#"== """#) || line.contains(r#"!= """#);
let has_if_statement = line.contains("if ");
let has_fallback_helper = line.to_lowercase().contains("getenvdefault")
|| line.to_lowercase().contains("getenvor")
|| line.to_lowercase().contains("envdefault");
has_fallback_helper || (has_empty_check && has_if_statement)
}
fn scan_file(&self, path: &Path, content: &str) -> Vec<Finding> {
let mut findings = vec![];
if is_test_file(path) {
return findings;
}
if content.contains('\0') {
return findings;
}
let lines: Vec<&str> = content.lines().collect();
for (line_num, line) in lines.iter().enumerate() {
let prev_line = if line_num > 0 {
Some(lines[line_num - 1])
} else {
None
};
if crate::detectors::is_line_suppressed(line, prev_line) {
continue;
}
let trimmed = line.trim();
if trimmed.starts_with("//") && trimmed.contains("example") {
continue;
}
findings.extend(self.try_match_line(path, line, line_num));
}
findings
}
fn try_match_line(&self, path: &Path, line: &str, line_num: usize) -> Vec<Finding> {
let mut findings = Vec::new();
for pattern in SECRET_PATTERNS.iter() {
let line_regex = pattern.to_legacy_line_regex();
if let Some(m) = line_regex.find(line) {
let matched = m.as_str();
if matched.len() < 10 {
continue;
}
if matched.contains("example") || matched.contains("EXAMPLE") {
continue;
}
if matched.contains("placeholder") || matched.contains("xxxx") {
continue;
}
let matched_lower = matched.to_lowercase();
if matched_lower.contains("your-")
|| matched_lower.contains("-here")
|| matched_lower.contains("changeme")
|| matched_lower.contains("replace")
|| matched_lower.contains("todo")
|| matched_lower.contains("fixme")
|| matched == "sk-your-openai-key"
|| matched_lower.starts_with("xxx")
|| matched_lower.ends_with("xxx")
{
continue;
}
if line.contains(&format!("${{{}", &matched.split('=').next().unwrap_or(""))) {
continue;
}
if line.contains("= process.env.") || line.contains("=process.env.") {
continue;
}
if line.contains("process.env") {
continue;
}
if line.contains("env::var(") || line.contains("std::env::var") {
continue;
}
if line.contains("headers.get(")
|| line.contains("req.headers.")
|| line.contains("request.headers.")
|| line.contains("headers[")
{
continue;
}
if line.contains("os.environ[")
|| line.contains("os.environ.get(")
|| line.contains("os.getenv(")
{
continue;
}
if line.contains("os.Getenv(") || line.contains("os.LookupEnv(") {
continue;
}
if pattern.finding_name() == "Generic Secret" {
let value_part = if let Some(eq_pos) = line.find('=') {
line[eq_pos + 1..].trim()
} else if let Some(colon_pos) = line.find(':') {
line[colon_pos + 1..].trim()
} else {
""
};
if !value_part.is_empty() {
if value_part.contains('(') {
continue;
}
let first_char = value_part.chars().next().unwrap_or(' ');
if matches!(first_char, '[' | '{') {
continue;
}
if !matches!(first_char, '"' | '\'' | '`' | 'b') {
continue;
}
if first_char == 'b' {
let second_char = value_part.chars().nth(1).unwrap_or(' ');
if !matches!(second_char, '"' | '\'') {
continue;
}
}
}
}
let line_lower = line.to_lowercase();
let mut effective_severity = pattern.severity();
if (line_lower.contains("process.env")
&& (line.contains("||") || line.contains("??")))
|| ((line_lower.contains("os.environ.get(")
|| line_lower.contains("os.getenv("))
&& Self::has_python_env_fallback(line))
|| line.contains("os.LookupEnv(")
|| (line.contains("os.Getenv(") && Self::has_go_env_fallback(line))
|| matched.contains("localhost")
|| matched.contains("127.0.0.1")
{
effective_severity = Severity::Low;
}
else if let Some(rel_path) = path.to_str() {
let rel_lower = rel_path.to_lowercase();
if rel_lower.contains("/seed")
|| rel_lower.contains("/script")
|| rel_lower.contains("/fixture")
|| rel_lower.contains("/examples/")
|| rel_lower.contains("/example/")
|| rel_lower.contains("/demo/")
|| rel_lower.contains("/samples/")
|| rel_lower.contains("/sample/")
|| rel_lower.contains(".seed.")
|| rel_lower.contains(".script.")
|| rel_lower.contains(".example.")
|| rel_lower.contains(".sample.")
{
effective_severity = Severity::Low;
}
}
let line_start = line_num as u32 + 1;
findings.push(Finding {
id: String::new(),
detector: "SecretDetector".to_string(),
severity: effective_severity,
title: format!("Hardcoded {}", pattern.finding_name()),
description: format!(
"Potential {} found in source code at line {}. \
Secrets should be stored in environment variables or secret management systems.",
pattern.finding_name(), line_start
),
affected_files: vec![self.relative_path(path)],
line_start: Some(line_start),
line_end: Some(line_start),
suggested_fix: Some("Move this secret to an environment variable or secrets manager".to_string()),
estimated_effort: Some("15 minutes".to_string()),
category: Some("security".to_string()),
cwe_id: Some("CWE-798".to_string()),
why_it_matters: Some("Hardcoded secrets can be extracted from source code, leading to credential theft".to_string()),
..Default::default()
});
}
}
findings
}
fn scan_file_ast(&self, inputs: &ScanAstInputs<'_>) -> Vec<Finding> {
let path = inputs.path();
let content = inputs.content();
let ext = inputs.ext();
let lang = inputs.lang;
let cached_tree = inputs.cached_tree;
let mut findings = vec![];
if is_test_file(path) {
return findings;
}
if content.contains('\0') {
return findings;
}
let owned;
let root = match cached_tree {
Some(tree) => tree.root_node(),
None => match parse_root_ext(content, lang, ext) {
Some(t) => {
owned = t;
owned.root_node()
}
None => return findings,
},
};
let bytes = content.as_bytes();
let mut bindings: Vec<Binding> = Vec::new();
collect_bindings(root, bytes, &mut bindings);
let lines: Vec<&str> = content.lines().collect();
for b in bindings {
let line = match lines.get(b.line) {
Some(l) => *l,
None => continue,
};
let prev_line = if b.line > 0 {
Some(lines[b.line - 1])
} else {
None
};
if crate::detectors::is_line_suppressed(line, prev_line) {
continue;
}
for pat in SECRET_PATTERNS.iter() {
if let Some(finding) = match_pattern_against_binding(self, path, pat, &b) {
findings.push(finding);
break;
}
}
}
findings
}
}
struct Binding<'a> {
name: Option<String>,
value: &'a str,
line: usize,
}
fn match_pattern_against_binding(
detector: &SecretDetector,
path: &Path,
pattern: &SecretPattern,
binding: &Binding<'_>,
) -> Option<Finding> {
if !value_passes_fp_filters(binding.value) {
return None;
}
let value_regex = match pattern {
SecretPattern::SelfShaped { value_regex, .. } => value_regex,
SecretPattern::NameGated {
name_gate,
value_regex,
..
} => {
let name = binding.name.as_deref()?;
if !name_gate.is_match(name) {
return None;
}
value_regex
}
};
if !value_regex.is_match(binding.value) {
return None;
}
let mut severity = pattern.severity();
if let Some(rel_path) = path.to_str() {
let rel_lower = rel_path.to_lowercase();
if rel_lower.contains("/seed")
|| rel_lower.contains("/script")
|| rel_lower.contains("/fixture")
|| rel_lower.contains("/examples/")
|| rel_lower.contains("/example/")
|| rel_lower.contains("/demo/")
|| rel_lower.contains("/samples/")
|| rel_lower.contains("/sample/")
|| rel_lower.contains(".seed.")
|| rel_lower.contains(".script.")
|| rel_lower.contains(".example.")
|| rel_lower.contains(".sample.")
{
severity = Severity::Low;
}
}
if binding.value.contains("localhost") || binding.value.contains("127.0.0.1") {
severity = Severity::Low;
}
let line_start = binding.line as u32 + 1;
Some(Finding {
id: String::new(),
detector: "SecretDetector".to_string(),
severity,
title: format!("Hardcoded {}", pattern.finding_name()),
description: format!(
"Potential {} found in source code at line {}. \
Secrets should be stored in environment variables or secret management systems.",
pattern.finding_name(),
line_start
),
affected_files: vec![detector.relative_path(path)],
line_start: Some(line_start),
line_end: Some(line_start),
suggested_fix: Some(
"Move this secret to an environment variable or secrets manager".to_string(),
),
estimated_effort: Some("15 minutes".to_string()),
category: Some("security".to_string()),
cwe_id: Some("CWE-798".to_string()),
why_it_matters: Some(
"Hardcoded secrets can be extracted from source code, leading to credential theft"
.to_string(),
),
..Default::default()
})
}
fn value_passes_fp_filters(value: &str) -> bool {
if value.len() < 8 {
return false;
}
let lower = value.to_lowercase();
if lower.contains("example")
|| lower.contains("placeholder")
|| lower.contains("xxxx")
|| lower.contains("your-")
|| lower.contains("-here")
|| lower.contains("changeme")
|| lower.contains("replace")
|| lower.contains("todo")
|| lower.contains("fixme")
|| lower.starts_with("xxx")
|| lower.ends_with("xxx")
{
return false;
}
if value.starts_with("${") && value.ends_with('}') {
return false;
}
true
}
fn collect_bindings<'a>(node: tree_sitter::Node, source: &'a [u8], out: &mut Vec<Binding<'a>>) {
let kind = node.kind();
if let Some((name, value_node)) = extract_named_binding(node, source) {
if is_string_node(value_node.kind()) && !is_likely_python_docstring(value_node) {
if let Some(content) = string_node_content(value_node, source) {
out.push(Binding {
name: Some(name),
value: content,
line: value_node.start_position().row,
});
}
}
}
if is_string_node(kind) && !is_likely_python_docstring(node) {
if !parent_is_handled_binding_value(node) {
if let Some(content) = string_node_content(node, source) {
out.push(Binding {
name: None,
value: content,
line: node.start_position().row,
});
}
}
return;
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
collect_bindings(child, source, out);
}
}
fn extract_named_binding<'a>(
node: tree_sitter::Node<'a>,
source: &'a [u8],
) -> Option<(String, tree_sitter::Node<'a>)> {
match node.kind() {
"assignment" | "assignment_expression" | "assignment_statement" => {
let name = extract_lhs_name(node.child_by_field_name("left")?, source)?;
let value = node.child_by_field_name("right")?;
Some((name, value))
}
"augmented_assignment" => {
let name = extract_lhs_name(node.child_by_field_name("left")?, source)?;
let value = node.child_by_field_name("right")?;
Some((name, value))
}
"keyword_argument" => {
let name = node_text(node.child_by_field_name("name")?, source)?.to_lowercase();
let value = node.child_by_field_name("value")?;
Some((name, value))
}
"pair" => {
let key_node = node.child_by_field_name("key")?;
let key_text = match key_node.kind() {
"string" | "string_literal" => string_node_content(key_node, source)?,
_ => node_text(key_node, source)?,
};
let value = node.child_by_field_name("value")?;
Some((key_text.to_lowercase(), value))
}
"variable_declarator" => {
let name_node = node.child_by_field_name("name")?;
let name = node_text(name_node, source)?.to_lowercase();
let value = node.child_by_field_name("value")?;
Some((name, value))
}
"const_spec" | "var_spec" => {
let name_node = node.child_by_field_name("name")?;
let name = node_text(name_node, source)?.to_lowercase();
let value = node.child_by_field_name("value")?;
let value = if value.kind() == "expression_list" && value.named_child_count() == 1 {
value.named_child(0)?
} else {
value
};
Some((name, value))
}
"short_var_declaration" => {
let left = node.child_by_field_name("left")?;
let right = node.child_by_field_name("right")?;
let name = if left.kind() == "expression_list" && left.named_child_count() == 1 {
node_text(left.named_child(0)?, source)?.to_lowercase()
} else {
node_text(left, source)?.to_lowercase()
};
let value = if right.kind() == "expression_list" && right.named_child_count() == 1 {
right.named_child(0)?
} else {
right
};
Some((name, value))
}
"keyed_element" => {
let key = node.named_child(0)?;
let value = node.named_child(1)?;
let key_text = match key.kind() {
"string" | "string_literal" | "interpreted_string_literal" => {
string_node_content(key, source)?
}
_ => node_text(key, source)?,
};
Some((key_text.to_lowercase(), value))
}
"let_declaration" => {
let pat = node.child_by_field_name("pattern")?;
let name = extract_lhs_name(pat, source)?;
let value = node.child_by_field_name("value")?;
Some((name, value))
}
"const_item" | "static_item" => {
let name_node = node.child_by_field_name("name")?;
let name = node_text(name_node, source)?.to_lowercase();
let value = node.child_by_field_name("value")?;
Some((name, value))
}
"init_declarator" => {
let declarator = node.child_by_field_name("declarator")?;
let name = extract_lhs_name(declarator, source)?;
let value = node.child_by_field_name("value")?;
Some((name, value))
}
"default_parameter" => {
let name = node_text(node.child_by_field_name("name")?, source)?.to_lowercase();
let value = node.child_by_field_name("value")?;
Some((name, value))
}
"field_definition" | "public_field_definition" => {
let name_node = node.child_by_field_name("property")?;
let name = node_text(name_node, source)?.to_lowercase();
let value = node.child_by_field_name("value")?;
Some((name, value))
}
"enum_assignment" => {
let name_node = node.child_by_field_name("name")?;
let name = node_text(name_node, source)?.to_lowercase();
let value = node.child_by_field_name("value")?;
Some((name, value))
}
_ => None,
}
}
fn extract_lhs_name(node: tree_sitter::Node, source: &[u8]) -> Option<String> {
match node.kind() {
"identifier" | "property_identifier" | "field_identifier" | "type_identifier" => {
Some(node_text(node, source)?.to_lowercase())
}
"attribute" | "member_expression" | "field_expression" => {
let count = node.named_child_count();
let last_idx = count.checked_sub(1)?;
extract_lhs_name(node.named_child(last_idx)?, source)
}
"pointer_declarator" => {
for i in 0..node.named_child_count() {
if let Some(c) = node.named_child(i) {
if let Some(n) = extract_lhs_name(c, source) {
return Some(n);
}
}
}
None
}
_ => {
if node.named_child_count() == 1 {
extract_lhs_name(node.named_child(0)?, source)
} else {
None
}
}
}
}
fn node_text<'a>(node: tree_sitter::Node, source: &'a [u8]) -> Option<&'a str> {
let start = node.start_byte();
let end = node.end_byte().min(source.len());
std::str::from_utf8(&source[start..end]).ok()
}
fn string_node_content<'a>(node: tree_sitter::Node, source: &'a [u8]) -> Option<&'a str> {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if matches!(
child.kind(),
"string_content" | "string_fragment" | "string_literal_content"
) {
return node_text(child, source);
}
}
let raw = node_text(node, source)?;
let bytes = raw.as_bytes();
let mut i = 0;
while i < bytes.len().min(3)
&& matches!(
bytes[i],
b'f' | b'F' | b'r' | b'R' | b'b' | b'B' | b'u' | b'U'
)
&& i + 1 < bytes.len()
&& !matches!(bytes[i + 1], b'"' | b'\'' | b'`' | b'#')
{
i += 1;
}
if i < bytes.len()
&& matches!(
bytes[i],
b'f' | b'F' | b'r' | b'R' | b'b' | b'B' | b'u' | b'U'
)
&& i + 1 < bytes.len()
&& matches!(bytes[i + 1], b'"' | b'\'' | b'`' | b'#')
{
i += 1;
}
let hash_count = bytes[i..].iter().take_while(|&&b| b == b'#').count();
let open_delim_end = i + hash_count;
if open_delim_end >= bytes.len() {
return Some(raw);
}
let quote = bytes[open_delim_end];
if !matches!(quote, b'"' | b'\'' | b'`') {
return Some(raw);
}
let close_pat_len = 1 + hash_count;
if bytes.len() < open_delim_end + 1 + close_pat_len {
return Some(raw);
}
let close_start = bytes.len() - close_pat_len;
if bytes[close_start] != quote || bytes[close_start + 1..].iter().any(|&b| b != b'#') {
return Some(raw);
}
let content_start = open_delim_end + 1;
let content_end = close_start;
if content_start > content_end {
return Some("");
}
std::str::from_utf8(&bytes[content_start..content_end]).ok()
}
fn is_string_node(kind: &str) -> bool {
matches!(
kind,
"string"
| "string_literal"
| "raw_string_literal"
| "interpreted_string_literal"
| "verbatim_string_literal"
| "template_string"
)
}
fn parent_is_handled_binding_value(node: tree_sitter::Node) -> bool {
let parent = match node.parent() {
Some(p) => p,
None => return false,
};
let value_node = match parent.kind() {
"assignment"
| "assignment_expression"
| "assignment_statement"
| "augmented_assignment"
| "short_var_declaration" => parent.child_by_field_name("right"),
"keyword_argument"
| "pair"
| "variable_declarator"
| "const_spec"
| "var_spec"
| "let_declaration"
| "const_item"
| "static_item"
| "init_declarator"
| "default_parameter"
| "field_definition"
| "public_field_definition" => parent.child_by_field_name("value"),
"keyed_element" => parent.named_child(1),
_ => return false,
};
if let Some(v) = value_node {
if v.id() == node.id() {
return true;
}
if v.kind() == "expression_list"
&& v.named_child_count() == 1
&& v.named_child(0).map(|c| c.id() == node.id()) == Some(true)
{
return true;
}
}
false
}
fn is_likely_python_docstring(node: tree_sitter::Node) -> bool {
let parent = match node.parent() {
Some(p) => p,
None => return false,
};
if parent.kind() != "expression_statement" {
return false;
}
let grandparent = match parent.parent() {
Some(g) => g,
None => return false,
};
if !matches!(grandparent.kind(), "block" | "module") {
return false;
}
let mut cursor = grandparent.walk();
for child in grandparent.children(&mut cursor) {
if child.kind() == "expression_statement" {
return child.id() == parent.id();
}
}
false
}
impl Detector for SecretDetector {
fn name(&self) -> &'static str {
"secret-detection"
}
fn description(&self) -> &'static str {
"Detects hardcoded secrets, API keys, and passwords"
}
fn bypass_postprocessor(&self) -> bool {
true
}
fn file_extensions(&self) -> &'static [&'static str] {
&[
"py", "js", "ts", "jsx", "tsx", "rb", "java", "go", "rs", "env", "yml", "yaml", "json",
"toml", "cfg", "ini", "conf",
]
}
fn content_requirements(&self) -> crate::detectors::detector_context::ContentFlags {
crate::detectors::detector_context::ContentFlags::HAS_SECRET_PATTERN
}
fn detect(
&self,
ctx: &crate::detectors::analysis_context::AnalysisContext,
) -> Result<Vec<Finding>> {
let graph = ctx.graph;
let files = &ctx.as_file_provider();
let i = graph.interner();
let mut findings = vec![];
for path in files.files_with_extensions(&[
"py",
"js",
"ts",
"jsx",
"tsx",
"rs",
"go",
"java",
"rb",
"php",
"cs",
"cpp",
"c",
"h",
"hpp",
"yaml",
"yml",
"json",
"toml",
"env",
"conf",
"config",
"sh",
"bash",
"zsh",
"properties",
"xml",
]) {
if findings.len() >= self.max_findings {
break;
}
let path_str = path.to_string_lossy();
if path_str.contains("node_modules")
|| path_str.contains(".git")
|| path_str.contains("vendor")
|| path_str.contains("target")
{
continue;
}
if path_str.contains("/detectors/") && path_str.ends_with(".rs") {
continue;
}
let raw = match files.content(path) {
Some(c) => c,
None => continue,
};
if !raw.contains("AKIA") && !raw.contains("ghp_") && !raw.contains("sk_live_") && !raw.contains("SG.") && !raw.contains("PRIVATE KEY") && !raw.contains("api_key") && !raw.contains("api-key") && !raw.contains("apikey")
&& !raw.contains("API_KEY") && !raw.contains("API-KEY") && !raw.contains("APIKEY")
&& !raw.contains("password") && !raw.contains("PASSWORD")
&& !raw.contains("passwd") && !raw.contains("PASSWD")
&& !raw.contains("secret") && !raw.contains("SECRET")
&& !raw.contains("token") && !raw.contains("TOKEN")
&& !raw.contains("postgres://") && !raw.contains("mysql://")
&& !raw.contains("mongodb://") && !raw.contains("redis://")
&& !raw.contains("xoxb-") && !raw.contains("xoxp-")
&& !raw.contains("xoxa-") && !raw.contains("xoxr-")
{
continue;
}
debug!("Scanning for secrets: {}", path.display());
let raw_content = match files.content(path) {
Some(c) => c,
None => continue,
};
let lang = Language::from_path(path);
let has_ast_grammar = matches!(
lang,
Language::Python
| Language::JavaScript
| Language::TypeScript
| Language::Rust
| Language::Go
| Language::Java
| Language::CSharp
| Language::C
| Language::Cpp
);
if has_ast_grammar {
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
let cached = files.tree(path);
let scan = ScanInputs::new(path, &raw_content, ext);
let ast_inputs = ScanAstInputs::new(scan, lang, cached.as_deref());
findings.extend(self.scan_file_ast(&ast_inputs));
} else {
findings.extend(self.scan_file(path, &raw_content));
}
}
for finding in &mut findings {
if let (Some(file_path), Some(line)) =
(finding.affected_files.first(), finding.line_start)
{
let path_str = file_path.to_string_lossy().to_string();
if let Some(f) = graph.find_function_at(&path_str, line) {
let callers = graph.get_callers(f.qn(i)).len();
let name_lower = f.node_name(i).to_lowercase();
let is_config = name_lower.contains("config")
|| name_lower.contains("init")
|| name_lower.contains("setup")
|| name_lower.contains("settings");
let func_name = f.node_name(i);
let mut notes = Vec::new();
notes.push(format!(
"📦 In function: `{}` ({} callers)",
func_name, callers
));
if is_config {
notes.push("⚙️ In config/setup function".to_string());
if finding.severity == Severity::Critical {
finding.severity = Severity::High;
}
}
if callers > 10 && finding.severity == Severity::High {
finding.severity = Severity::Critical;
}
finding.description = format!(
"{}\n\n**Context:**\n{}",
finding.description,
notes.join("\n")
);
}
}
}
Ok(findings)
}
}
impl crate::detectors::RegisteredDetector for SecretDetector {
fn create(init: &crate::detectors::DetectorInit) -> std::sync::Arc<dyn Detector> {
std::sync::Arc::new(Self::new(init.repo_path))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::builder::GraphBuilder;
#[test]
fn test_detects_hardcoded_aws_key() {
let store = GraphBuilder::new().freeze();
let _detector = SecretDetector::new("/mock/repo");
let detector = SecretDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![("config.rb", "\nAWS_ACCESS_KEY = \"AKIAIOSFODNN7ABCDEFG\"\n")],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
!findings.is_empty(),
"Should detect hardcoded AWS access key"
);
assert!(findings.iter().any(|f| f.title.contains("AWS Access Key")));
}
#[test]
fn test_no_finding_for_env_variable_usage() {
let store = GraphBuilder::new().freeze();
let detector = SecretDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("config.py", "\nimport os\nAWS_KEY = os.environ.get(\"AWS_ACCESS_KEY_ID\")\nSECRET = os.getenv(\"AWS_SECRET_ACCESS_KEY\")\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag secrets read from environment variables, but got: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_no_finding_for_password_in_docstring() {
let store = GraphBuilder::new().freeze();
let detector = SecretDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("auth.py", "def authenticate(username, password):\n \"\"\"\n Authenticate user with password.\n password = hashlib.sha256(raw).hexdigest()\n \"\"\"\n return check_password(username, password)\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag 'password' references in docstrings. Found: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_no_finding_for_password_type_annotation() {
let store = GraphBuilder::new().freeze();
let detector = SecretDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("models.py", "from pydantic import BaseModel\n\nclass LoginRequest(BaseModel):\n username: str\n password: str\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag password type annotations. Found: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_no_finding_for_password_field_definition() {
let store = GraphBuilder::new().freeze();
let _detector = SecretDetector::new("/mock/repo");
let detector = SecretDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("models.rb", "password = CharField(max_length=128)\nsecret = SecretManager.from_config(settings)\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag function/class calls as secrets. Found: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_no_finding_for_password_list_assignment() {
let store = GraphBuilder::new().freeze();
let detector = SecretDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("config.rb", "password = [\"django.contrib.auth.hashers.PBKDF2PasswordHasher\"]\nsecret = {\"key\": \"value\", \"other\": \"data\"}\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag list/dict literal assignments as secrets. Found: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_still_detects_real_hardcoded_password() {
let store = GraphBuilder::new().freeze();
let detector = SecretDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![("config.rb", "password = \"super_secret_password_123\"\n")],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
!findings.is_empty(),
"Should still detect real hardcoded password"
);
}
#[test]
fn test_skips_uppercase_constant_reference() {
let store = GraphBuilder::new().freeze();
let detector = SecretDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![("config.rb", "password = HARDCODED_SECRET_VALUE\n")],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag variable/constant references as secrets. Found: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_no_finding_for_password_variable_reference() {
let store = GraphBuilder::new().freeze();
let detector = SecretDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("views.rb", "password=auth_password,\nsecret = settings.SECRET_KEY\nself._password = raw_password\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag variable references as secrets. Found: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_no_finding_for_settings_read() {
let store = GraphBuilder::new().freeze();
let detector = SecretDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("config.rb", "self.password = settings.EMAIL_HOST_PASSWORD if password is None else password\npassword=self.settings_dict[\"PASSWORD\"],\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag settings reads as secrets. Found: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_no_finding_for_request_data_read() {
let store = GraphBuilder::new().freeze();
let detector = SecretDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("views.rb", "csrf_secret = request.META[\"CSRF_COOKIE\"]\nold_password = self.cleaned_data[\"old_password\"]\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag request/form data reads as secrets. Found: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_audit_repro_python_hardcoded_password() {
let stripe_token = String::from("sk") + "_live_" + "abcdefghijklmnopqrstuvwx";
let source =
format!("password = \"hardcoded_super_secret_p4ss\"\napi_key = \"{stripe_token}\"\n");
let store = GraphBuilder::new().freeze();
let detector = SecretDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![("creds.py", source.as_str())],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
!findings.is_empty(),
"QA audit regression: SecretDetector returned 0 findings for an \
obvious hardcoded password in Python (.py). The masking layer \
was stripping the value. Findings: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
assert!(
findings.iter().any(|f| f.title.contains("Generic Secret")),
"Expected a Generic Secret finding on the password line. Got: {:?}",
findings
.iter()
.map(|f| (&f.title, f.line_start))
.collect::<Vec<_>>()
);
}
#[test]
fn test_ast_detects_javascript_hardcoded_password() {
let github_pat = String::from("ghp") + "_" + &"a".repeat(36);
let source =
format!("const password = \"hardcoded_pass_j6k\";\nconst token = \"{github_pat}\";\n");
let store = GraphBuilder::new().freeze();
let detector = SecretDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![("app.js", source.as_str())],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.iter().any(|f| f.title.contains("Generic Secret")),
"Expected Generic Secret on JS const password assignment. Got: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
assert!(
findings.iter().any(|f| f.title.contains("GitHub Token")),
"Expected GitHub Token on JS string literal. Got: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_ast_detects_rust_hardcoded_stripe_key() {
let stripe_token = String::from("sk") + "_live_" + "zzzzzzzzzzzzzzzzzzzzzzzzz";
let source = format!(
"fn main() {{\n let stripe_key = \"{stripe_token}\";\n let _ = stripe_key;\n}}\n"
);
let store = GraphBuilder::new().freeze();
let detector = SecretDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![("config.rs", source.as_str())],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.iter().any(|f| f.title.contains("Stripe API Key")),
"Expected Stripe API Key on Rust string literal. Got: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_ast_skips_python_docstring_with_password_word() {
let store = GraphBuilder::new().freeze();
let detector = SecretDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![(
"auth.py",
"def authenticate(user, raw):\n \"\"\"Authenticate user. password = \\\"never_a_real_secret_xyz\\\"\"\"\"\n return user\n",
)],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag credentials inside a Python docstring. Got: {:?}",
findings
.iter()
.map(|f| (&f.title, f.line_start))
.collect::<Vec<_>>()
);
}
#[test]
fn test_ast_skips_python_env_fallback() {
let store = GraphBuilder::new().freeze();
let detector = SecretDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![(
"settings.py",
"import os\nDB_PASSWORD = os.environ.get(\"DB_PASSWORD\", \"dev_default_pw\")\n",
)],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
for f in &findings {
assert!(
matches!(f.severity, Severity::Low | Severity::Info),
"env.get() with fallback should be Low severity, got {:?} for {:?}",
f.severity,
f.title
);
}
}
#[test]
fn test_ast_python_function_call_value_not_flagged() {
let store = GraphBuilder::new().freeze();
let detector = SecretDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![(
"models.py",
"password = CharField(max_length=128)\nsecret = SecretManager.from_config(settings)\n",
)],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag function/class calls as secrets. Got: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_ast_detects_aws_key_used_as_dict_key() {
let akia = format!("AKIA{}{}", "IOSFODNN", "7ABCDEFG");
let py_src = format!("config = {{\"{key}\": \"placeholder\"}}\n", key = akia);
let store = GraphBuilder::new().freeze();
let detector = SecretDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![("audit.py", py_src.as_str())],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.iter().any(|f| f.title.contains("AWS Access Key")),
"AKIA-shaped dict key must be reported. Got: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_ast_detects_js_class_field_secret() {
let store = GraphBuilder::new().freeze();
let detector = SecretDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![(
"auth.js",
"class Auth {\n password = \"hardcoded123!\";\n}\n",
)],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings
.iter()
.any(|f| f.title.contains("Generic Secret") || f.title.contains("Password")),
"JS class field with hardcoded password must be reported. Got: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_ast_detects_python_default_parameter_secret() {
let store = GraphBuilder::new().freeze();
let detector = SecretDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![(
"auth.py",
"def authenticate(user, password=\"hardcoded123!\"):\n return user\n",
)],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
!findings.is_empty(),
"Default-parameter hardcoded password must be reported. Got: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_ast_strict_api_key_gate_rejects_camelcase_substring() {
let store = GraphBuilder::new().freeze();
let detector = SecretDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![(
"config.py",
"apiKeyHeader = \"X-API-Key-Custom-Header-Name\"\n",
)],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings
.iter()
.all(|f| !f.title.contains("Generic API Key")),
"Generic API Key gate must be anchored — `apiKeyHeader` is not an api_key. Got: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_ast_generic_secret_name_gate_boundaries() {
let store = GraphBuilder::new().freeze();
let detector = SecretDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![("a.py", "passwords = \"this-is-a-list-name\"\n")],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.iter().all(|f| !f.title.contains("Generic Secret")),
"Plural `passwords` must not match Generic Secret. Got: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![(
"b.py",
"auth_token_value = \"hardcoded-bearer-abcd-1234\"\n",
)],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.iter().any(|f| f.title.contains("Generic Secret")),
"`auth_token_value` should match via `_token_` boundary. Got: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
}