repotoire 0.9.0

//! Secret Detection
//!
//! Detects hardcoded secrets, API keys, passwords, and tokens in source code.
//! CWE-798: Use of Hard-coded Credentials
//!
//! # Architecture
//!
//! Two scan paths, picked by file language:
//!
//! 1. **AST path** (Python, JS/TS, Rust, Go, Java, C#, C, C++): walks the
//!    tree-sitter parse tree, extracts `(name, value)` pairs from binding
//!    nodes (`assignment`, `variable_declarator`, `keyword_argument`, `pair`,
//!    `const_item`, `init_declarator`, ...) plus naked string literals, and
//!    matches a typed `SecretPattern` table against the value text directly.
//!    No regex on lines, no `line.contains("=")` heuristics — the AST already
//!    knows what's a name and what's a value.
//!
//! 2. **Line path** (`.env`, `.yml`, `.json`, `.toml`, `.ini`, `.conf`, Ruby,
//!    PHP, Kotlin, Swift, ...): runs the legacy line-based regex scanner on
//!    raw content for languages without tree-sitter grammars. Config formats
//!    have no docstrings, so masking isn't needed.
//!
//! Patterns split into two kinds:
//!
//! - `SelfShaped` — match by the secret's own format alone (AKIA, ghp_,
//!   sk_live_, SG., xox..., postgres://user:pwd@host, BEGIN PRIVATE KEY).
//!   Applied to every string literal regardless of surrounding name.
//! - `NameGated` — require a sensitive LHS name (`password`, `secret`,
//!   `api_key`, etc.) AND a string value of sufficient length/content.
//!   Eliminates the previous regex's value-type guesswork (`if first char
//!   is "`/`'`/`b`...`) since the AST tells us if the RHS is a literal.
//!
//! This is the proper structural fix for QA audit finding #1: the previous
//! detector ran regex over `masked_content`, which had already replaced every
//! string literal's contents with spaces — the very place secrets live.

use crate::detectors::ast_fingerprint::parse_root_ext;
use crate::detectors::base::{is_test_file, Detector, DetectorConfig};
use crate::detectors::security::scan_inputs::{ScanAstInputs, ScanInputs};
use crate::detectors::security::secret_formats;
use crate::graph::GraphQueryExt;
use crate::models::{Evidence, Finding, Severity, SourceSpan, Tier};
use crate::parsers::lightweight::Language;
use anyhow::Result;
use regex::Regex;
use std::path::{Path, PathBuf};
use std::sync::LazyLock;
use tracing::debug;

/// Secret patterns split by matching strategy.
///
/// `SelfShaped` patterns identify a secret by its own format alone (e.g. AWS
/// `AKIA[0-9A-Z]{16}`, GitHub `ghp_[a-zA-Z0-9]{36}`); they apply to every
/// string-literal value regardless of the surrounding name.
///
/// `NameGated` patterns require BOTH a sensitive LHS name (matched by
/// `name_gate`) AND a value matching `value_regex`. The AST scanner already
/// knows what is a name and what is a value, so unlike the old `\s*[=:]\s*`-
/// shaped regex these never need to re-derive structure from text.
///
/// The line-based scanner (used for non-AST languages) reconstructs a
/// `name=value`-shaped synthesis from each pattern via `to_legacy_line_regex()`
/// so it can keep matching against whole lines for `.env`, `.yml`, `.toml`, etc.
enum SecretPattern {
    SelfShaped {
        name: &'static str,
        value_regex: Regex,
        severity: Severity,
    },
    NameGated {
        name: &'static str,
        /// Lowercased name regex applied to the LHS identifier text.
        /// Consumed by the AST scanner — the line-based scanner currently
        /// hard-codes the legacy combined form in `to_legacy_line_regex`.
        #[allow(dead_code)]
        name_gate: Regex,
        /// Regex applied to the value content (between the quotes).
        value_regex: Regex,
        severity: Severity,
    },
}

impl SecretPattern {
    fn finding_name(&self) -> &'static str {
        match self {
            SecretPattern::SelfShaped { name, .. } | SecretPattern::NameGated { name, .. } => name,
        }
    }

    fn severity(&self) -> Severity {
        match self {
            SecretPattern::SelfShaped { severity, .. }
            | SecretPattern::NameGated { severity, .. } => *severity,
        }
    }

    /// Reconstruct a legacy `name\s*[=:]\s*"value"` -shaped regex for the
    /// line-based scanner. Self-shaped patterns just return their value regex
    /// (they don't need a name on the line).
    fn to_legacy_line_regex(&self) -> Regex {
        match self {
            SecretPattern::SelfShaped { value_regex, .. } => value_regex.clone(),
            SecretPattern::NameGated {
                name_gate: _,
                value_regex,
                name,
                ..
            } => {
                // Build a permissive line regex for the legacy line path. We
                // can't easily compose two regexes, so for the few NameGated
                // patterns we hard-code the legacy form here (it matched the
                // original `r"(?i)(secret|password|...)\s*[=:]\s*[...]"`).
                match *name {
                    "Generic Secret" => {
                        Regex::new(r#"(?i)(secret|password|passwd|pwd)\s*[=:]\s*["'][^"']{8,}["']"#)
                            .expect("valid regex")
                    }
                    "Generic API Key" => {
                        Regex::new(r"(?i)api[_-]?key\s*[=:]\s*[a-zA-Z0-9_\-]{20,}")
                            .expect("valid regex")
                    }
                    "AWS Secret Access Key" => {
                        Regex::new(r"(?i)aws_secret_access_key\s*[=:]\s*[A-Za-z0-9/+=]{40}")
                            .expect("valid regex")
                    }
                    _ => value_regex.clone(),
                }
            }
        }
    }
}

/// Master pattern table.
///
/// Order matters: when multiple patterns match the same value (e.g. an AWS
/// secret access key value also matches `Generic API Key`'s relaxed pattern),
/// we report only the FIRST one matched per `(line, value)` pair. The most
/// specific patterns are listed first.
static SECRET_PATTERNS: LazyLock<Vec<SecretPattern>> = LazyLock::new(|| {
    vec![
        // ---------- Self-shaped (match by value alone) ----------
        SecretPattern::SelfShaped {
            name: "AWS Access Key ID",
            value_regex: Regex::new(r"AKIA[0-9A-Z]{16}").expect("valid regex"),
            severity: Severity::Critical,
        },
        SecretPattern::SelfShaped {
            name: "GitHub Token",
            value_regex: Regex::new(r"ghp_[a-zA-Z0-9]{36}").expect("valid regex"),
            severity: Severity::Critical,
        },
        SecretPattern::SelfShaped {
            name: "Private Key",
            value_regex: Regex::new(r"-----BEGIN (RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----")
                .expect("valid regex"),
            severity: Severity::Critical,
        },
        SecretPattern::SelfShaped {
            name: "Slack Token",
            value_regex: Regex::new(r"xox[baprs]-[0-9]{10,13}-[0-9]{10,13}[a-zA-Z0-9-]*")
                .expect("valid regex"),
            severity: Severity::Critical,
        },
        SecretPattern::SelfShaped {
            name: "Stripe API Key",
            value_regex: Regex::new(r"sk_live_[a-zA-Z0-9]{24,}").expect("valid regex"),
            severity: Severity::Critical,
        },
        SecretPattern::SelfShaped {
            name: "Database URL with Password",
            value_regex: Regex::new(r"(?i)(postgres|mysql|mongodb|redis)://[^:]+:[^@]+@")
                .expect("valid regex"),
            severity: Severity::Critical,
        },
        SecretPattern::SelfShaped {
            name: "SendGrid API Key",
            value_regex: Regex::new(r"SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}")
                .expect("valid regex"),
            severity: Severity::High,
        },
        // ---------- Name-gated (require sensitive name + value) ----------
        SecretPattern::NameGated {
            name: "AWS Secret Access Key",
            name_gate: Regex::new(r"(?i)aws.?secret.?access.?key").expect("valid regex"),
            value_regex: Regex::new(r"^[A-Za-z0-9/+=]{40}$").expect("valid regex"),
            severity: Severity::Critical,
        },
        SecretPattern::NameGated {
            name: "Generic API Key",
            name_gate: Regex::new(r"(?i)^api[_-]?key$").expect("valid regex"),
            value_regex: Regex::new(r"^[a-zA-Z0-9_\-]{20,}$").expect("valid regex"),
            severity: Severity::High,
        },
        SecretPattern::NameGated {
            name: "Generic Secret",
            // Match any name that contains password/secret/passwd/pwd/token,
            // but not pure prefix/suffix words like "passwords" or hash names
            // (handled by FP filters on the value side).
            name_gate: Regex::new(r"(?i)(^|_)(secret|password|passwd|pwd|token)($|_)")
                .expect("valid regex"),
            value_regex: Regex::new(r"^[^\s]{8,}$").expect("valid regex"),
            severity: Severity::High,
        },
    ]
});

pub struct SecretDetector {
    #[allow(dead_code)] // Part of detector pattern
    config: DetectorConfig,
    #[allow(dead_code)] // Part of detector pattern, used for file scanning
    repository_path: PathBuf,
    max_findings: usize,
}

impl SecretDetector {
    pub fn new(repository_path: impl Into<PathBuf>) -> Self {
        Self {
            config: DetectorConfig::default(),
            repository_path: repository_path.into(),
            max_findings: 100,
        }
    }

    fn relative_path(&self, path: &Path) -> PathBuf {
        crate::detectors::detector_relative_path(&self.repository_path, path)
    }

    /// Check if a Python os.environ.get() or os.getenv() call has a fallback (second argument)
    /// Pattern: os.environ.get("KEY", "fallback") or os.getenv("KEY", "fallback")
    fn has_python_env_fallback(line: &str) -> bool {
        // Look for the pattern: os.environ.get( or os.getenv( followed by args with a comma
        // This indicates a default value is provided
        let line_lower = line.to_lowercase();

        for pattern in ["os.environ.get(", "os.getenv("] {
            if let Some(start) = line_lower.find(pattern) {
                let after_pattern = &line[start + pattern.len()..];
                // Count parentheses to find the matching close
                let mut depth = 1;
                let mut found_comma_at_depth_1 = false;

                for ch in after_pattern.chars() {
                    match ch {
                        '(' => depth += 1,
                        ')' => {
                            depth -= 1;
                            if depth == 0 {
                                break;
                            }
                        }
                        ',' if depth == 1 => {
                            found_comma_at_depth_1 = true;
                            break;
                        }
                        _ => {}
                    }
                }

                if found_comma_at_depth_1 {
                    return true;
                }
            }
        }

        false
    }

    /// Check if a Go os.Getenv() call has fallback handling on the same line
    /// Common patterns:
    /// - `if val := os.Getenv("X"); val == "" { ... }` (short variable declaration with check)
    /// - `val := os.Getenv("X"); if val == "" { val = "default" }`
    /// - Using with a helper: `getEnvOr(os.Getenv("X"), "default")`
    /// - Ternary-style: `func() string { if v := os.Getenv("X"); v != "" { return v }; return "default" }()`
    fn has_go_env_fallback(line: &str) -> bool {
        // Check for common fallback indicators on the same line
        let has_empty_check = line.contains(r#"== """#) || line.contains(r#"!= """#);
        let has_if_statement = line.contains("if ");
        let has_fallback_helper = line.to_lowercase().contains("getenvdefault")
            || line.to_lowercase().contains("getenvor")
            || line.to_lowercase().contains("envdefault");

        has_fallback_helper || (has_empty_check && has_if_statement)
    }

    fn scan_file(&self, path: &Path, content: &str) -> Vec<Finding> {
        let mut findings = vec![];

        // Skip test files - they often contain test certificates/keys
        if is_test_file(path) {
            return findings;
        }

        // Skip binary files
        if content.contains('\0') {
            return findings;
        }

        let lines: Vec<&str> = content.lines().collect();
        for (line_num, line) in lines.iter().enumerate() {
            let prev_line = if line_num > 0 {
                Some(lines[line_num - 1])
            } else {
                None
            };
            if crate::detectors::is_line_suppressed(line, prev_line) {
                continue;
            }

            // Skip comments that look like documentation
            let trimmed = line.trim();
            if trimmed.starts_with("//") && trimmed.contains("example") {
                continue;
            }

            findings.extend(self.try_match_line(path, line, line_num));
        }

        findings
    }

    /// Legacy line-based scanner for non-AST languages.
    ///
    /// Used ONLY by `scan_file()` for file types without a tree-sitter
    /// grammar in our dispatch list (`.env`, `.yml`, `.json`, `.toml`,
    /// `.ini`, `.conf`, Ruby, PHP, Kotlin, Swift, ...). For those formats
    /// we fall back to running each pattern's `to_legacy_line_regex()`
    /// against the raw line and applying contextual filters (placeholder
    /// text, env-var fallbacks, value-type checks, severity downgrades).
    ///
    /// **Not used by AST-eligible languages** (Python, JS/TS, Rust, Go,
    /// Java, C#, C, C++) — those go through `scan_file_ast()` →
    /// `match_pattern_against_binding()`, which operates on AST-extracted
    /// `(name, value)` pairs and does not regex-reverse-engineer line
    /// shape. The `line.find('=')` / `value_part.contains('(')` heuristics
    /// below are a deliberate concession to the line scanner; they are
    /// inappropriate for AST-handled sources.
    fn try_match_line(&self, path: &Path, line: &str, line_num: usize) -> Vec<Finding> {
        let mut findings = Vec::new();

        for pattern in SECRET_PATTERNS.iter() {
            // The line scanner uses each pattern's "legacy" line regex
            // (`name\s*[=:]\s*"value"`) so it can keep matching whole-line
            // shapes for non-AST languages like .env, .yml, Ruby, etc. The
            // AST scanner uses `name_gate` + `value_regex` separately.
            let line_regex = pattern.to_legacy_line_regex();
            if let Some(m) = line_regex.find(line) {
                let matched = m.as_str();

                // Skip obvious false positives
                if matched.len() < 10 {
                    continue;
                }
                if matched.contains("example") || matched.contains("EXAMPLE") {
                    continue;
                }
                if matched.contains("placeholder") || matched.contains("xxxx") {
                    continue;
                }

                // Skip placeholder patterns (setup templates, documentation)
                let matched_lower = matched.to_lowercase();
                if matched_lower.contains("your-")
                    || matched_lower.contains("-here")
                    || matched_lower.contains("changeme")
                    || matched_lower.contains("replace")
                    || matched_lower.contains("todo")
                    || matched_lower.contains("fixme")
                    || matched == "sk-your-openai-key"
                    || matched_lower.starts_with("xxx")
                    || matched_lower.ends_with("xxx")
                {
                    continue;
                }

                // Skip shell variable substitutions: ${VAR_NAME}
                // Docker Compose, shell scripts use ${SECRET} as variable reference, not hardcoded
                if line.contains(&format!("${{{}", &matched.split('=').next().unwrap_or(""))) {
                    continue;
                }

                // Skip when value is reading from environment variables or headers
                // (not hardcoding — the value is fetched at runtime, not embedded in source)
                // Pattern: const secret = process.env.SECRET
                if line.contains("= process.env.") || line.contains("=process.env.") {
                    continue;
                }
                // Node/Deno: process.env["KEY"] or process.env.KEY
                if line.contains("process.env") {
                    continue;
                }
                // Rust: std::env::var("KEY") or env::var("KEY")
                if line.contains("env::var(") || line.contains("std::env::var") {
                    continue;
                }
                // HTTP headers: req.headers.get(), headers.get(), request.headers
                if line.contains("headers.get(")
                    || line.contains("req.headers.")
                    || line.contains("request.headers.")
                    || line.contains("headers[")
                {
                    continue;
                }
                // Python: os.environ["KEY"] or os.environ.get()
                if line.contains("os.environ[")
                    || line.contains("os.environ.get(")
                    || line.contains("os.getenv(")
                {
                    continue;
                }
                // Go: os.Getenv("KEY")
                if line.contains("os.Getenv(") || line.contains("os.LookupEnv(") {
                    continue;
                }

                // Value-type filtering for Generic Secret pattern:
                // Skip when the value is clearly not a secret (function call or collection literal)
                if pattern.finding_name() == "Generic Secret" {
                    // Extract the value part after = or :
                    let value_part = if let Some(eq_pos) = line.find('=') {
                        line[eq_pos + 1..].trim()
                    } else if let Some(colon_pos) = line.find(':') {
                        line[colon_pos + 1..].trim()
                    } else {
                        ""
                    };

                    if !value_part.is_empty() {
                        // Skip function/class calls: CharField(...), Signal(), SecretManager.from_config()
                        if value_part.contains('(') {
                            continue;
                        }
                        // Skip collection literals: [...], {...}
                        let first_char = value_part.chars().next().unwrap_or(' ');
                        if matches!(first_char, '[' | '{') {
                            continue;
                        }
                        // Skip variable references — a hardcoded secret MUST be a string literal
                        // Variables, attribute accesses, settings reads are NOT hardcoded
                        if !matches!(first_char, '"' | '\'' | '`' | 'b') {
                            // Not a string literal (b"..." for bytes is also a literal)
                            continue;
                        }
                        // If starts with b, check it's b"..." not a variable like `base64...`
                        if first_char == 'b' {
                            let second_char = value_part.chars().nth(1).unwrap_or(' ');
                            if !matches!(second_char, '"' | '\'') {
                                continue;
                            }
                        }
                    }
                }

                // Determine effective severity based on context
                let line_lower = line.to_lowercase();
                let mut effective_severity = pattern.severity();

                // Dev fallback pattern: process.env.X || 'fallback' or process.env.X ?? 'fallback'
                // These are typically local dev defaults, not production credentials
                if (line_lower.contains("process.env")
                    && (line.contains("||") || line.contains("??")))
                    // Python fallback patterns: os.environ.get("KEY", "fallback") or os.getenv("KEY", "fallback")
                    // The second argument is the default value, indicating a fallback
                    || ((line_lower.contains("os.environ.get(")
                        || line_lower.contains("os.getenv("))
                        && Self::has_python_env_fallback(line))
                    // Go fallback patterns: os.Getenv with fallback handling
                    // os.LookupEnv returns (value, found) - implies fallback handling
                    // Also check for common inline fallback patterns
                    || line.contains("os.LookupEnv(")
                    || (line.contains("os.Getenv(") && Self::has_go_env_fallback(line))
                    // Localhost URLs are lower risk - typically dev/test environments
                    || matched.contains("localhost")
                    || matched.contains("127.0.0.1")
                {
                    effective_severity = Severity::Low;
                }
                // Check file path for seed/script/test/example patterns
                else if let Some(rel_path) = path.to_str() {
                    let rel_lower = rel_path.to_lowercase();
                    if rel_lower.contains("/seed")
                        || rel_lower.contains("/script")
                        || rel_lower.contains("/fixture")
                        || rel_lower.contains("/examples/")
                        || rel_lower.contains("/example/")
                        || rel_lower.contains("/demo/")
                        || rel_lower.contains("/samples/")
                        || rel_lower.contains("/sample/")
                        || rel_lower.contains(".seed.")
                        || rel_lower.contains(".script.")
                        || rel_lower.contains(".example.")
                        || rel_lower.contains(".sample.")
                    {
                        effective_severity = Severity::Low;
                    }
                }

                let line_start = line_num as u32 + 1;
                findings.push(Finding {
                    id: String::new(),
                    detector: "SecretDetector".to_string(),
                    severity: effective_severity,
                    title: format!("Hardcoded {}", pattern.finding_name()),
                    description: format!(
                        "Potential {} found in source code at line {}. \
                        Secrets should be stored in environment variables or secret management systems.",
                        pattern.finding_name(), line_start
                    ),
                    affected_files: vec![self.relative_path(path)],
                    line_start: Some(line_start),
                    line_end: Some(line_start),
                    suggested_fix: Some("Move this secret to an environment variable or secrets manager".to_string()),
                    estimated_effort: Some("15 minutes".to_string()),
                    category: Some("security".to_string()),
                    cwe_id: Some("CWE-798".to_string()),
                    why_it_matters: Some("Hardcoded secrets can be extracted from source code, leading to credential theft".to_string()),
                    ..Default::default()
                });
            }
        }

        findings
    }

    /// AST-first scanner.
    ///
    /// Walks the tree-sitter parse tree once and emits one `Binding` per
    /// "(name?, string-literal value)" pair, then matches the typed
    /// `SecretPattern` set against `(name, value)` directly. No regex over
    /// lines, no `line.find('=')` heuristics — the AST is the source of
    /// truth for what is a name and what is a value.
    ///
    /// This is the proper structural fix for QA audit finding #1: previously
    /// `masked_content` had stripped every string literal to spaces before
    /// the regex saw the line, making secrets in Python/JS/TS/Rust/Go/Java/
    /// C#/C/C++ structurally undetectable.
    fn scan_file_ast(&self, inputs: &ScanAstInputs<'_>) -> Vec<Finding> {
        let path = inputs.path();
        let content = inputs.content();
        let ext = inputs.ext();
        let lang = inputs.lang;
        let cached_tree = inputs.cached_tree;
        let mut findings = vec![];

        if is_test_file(path) {
            return findings;
        }
        if content.contains('\0') {
            return findings;
        }

        // Reuse the cached tree from the main parse phase when available.
        // Fall back to a fresh parse only on cache miss (test fixtures, files
        // added after graph build).
        let owned;
        let root = match cached_tree {
            Some(tree) => tree.root_node(),
            None => match parse_root_ext(content, lang, ext) {
                Some(t) => {
                    owned = t;
                    owned.root_node()
                }
                None => return findings,
            },
        };

        let bytes = content.as_bytes();
        let mut bindings: Vec<Binding> = Vec::new();
        collect_bindings(root, bytes, &mut bindings);

        let lines: Vec<&str> = content.lines().collect();

        for b in bindings {
            // Per-line suppression markers (// repotoire-ignore, etc.) still
            // apply on the AST path. We have to pull the line text to check.
            let line = match lines.get(b.line) {
                Some(l) => *l,
                None => continue,
            };
            let prev_line = if b.line > 0 {
                Some(lines[b.line - 1])
            } else {
                None
            };
            if crate::detectors::is_line_suppressed(line, prev_line) {
                continue;
            }

            for pat in SECRET_PATTERNS.iter() {
                if let Some(finding) = match_pattern_against_binding(self, path, pat, &b) {
                    findings.push(finding);
                    // First match wins per binding — patterns are ordered
                    // most-specific-first so we don't report a less specific
                    // pattern overlapping a more specific one.
                    break;
                }
            }
        }

        findings
    }
}

/// One AST-extracted (optional name, string value) pair.
///
/// Emitted by `collect_bindings` for every binding/literal node we care about.
/// `name` is `Some(lowercased_text)` for keyed bindings (Python `assignment`,
/// JS `variable_declarator`, etc.) and `None` for naked string literals
/// outside any binding (where only self-shaped patterns can apply).
struct Binding<'a> {
    /// LHS identifier text, lowercased, with leading `self.` / `this.` /
    /// scope prefixes stripped (so `self._password` becomes `_password`).
    /// `None` if this is a naked literal.
    name: Option<String>,
    /// Value content WITHOUT surrounding quotes, taken from raw source.
    value: &'a str,
    /// 0-indexed line of the value's first byte. Used for finding location
    /// and as the surrounding line for `is_line_suppressed`.
    line: usize,
    /// `true` when the literal is an element of a list/array whose sibling
    /// elements are all short lowercase identifier-like strings (e.g. the
    /// `SENSITIVE_FIELD_PATTERNS = ["password", "token", "secret"]` pattern).
    /// Used by `is_non_credential_context` to avoid blocking on redaction-lists.
    in_identifier_list: bool,
}

/// Match one pattern against one binding. Applies all FP filters on the
/// VALUE (not the surrounding line). Returns `Some(Finding)` if it's a
/// real hit, `None` otherwise.
fn match_pattern_against_binding(
    detector: &SecretDetector,
    path: &Path,
    pattern: &SecretPattern,
    binding: &Binding<'_>,
) -> Option<Finding> {
    // Shared FP filters on the value. These are precise: they look at the
    // EXACT string-literal content the AST extracted.
    if !value_passes_fp_filters(binding.value) {
        return None;
    }

    // Pattern-specific match logic.
    let value_regex = match pattern {
        SecretPattern::SelfShaped { value_regex, .. } => value_regex,
        SecretPattern::NameGated {
            name_gate,
            value_regex,
            name: pattern_name,
            ..
        } => {
            // Name-gated patterns require an LHS name AND that name to
            // match the gate. Naked literals never match.
            let name = binding.name.as_deref()?;
            if !name_gate.is_match(name) {
                return None;
            }

            // Generic Secret extra guards: the name_gate intentionally casts
            // a wide net (any name containing "token", "secret", etc.), so we
            // need additional signal on BOTH the variable-name suffix AND the
            // value shape to avoid flagging identifier constants.
            //
            // False-positive pattern:
            //   const E2E_AUTH_TOKEN_HEADER = "x-scout-e2e-auth-token"
            //   const E2E_AUTH_TOKEN_ENV    = "SCOUT_E2E_AUTH_TOKEN"
            //
            // The variable name contains "token" so name_gate fires, but the
            // values are an HTTP-header name and an env-var name — NOT secrets.
            //
            // We suppress the finding when EITHER:
            //   (a) the variable name ends with a suffix that signals the value
            //       is a *name/identifier* rather than a secret (e.g. `_HEADER`,
            //       `_ENV`, `_NAME`, `_FIELD`, `_PARAM`, …), OR
            //   (b) the value itself looks like an identifier (HTTP header,
            //       SCREAMING_SNAKE_CASE env-var, snake_case ident, dotted path,
            //       or low Shannon entropy).
            if *pattern_name == "Generic Secret"
                && (var_name_signals_identifier(name) || value_looks_like_identifier(binding.value))
            {
                return None;
            }

            value_regex
        }
    };

    if !value_regex.is_match(binding.value) {
        return None;
    }

    // Path-based severity downgrade: seed/example/sample/demo files are
    // typically test fixtures or templates, not real production secrets.
    let mut severity = pattern.severity();
    if let Some(rel_path) = path.to_str() {
        let rel_lower = rel_path.to_lowercase();
        if rel_lower.contains("/seed")
            || rel_lower.contains("/script")
            || rel_lower.contains("/fixture")
            || rel_lower.contains("/examples/")
            || rel_lower.contains("/example/")
            || rel_lower.contains("/demo/")
            || rel_lower.contains("/samples/")
            || rel_lower.contains("/sample/")
            || rel_lower.contains(".seed.")
            || rel_lower.contains(".script.")
            || rel_lower.contains(".example.")
            || rel_lower.contains(".sample.")
        {
            severity = Severity::Low;
        }
    }
    // Localhost URLs: dev environment, low risk.
    if binding.value.contains("localhost") || binding.value.contains("127.0.0.1") {
        severity = Severity::Low;
    }

    let line_start = binding.line as u32 + 1;
    let file_str = path.to_str().unwrap_or("");
    let surrounding_name = binding.name.as_deref().unwrap_or("");

    // ── Blocking-tier promotion (Task 10) ────────────────────────────────────
    // Run the three-branch predicate:
    //   1. Non-credential context? → stay Advisory, no evidence.
    //   2. Known format match? → Blocking with Secret evidence.
    //   3. High-entropy generic? → Blocking with Secret evidence.
    //   4. Else → Advisory, no evidence (existing keyword/regex heuristics).
    //
    // The existing FP guards above (value_passes_fp_filters, path downgrade,
    // localhost check) run FIRST and are unaffected — if they returned None
    // already we never reach here. The `in_identifier_list` flag is the one
    // new context that can keep a format-matched literal advisory.
    let (tier, evidence, effective_severity, blocking_conf) = determine_blocking_tier(
        file_str,
        surrounding_name,
        binding.value,
        binding.in_identifier_list,
        line_start,
        path,
        severity,
    );

    Some(Finding {
        id: String::new(),
        detector: "SecretDetector".to_string(),
        severity: effective_severity,
        title: format!("Hardcoded {}", pattern.finding_name()),
        description: format!(
            "Potential {} found in source code at line {}. \
            Secrets should be stored in environment variables or secret management systems.",
            pattern.finding_name(),
            line_start
        ),
        affected_files: vec![detector.relative_path(path)],
        line_start: Some(line_start),
        line_end: Some(line_start),
        suggested_fix: Some(
            "Move this secret to an environment variable or secrets manager".to_string(),
        ),
        estimated_effort: Some("15 minutes".to_string()),
        category: Some("security".to_string()),
        cwe_id: Some("CWE-798".to_string()),
        why_it_matters: Some(
            "Hardcoded secrets can be extracted from source code, leading to credential theft"
                .to_string(),
        ),
        tier,
        evidence,
        deterministic: matches!(tier, Tier::Blocking),
        confidence: blocking_conf,
        ..Default::default()
    })
}

/// Determine the blocking tier, evidence, severity, and confidence for a string
/// literal that passed the existing FP filters.
///
/// Returns `(tier, evidence, severity, confidence)`.
/// `confidence` is `Some(f64)` only for `Blocking` findings:
/// - known-format with checksum → 1.0
/// - known-format without checksum → 0.95
/// - generic high-entropy → 0.90
fn determine_blocking_tier(
    file: &str,
    surrounding_name: &str,
    literal: &str,
    in_identifier_list: bool,
    line_start: u32,
    path: &Path,
    advisory_severity: Severity,
) -> (Tier, Option<Evidence>, Severity, Option<f64>) {
    // 1. Non-credential context? → Advisory, no evidence.
    if secret_formats::is_non_credential_context(
        file,
        surrounding_name,
        literal,
        in_identifier_list,
    ) {
        return (Tier::Advisory, None, advisory_severity, None);
    }

    let span = SourceSpan {
        file: path.to_path_buf(),
        line_start,
        line_end: line_start,
        snippet: None,
    };

    // 2. Known credential format?
    if let Some(m) = secret_formats::match_known_format(literal) {
        let confidence = if m.checksum_valid == Some(true) {
            1.0
        } else {
            0.95
        };
        let entropy = secret_formats::shannon_entropy_bits_per_char(literal);
        return (
            Tier::Blocking,
            Some(Evidence::Secret {
                span,
                format: m.format.to_string(),
                entropy_bits: entropy,
                checksum_valid: m.checksum_valid,
            }),
            Severity::Critical,
            Some(confidence),
        );
    }

    // 3. High-entropy generic?
    let entropy = secret_formats::shannon_entropy_bits_per_char(literal);
    if literal.len() >= secret_formats::GENERIC_MIN_LEN
        && entropy >= secret_formats::GENERIC_ENTROPY_FLOOR
        && looks_like_a_value_not_a_format_string(literal)
    {
        return (
            Tier::Blocking,
            Some(Evidence::Secret {
                span,
                format: "generic_high_entropy".to_string(),
                entropy_bits: entropy,
                checksum_valid: None,
            }),
            Severity::High,
            Some(0.90),
        );
    }

    // 4. Existing keyword/regex heuristics — Advisory only.
    (Tier::Advisory, None, advisory_severity, None)
}

/// Filters that look only at the value text (between the quotes).
///
/// Replaces the previous regex match-on-line filters with precise checks
/// against what the AST extracted. Examples that should NOT be flagged:
///
/// - `password = "your-key-here"` (placeholder)
/// - `password = "CHANGEME"` (template)
/// - `password = "xxxxxxxx"` (filler)
/// - `password = "${SECRET}"` (shell variable substitution)
///
/// NOTE: alpha placeholder checks use word-boundary matching so that a
/// known-format credential that happens to end with a word like "EXAMPLE"
/// (e.g. `AKIAIOSFODNN7EXAMPLE`) is not falsely dropped — the digit `7`
/// before `EXAMPLE` is alphanumeric and breaks the word boundary.
fn value_passes_fp_filters(value: &str) -> bool {
    if value.len() < 8 {
        return false;
    }

    let lower = value.to_lowercase();

    // Structural markers that can't appear in a real token (no word-boundary
    // needed — if they appear anywhere, it's not an opaque credential).
    if lower.contains("xxxx") || lower.contains("your-") || lower.contains("-here") {
        return false;
    }
    if lower.starts_with("xxx") || lower.ends_with("xxx") {
        return false;
    }

    // Alpha placeholder words: use word-boundary matching so that a real key
    // that ends with one of these words (e.g. `...7EXAMPLE`) is not dropped.
    const PLACEHOLDER_WORDS: &[&str] = &[
        "example",
        "placeholder",
        "changeme",
        "replace",
        "todo",
        "fixme",
    ];
    for word in PLACEHOLDER_WORDS {
        if secret_formats::contains_as_word(&lower, word) {
            return false;
        }
    }

    // Shell variable references (Docker Compose, .env templates).
    if value.starts_with("${") && value.ends_with('}') {
        return false;
    }

    true
}

/// True when the value string looks like an **identifier / name**, not an
/// actual secret. These are the shapes we want to exclude from the Generic
/// Secret heuristic:
///
/// - **HTTP header name**: all-lowercase, only `[a-z0-9-]`, e.g.
///   `x-scout-e2e-auth-token`, `authorization`, `content-type`.
///   HTTP headers cannot contain uppercase letters or underscores, which
///   makes this a tight, precise check.
/// - **SCREAMING_SNAKE_CASE env-var name**: only `[A-Z0-9_]`, e.g.
///   `SCOUT_E2E_AUTH_TOKEN`, `DATABASE_URL`. Env-var names by convention
///   never contain lowercase letters.
/// - **Dotted / qualified identifier**: `config.auth.token`, `foo.bar`.
///   Real secrets don't use dot-notation.
///
/// Note: plain `snake_case` is deliberately NOT included here because
/// actual passwords like `"super_secret_password_123"` are also snake_case.
/// Variable-name suffix checks (see `var_name_signals_identifier`) are the
/// right tool for that case.
fn value_looks_like_identifier(value: &str) -> bool {
    // HTTP header name: lowercase letters, digits, hyphens ONLY — AND every
    // hyphen-separated segment must start with a letter (not a digit).
    // e.g. "x-scout-e2e-auth-token", "authorization", "content-type",
    //      "x-dev-auth-token"
    // This excludes values like "hardcoded-bearer-abcd-1234" where "1234" is
    // a purely-numeric segment (a sign the value is a real token, not a name).
    let is_http_header = !value.is_empty()
        && value
            .bytes()
            .all(|b| matches!(b, b'a'..=b'z' | b'0'..=b'9' | b'-'))
        && value.split('-').all(|seg| {
            // Every segment must be non-empty and start with a letter.
            seg.bytes().next().is_some_and(|b| b.is_ascii_alphabetic())
        });

    // SCREAMING_SNAKE_CASE env-var name: uppercase letters, digits, underscores ONLY.
    // e.g. "SCOUT_E2E_AUTH_TOKEN", "DATABASE_URL", "JWT_SECRET_KEY"
    // By convention env-var names never contain lowercase letters. This is a
    // tight structural check: if it has any lowercase it falls through.
    let is_env_var_name = !value.is_empty()
        && value
            .bytes()
            .all(|b| matches!(b, b'A'..=b'Z' | b'0'..=b'9' | b'_'));

    // Dotted/qualified identifier: word chars and dots only (no spaces).
    // e.g. "config.auth.token", "settings.SECRET_KEY"
    let is_dotted_ident = value.contains('.')
        && value
            .bytes()
            .all(|b| matches!(b, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'.' | b'_' | b'-'));

    is_http_header || is_env_var_name || is_dotted_ident
}

/// True when the **variable name** (lowercased) ends with a suffix that
/// strongly indicates the value is an *identifier or name* rather than an
/// actual secret value. For example:
///
/// - `E2E_AUTH_TOKEN_HEADER` → the value is an HTTP header name string
/// - `E2E_AUTH_TOKEN_ENV`    → the value is an env-var name string
/// - `TOKEN_KEY_NAME`        → the value is the name of a key
///
/// We require the suffix to appear after an underscore boundary so that
/// plain names like `token` or `secret` are unaffected.
fn var_name_signals_identifier(name_lower: &str) -> bool {
    // These suffixes, preceded by `_`, mean "this variable holds a *name*,
    // not a *value*". A variable named `AUTH_TOKEN_HEADER` stores the string
    // `"x-auth-token"` — the HTTP header name — not the token itself.
    const NAME_SUFFIXES: &[&str] = &[
        "_header",
        "_env",
        "_name",
        "_field",
        "_param",
        "_key_name",
        "_header_name",
        "_var",
        "_attr",
        "_property",
        "_label",
        "_tag",
    ];
    for suffix in NAME_SUFFIXES {
        if name_lower.ends_with(suffix) {
            return true;
        }
    }
    false
}

// ---------------------------------------------------------------------------
// Blocking-tier helpers
// ---------------------------------------------------------------------------

/// Returns `true` when `literal` looks like an opaque credential token, NOT a
/// format string or URL/path.
///
/// Rejects:
/// - Format placeholders: `{`, `}`, `%s`, `%d`, `{0}`
/// - Shell variable expansions already handled by `value_passes_fp_filters` (`${...}`)
/// - Whitespace (real tokens are opaque blobs)
/// - URL/path indicators: `://`, leading `/`
/// - Backtick strings (template literals with placeholders)
/// - All-hex-with-spaces (hex dumps, not tokens)
/// - Single-char repetitions already handled in `value_passes_fp_filters`
fn looks_like_a_value_not_a_format_string(literal: &str) -> bool {
    if literal.contains('{') || literal.contains('}') {
        return false;
    }
    if literal.contains("%s") || literal.contains("%d") || literal.contains("{0}") {
        return false;
    }
    if literal.chars().any(|c| c.is_whitespace()) {
        return false;
    }
    if literal.contains("://") {
        return false;
    }
    if literal.starts_with('/') {
        return false;
    }
    if literal.contains('`') {
        return false;
    }
    // All-hex-with-dashes (UUID, hash dump) is fine for high-entropy matching;
    // only reject all-hex separated by spaces (hex dumps like "DE AD BE EF").
    // That case is caught above by the whitespace check.
    true
}

/// Returns `true` when `node` (a string literal) is a direct element of a
/// list/array whose *all sibling string elements* are short (<= 32 chars),
/// all-lowercase, no digits. This is the pattern for redaction-field lists like
/// `["password", "token", "secret"]`.
///
/// We check the node's parent for list-like kinds across all our grammars.
fn is_string_in_identifier_list(node: tree_sitter::Node, source: &[u8]) -> bool {
    let parent = match node.parent() {
        Some(p) => p,
        None => return false,
    };
    // List/array node kinds across our grammars:
    //   Python: "list"
    //   JS/TS:  "array"
    //   Rust:   "array_expression"
    //   Go:     "composite_literal" (slice literal) — too broad; skip for now
    //   Java:   "array_initializer"
    //   C/C++:  "initializer_list"
    if !matches!(
        parent.kind(),
        "list" | "array" | "array_expression" | "array_initializer" | "initializer_list"
    ) {
        return false;
    }
    // Walk siblings: all string children must be short lowercase-only identifiers.
    let mut cursor = parent.walk();
    for sibling in parent.named_children(&mut cursor) {
        if !is_string_node(sibling.kind()) {
            // Non-string elements (numbers, identifiers, nested lists) — bail out.
            return false;
        }
        if let Some(s) = string_node_content(sibling, source) {
            // Each element: <= 40 chars, only [a-z0-9_-], no uppercase.
            if s.len() > 40 || s.bytes().any(|b| b.is_ascii_uppercase()) {
                return false;
            }
        }
    }
    true
}

// ---------------------------------------------------------------------------
// AST → Binding extraction
// ---------------------------------------------------------------------------

/// Walk the tree once and emit a `Binding` for every binding/literal node
/// whose value side is a string literal. The grammar-specific extraction
/// is centralized here so `scan_file_ast` is grammar-agnostic.
fn collect_bindings<'a>(node: tree_sitter::Node, source: &'a [u8], out: &mut Vec<Binding<'a>>) {
    let kind = node.kind();

    // 1) Keyed bindings: extract (name, value) pair when the value is a
    //    string literal directly.
    if let Some((name, value_node)) = extract_named_binding(node, source) {
        if is_string_node(value_node.kind()) && !is_likely_python_docstring(value_node) {
            if let Some(content) = string_node_content(value_node, source) {
                out.push(Binding {
                    name: Some(name),
                    value: content,
                    line: value_node.start_position().row,
                    in_identifier_list: is_string_in_identifier_list(value_node, source),
                });
            }
        }
        // Don't return — children may contain nested bindings (e.g. a
        // `pair` whose value is another `assignment` in some grammars).
    }

    // 2) Naked string literals: collect every string-literal node not yet
    //    handled above (i.e. its parent isn't a binding we already paired).
    //    These can only match SelfShaped patterns.
    if is_string_node(kind) && !is_likely_python_docstring(node) {
        if !parent_is_handled_binding_value(node) {
            if let Some(content) = string_node_content(node, source) {
                out.push(Binding {
                    name: None,
                    value: content,
                    line: node.start_position().row,
                    in_identifier_list: is_string_in_identifier_list(node, source),
                });
            }
        }
        // String content children hold no bindings — stop recursion.
        return;
    }

    let mut cursor = node.walk();
    for child in node.children(&mut cursor) {
        collect_bindings(child, source, out);
    }
}

/// Extract `(name_text_lowercased, value_node)` when `node` is a binding
/// shape AND its value side is exactly a string literal (not an expression).
///
/// Returns `None` for non-binding nodes or bindings whose RHS is something
/// else (function call, member expression, identifier reference, etc.) —
/// the AST already filtered out the FP cases that the old regex had to
/// detect via `value_part.contains('(')` / first-char checks.
fn extract_named_binding<'a>(
    node: tree_sitter::Node<'a>,
    source: &'a [u8],
) -> Option<(String, tree_sitter::Node<'a>)> {
    match node.kind() {
        // Python: `password = "value"` (assignment), `func(api_key="v")`
        // (keyword_argument), `{"password": "v"}` (pair).
        // JS/TS: `x = "v"` (assignment_expression).
        "assignment" | "assignment_expression" | "assignment_statement" => {
            let name = extract_lhs_name(node.child_by_field_name("left")?, source)?;
            let value = node.child_by_field_name("right")?;
            Some((name, value))
        }
        "augmented_assignment" => {
            // `password += "v"` — treat the LHS as the binding name.
            let name = extract_lhs_name(node.child_by_field_name("left")?, source)?;
            let value = node.child_by_field_name("right")?;
            Some((name, value))
        }
        "keyword_argument" => {
            let name = node_text(node.child_by_field_name("name")?, source)?.to_lowercase();
            let value = node.child_by_field_name("value")?;
            Some((name, value))
        }
        "pair" => {
            // `{"password": "v"}` (Python dict / JS object).
            let key_node = node.child_by_field_name("key")?;
            let key_text = match key_node.kind() {
                "string" | "string_literal" => string_node_content(key_node, source)?,
                _ => node_text(key_node, source)?,
            };
            let value = node.child_by_field_name("value")?;
            Some((key_text.to_lowercase(), value))
        }
        // JS/TS: `const password = "v"`, `let api_key = "v"`, `var x = "v"`.
        // C#: same node kind for field declarations.
        // Java: inside `field_declaration` -> `variable_declarator`.
        "variable_declarator" => {
            let name_node = node.child_by_field_name("name")?;
            let name = node_text(name_node, source)?.to_lowercase();
            let value = node.child_by_field_name("value")?;
            Some((name, value))
        }
        // Go: `const Password = "v"`, `var ApiKey string = "v"`.
        "const_spec" | "var_spec" => {
            let name_node = node.child_by_field_name("name")?;
            let name = node_text(name_node, source)?.to_lowercase();
            let value = node.child_by_field_name("value")?;
            // Go's value field is an `expression_list` — drill in if it has
            // exactly one child that's a string literal.
            let value = if value.kind() == "expression_list" && value.named_child_count() == 1 {
                value.named_child(0)?
            } else {
                value
            };
            Some((name, value))
        }
        // Go: `x := "v"` (short_var_declaration).
        "short_var_declaration" => {
            let left = node.child_by_field_name("left")?;
            let right = node.child_by_field_name("right")?;
            let name = if left.kind() == "expression_list" && left.named_child_count() == 1 {
                node_text(left.named_child(0)?, source)?.to_lowercase()
            } else {
                node_text(left, source)?.to_lowercase()
            };
            let value = if right.kind() == "expression_list" && right.named_child_count() == 1 {
                right.named_child(0)?
            } else {
                right
            };
            Some((name, value))
        }
        // Go: `{key: "v"}` (composite literal element).
        "keyed_element" => {
            let key = node.named_child(0)?;
            let value = node.named_child(1)?;
            let key_text = match key.kind() {
                "string" | "string_literal" | "interpreted_string_literal" => {
                    string_node_content(key, source)?
                }
                _ => node_text(key, source)?,
            };
            Some((key_text.to_lowercase(), value))
        }
        // Rust: `let x = "v";` (let_declaration with .pattern + .value).
        "let_declaration" => {
            let pat = node.child_by_field_name("pattern")?;
            let name = extract_lhs_name(pat, source)?;
            let value = node.child_by_field_name("value")?;
            Some((name, value))
        }
        // Rust: `const NAME: TYPE = "v";` and `static NAME: TYPE = "v";`.
        // Both have .name (identifier) and .value (string_literal) fields.
        "const_item" | "static_item" => {
            let name_node = node.child_by_field_name("name")?;
            let name = node_text(name_node, source)?.to_lowercase();
            let value = node.child_by_field_name("value")?;
            Some((name, value))
        }
        // C / C++: `int x = "v";` -> `init_declarator { declarator, =, value }`.
        // The declarator may be a `pointer_declarator` containing the identifier.
        "init_declarator" => {
            let declarator = node.child_by_field_name("declarator")?;
            let name = extract_lhs_name(declarator, source)?;
            let value = node.child_by_field_name("value")?;
            Some((name, value))
        }
        // Python: `def f(password="default"):` — default_parameter has the
        // same .name/.value field shape as keyword_argument.
        "default_parameter" => {
            let name = node_text(node.child_by_field_name("name")?, source)?.to_lowercase();
            let value = node.child_by_field_name("value")?;
            Some((name, value))
        }
        // JS/TS class field declarations:
        //   class C { password = "v" }              -> field_definition
        //   class C { public password = "v" }       -> public_field_definition
        // Both expose .property (identifier) and .value.
        "field_definition" | "public_field_definition" => {
            let name_node = node.child_by_field_name("property")?;
            let name = node_text(name_node, source)?.to_lowercase();
            let value = node.child_by_field_name("value")?;
            Some((name, value))
        }
        // TypeScript: `enum E { Password = "v" }` -> enum_assignment.
        "enum_assignment" => {
            let name_node = node.child_by_field_name("name")?;
            let name = node_text(name_node, source)?.to_lowercase();
            let value = node.child_by_field_name("value")?;
            Some((name, value))
        }
        _ => None,
    }
}

/// Extract a usable "name" from an LHS node, walking through wrappers.
///
/// Examples:
/// - `password` -> `"password"`
/// - `self._password` (attribute) -> `"_password"` (last identifier)
/// - `*p` (pointer_declarator) -> `"p"`
/// - `(a, b)` (tuple_pattern) -> None (multi-bind, can't pick one name)
fn extract_lhs_name(node: tree_sitter::Node, source: &[u8]) -> Option<String> {
    match node.kind() {
        "identifier" | "property_identifier" | "field_identifier" | "type_identifier" => {
            Some(node_text(node, source)?.to_lowercase())
        }
        // Python `self._password` / JS `this.password` / Rust `self.password`.
        "attribute" | "member_expression" | "field_expression" => {
            // The last named child is the rightmost identifier.
            let count = node.named_child_count();
            let last_idx = count.checked_sub(1)?;
            extract_lhs_name(node.named_child(last_idx)?, source)
        }
        // C pointer: `*p`.
        "pointer_declarator" => {
            for i in 0..node.named_child_count() {
                if let Some(c) = node.named_child(i) {
                    if let Some(n) = extract_lhs_name(c, source) {
                        return Some(n);
                    }
                }
            }
            None
        }
        // Rust: `mut x` / `(a, b)` / etc. — drill into single-child wrappers.
        _ => {
            if node.named_child_count() == 1 {
                extract_lhs_name(node.named_child(0)?, source)
            } else {
                None
            }
        }
    }
}

/// Extract the raw text of a node as `&str`. Returns `None` if the bytes
/// aren't valid UTF-8 (shouldn't happen for tree-sitter on a UTF-8 input).
fn node_text<'a>(node: tree_sitter::Node, source: &'a [u8]) -> Option<&'a str> {
    let start = node.start_byte();
    let end = node.end_byte().min(source.len());
    std::str::from_utf8(&source[start..end]).ok()
}

/// Extract the content of a string-literal node (between the quotes).
///
/// Tree-sitter typically gives us a `string` node containing
/// `string_start` + `string_content` + `string_end` children, and the
/// fast path returns the `string_content` text directly. The fallback
/// is reachable only for grammars that omit `string_content` (e.g.
/// some older grammar versions, or empty string literals). It works
/// by:
///   1. skipping a single optional language prefix (`f`, `b`, `r`, `u`,
///      or any combination thereof — Python `rb"..."`, Rust `b"..."`,
///      etc.),
///   2. measuring the run of leading delimiter chars (a quote optionally
///      preceded by `#`s for Rust raw strings),
///   3. returning the slice between the matching opening and closing
///      delimiter runs.
///
/// We deliberately do NOT use `trim_start_matches([...])` for the prefix
/// because it would also strip multiple `r`s from a delimiter run like
/// `r##"…"##` and break the closing-delimiter computation.
fn string_node_content<'a>(node: tree_sitter::Node, source: &'a [u8]) -> Option<&'a str> {
    // Fast path: look for a child holding the raw content.
    let mut cursor = node.walk();
    for child in node.children(&mut cursor) {
        if matches!(
            child.kind(),
            "string_content" | "string_fragment" | "string_literal_content"
        ) {
            return node_text(child, source);
        }
    }

    // Fallback (rare). Strip a single language prefix, then a single
    // delimiter run, from each side.
    let raw = node_text(node, source)?;
    let bytes = raw.as_bytes();

    // 1) Skip language prefix bytes. Stop as soon as the next byte is a
    //    quote or a `#` (which belongs to the delimiter run, not the
    //    prefix). Bound at 3 bytes — no real language uses a 4-letter
    //    string prefix.
    let mut i = 0;
    while i < bytes.len().min(3)
        && matches!(
            bytes[i],
            b'f' | b'F' | b'r' | b'R' | b'b' | b'B' | b'u' | b'U'
        )
        && i + 1 < bytes.len()
        && !matches!(bytes[i + 1], b'"' | b'\'' | b'`' | b'#')
    {
        i += 1;
    }
    // Consume the LAST prefix byte that immediately precedes the
    // delimiter run (the loop above stops one short to avoid eating
    // a `#`).
    if i < bytes.len()
        && matches!(
            bytes[i],
            b'f' | b'F' | b'r' | b'R' | b'b' | b'B' | b'u' | b'U'
        )
        && i + 1 < bytes.len()
        && matches!(bytes[i + 1], b'"' | b'\'' | b'`' | b'#')
    {
        i += 1;
    }

    // 2) Measure the opening delimiter run: optional `#`s followed by
    //    a single quote char.
    let hash_count = bytes[i..].iter().take_while(|&&b| b == b'#').count();
    let open_delim_end = i + hash_count;
    if open_delim_end >= bytes.len() {
        return Some(raw);
    }
    let quote = bytes[open_delim_end];
    if !matches!(quote, b'"' | b'\'' | b'`') {
        return Some(raw);
    }

    // Closing delimiter is `quote` followed by the same number of `#`s.
    let close_pat_len = 1 + hash_count;
    if bytes.len() < open_delim_end + 1 + close_pat_len {
        return Some(raw);
    }
    let close_start = bytes.len() - close_pat_len;
    if bytes[close_start] != quote || bytes[close_start + 1..].iter().any(|&b| b != b'#') {
        return Some(raw);
    }

    let content_start = open_delim_end + 1;
    let content_end = close_start;
    if content_start > content_end {
        return Some("");
    }
    std::str::from_utf8(&bytes[content_start..content_end]).ok()
}

/// True if `kind` represents a string-literal node across our grammars.
fn is_string_node(kind: &str) -> bool {
    matches!(
        kind,
        "string"
            | "string_literal"
            | "raw_string_literal"
            | "interpreted_string_literal"
            | "verbatim_string_literal"
            | "template_string"
    )
}

/// True if `node` is exactly the value side of a binding we already paired
/// in `extract_named_binding`. This prevents emitting the same string
/// literal as both a keyed binding and a naked literal.
///
/// We deliberately check the EXACT value-field position (not just "parent
/// is a binding kind") so that string KEYS in `pair`/`keyed_element` —
/// e.g. an `AKIA...` AWS key used as a Python dict key — still surface
/// through the naked-literal path and can match `SelfShaped` patterns.
/// (Audit finding #1: the old "any child of a binding parent" rule
/// silently dropped such keys.)
fn parent_is_handled_binding_value(node: tree_sitter::Node) -> bool {
    let parent = match node.parent() {
        Some(p) => p,
        None => return false,
    };

    let value_node = match parent.kind() {
        // Python / JS / Go assignments use field name "right".
        "assignment"
        | "assignment_expression"
        | "assignment_statement"
        | "augmented_assignment"
        | "short_var_declaration" => parent.child_by_field_name("right"),

        // Field name "value" is used by most binding shapes.
        "keyword_argument"
        | "pair"
        | "variable_declarator"
        | "const_spec"
        | "var_spec"
        | "let_declaration"
        | "const_item"
        | "static_item"
        | "init_declarator"
        | "default_parameter"
        | "field_definition"
        | "public_field_definition" => parent.child_by_field_name("value"),

        // Go composite literal: `{key: "v"}` — value is positional named_child(1).
        "keyed_element" => parent.named_child(1),

        _ => return false,
    };

    // For multi-element value-list nodes (Go `expression_list`), the actual
    // string literal is one level deeper. We treat the literal as suppressed
    // if it's the (sole) named child of an expression_list that is itself
    // the value field of the parent.
    if let Some(v) = value_node {
        if v.id() == node.id() {
            return true;
        }
        if v.kind() == "expression_list"
            && v.named_child_count() == 1
            && v.named_child(0).map(|c| c.id() == node.id()) == Some(true)
        {
            return true;
        }
    }
    false
}

/// Mirror of `cache::masking::is_python_docstring` for the AST scanner.
fn is_likely_python_docstring(node: tree_sitter::Node) -> bool {
    let parent = match node.parent() {
        Some(p) => p,
        None => return false,
    };
    if parent.kind() != "expression_statement" {
        return false;
    }
    let grandparent = match parent.parent() {
        Some(g) => g,
        None => return false,
    };
    if !matches!(grandparent.kind(), "block" | "module") {
        return false;
    }
    let mut cursor = grandparent.walk();
    for child in grandparent.children(&mut cursor) {
        if child.kind() == "expression_statement" {
            return child.id() == parent.id();
        }
    }
    false
}

impl Detector for SecretDetector {
    fn name(&self) -> &'static str {
        "secret-detection"
    }

    fn description(&self) -> &'static str {
        "Detects hardcoded secrets, API keys, and passwords"
    }

    fn bypass_postprocessor(&self) -> bool {
        // Bypasses the GBDT/heuristic false-positive *classifier* (the `filter_false_positives`
        // step) only — secret findings have their own keyword/context FP guards and shouldn't be
        // second-guessed by the generic ML filter. This does NOT bypass `enforce_blocking_invariant`,
        // which still runs unconditionally over every finding (including these) in the postprocess
        // stage: a Blocking secret finding that didn't set evidence / deterministic / confidence ≥
        // 0.90 / severity ≥ High would still be downgraded. The blocking-tier choke point is intact.
        true
    }

    fn file_extensions(&self) -> &'static [&'static str] {
        &[
            "py", "js", "ts", "jsx", "tsx", "rb", "java", "go", "rs", "env", "yml", "yaml", "json",
            "toml", "cfg", "ini", "conf",
        ]
    }

    fn content_requirements(&self) -> crate::detectors::detector_context::ContentFlags {
        crate::detectors::detector_context::ContentFlags::HAS_SECRET_PATTERN
    }

    fn detect(
        &self,
        ctx: &crate::detectors::analysis_context::AnalysisContext,
    ) -> Result<Vec<Finding>> {
        let graph = ctx.graph;
        let files = &ctx.as_file_provider();
        let i = graph.interner();
        let mut findings = vec![];

        for path in files.files_with_extensions(&[
            "py",
            "js",
            "ts",
            "jsx",
            "tsx",
            "rs",
            "go",
            "java",
            "rb",
            "php",
            "cs",
            "cpp",
            "c",
            "h",
            "hpp",
            "yaml",
            "yml",
            "json",
            "toml",
            "env",
            "conf",
            "config",
            "sh",
            "bash",
            "zsh",
            "properties",
            "xml",
        ]) {
            if findings.len() >= self.max_findings {
                break;
            }

            // Skip certain directories
            let path_str = path.to_string_lossy();
            if path_str.contains("node_modules")
                || path_str.contains(".git")
                || path_str.contains("vendor")
                || path_str.contains("target")
            {
                continue;
            }

            // Skip detector files (contain regex patterns that look like secrets)
            if path_str.contains("/detectors/") && path_str.ends_with(".rs") {
                continue;
            }

            // Cheap pre-filter: skip files without any secret-related patterns
            let raw = match files.content(path) {
                Some(c) => c,
                None => continue,
            };
            if !raw.contains("AKIA")          // AWS
                && !raw.contains("ghp_")      // GitHub
                && !raw.contains("sk_live_")  // Stripe
                && !raw.contains("SG.")       // SendGrid
                && !raw.contains("PRIVATE KEY") // Private keys
                && !raw.contains("api_key") && !raw.contains("api-key") && !raw.contains("apikey")
                && !raw.contains("API_KEY") && !raw.contains("API-KEY") && !raw.contains("APIKEY")
                && !raw.contains("password") && !raw.contains("PASSWORD")
                && !raw.contains("passwd") && !raw.contains("PASSWD")
                && !raw.contains("secret") && !raw.contains("SECRET")
                && !raw.contains("token") && !raw.contains("TOKEN")
                && !raw.contains("postgres://") && !raw.contains("mysql://")
                && !raw.contains("mongodb://") && !raw.contains("redis://")
                && !raw.contains("xoxb-") && !raw.contains("xoxp-")
                && !raw.contains("xoxa-") && !raw.contains("xoxr-")
            {
                continue;
            }

            debug!("Scanning for secrets: {}", path.display());
            // Use RAW content, not masked: the masking layer (`cache::masking`)
            // strips string-literal contents to spaces, which is exactly where
            // secrets live. For tree-sitter-supported languages we walk the AST
            // and only look at lines that contain real bindings/literals
            // (preventing comment/docstring FPs); for other formats (.env,
            // .yml, .json, .toml, .ini, .conf, Ruby, PHP, ...) we run the
            // line-based scanner directly on raw content. Config formats have
            // no docstrings, and the in-detector `is_line_suppressed` /
            // example-comment skip already covers their FPs.
            let raw_content = match files.content(path) {
                Some(c) => c,
                None => continue,
            };

            let lang = Language::from_path(path);
            let has_ast_grammar = matches!(
                lang,
                Language::Python
                    | Language::JavaScript
                    | Language::TypeScript
                    | Language::Rust
                    | Language::Go
                    | Language::Java
                    | Language::CSharp
                    | Language::C
                    | Language::Cpp
            );

            if has_ast_grammar {
                let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
                let cached = files.tree(path);
                let scan = ScanInputs::new(path, &raw_content, ext);
                let ast_inputs = ScanAstInputs::new(scan, lang, cached.as_deref());
                findings.extend(self.scan_file_ast(&ast_inputs));
            } else {
                findings.extend(self.scan_file(path, &raw_content));
            }
        }

        // Enrich findings with graph context
        for finding in &mut findings {
            if let (Some(file_path), Some(line)) =
                (finding.affected_files.first(), finding.line_start)
            {
                let path_str = file_path.to_string_lossy().to_string();

                if let Some(f) = graph.find_function_at(&path_str, line) {
                    let callers = graph.get_callers(f.qn(i)).len();
                    let name_lower = f.node_name(i).to_lowercase();
                    let is_config = name_lower.contains("config")
                        || name_lower.contains("init")
                        || name_lower.contains("setup")
                        || name_lower.contains("settings");
                    let func_name = f.node_name(i);

                    let mut notes = Vec::new();
                    notes.push(format!(
                        "📦 In function: `{}` ({} callers)",
                        func_name, callers
                    ));

                    if is_config {
                        notes.push("⚙️ In config/setup function".to_string());
                        // Config functions with secrets are more expected but still bad
                        if finding.severity == Severity::Critical {
                            finding.severity = Severity::High;
                        }
                    }

                    // Boost severity if function has many callers (widely used)
                    if callers > 10 && finding.severity == Severity::High {
                        finding.severity = Severity::Critical;
                    }

                    finding.description = format!(
                        "{}\n\n**Context:**\n{}",
                        finding.description,
                        notes.join("\n")
                    );
                }
            }
        }

        Ok(findings)
    }
}

impl crate::detectors::RegisteredDetector for SecretDetector {
    fn create(init: &crate::detectors::DetectorInit) -> std::sync::Arc<dyn Detector> {
        std::sync::Arc::new(Self::new(init.repo_path))
    }

    fn max_tier() -> crate::models::Tier {
        crate::models::Tier::Blocking
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::graph::builder::GraphBuilder;

    #[test]
    fn test_detects_hardcoded_aws_key() {
        let store = GraphBuilder::new().freeze();
        let _detector = SecretDetector::new("/mock/repo");
        // Use .rb extension: masking has no tree-sitter grammar for Ruby,
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("config.rb", "\nAWS_ACCESS_KEY = \"AKIAIOSFODNN7ABCDEFG\"\n")],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "Should detect hardcoded AWS access key"
        );
        assert!(findings.iter().any(|f| f.title.contains("AWS Access Key")));
    }

    #[test]
    fn test_no_finding_for_env_variable_usage() {
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
            ("config.py", "\nimport os\nAWS_KEY = os.environ.get(\"AWS_ACCESS_KEY_ID\")\nSECRET = os.getenv(\"AWS_SECRET_ACCESS_KEY\")\n"),
        ]);
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "Should not flag secrets read from environment variables, but got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_no_finding_for_password_in_docstring() {
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
            ("auth.py", "def authenticate(username, password):\n    \"\"\"\n    Authenticate user with password.\n    password = hashlib.sha256(raw).hexdigest()\n    \"\"\"\n    return check_password(username, password)\n"),
        ]);
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "Should not flag 'password' references in docstrings. Found: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_no_finding_for_password_type_annotation() {
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
            ("models.py", "from pydantic import BaseModel\n\nclass LoginRequest(BaseModel):\n    username: str\n    password: str\n"),
        ]);
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "Should not flag password type annotations. Found: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_no_finding_for_password_field_definition() {
        let store = GraphBuilder::new().freeze();
        let _detector = SecretDetector::new("/mock/repo");
        // Use .rb -- no tree-sitter masking, content passes through
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
            ("models.rb", "password = CharField(max_length=128)\nsecret = SecretManager.from_config(settings)\n"),
        ]);
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "Should not flag function/class calls as secrets. Found: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_no_finding_for_password_list_assignment() {
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
            ("config.rb", "password = [\"django.contrib.auth.hashers.PBKDF2PasswordHasher\"]\nsecret = {\"key\": \"value\", \"other\": \"data\"}\n"),
        ]);
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "Should not flag list/dict literal assignments as secrets. Found: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_still_detects_real_hardcoded_password() {
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("config.rb", "password = \"super_secret_password_123\"\n")],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "Should still detect real hardcoded password"
        );
    }

    #[test]
    fn test_skips_uppercase_constant_reference() {
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("config.rb", "password = HARDCODED_SECRET_VALUE\n")],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "Should not flag variable/constant references as secrets. Found: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_no_finding_for_password_variable_reference() {
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
            ("views.rb", "password=auth_password,\nsecret = settings.SECRET_KEY\nself._password = raw_password\n"),
        ]);
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "Should not flag variable references as secrets. Found: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_no_finding_for_settings_read() {
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
            ("config.rb", "self.password = settings.EMAIL_HOST_PASSWORD if password is None else password\npassword=self.settings_dict[\"PASSWORD\"],\n"),
        ]);
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "Should not flag settings reads as secrets. Found: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_no_finding_for_request_data_read() {
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
            ("views.rb", "csrf_secret = request.META[\"CSRF_COOKIE\"]\nold_password = self.cleaned_data[\"old_password\"]\n"),
        ]);
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "Should not flag request/form data reads as secrets. Found: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    // ------------------------------------------------------------------
    // AST-path tests (QA audit finding #1)
    //
    // Before the AST migration, all positive tests used `.rb` extension
    // because Ruby has no tree-sitter grammar, so `mask_non_code()`
    // returned the original source unchanged. For supported languages
    // (`.py`, `.js`, `.ts`, `.rs`, ...) the masking layer replaced every
    // string literal's contents with spaces BEFORE this detector ran,
    // making it structurally impossible to detect a hardcoded secret in
    // those languages — the values were already gone.
    //
    // The tests below exercise the AST path against the languages that
    // were previously broken.
    // ------------------------------------------------------------------

    #[test]
    fn test_audit_repro_python_hardcoded_password() {
        // Direct reproducer of QA audit finding #1: a Python file with
        // an obvious hardcoded credential returned 0 findings even with
        // --all-detectors. This MUST find at least one secret.
        //
        // We build the fixture string at runtime so this test file
        // doesn't itself contain a contiguous `sk_live_...` literal —
        // GitHub's secret-scanner push protection would otherwise block
        // the commit (rightly: this is the very pattern the detector
        // is supposed to find).
        let stripe_token = String::from("sk") + "_live_" + "abcdefghijklmnopqrstuvwx";
        let source =
            format!("password = \"hardcoded_super_secret_p4ss\"\napi_key = \"{stripe_token}\"\n");
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("creds.py", source.as_str())],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "QA audit regression: SecretDetector returned 0 findings for an \
             obvious hardcoded password in Python (.py). The masking layer \
             was stripping the value. Findings: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
        // We expect at least the Generic Secret hit on line 1.
        assert!(
            findings.iter().any(|f| f.title.contains("Generic Secret")),
            "Expected a Generic Secret finding on the password line. Got: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, f.line_start))
                .collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_ast_detects_javascript_hardcoded_password() {
        // Build the GitHub PAT literal at runtime to avoid tripping GitHub
        // Push Protection on a contiguous `ghp_...` shape in this source.
        let github_pat = String::from("ghp") + "_" + &"a".repeat(36);
        let source =
            format!("const password = \"hardcoded_pass_j6k\";\nconst token = \"{github_pat}\";\n");
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("app.js", source.as_str())],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.iter().any(|f| f.title.contains("Generic Secret")),
            "Expected Generic Secret on JS const password assignment. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
        assert!(
            findings.iter().any(|f| f.title.contains("GitHub Token")),
            "Expected GitHub Token on JS string literal. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_ast_detects_rust_hardcoded_stripe_key() {
        // Rust `const NAME: TYPE = "value"` defeats the Generic Secret regex
        // (the type colon is consumed greedily). But `sk_live_` is a self-
        // shaped pattern: it identifies the secret by its own format, so
        // the literal-walk path of the AST scanner catches it regardless.
        //
        // Same trick as `test_audit_repro_python_hardcoded_password`: build
        // the literal at runtime so this source file doesn't trip GitHub
        // Push Protection's secret scanner.
        let stripe_token = String::from("sk") + "_live_" + "zzzzzzzzzzzzzzzzzzzzzzzzz";
        let source = format!(
            "fn main() {{\n    let stripe_key = \"{stripe_token}\";\n    let _ = stripe_key;\n}}\n"
        );
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("config.rs", source.as_str())],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.iter().any(|f| f.title.contains("Stripe API Key")),
            "Expected Stripe API Key on Rust string literal. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_ast_skips_python_docstring_with_password_word() {
        // Verify the AST path does NOT regress on docstrings — the masking
        // layer used to handle this; we replicated `is_python_docstring`
        // in `is_likely_python_docstring` for the AST scanner.
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![(
                "auth.py",
                "def authenticate(user, raw):\n    \"\"\"Authenticate user. password = \\\"never_a_real_secret_xyz\\\"\"\"\"\n    return user\n",
            )],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "Should not flag credentials inside a Python docstring. Got: {:?}",
            findings
                .iter()
                .map(|f| (&f.title, f.line_start))
                .collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_ast_skips_python_env_fallback() {
        // The env-var fallback FP filter in `try_match_line` should still
        // apply on the AST path because we feed it the raw source line.
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![(
                "settings.py",
                "import os\nDB_PASSWORD = os.environ.get(\"DB_PASSWORD\", \"dev_default_pw\")\n",
            )],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        // Either no findings, or any finding is downgraded to Low severity.
        for f in &findings {
            assert!(
                matches!(f.severity, Severity::Low | Severity::Info),
                "env.get() with fallback should be Low severity, got {:?} for {:?}",
                f.severity,
                f.title
            );
        }
    }

    #[test]
    fn test_ast_python_function_call_value_not_flagged() {
        // The Generic Secret value-type filter in `try_match_line` skips
        // `password = SomeClass(...)`. AST path must keep this behavior.
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![(
                "models.py",
                "password = CharField(max_length=128)\nsecret = SecretManager.from_config(settings)\n",
            )],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "Should not flag function/class calls as secrets. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    // -----------------------------------------------------------------
    // Audit-fix regression tests (audit report on commit 4381002a)
    // -----------------------------------------------------------------

    /// Bug 1 (medium): An AKIA-shaped string used as a Python dict KEY
    /// should still be reported. Previously `parent_is_handled_binding_value`
    /// suppressed it because the parent kind was `pair`.
    #[test]
    fn test_ast_detects_aws_key_used_as_dict_key() {
        // Build the AKIA prefix at runtime so this source file doesn't
        // contain a literal AKIA-shaped token (GitHub Push Protection).
        let akia = format!("AKIA{}{}", "IOSFODNN", "7ABCDEFG");
        let py_src = format!("config = {{\"{key}\": \"placeholder\"}}\n", key = akia);
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("audit.py", py_src.as_str())],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.iter().any(|f| f.title.contains("AWS Access Key")),
            "AKIA-shaped dict key must be reported. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// Bug 2 (low): JS `class C { password = "v" }` field definitions
    /// should be picked up by the NameGated Generic Secret pattern.
    #[test]
    fn test_ast_detects_js_class_field_secret() {
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![(
                "auth.js",
                "class Auth {\n  password = \"hardcoded123!\";\n}\n",
            )],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings
                .iter()
                .any(|f| f.title.contains("Generic Secret") || f.title.contains("Password")),
            "JS class field with hardcoded password must be reported. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// Coverage gap: Python `def f(password="hardcoded"):` default
    /// parameter values should be flagged the same as keyword arguments.
    #[test]
    fn test_ast_detects_python_default_parameter_secret() {
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![(
                "auth.py",
                "def authenticate(user, password=\"hardcoded123!\"):\n    return user\n",
            )],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "Default-parameter hardcoded password must be reported. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// `apiKeyHeader = "longvalue123..."` should NOT match the new
    /// anchored Generic API Key gate (`^api[_-]?key$`). Verifies the
    /// stricter gate doesn't regress on broad matching.
    #[test]
    fn test_ast_strict_api_key_gate_rejects_camelcase_substring() {
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![(
                "config.py",
                "apiKeyHeader = \"X-API-Key-Custom-Header-Name\"\n",
            )],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings
                .iter()
                .all(|f| !f.title.contains("Generic API Key")),
            "Generic API Key gate must be anchored — `apiKeyHeader` is not an api_key. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    /// `name_gate` corner cases for Generic Secret: `passwords` (plural)
    /// and `mypassword` (no boundary) should NOT match; `auth_token_value`
    /// SHOULD match via the `_token_` boundary.
    #[test]
    fn test_ast_generic_secret_name_gate_boundaries() {
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");

        // `passwords` (plural) — not a credential variable.
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("a.py", "passwords = \"this-is-a-list-name\"\n")],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.iter().all(|f| !f.title.contains("Generic Secret")),
            "Plural `passwords` must not match Generic Secret. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );

        // `auth_token_value` SHOULD match (token surrounded by _).
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![(
                "b.py",
                "auth_token_value = \"hardcoded-bearer-abcd-1234\"\n",
            )],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.iter().any(|f| f.title.contains("Generic Secret")),
            "`auth_token_value` should match via `_token_` boundary. Got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }

    // ------------------------------------------------------------------
    // False-positive regression tests: header/env-var name constants
    //
    // These were the original bug: the Generic Secret heuristic was
    // flagging constants whose VALUE is an HTTP-header name or env-var
    // name (an *identifier*, not a secret value) because it only checked
    // that the VARIABLE NAME contained "token" / "secret" etc. and that
    // the value was ≥8 non-whitespace chars.
    //
    // After the fix:
    //   - Variable name suffix `_HEADER` / `_ENV` / `_NAME` / `_FIELD` /
    //     `_PARAM` … → suppressed via `var_name_signals_identifier`.
    //   - Value that is an HTTP-header name (`[a-z0-9-]` only, each
    //     segment starting with a letter) or SCREAMING_SNAKE_CASE env-var
    //     name (`[A-Z0-9_]` only) → suppressed via `value_looks_like_identifier`.
    //   - High-entropy values (real API keys) still fire regardless.
    // ------------------------------------------------------------------

    #[test]
    fn test_no_fp_for_http_header_name_constant() {
        // `const E2E_AUTH_TOKEN_HEADER = "x-scout-e2e-auth-token"` —
        // the value is an HTTP header name, not a secret. Both the variable-
        // name suffix `_header` and the value shape (lowercase + hyphens,
        // all segments start with a letter) should suppress the finding.
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![(
                "constants.ts",
                "const E2E_AUTH_TOKEN_HEADER = 'x-scout-e2e-auth-token';\n\
                 const DEV_AUTH_TOKEN_HEADER = 'x-dev-auth-token';\n\
                 const AUTH_HEADER = 'authorization';\n",
            )],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        let generic_secret_findings: Vec<_> = findings
            .iter()
            .filter(|f| f.title.contains("Generic Secret"))
            .collect();
        assert!(
            generic_secret_findings.is_empty(),
            "Should NOT flag HTTP-header-name constants as Generic Secret. Got: {:?}",
            generic_secret_findings
                .iter()
                .map(|f| (&f.title, f.line_start))
                .collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_no_fp_for_env_var_name_constant() {
        // `const E2E_AUTH_TOKEN_ENV = "SCOUT_E2E_AUTH_TOKEN"` —
        // the value is an env-var *name* (SCREAMING_SNAKE_CASE), not a secret.
        // Both the `_env` variable-name suffix and the SCREAMING_SNAKE value
        // shape should suppress the finding.
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![(
                "constants.ts",
                "const E2E_AUTH_TOKEN_ENV = 'SCOUT_E2E_AUTH_TOKEN';\n",
            )],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        let generic_secret_findings: Vec<_> = findings
            .iter()
            .filter(|f| f.title.contains("Generic Secret"))
            .collect();
        assert!(
            generic_secret_findings.is_empty(),
            "Should NOT flag env-var-name constants as Generic Secret. Got: {:?}",
            generic_secret_findings
                .iter()
                .map(|f| (&f.title, f.line_start))
                .collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_no_fp_for_field_name_constant() {
        // `const TOKEN_FIELD = "auth_token"` — the value is a field/attribute
        // name, not a secret. The `_field` variable-name suffix triggers the
        // suppression.
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![(
                "constants.ts",
                "const TOKEN_FIELD = 'auth_token';\nconst SECRET_PARAM = 'secret_key';\n",
            )],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        let generic_secret_findings: Vec<_> = findings
            .iter()
            .filter(|f| f.title.contains("Generic Secret"))
            .collect();
        assert!(
            generic_secret_findings.is_empty(),
            "Should NOT flag field/param name constants as Generic Secret. Got: {:?}",
            generic_secret_findings
                .iter()
                .map(|f| (&f.title, f.line_start))
                .collect::<Vec<_>>()
        );
    }

    #[test]
    fn test_still_detects_high_entropy_api_key() {
        // High-entropy values assigned to token/secret/key variables MUST
        // still fire even after the identifier-shape guards are added.
        // We build the value at runtime to avoid tripping GitHub Push Protection.
        let api_key = String::from("sk-proj-") + "1aB2c3D4e5F6g7H8i9J0kLmNoPqRsTuVwXyZ";
        let source = format!("const API_KEY = '{api_key}';\n");
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("keys.ts", source.as_str())],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "High-entropy API key constant MUST still produce a finding. Got none."
        );
    }

    // ------------------------------------------------------------------
    // Task 10: Blocking-tier tests
    // ------------------------------------------------------------------

    #[test]
    fn known_format_in_app_code_is_blocking() {
        // `const AWS_KEY: &str = "AKIAIOSFODNN7EXAMPLE";` in src/aws.rs
        // — a known-format credential in real application code → Blocking.
        let akia = String::from("AKIA") + "IOSFODNN7EXAMPLE";
        let source = format!("const AWS_KEY: &str = \"{akia}\";\n");
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("src/aws.rs", source.as_str())],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        let f = findings
            .iter()
            .find(|f| {
                matches!(
                    &f.evidence,
                    Some(crate::models::Evidence::Secret { format, .. })
                        if format == "aws_access_key_id"
                )
            })
            .expect("should have a Blocking aws_access_key_id finding");
        assert_eq!(
            f.tier,
            crate::models::Tier::Blocking,
            "tier must be Blocking"
        );
        assert!(
            f.deterministic,
            "deterministic must be true for Blocking findings"
        );
        assert!(
            f.confidence.unwrap_or(0.0) >= 0.90,
            "confidence must be >= 0.90 for Blocking findings"
        );
        assert_eq!(
            f.severity,
            crate::models::Severity::Critical,
            "known-format secrets must be Critical"
        );
    }

    #[test]
    fn high_entropy_generic_in_app_code_is_blocking() {
        // A 32-char random-looking opaque literal assigned to a var in src/x.rs
        // → Blocking with format "generic_high_entropy".
        // Chosen to exceed GENERIC_MIN_LEN (20) and GENERIC_ENTROPY_FLOOR (4.0).
        let high_entropy_val = "xR7kP2mZ9qW4vL8tN1sH3dF6jY0eC5bA2uQ";
        let source = format!("const APP_TOKEN: &str = \"{high_entropy_val}\";\n");
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("src/x.rs", source.as_str())],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        // The value must be opaque enough to trigger the entropy gate.
        let entropy = crate::detectors::security::secret_formats::shannon_entropy_bits_per_char(
            high_entropy_val,
        );
        assert!(
            entropy >= crate::detectors::security::secret_formats::GENERIC_ENTROPY_FLOOR,
            "test value must have sufficient entropy; got {entropy}"
        );
        let f = findings
            .iter()
            .find(|f| {
                matches!(
                    &f.evidence,
                    Some(crate::models::Evidence::Secret { format, .. })
                        if format == "generic_high_entropy"
                )
            })
            .expect("should have a generic_high_entropy Blocking finding");
        assert_eq!(
            f.tier,
            crate::models::Tier::Blocking,
            "tier must be Blocking"
        );
        assert!(f.deterministic, "deterministic must be true");
        assert!(f.confidence.unwrap_or(0.0) >= 0.90, "confidence >= 0.90");
    }

    #[test]
    fn sensitive_field_list_is_advisory() {
        // `SENSITIVE_FIELD_PATTERNS = ["password", "token", "secret"]` in
        // app/core/logging/filters.py — the individual elements are field
        // names, not credentials; if flagged at all they must be Advisory.
        let source =
            "SENSITIVE_FIELD_PATTERNS = [\"password\", \"token\", \"secret\", \"api_key\"]\n";
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("app/core/logging/filters.py", source)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        // None of the list elements may be Blocking; if flagged, must be Advisory.
        for f in &findings {
            assert_ne!(
                f.tier,
                crate::models::Tier::Blocking,
                "list element {:?} must not be Blocking",
                f.title
            );
            assert!(
                f.evidence.is_none(),
                "Advisory findings must have no evidence; got {:?}",
                f.evidence
            );
        }
    }

    #[test]
    fn fixture_and_doc_are_advisory() {
        // An AKIA-shaped key inside tests/fixtures/ and README.md → Advisory.
        let akia = String::from("AKIA") + "IOSFODNN7EXAMPLE";
        let fixture_src = format!("AWS_KEY = \"{akia}\"\n");
        let readme_src =
            format!("# Example\n\nUse your AWS key here:\n\n    export AWS_KEY={akia}\n");
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");

        // tests/fixtures/ → is_test_file returns true, detector skips entirely.
        // README.md → is_non_credential_context returns true.
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![
                ("tests/fixtures/auth.py", fixture_src.as_str()),
                ("README.md", readme_src.as_str()),
            ],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        for f in &findings {
            assert_ne!(
                f.tier,
                crate::models::Tier::Blocking,
                "fixture/doc finding {:?} must not be Blocking",
                f.title
            );
            assert!(
                f.evidence.is_none(),
                "fixture/doc findings must have no evidence"
            );
        }
    }

    #[test]
    fn placeholder_is_advisory() {
        // "your-api-key-here" is an obvious placeholder → Advisory or not flagged.
        let source = "API_KEY = \"your-api-key-here\"\n";
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("config.py", source)],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        for f in &findings {
            assert_ne!(
                f.tier,
                crate::models::Tier::Blocking,
                "placeholder must not be Blocking, got {:?}",
                f.title
            );
            assert!(f.evidence.is_none(), "placeholder must have no evidence");
        }
    }

    #[test]
    fn test_still_detects_high_entropy_stripe_secret_name_variable() {
        // `const STRIPE_SECRET = "rk_live_..."` — high-entropy value, variable
        // name contains "secret". Not caught by any name-suffix guard. Must fire.
        let stripe_key = String::from("rk") + "_live_51HxYzAbCdEfGhIjKlMnOpQrStUv";
        let source = format!("const STRIPE_SECRET = '{stripe_key}';\n");
        let store = GraphBuilder::new().freeze();
        let detector = SecretDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("payment.ts", source.as_str())],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        // rk_live_ is not a recognised self-shaped pattern, so this hits
        // Generic Secret (contains uppercase + lowercase + digits + underscore).
        assert!(
            !findings.is_empty(),
            "High-entropy Stripe-style secret constant MUST still produce a finding."
        );
    }
}