Skip to main content

ta_changeset/
secret_scan.rs

1// secret_scan.rs — Regex-based secret scanner for draft artifacts (v0.15.14.4).
2//
3// Runs at `ta draft apply` time over all text-content artifact diffs.
4// Scans for credentials, API keys, private key PEM headers, and other
5// high-confidence secret patterns.
6//
7// Behavior is controlled by `SecretScanMode` from `SecurityProfile`:
8//   - `Off`   → skip entirely (explicit opt-out)
9//   - `Warn`  → print findings, allow apply to continue (default for low/mid)
10//   - `Block` → print findings, abort apply with remediation CTA (default for high)
11//
12// False-positive management: add the offending path to `.ta-secret-ignore`.
13// Format: one path pattern per line, same glob syntax as `.taignore`.
14
15use std::path::Path;
16
17// Patterns are compiled once at module init via `std::sync::OnceLock`.
18use std::sync::OnceLock;
19
20/// A single secret finding produced by the scanner.
21#[derive(Debug, Clone)]
22pub struct SecretFinding {
23    /// Human-readable name of the matched pattern.
24    pub pattern_name: String,
25    /// File path where the match was found (relative to workspace root).
26    pub file_path: String,
27    /// Redacted context line (the matched value is replaced with `[REDACTED]`).
28    pub context: String,
29}
30
31impl std::fmt::Display for SecretFinding {
32    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
33        write!(
34            f,
35            "[secret] {} in {}: {}",
36            self.pattern_name, self.file_path, self.context
37        )
38    }
39}
40
41// ── Pattern definitions ────────────────────────────────────────────────────────
42
43struct PatternDef {
44    name: &'static str,
45    /// The capture group that holds the secret value for redaction.
46    pattern: &'static str,
47}
48
49static PATTERNS: &[PatternDef] = &[
50    PatternDef {
51        name: "AWS Access Key ID",
52        pattern: r"(AKIA[0-9A-Z]{16})",
53    },
54    PatternDef {
55        name: "GitHub Personal Access Token",
56        pattern: r"(ghp_[A-Za-z0-9]{36})",
57    },
58    PatternDef {
59        name: "Generic API Key",
60        pattern: r"(?i)[Aa][Pp][Ii][_\-]?[Kk][Ee][Yy]\s*[=:]\s*([A-Za-z0-9_\-]{20,})",
61    },
62    PatternDef {
63        name: "Private Key PEM Header",
64        pattern: r"(-----BEGIN [A-Z ]*PRIVATE KEY-----)",
65    },
66    PatternDef {
67        name: "Generic Secret Assignment",
68        pattern: r#"(?i)(?:secret|password|passwd|token|credential|auth_token|access_token|refresh_token)\s*[=:]\s*["']([A-Za-z0-9+/=_\-!@#$%^&*]{12,})["']"#,
69    },
70];
71
72struct CompiledPattern {
73    name: &'static str,
74    re: regex::Regex,
75}
76
77static COMPILED: OnceLock<Vec<CompiledPattern>> = OnceLock::new();
78
79fn get_patterns() -> &'static Vec<CompiledPattern> {
80    COMPILED.get_or_init(|| {
81        PATTERNS
82            .iter()
83            .map(|p| CompiledPattern {
84                name: p.name,
85                re: regex::Regex::new(p.pattern)
86                    .unwrap_or_else(|e| panic!("bad secret pattern {}: {}", p.name, e)),
87            })
88            .collect()
89    })
90}
91
92// ── Secret-ignore file ─────────────────────────────────────────────────────────
93
94const SECRET_IGNORE_FILE: &str = ".ta-secret-ignore";
95
96/// Returns true if the path should be excluded from scanning based on
97/// `.ta-secret-ignore` patterns in `workspace_root`.
98fn is_ignored(file_path: &str, workspace_root: &Path) -> bool {
99    let ignore_path = workspace_root.join(SECRET_IGNORE_FILE);
100    if !ignore_path.exists() {
101        return false;
102    }
103    let Ok(content) = std::fs::read_to_string(&ignore_path) else {
104        return false;
105    };
106    for line in content.lines() {
107        let pattern = line.trim();
108        if pattern.is_empty() || pattern.starts_with('#') {
109            continue;
110        }
111        if glob_matches(pattern, file_path) {
112            return true;
113        }
114    }
115    false
116}
117
118/// Minimal glob matcher: `*` matches within a path component, `**` matches any segments.
119fn glob_matches(pattern: &str, path: &str) -> bool {
120    if pattern == path {
121        return true;
122    }
123    if pattern.contains("**") {
124        let parts: Vec<&str> = pattern.splitn(2, "**").collect();
125        let prefix = parts[0];
126        let suffix = parts.get(1).unwrap_or(&"");
127        if prefix.is_empty() {
128            return path.ends_with(suffix.trim_start_matches('/'));
129        }
130        return path.starts_with(prefix) && path.ends_with(suffix.trim_start_matches('/'));
131    }
132    if pattern.contains('*') {
133        // Treat * as matching a single component segment.
134        let re_str = regex::escape(pattern).replace("\\*", "[^/]*");
135        if let Ok(re) = regex::Regex::new(&format!("^{}$", re_str)) {
136            return re.is_match(path);
137        }
138    }
139    false
140}
141
142// ── Scanner ────────────────────────────────────────────────────────────────────
143
144/// Scan `text` (the text content of `file_path`) for secret patterns.
145/// Returns a list of findings with redacted context lines.
146///
147/// `workspace_root` is used to check `.ta-secret-ignore`.
148pub fn scan_for_secrets(text: &str, file_path: &str, workspace_root: &Path) -> Vec<SecretFinding> {
149    if is_ignored(file_path, workspace_root) {
150        return vec![];
151    }
152
153    let patterns = get_patterns();
154    let mut findings = Vec::new();
155
156    for line in text.lines() {
157        // Scan the raw line. Regex patterns match anywhere in the line,
158        // so no prefix stripping is needed. Diff format prefixes (+/-/ )
159        // are harmless since the secret patterns look for specific high-confidence strings.
160        let stripped = line;
161
162        for compiled in patterns {
163            if let Some(m) = compiled.re.find(stripped) {
164                let secret_val = m.as_str();
165                let redacted = stripped.replacen(secret_val, "[REDACTED]", 1);
166                findings.push(SecretFinding {
167                    pattern_name: compiled.name.to_string(),
168                    file_path: file_path.to_string(),
169                    context: redacted.trim().to_string(),
170                });
171                // One finding per pattern per line is enough.
172                break;
173            }
174        }
175    }
176
177    findings
178}
179
180/// Print findings to stderr and return whether any were found.
181pub fn print_findings(findings: &[SecretFinding]) -> bool {
182    if findings.is_empty() {
183        return false;
184    }
185    eprintln!();
186    eprintln!("┌─ Secret Scan Findings ─────────────────────────────────────");
187    for f in findings {
188        eprintln!(
189            "│  [{pattern}] {file}",
190            pattern = f.pattern_name,
191            file = f.file_path
192        );
193        eprintln!("│    {}", f.context);
194    }
195    eprintln!("└────────────────────────────────────────────────────────────");
196    true
197}
198
199/// Print the block CTA when `SecretScanMode::Block` aborts apply.
200pub fn print_block_cta(findings: &[SecretFinding]) {
201    print_findings(findings);
202    eprintln!();
203    eprintln!(
204        "Apply blocked: {} secret finding(s) detected in draft artifacts.",
205        findings.len()
206    );
207    eprintln!("To resolve:");
208    eprintln!("  1. Remove secrets from the staged files.");
209    eprintln!("  2. Or add the path to .ta-secret-ignore to exclude it from scanning.");
210    eprintln!("  3. Or set [security.secrets] scan = \"warn\" to downgrade to a warning.");
211    eprintln!();
212}
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217
218    fn tmp_root() -> tempfile::TempDir {
219        tempfile::tempdir().unwrap()
220    }
221
222    // Item 9f: secret scanner finds AWS key.
223    #[test]
224    fn finds_aws_key() {
225        let text = "export AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE\n";
226        let root = tmp_root();
227        let findings = scan_for_secrets(text, "config/env.sh", root.path());
228        assert!(
229            findings
230                .iter()
231                .any(|f| f.pattern_name.contains("AWS") && f.context.contains("[REDACTED]")),
232            "expected AWS key finding, got: {findings:?}"
233        );
234    }
235
236    // Item 9f: secret scanner finds GitHub PAT.
237    #[test]
238    fn finds_github_pat() {
239        let text = "token: ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890\n";
240        let root = tmp_root();
241        let findings = scan_for_secrets(text, "src/auth.rs", root.path());
242        assert!(
243            findings
244                .iter()
245                .any(|f| f.pattern_name.contains("GitHub") && f.context.contains("[REDACTED]")),
246            "expected GitHub PAT finding, got: {findings:?}"
247        );
248    }
249
250    #[test]
251    fn finds_private_key_pem() {
252        let text = "-----BEGIN RSA PRIVATE KEY-----\nMIIEowIBAAKCAQEA...\n";
253        let root = tmp_root();
254        let findings = scan_for_secrets(text, "keys/server.pem", root.path());
255        assert!(
256            findings
257                .iter()
258                .any(|f| f.pattern_name.contains("Private Key")),
259            "expected private key finding, got: {findings:?}"
260        );
261    }
262
263    #[test]
264    fn clean_text_produces_no_findings() {
265        let text = "fn main() { println!(\"hello\"); }\n";
266        let root = tmp_root();
267        let findings = scan_for_secrets(text, "src/main.rs", root.path());
268        assert!(findings.is_empty());
269    }
270
271    #[test]
272    fn ignored_path_is_skipped() {
273        let root = tmp_root();
274        std::fs::write(root.path().join(".ta-secret-ignore"), "fixtures/**\n").unwrap();
275        let text = "token: ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890\n";
276        let findings = scan_for_secrets(text, "fixtures/test.sh", root.path());
277        assert!(
278            findings.is_empty(),
279            "ignored path should produce no findings"
280        );
281    }
282}