Skip to main content

resq_cli/commands/
secrets.rs

1/*
2 * Copyright 2026 ResQ
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//! Secret scanning command for detecting hardcoded credentials.
18//!
19//! Scans source files for potential secrets like API keys, passwords, tokens,
20//! and other sensitive information using pattern matching and entropy analysis.
21
22use aho_corasick::AhoCorasick;
23use anyhow::Result;
24use rayon::prelude::*;
25use regex::Regex;
26use std::fs;
27use std::path::{Path, PathBuf};
28use std::process::Command;
29use std::sync::Mutex;
30use walkdir::WalkDir;
31
32// ── CLI Args ─────────────────────────────────────────────────────────────────
33
34/// CLI arguments for the secrets scanning command.
35#[derive(clap::Args, Debug)]
36pub struct SecretsArgs {
37    /// Root directory to scan (defaults to project root)
38    #[arg(long, default_value = ".")]
39    pub root: PathBuf,
40
41    /// Only scan git-tracked files
42    #[arg(long, default_value_t = true)]
43    pub git_only: bool,
44
45    /// Show verbose output (print matched content)
46    #[arg(long, short)]
47    pub verbose: bool,
48
49    /// Path to allowlist file (one pattern per line)
50    #[arg(long)]
51    pub allowlist: Option<PathBuf>,
52
53    /// Scan staged changes only (for pre-commit hook integration)
54    #[arg(long)]
55    pub staged: bool,
56
57    /// Also scan git history (all commits reachable from HEAD)
58    #[arg(long)]
59    pub history: bool,
60
61    /// Limit history scan to commits after this rev/date (e.g. "30 days ago", "v1.0.0")
62    #[arg(long)]
63    pub since: Option<String>,
64}
65
66// ── Rules ─────────────────────────────────────────────────────────────────────
67
68/// Charset-specific entropy thresholds.
69/// Hex max theoretical entropy = 4.0 bits/char (16 symbols)
70/// Base64 max = ~6.0 bits/char (64 symbols)
71/// Alphanumeric max = ~5.17 bits/char (62 symbols)
72#[derive(Clone, Copy)]
73enum EntropyCharset {
74    Hex,
75    Base64,
76    Alphanumeric,
77}
78
79impl EntropyCharset {
80    fn threshold(self) -> f64 {
81        match self {
82            EntropyCharset::Hex => 3.5,
83            EntropyCharset::Base64 => 4.5,
84            EntropyCharset::Alphanumeric => 4.0,
85        }
86    }
87
88    fn min_len(self) -> usize {
89        match self {
90            EntropyCharset::Hex => 40,
91            EntropyCharset::Base64 => 20,
92            EntropyCharset::Alphanumeric => 20,
93        }
94    }
95}
96
97/// A secret pattern rule
98struct Rule {
99    name: &'static str,
100    pattern: Regex,
101    /// Optional entropy gate applied to the full regex match.
102    /// A match that does not meet the entropy threshold is suppressed.
103    entropy_gate: Option<EntropyCharset>,
104}
105
106/// A single finding
107struct Finding {
108    file: String,
109    line: usize,
110    rule: String,
111    content: String,
112}
113
114// ── Entropy ───────────────────────────────────────────────────────────────────
115
116/// Shannon entropy of a string, counting only bytes present in `charset`.
117/// Bytes outside the charset are ignored so the score reflects the density
118/// of the charset-relevant portion, not padding or delimiters.
119fn charset_entropy(s: &str, charset: EntropyCharset) -> f64 {
120    let is_member: fn(u8) -> bool = match charset {
121        EntropyCharset::Hex => |b| b.is_ascii_hexdigit(),
122        EntropyCharset::Base64 => {
123            |b| matches!(b, b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'+' | b'/' | b'=')
124        }
125        EntropyCharset::Alphanumeric => |b| matches!(b, b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9'),
126    };
127
128    let mut freq = [0u32; 256];
129    let mut count = 0usize;
130    for byte in s.bytes() {
131        if is_member(byte) {
132            freq[byte as usize] += 1;
133            count += 1;
134        }
135    }
136
137    if count == 0 {
138        return 0.0;
139    }
140
141    let len = count as f64;
142    freq.iter()
143        .filter(|&&c| c > 0)
144        .map(|&c| {
145            let p = f64::from(c) / len;
146            -p * p.log2()
147        })
148        .sum()
149}
150
151fn passes_entropy_gate(matched: &str, gate: EntropyCharset) -> bool {
152    matched.len() >= gate.min_len() && charset_entropy(matched, gate) >= gate.threshold()
153}
154
155// ── Pattern Registry ──────────────────────────────────────────────────────────
156
157fn build_rules() -> Vec<Rule> {
158    // (name, pattern, entropy_gate)
159    let specs: &[(&str, &str, Option<EntropyCharset>)] = &[
160        // ── Cloud: AWS ──────────────────────────────────────────────────────
161        ("AWS Access Key ID", r"AKIA[0-9A-Z]{16}", None),
162        (
163            "AWS Secret Access Key",
164            r"(?i)aws_secret_access_key\s*[=:]\s*[A-Za-z0-9/+=]{40}",
165            Some(EntropyCharset::Base64),
166        ),
167        (
168            "AWS MWS Key",
169            r"amzn\.mws\.[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}",
170            None,
171        ),
172        // ── Cloud: GCP ──────────────────────────────────────────────────────
173        ("GCP API Key", r"AIza[0-9A-Za-z\-_]{35}", None),
174        (
175            "GCP Service Account",
176            r#""type":\s*"service_account""#,
177            None,
178        ),
179        // ── Cloud: Azure ────────────────────────────────────────────────────
180        (
181            "Azure Storage Account Key",
182            r"(?i)(?:AccountKey|storageaccountkey|DefaultEndpointsProtocol)[=:\s]+[A-Za-z0-9+/]{86}==",
183            Some(EntropyCharset::Base64),
184        ),
185        (
186            "Azure SAS Token",
187            r"(?i)sig=[A-Za-z0-9%+/]{43,}={0,2}",
188            Some(EntropyCharset::Base64),
189        ),
190        (
191            "Azure APIM Subscription Key",
192            r"(?i)(?:ocp-apim-subscription-key|subscription.?key)\s*[=:]\s*[a-f0-9]{32}",
193            Some(EntropyCharset::Hex),
194        ),
195        // ── GitHub ──────────────────────────────────────────────────────────
196        ("GitHub PAT (classic)", r"ghp_[a-zA-Z0-9]{36}", None),
197        ("GitHub OAuth Token", r"gho_[a-zA-Z0-9]{36}", None),
198        (
199            "GitHub Fine-Grained PAT",
200            r"github_pat_[a-zA-Z0-9_]{82}",
201            None,
202        ),
203        ("GitHub App Token", r"(?:ghu|ghs)_[a-zA-Z0-9]{36}", None),
204        ("GitHub Refresh Token", r"ghr_[a-zA-Z0-9]{36}", None),
205        // ── API Keys: AI ────────────────────────────────────────────────────
206        (
207            "OpenAI API Key",
208            r"sk-[a-zA-Z0-9]{20}T3BlbkFJ[a-zA-Z0-9]{20}",
209            None,
210        ),
211        (
212            "OpenAI Project Key",
213            r"sk-proj-[a-zA-Z0-9\-_]{80,}",
214            Some(EntropyCharset::Alphanumeric),
215        ),
216        (
217            "Anthropic API Key",
218            r"sk-ant-[a-zA-Z0-9\-_]{80,}",
219            Some(EntropyCharset::Alphanumeric),
220        ),
221        // ── API Keys: Payments ───────────────────────────────────────────────
222        ("Stripe Live Secret Key", r"sk_live_[a-zA-Z0-9]{24,}", None),
223        ("Stripe Test Secret Key", r"sk_test_[a-zA-Z0-9]{24,}", None),
224        (
225            "Stripe Restricted Key",
226            r"rk_(?:live|test)_[a-zA-Z0-9]{24,}",
227            None,
228        ),
229        ("Stripe Publishable Key", r"pk_live_[a-zA-Z0-9]{24,}", None),
230        ("Square Access Token", r"sq0atp-[A-Za-z0-9_-]{22}", None),
231        ("Square OAuth Token", r"sq0csp-[A-Za-z0-9_-]{43}", None),
232        (
233            "Braintree Access Token",
234            r"access_token\$production\$[a-z0-9]{16}\$[a-f0-9]{32}",
235            None,
236        ),
237        // ── API Keys: Communication ──────────────────────────────────────────
238        (
239            "Slack Token",
240            r"xox[bpors]-[0-9]{10,13}-[0-9]{10,13}[a-zA-Z0-9-]*",
241            None,
242        ),
243        (
244            "Slack Webhook",
245            r"https://hooks\.slack\.com/services/T[0-9A-Z]{8,}/B[0-9A-Z]{8,}/[a-zA-Z0-9]{24}",
246            None,
247        ),
248        (
249            "Twilio API Key",
250            r"SK[a-f0-9]{32}",
251            Some(EntropyCharset::Hex),
252        ),
253        (
254            "SendGrid API Key",
255            r"SG\.[a-zA-Z0-9_\-]{22}\.[a-zA-Z0-9_\-]{43}",
256            None,
257        ),
258        (
259            "Mailgun API Key",
260            r"key-[a-zA-Z0-9]{32}",
261            Some(EntropyCharset::Alphanumeric),
262        ),
263        // Mailchimp keys always end in -us## (unique format, no entropy gate needed)
264        ("Mailchimp API Key", r"[a-f0-9]{32}-us\d{1,2}", None),
265        // ── API Keys: Observability ──────────────────────────────────────────
266        (
267            "Datadog API Key",
268            r"(?i)(?:datadog|dd)[_-]?(?:api[_-]?key|token)\s*[=:]\s*[a-f0-9]{32}",
269            Some(EntropyCharset::Hex),
270        ),
271        (
272            "Datadog App Key",
273            r"(?i)(?:datadog|dd)[_-]?(?:app[_-]?key|application[_-]?key)\s*[=:]\s*[a-f0-9]{40}",
274            Some(EntropyCharset::Hex),
275        ),
276        // ── API Keys: E-commerce ─────────────────────────────────────────────
277        ("Shopify Private App Token", r"shppa_[a-fA-F0-9]{32}", None),
278        ("Shopify Shared Secret", r"shpss_[a-fA-F0-9]{32}", None),
279        ("Shopify Access Token", r"shpat_[a-fA-F0-9]{32}", None),
280        ("Shopify Custom App Token", r"shpca_[a-fA-F0-9]{32}", None),
281        // ── API Keys: CRM ────────────────────────────────────────────────────
282        (
283            "HubSpot Private App Token",
284            r"pat-(?:na1|eu1)-[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}",
285            None,
286        ),
287        // ── Infrastructure: HashiCorp ────────────────────────────────────────
288        (
289            "HashiCorp Vault Service Token",
290            r"hvs\.[A-Za-z0-9_-]{90,}",
291            Some(EntropyCharset::Base64),
292        ),
293        (
294            "HashiCorp Vault Batch Token",
295            r"hvb\.[A-Za-z0-9_-]{90,}",
296            Some(EntropyCharset::Base64),
297        ),
298        (
299            "Terraform Cloud Token",
300            r"[A-Za-z0-9]{14}\.atlasv1\.[A-Za-z0-9_-]{60,}",
301            Some(EntropyCharset::Alphanumeric),
302        ),
303        // ── Infrastructure: CI/CD & Hosting ──────────────────────────────────
304        (
305            "Heroku API Key",
306            r"(?i)heroku[_-]?(?:api[_-]?key|token)\s*[=:]\s*[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}",
307            None,
308        ),
309        (
310            "Render API Key",
311            r"rnd_[A-Za-z0-9]{32,}",
312            Some(EntropyCharset::Alphanumeric),
313        ),
314        // ── Package Registries ───────────────────────────────────────────────
315        (
316            "NPM Registry Auth Token (legacy)",
317            r"(?i)//registry\.npmjs\.org/:_authToken=[a-zA-Z0-9\-_]+",
318            None,
319        ),
320        ("NPM Access Token", r"npm_[A-Za-z0-9]{36}", None),
321        (
322            "PyPI API Token",
323            r"pypi-AgEIcHlwaS5vcmc[A-Za-z0-9_-]{50,}",
324            None,
325        ),
326        // ── Database / Infrastructure ────────────────────────────────────────
327        (
328            "Generic Connection String",
329            r#"(?i)(?:mongodb|postgres|mysql|redis)://[^\s"']+:[^\s"']+@"#,
330            None,
331        ),
332        (
333            "Database URL",
334            r#"(?i)database_url\s*[=:]\s*["']?(?:postgres|mysql|mongodb)://[^\s"']+"#,
335            None,
336        ),
337        // ── Private Keys ─────────────────────────────────────────────────────
338        ("RSA Private Key", r"-----BEGIN RSA PRIVATE KEY-----", None),
339        ("DSA Private Key", r"-----BEGIN DSA PRIVATE KEY-----", None),
340        ("EC Private Key", r"-----BEGIN EC PRIVATE KEY-----", None),
341        (
342            "OpenSSH Private Key",
343            r"-----BEGIN OPENSSH PRIVATE KEY-----",
344            None,
345        ),
346        (
347            "PGP Private Key",
348            r"-----BEGIN PGP PRIVATE KEY BLOCK-----",
349            None,
350        ),
351        ("Generic Private Key", r"-----BEGIN PRIVATE KEY-----", None),
352        (
353            "Encrypted Private Key",
354            r"-----BEGIN ENCRYPTED PRIVATE KEY-----",
355            None,
356        ),
357        // ── Blockchain / Crypto ──────────────────────────────────────────────
358        // WIF format private keys (Bitcoin / Neo N3 WIF)
359        (
360            "WIF Private Key",
361            r"\b[5KLc][1-9A-HJ-NP-Za-km-z]{50,51}\b",
362            None,
363        ),
364        (
365            "Ethereum/EVM Private Key",
366            r"(?i)(?:private[_-]?key|eth[_-]?key)\s*[=:]\s*(?:0x)?[a-f0-9]{64}",
367            Some(EntropyCharset::Hex),
368        ),
369        // ── Generic Patterns ─────────────────────────────────────────────────
370        (
371            "JWT Token",
372            r"eyJ[a-zA-Z0-9_-]{10,}\.eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}",
373            None,
374        ),
375        (
376            "Bearer Token",
377            r#"(?i)(?:bearer|authorization)\s*[=:]\s*["']?[a-zA-Z0-9\-_.~+/]{20,}["']?"#,
378            Some(EntropyCharset::Alphanumeric),
379        ),
380        (
381            "Generic API Key Assignment",
382            r#"(?i)(?:api[_-]?key|apikey|api[_-]?secret)\s*[=:]\s*["'][a-zA-Z0-9\-_.]{16,}["']"#,
383            Some(EntropyCharset::Alphanumeric),
384        ),
385        (
386            "Generic Secret Assignment",
387            r#"(?i)(?:secret|password|passwd|token)\s*[=:]\s*["'][^\s"']{8,}["']"#,
388            Some(EntropyCharset::Alphanumeric),
389        ),
390        // ── High-Entropy Fallback ─────────────────────────────────────────────
391        // Catches secrets that don't match any specific rule above.
392        // Entropy is always verified; threshold is 3.5 (hex max is 4.0).
393        (
394            "High-Entropy Hex (≥40 chars)",
395            r"\b[a-f0-9]{40,}\b",
396            Some(EntropyCharset::Hex),
397        ),
398    ];
399
400    specs
401        .iter()
402        .filter_map(|&(name, pat, entropy_gate)| {
403            Regex::new(pat).ok().map(|pattern| Rule {
404                name,
405                pattern,
406                entropy_gate,
407            })
408        })
409        .collect()
410}
411
412// ── AhoCorasick Prefilter ─────────────────────────────────────────────────────
413
414/// Known literal prefixes present in at least one rule pattern.
415/// Lines containing none of these can skip the full regex battery.
416/// Entropy-only lines still need scanning — handled separately.
417const KNOWN_PREFIXES: &[&str] = &[
418    "AKIA",
419    "AIza",
420    "ghp_",
421    "gho_",
422    "github_pat_",
423    "ghu_",
424    "ghs_",
425    "ghr_",
426    "sk-",
427    "sk_live_",
428    "sk_test_",
429    "pk_live_",
430    "rk_live_",
431    "rk_test_",
432    "xox",
433    "https://hooks.slack.com",
434    "SG.",
435    "key-",
436    "hvs.",
437    "hvb.",
438    "npm_",
439    "pypi-",
440    "sq0atp-",
441    "sq0csp-",
442    "shppa_",
443    "shpss_",
444    "shpat_",
445    "shpca_",
446    "pat-na1-",
447    "pat-eu1-",
448    "rnd_",
449    "-----BEGIN",
450    "access_token$production$",
451    "amzn.mws.",
452    "eyJ",
453    "AccountKey",
454    "DefaultEndpointsProtocol",
455    "sig=",
456    "atlasv1.",
457    "us1",
458    "us2",
459    "us3",
460];
461
462fn build_prefilter() -> AhoCorasick {
463    AhoCorasick::new(KNOWN_PREFIXES).expect("prefilter patterns are valid")
464}
465
466// ── False-Positive Reduction ──────────────────────────────────────────────────
467
468/// Returns true if the value-part of an assignment is a variable reference or
469/// obvious placeholder, meaning the line should not be reported as a finding.
470fn is_variable_reference(line: &str) -> bool {
471    // Extract value after first = or :
472    let value = line.split_once(['=', ':']).map_or(line, |x| x.1);
473    let v = value
474        .trim()
475        .trim_matches(|c| c == '"' || c == '\'' || c == '`');
476
477    v.starts_with("${")
478        || v.starts_with("$(")
479        || v.starts_with("#{")  // Ruby interpolation
480        || v.starts_with('%')   // Python/Ruby template
481        || v.starts_with('<')   // <placeholder>
482        || v.starts_with("process.env.")
483        || v.starts_with("os.environ")
484        || v.starts_with("env(")
485        || v.starts_with("vault(")
486        || v.starts_with("secret(")
487        || v == "null"
488        || v == "undefined"
489        || v == "None"
490        || v == "false"
491        || v == "true"
492        || v.is_empty()
493        // ALL_CAPS with underscores = environment variable name, not a value
494        || (v.chars().all(|c| c.is_uppercase() || c == '_' || c.is_ascii_digit())
495            && v.len() > 2)
496}
497
498const TEST_KEYWORDS: &[&str] = &[
499    "example",
500    "placeholder",
501    "changeme",
502    "replace_me",
503    "insert_",
504    "dummy",
505    "fake",
506    "mock",
507    "stub",
508    "fixture",
509    "demo",
510    "invalid",
511    "xxx",
512    "000000",
513    "aaaaaa",
514    "test-key",
515    "sample",
516];
517
518/// Returns true if the line looks like it contains a test/example value
519/// rather than a real credential.
520fn has_test_marker(line: &str) -> bool {
521    let lower = line.to_lowercase();
522    TEST_KEYWORDS.iter().any(|kw| lower.contains(kw))
523}
524
525/// Returns true if the matched hex string is a well-known non-secret
526/// (git SHA context, integrity hashes, checksums).
527fn is_known_non_secret_hex(line: &str) -> bool {
528    let lower = line.trim().to_lowercase();
529    lower.starts_with("commit ")
530        || lower.contains("sha256")
531        || lower.contains("integrity")
532        || lower.contains("checksum")
533        || lower.contains("srchash")
534        || lower.contains("filehash")
535}
536
537// ── Git File Collection ───────────────────────────────────────────────────────
538
539fn get_git_files(root: &Path, staged: bool) -> Vec<PathBuf> {
540    let args = if staged {
541        vec!["diff", "--cached", "--name-only", "--diff-filter=ACM"]
542    } else {
543        vec!["ls-files", "--cached", "--others", "--exclude-standard"]
544    };
545
546    let output = Command::new("git").args(&args).current_dir(root).output();
547
548    match output {
549        Ok(out) if out.status.success() => String::from_utf8_lossy(&out.stdout)
550            .lines()
551            .map(|l| root.join(l.trim()))
552            .filter(|p| p.is_file())
553            .collect(),
554        _ => vec![],
555    }
556}
557
558/// Collect (`commit_hash`, `addition_lines`) pairs from git log -p for history scanning.
559fn get_history_diffs(root: &Path, since: Option<&str>) -> Vec<(String, Vec<String>)> {
560    let mut cmd = Command::new("git");
561    cmd.args(["log", "--all", "--format=%H", "-p", "--diff-filter=ACM"]);
562    if let Some(s) = since {
563        cmd.arg(format!("--since={s}"));
564    }
565    cmd.current_dir(root);
566
567    let output = match cmd.output() {
568        Ok(o) if o.status.success() => o,
569        _ => return vec![],
570    };
571
572    let text = String::from_utf8_lossy(&output.stdout);
573    let mut result: Vec<(String, Vec<String>)> = Vec::new();
574    let mut current_hash = String::new();
575    let mut additions: Vec<String> = Vec::new();
576
577    for line in text.lines() {
578        if line.len() == 40 && line.chars().all(|c| c.is_ascii_hexdigit()) {
579            if !current_hash.is_empty() && !additions.is_empty() {
580                result.push((current_hash.clone(), additions.clone()));
581                additions.clear();
582            }
583            current_hash = line.to_string();
584        } else if let Some(rest) = line.strip_prefix('+') {
585            if !rest.starts_with("++") {
586                additions.push(rest.to_string());
587            }
588        }
589    }
590    if !current_hash.is_empty() && !additions.is_empty() {
591        result.push((current_hash, additions));
592    }
593    result
594}
595
596// ── Skip Logic ────────────────────────────────────────────────────────────────
597
598fn should_skip(path: &Path, gitignore_excludes: &[String]) -> bool {
599    const SKIP_EXT: &[&str] = &[
600        "png", "jpg", "jpeg", "gif", "ico", "svg", "webp", "woff", "woff2", "ttf", "eot", "mp3",
601        "mp4", "wav", "avi", "mov", "pdf", "zip", "gz", "tar", "bz2", "7z", "rar", "exe", "dll",
602        "so", "dylib", "o", "a", "wasm", "lock",
603    ];
604
605    if crate::gitignore::should_skip_path(path, gitignore_excludes) {
606        return true;
607    }
608
609    if let Some(ext) = path.extension() {
610        let ext = ext.to_string_lossy().to_lowercase();
611        if SKIP_EXT.iter().any(|e| ext == *e) {
612            return true;
613        }
614    }
615
616    let path_str = path.to_string_lossy();
617    if path_str.contains(".env.example") || path_str.contains(".env.sample") {
618        return true;
619    }
620
621    false
622}
623
624fn is_binary(content: &[u8]) -> bool {
625    let check_len = content.len().min(512);
626    content[..check_len].contains(&0)
627}
628
629// ── Allowlist ─────────────────────────────────────────────────────────────────
630
631fn load_allowlist(path: &Path) -> Vec<String> {
632    if !path.exists() {
633        return vec![];
634    }
635    fs::read_to_string(path)
636        .unwrap_or_default()
637        .lines()
638        .map(|l| l.trim().to_string())
639        .filter(|l| !l.is_empty() && !l.starts_with('#'))
640        .collect()
641}
642
643fn is_allowlisted(finding: &Finding, allowlist: &[String]) -> bool {
644    allowlist
645        .iter()
646        .any(|pattern| finding.content.contains(pattern) || finding.file.contains(pattern))
647}
648
649// ── Line Scanning ─────────────────────────────────────────────────────────────
650
651/// Scan a single line against all rules. Appends any findings to `out`.
652fn scan_line(
653    line: &str,
654    line_num: usize,
655    rel_path: &str,
656    rules: &[Rule],
657    prefilter: &AhoCorasick,
658    allowlist: &[String],
659    out: &mut Vec<Finding>,
660) {
661    let trimmed = line.trim();
662
663    // Skip comment lines that contain obvious example markers
664    let is_comment = trimmed.starts_with("//")
665        || trimmed.starts_with('#')
666        || trimmed.starts_with("<!--")
667        || trimmed.starts_with('*')
668        || trimmed.starts_with("/*");
669    if is_comment
670        && (trimmed.contains("example")
671            || trimmed.contains("EXAMPLE")
672            || trimmed.contains("xxx")
673            || trimmed.contains("your-")
674            || trimmed.contains("placeholder"))
675    {
676        return;
677    }
678
679    // Fast path: skip lines with no known literal prefix AND no assignment context
680    let has_known_prefix = prefilter.is_match(line);
681    let has_assignment = line.contains('=') || line.contains(':');
682    if !has_known_prefix && !has_assignment {
683        return;
684    }
685
686    for rule in rules {
687        let Some(mat) = rule.pattern.find(line) else {
688            continue;
689        };
690        let matched = mat.as_str();
691
692        // Entropy gate
693        if let Some(charset) = rule.entropy_gate {
694            if !passes_entropy_gate(matched, charset) {
695                continue;
696            }
697        }
698
699        // Hex-specific non-secret exclusions
700        if rule.name.contains("Hex") && is_known_non_secret_hex(line) {
701            continue;
702        }
703
704        // Variable reference / placeholder exclusions for assignment rules
705        if (rule.name.contains("Assignment") || rule.name.contains("Generic"))
706            && (is_variable_reference(line) || has_test_marker(line))
707        {
708            continue;
709        }
710
711        let finding = Finding {
712            file: rel_path.to_string(),
713            line: line_num + 1,
714            rule: rule.name.to_string(),
715            content: redact_line(line),
716        };
717
718        if !is_allowlisted(&finding, allowlist) {
719            out.push(finding);
720        }
721    }
722}
723
724// ── Entry Point ───────────────────────────────────────────────────────────────
725
726/// Run the secrets scan.
727pub async fn run(args: SecretsArgs) -> Result<()> {
728    let root = if args.root == std::path::Path::new(".") {
729        crate::utils::find_project_root()
730    } else {
731        args.root
732    };
733
734    let rules = build_rules();
735    let prefilter = build_prefilter();
736    let allowlist_path = args
737        .allowlist
738        .unwrap_or_else(|| root.join(".secretsignore"));
739    let allowlist = load_allowlist(&allowlist_path);
740    let gitignore_excludes = crate::gitignore::parse_gitignore(&root);
741
742    if args.verbose {
743        println!("🔍 Scanning for secrets in: {}", root.display());
744        if !allowlist.is_empty() {
745            println!("📋 Loaded {} allowlist entries", allowlist.len());
746        }
747    }
748
749    // ── History scanning ──────────────────────────────────────────────────────
750    if args.history {
751        println!("📜 Scanning git history{}...", {
752            args.since
753                .as_deref()
754                .map(|s| format!(" since {s}"))
755                .unwrap_or_default()
756        });
757
758        let diffs = get_history_diffs(&root, args.since.as_deref());
759        println!("   {} commits to check", diffs.len());
760
761        let all_findings: Mutex<Vec<(String, Finding)>> = Mutex::new(Vec::new());
762
763        diffs.par_iter().for_each(|(hash, lines)| {
764            let mut local: Vec<(String, Finding)> = Vec::new();
765            for (i, line) in lines.iter().enumerate() {
766                let mut findings: Vec<Finding> = Vec::new();
767                scan_line(
768                    line,
769                    i,
770                    &format!("commit:{}", &hash[..8]),
771                    &rules,
772                    &prefilter,
773                    &allowlist,
774                    &mut findings,
775                );
776                for f in findings {
777                    local.push((hash.clone(), f));
778                }
779            }
780            if !local.is_empty() {
781                all_findings.lock().unwrap().extend(local);
782            }
783        });
784
785        let history_findings = all_findings.into_inner().unwrap();
786        if history_findings.is_empty() {
787            println!("✅ No secrets found in git history.");
788        } else {
789            println!("\n🚨 History findings:");
790            for (hash, f) in &history_findings {
791                println!("   {} L{}: [{}]", &hash[..8], f.line, f.rule);
792                if args.verbose {
793                    println!("      {}", f.content);
794                }
795            }
796            println!(
797                "\n⚠️  {} secret(s) found in git history. Rotate exposed credentials and consider a history rewrite.",
798                history_findings.len()
799            );
800        }
801        println!();
802    }
803
804    // ── Current tree scanning ─────────────────────────────────────────────────
805    let files: Vec<PathBuf> = if args.staged {
806        get_git_files(&root, true)
807    } else if args.git_only {
808        get_git_files(&root, false)
809    } else {
810        WalkDir::new(&root)
811            .into_iter()
812            .filter_map(std::result::Result::ok)
813            .filter(|e| e.file_type().is_file())
814            .map(walkdir::DirEntry::into_path)
815            .collect()
816    };
817
818    let files: Vec<PathBuf> = files
819        .into_iter()
820        .filter(|p| !should_skip(p, &gitignore_excludes))
821        .collect();
822
823    if args.verbose {
824        println!("📂 Scanning {} files...", files.len());
825    }
826
827    let all_findings: Mutex<Vec<Finding>> = Mutex::new(Vec::new());
828
829    files.par_iter().for_each(|file_path| {
830        let content_bytes = match fs::read(file_path) {
831            Ok(c) => c,
832            Err(_) => return,
833        };
834
835        if is_binary(&content_bytes) {
836            return;
837        }
838
839        let content = match std::str::from_utf8(&content_bytes) {
840            Ok(s) => s,
841            Err(_) => return,
842        };
843
844        let rel_path = file_path
845            .strip_prefix(&root)
846            .unwrap_or(file_path)
847            .to_string_lossy()
848            .to_string();
849
850        let mut local: Vec<Finding> = Vec::new();
851        for (line_num, line) in content.lines().enumerate() {
852            scan_line(
853                line, line_num, &rel_path, &rules, &prefilter, &allowlist, &mut local,
854            );
855        }
856
857        if !local.is_empty() {
858            all_findings.lock().unwrap().extend(local);
859        }
860    });
861
862    let mut all_findings = all_findings.into_inner().unwrap();
863
864    if all_findings.is_empty() {
865        println!("✅ No secrets detected");
866        return Ok(());
867    }
868
869    // Group by file, sorted
870    all_findings.sort_by(|a, b| a.file.cmp(&b.file).then(a.line.cmp(&b.line)));
871
872    let mut current_file = String::new();
873    for finding in &all_findings {
874        if finding.file != current_file {
875            current_file = finding.file.clone();
876            println!("❌ {current_file}");
877        }
878        print!("   L{}: [{}]", finding.line, finding.rule);
879        if args.verbose {
880            print!("  {}", finding.content);
881        }
882        println!();
883    }
884
885    let file_count = {
886        let mut seen: Vec<&str> = all_findings.iter().map(|f| f.file.as_str()).collect();
887        seen.dedup();
888        seen.len()
889    };
890
891    println!(
892        "\n🚨 Found {} potential secret(s) across {} file(s)",
893        all_findings.len(),
894        file_count
895    );
896    println!("   Rotate any exposed credentials immediately.");
897    println!("   Add false positives to .secretsignore");
898
899    std::process::exit(1);
900}
901
902// ── Helpers ───────────────────────────────────────────────────────────────────
903
904fn redact_line(line: &str) -> String {
905    let trimmed = line.trim();
906    if trimmed.len() <= 20 {
907        return trimmed.to_string();
908    }
909    let visible_prefix = 10.min(trimmed.len() / 4);
910    let visible_suffix = 6.min(trimmed.len() / 6);
911    format!(
912        "{}...REDACTED...{}",
913        &trimmed[..visible_prefix],
914        &trimmed[trimmed.len() - visible_suffix..]
915    )
916}
917
918// ── Tests ─────────────────────────────────────────────────────────────────────
919
920#[cfg(test)]
921mod tests {
922    use super::*;
923    use std::io::Write;
924
925    // ── charset_entropy ───────────────────────────────────────────────────────
926
927    #[test]
928    fn entropy_single_char_is_zero() {
929        // Repeating same char → zero entropy
930        assert_eq!(
931            charset_entropy("aaaaaaaaaa", EntropyCharset::Alphanumeric),
932            0.0
933        );
934    }
935
936    #[test]
937    fn entropy_empty_string_is_zero() {
938        assert_eq!(charset_entropy("", EntropyCharset::Hex), 0.0);
939    }
940
941    #[test]
942    fn entropy_no_matching_chars_is_zero() {
943        // No hex chars in string of special chars
944        assert_eq!(charset_entropy("!@#$%^&*()", EntropyCharset::Hex), 0.0);
945    }
946
947    #[test]
948    fn entropy_uniform_hex_is_high() {
949        // All 16 hex symbols used equally → max entropy ~4.0
950        let s = "0123456789abcdef";
951        let e = charset_entropy(s, EntropyCharset::Hex);
952        assert!(e > 3.9, "Expected entropy > 3.9, got {e}");
953    }
954
955    #[test]
956    fn entropy_uniform_base64_is_high() {
957        let s = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
958        let e = charset_entropy(s, EntropyCharset::Base64);
959        assert!(e > 5.0, "Expected entropy > 5.0, got {e}");
960    }
961
962    #[test]
963    fn entropy_hex_ignores_non_hex() {
964        // 'xyz' chars are not hex, only 'aabb' counted
965        let e = charset_entropy("aabbxyz", EntropyCharset::Hex);
966        let e_pure = charset_entropy("aabb", EntropyCharset::Hex);
967        assert!((e - e_pure).abs() < f64::EPSILON);
968    }
969
970    // ── passes_entropy_gate ───────────────────────────────────────────────────
971
972    #[test]
973    fn entropy_gate_too_short_fails() {
974        // Hex min_len is 40, this is only 10
975        assert!(!passes_entropy_gate("0123456789", EntropyCharset::Hex));
976    }
977
978    #[test]
979    fn entropy_gate_low_entropy_fails() {
980        // Long but all same char — entropy = 0
981        let s = "a".repeat(50);
982        assert!(!passes_entropy_gate(&s, EntropyCharset::Hex));
983    }
984
985    #[test]
986    fn entropy_gate_high_entropy_passes() {
987        // 40+ chars, repeating full hex alphabet → high entropy
988        let s = "0123456789abcdef".repeat(3); // 48 chars
989        assert!(passes_entropy_gate(&s, EntropyCharset::Hex));
990    }
991
992    // ── is_variable_reference ────────────────────────────────────────────────
993
994    #[test]
995    fn variable_ref_shell_expansion() {
996        assert!(is_variable_reference("API_KEY=${SECRET_VALUE}"));
997    }
998
999    #[test]
1000    fn variable_ref_command_substitution() {
1001        assert!(is_variable_reference("TOKEN=$(vault read secret/key)"));
1002    }
1003
1004    #[test]
1005    fn variable_ref_process_env() {
1006        assert!(is_variable_reference("key = process.env.API_KEY"));
1007    }
1008
1009    #[test]
1010    fn variable_ref_null_values() {
1011        assert!(is_variable_reference("secret = null"));
1012        assert!(is_variable_reference("secret = undefined"));
1013        assert!(is_variable_reference("secret = None"));
1014    }
1015
1016    #[test]
1017    fn variable_ref_boolean_values() {
1018        assert!(is_variable_reference("debug = true"));
1019        assert!(is_variable_reference("debug = false"));
1020    }
1021
1022    #[test]
1023    fn variable_ref_empty_value() {
1024        assert!(is_variable_reference("key = "));
1025        assert!(is_variable_reference("key = \"\""));
1026    }
1027
1028    #[test]
1029    fn variable_ref_env_var_name() {
1030        // ALL_CAPS_ENV is an environment variable name, not a value
1031        assert!(is_variable_reference("key = MY_SECRET_KEY"));
1032    }
1033
1034    #[test]
1035    fn variable_ref_ruby_interpolation() {
1036        assert!(is_variable_reference("secret = #{ENV['KEY']}"));
1037    }
1038
1039    #[test]
1040    fn variable_ref_vault_function() {
1041        assert!(is_variable_reference("token: vault(secret/data/key)"));
1042    }
1043
1044    #[test]
1045    fn variable_ref_real_secret_is_not_ref() {
1046        assert!(!is_variable_reference(
1047            "api_key = sk_live_<YOUR-STRIPE-KEY>"
1048        ));
1049    }
1050
1051    // ── has_test_marker ──────────────────────────────────────────────────────
1052
1053    #[test]
1054    fn test_marker_example() {
1055        assert!(has_test_marker("api_key = 'example_key_12345'"));
1056    }
1057
1058    #[test]
1059    fn test_marker_placeholder() {
1060        assert!(has_test_marker("token: placeholder-token"));
1061    }
1062
1063    #[test]
1064    fn test_marker_changeme() {
1065        assert!(has_test_marker("password = changeme"));
1066    }
1067
1068    #[test]
1069    fn test_marker_dummy() {
1070        assert!(has_test_marker("SECRET=dummy_secret_value"));
1071    }
1072
1073    #[test]
1074    fn test_marker_real_secret_no_match() {
1075        assert!(!has_test_marker("sk_live_<YOUR-STRIPE-KEY>"));
1076    }
1077
1078    #[test]
1079    fn test_marker_case_insensitive() {
1080        assert!(has_test_marker("API_KEY=EXAMPLE_VALUE"));
1081    }
1082
1083    // ── is_known_non_secret_hex ──────────────────────────────────────────────
1084
1085    #[test]
1086    fn non_secret_git_commit() {
1087        assert!(is_known_non_secret_hex(
1088            "commit abc123def456789012345678901234567890abcd"
1089        ));
1090    }
1091
1092    #[test]
1093    fn non_secret_sha256() {
1094        assert!(is_known_non_secret_hex("sha256-abc123def456"));
1095    }
1096
1097    #[test]
1098    fn non_secret_integrity() {
1099        assert!(is_known_non_secret_hex("integrity: sha384-abc123"));
1100    }
1101
1102    #[test]
1103    fn non_secret_checksum() {
1104        assert!(is_known_non_secret_hex("checksum = abc123def456"));
1105    }
1106
1107    #[test]
1108    fn non_secret_plain_hex_is_secret() {
1109        assert!(!is_known_non_secret_hex(
1110            "abc123def456789012345678901234567890abcd"
1111        ));
1112    }
1113
1114    // ── is_binary ────────────────────────────────────────────────────────────
1115
1116    #[test]
1117    fn binary_null_byte() {
1118        assert!(is_binary(b"\x00ELF binary content"));
1119    }
1120
1121    #[test]
1122    fn binary_text_is_not_binary() {
1123        assert!(!is_binary(b"fn main() { println!(\"hello\"); }"));
1124    }
1125
1126    #[test]
1127    fn test_charset_entropy() {
1128        // Use a small epsilon for float comparisons to satisfy clippy
1129        let epsilon = 1e-10;
1130
1131        assert!(
1132            (charset_entropy("aaaaaaaaaa", EntropyCharset::Alphanumeric) - 0.0).abs() < epsilon
1133        );
1134        assert!(
1135            (charset_entropy("abcde", EntropyCharset::Hex) - 2.321928094887362).abs() < epsilon
1136        );
1137        assert!((charset_entropy("", EntropyCharset::Hex) - 0.0).abs() < epsilon);
1138
1139        // Mix of valid and invalid chars
1140        assert!((charset_entropy("!@#$%^&*()", EntropyCharset::Hex) - 0.0).abs() < epsilon);
1141    }
1142
1143    #[test]
1144    fn test_load_allowlist() {
1145        let dir = tempfile::tempdir().expect("failed to create temp dir");
1146        let path = dir.path().join(".secretsignore");
1147        let mut f = std::fs::File::create(&path).expect("failed to create temp file");
1148        writeln!(f, "# This is a comment").expect("failed to write");
1149        writeln!(f).expect("failed to write");
1150        writeln!(f, "some-pattern").expect("failed to write");
1151        writeln!(f, "  another-pattern  ").expect("failed to write");
1152        writeln!(f, "# another comment").expect("failed to write");
1153
1154        let allowlist = load_allowlist(&path);
1155        assert_eq!(allowlist.len(), 2);
1156        assert_eq!(allowlist[0], "some-pattern");
1157        assert_eq!(allowlist[1], "another-pattern");
1158    }
1159
1160    // ── is_allowlisted ───────────────────────────────────────────────────────
1161
1162    #[test]
1163    fn allowlisted_content_match() {
1164        let finding = Finding {
1165            file: "src/config.rs".to_string(),
1166            line: 10,
1167            rule: "test".to_string(),
1168            content: "api_key = AKIA1234567890ABCDEF".to_string(),
1169        };
1170        let allowlist = vec!["AKIA1234567890ABCDEF".to_string()];
1171        assert!(is_allowlisted(&finding, &allowlist));
1172    }
1173
1174    #[test]
1175    fn allowlisted_file_match() {
1176        let finding = Finding {
1177            file: "tests/fixtures/secrets.txt".to_string(),
1178            line: 1,
1179            rule: "test".to_string(),
1180            content: "secret here".to_string(),
1181        };
1182        let allowlist = vec!["tests/fixtures".to_string()];
1183        assert!(is_allowlisted(&finding, &allowlist));
1184    }
1185
1186    #[test]
1187    fn allowlisted_no_match() {
1188        let finding = Finding {
1189            file: "src/main.rs".to_string(),
1190            line: 5,
1191            rule: "test".to_string(),
1192            content: "ghp_abcdef1234567890abcdef1234567890abcd".to_string(),
1193        };
1194        let allowlist = vec!["unrelated-pattern".to_string()];
1195        assert!(!is_allowlisted(&finding, &allowlist));
1196    }
1197
1198    // ── redact_line ──────────────────────────────────────────────────────────
1199
1200    #[test]
1201    fn redact_short_line_unchanged() {
1202        assert_eq!(redact_line("short"), "short");
1203    }
1204
1205    #[test]
1206    fn redact_long_line_is_redacted() {
1207        let line = "api_key = sk_live_<YOUR-STRIPE-KEY>_very_long_secret";
1208        let result = redact_line(line);
1209        assert!(result.contains("REDACTED"));
1210        assert!(result.len() < line.len());
1211    }
1212}