Skip to main content

keyhog_scanner/confidence/
signals.rs

1/// Confidence signals for a potential match.
2pub struct ConfidenceSignals {
3    /// Pattern has a distinctive literal prefix (e.g., `sk-proj-`, `ghp_`).
4    pub has_literal_prefix: bool,
5    /// Pattern uses a capture group with context anchoring.
6    pub has_context_anchor: bool,
7    /// Shannon entropy of the matched credential in **bits per byte** (range
8    /// `0.0..=8.0`) - NOT normalized to `0..1`. Use
9    /// `crate::entropy::normalized_entropy` for the rescaled value.
10    pub entropy: f64,
11    /// A secret-related keyword appears nearby.
12    pub keyword_nearby: bool,
13    /// File extension suggests config/env/secret file.
14    pub sensitive_file: bool,
15    /// Matched credential length.
16    pub match_length: usize,
17    /// Companion credential was found.
18    pub has_companion: bool,
19}
20
21/// Check if a file path suggests a sensitive file.
22/// Check if a file path suggests a sensitive file using Aho-Corasick.
23///
24/// Single AC automaton replaces O(n*m) nested loop with O(n) scan.
25pub fn is_sensitive_path(path: &str) -> bool {
26    use std::sync::OnceLock;
27
28    static AC: OnceLock<Option<aho_corasick::AhoCorasick>> = OnceLock::new();
29
30    let ac = AC.get_or_init(|| {
31        aho_corasick::AhoCorasickBuilder::new()
32            .ascii_case_insensitive(true)
33            .build([
34                // Sensitive filenames
35                ".env",
36                ".env.local",
37                ".env.production",
38                ".env.staging",
39                "credentials",
40                "secrets",
41                "apikeys",
42                "api_keys",
43                ".npmrc",
44                ".pypirc",
45                ".netrc",
46                ".pgpass",
47                "terraform.tfvars",
48                "variables.tf",
49                "docker-compose",
50                "application.yml",
51                "application.properties",
52                "config.json",
53                "config.yaml",
54                "config.toml",
55                // Sensitive extensions (matched as substrings - works because
56                // extensions are at end of path and names are distinctive)
57                ".pem",
58                ".key",
59                ".p12",
60                ".pfx",
61                ".jks",
62                ".keystore",
63                ".cer",
64                ".crt",
65                // CI/CD secret files
66                ".github/workflows",
67                "gitlab-ci.yml",
68                "Jenkinsfile",
69                "buildspec.yml",
70                // Cloud config
71                "serverless.yml",
72                "sam-template",
73                "helm/values",
74                "chart/values",
75            ])
76            .ok()
77    });
78
79    ac.as_ref().is_some_and(|ac| ac.is_match(path))
80}