Skip to main content

keyhog_scanner/confidence/
signals.rs

1/// Confidence signals for a potential match.
2pub struct ConfidenceSignals {
3    /// Pattern has a distinctive literal prefix (e.g., `sk-proj-`, `ghp_`).
4    pub has_literal_prefix: bool,
5    /// Pattern uses a capture group with context anchoring.
6    pub has_context_anchor: bool,
7    /// Shannon entropy of the matched credential.
8    pub entropy: f64,
9    /// A secret-related keyword appears nearby.
10    pub keyword_nearby: bool,
11    /// File extension suggests config/env/secret file.
12    pub sensitive_file: bool,
13    /// Matched credential length.
14    pub match_length: usize,
15    /// Companion credential was found.
16    pub has_companion: bool,
17}
18
19/// Check if a file path suggests a sensitive file.
20/// Check if a file path suggests a sensitive file using Aho-Corasick.
21///
22/// Single AC automaton replaces O(n*m) nested loop with O(n) scan.
23pub fn is_sensitive_path(path: &str) -> bool {
24    use std::sync::OnceLock;
25
26    static AC: OnceLock<aho_corasick::AhoCorasick> = OnceLock::new();
27
28    let ac = AC.get_or_init(|| {
29        aho_corasick::AhoCorasickBuilder::new()
30            .ascii_case_insensitive(true)
31            .build([
32                // Sensitive filenames
33                ".env", ".env.local", ".env.production", ".env.staging",
34                "credentials", "secrets", "apikeys", "api_keys",
35                ".npmrc", ".pypirc", ".netrc", ".pgpass",
36                "terraform.tfvars", "variables.tf",
37                "docker-compose",
38                "application.yml", "application.properties",
39                "config.json", "config.yaml", "config.toml",
40                // Sensitive extensions (matched as substrings — works because
41                // extensions are at end of path and names are distinctive)
42                ".pem", ".key", ".p12", ".pfx", ".jks",
43                ".keystore", ".cer", ".crt",
44                // CI/CD secret files
45                ".github/workflows", "gitlab-ci.yml",
46                "Jenkinsfile", "buildspec.yml",
47                // Cloud config
48                "serverless.yml", "sam-template",
49                "helm/values", "chart/values",
50            ])
51            .unwrap()
52    });
53
54    ac.is_match(path)
55}
56
57#[cfg(test)]
58mod tests {
59    use super::is_sensitive_path;
60
61    #[test]
62    fn sensitive_paths() {
63        assert!(is_sensitive_path(".env.production"));
64        assert!(is_sensitive_path("config/credentials.json"));
65        assert!(is_sensitive_path("server.key"));
66        assert!(!is_sensitive_path("src/main.rs"));
67        assert!(!is_sensitive_path("README.md"));
68    }
69
70    #[test]
71    fn sensitive_path_matches_case_insensitively() {
72        assert!(is_sensitive_path("CONFIG/.ENV.PRODUCTION"));
73        assert!(is_sensitive_path("Secrets/CREDENTIALS.JSON"));
74        assert!(is_sensitive_path("keys/CLIENT.P12"));
75    }
76
77    #[test]
78    fn sensitive_path_rejects_empty_and_non_sensitive_values() {
79        assert!(!is_sensitive_path(""));
80        assert!(!is_sensitive_path("notes/environment.txt"));
81        assert!(!is_sensitive_path("docs/secretary.txt"));
82    }
83
84    #[test]
85    fn sensitive_path_detects_embedded_sensitive_names_with_special_characters() {
86        assert!(is_sensitive_path("deploy/docker-compose.override.yml"));
87        assert!(is_sensitive_path("dir/my api_keys-backup.txt"));
88        assert!(is_sensitive_path("nested/application.properties.template"));
89    }
90
91    #[test]
92    fn sensitive_path_handles_huge_input() {
93        let long_prefix = "a/".repeat(4096);
94        let long_sensitive = format!("{long_prefix}terraform.tfvars");
95        let long_non_sensitive = format!("{long_prefix}plain-text-file.txt");
96        assert!(is_sensitive_path(&long_sensitive));
97        assert!(!is_sensitive_path(&long_non_sensitive));
98    }
99}