keyhog_scanner/confidence/signals.rs
1/// Confidence signals for a potential match.
2pub struct ConfidenceSignals {
3 /// Pattern has a distinctive literal prefix (e.g., `sk-proj-`, `ghp_`).
4 pub has_literal_prefix: bool,
5 /// Pattern uses a capture group with context anchoring.
6 pub has_context_anchor: bool,
7 /// Shannon entropy of the matched credential in **bits per byte** (range
8 /// `0.0..=8.0`) - NOT normalized to `0..1`. Use
9 /// `crate::entropy::normalized_entropy` for the rescaled value.
10 pub entropy: f64,
11 /// A secret-related keyword appears nearby.
12 pub keyword_nearby: bool,
13 /// File extension suggests config/env/secret file.
14 pub sensitive_file: bool,
15 /// Matched credential length.
16 pub match_length: usize,
17 /// Companion credential was found.
18 pub has_companion: bool,
19}
20
21/// Check if a file path suggests a sensitive file.
22/// Check if a file path suggests a sensitive file using Aho-Corasick.
23///
24/// Single AC automaton replaces O(n*m) nested loop with O(n) scan.
25pub fn is_sensitive_path(path: &str) -> bool {
26 use std::sync::OnceLock;
27
28 static AC: OnceLock<Option<aho_corasick::AhoCorasick>> = OnceLock::new();
29
30 let ac = AC.get_or_init(|| {
31 aho_corasick::AhoCorasickBuilder::new()
32 .ascii_case_insensitive(true)
33 .build([
34 // Sensitive filenames
35 ".env",
36 ".env.local",
37 ".env.production",
38 ".env.staging",
39 "credentials",
40 "secrets",
41 "apikeys",
42 "api_keys",
43 ".npmrc",
44 ".pypirc",
45 ".netrc",
46 ".pgpass",
47 "terraform.tfvars",
48 "variables.tf",
49 "docker-compose",
50 "application.yml",
51 "application.properties",
52 "config.json",
53 "config.yaml",
54 "config.toml",
55 // Sensitive extensions (matched as substrings - works because
56 // extensions are at end of path and names are distinctive)
57 ".pem",
58 ".key",
59 ".p12",
60 ".pfx",
61 ".jks",
62 ".keystore",
63 ".cer",
64 ".crt",
65 // CI/CD secret files
66 ".github/workflows",
67 "gitlab-ci.yml",
68 "Jenkinsfile",
69 "buildspec.yml",
70 // Cloud config
71 "serverless.yml",
72 "sam-template",
73 "helm/values",
74 "chart/values",
75 ])
76 .ok()
77 });
78
79 ac.as_ref().is_some_and(|ac| ac.is_match(path))
80}