Skip to main content

keyhog_core/
config.rs

1//! Configuration for KeyHog scanning and verification.
2//!
3//! Provides the [`ScanConfig`] struct used to control decoding depth,
4//! entropy thresholds, deduplication strategy, and performance tuning.
5
6use serde::{Deserialize, Serialize};
7use thiserror::Error;
8
9use crate::DedupScope;
10
11/// Configuration for a scan run.
12#[derive(Clone, Debug, Serialize, Deserialize)]
13pub struct ScanConfig {
14    /// Minimum confidence (0.0 to 1.0) required to report a finding.
15    pub min_confidence: f64,
16    /// Maximum recursive decoding depth (e.g. Base64(Hex(URL(secret)))).
17    pub max_decode_depth: usize,
18    /// Whether to enable Shannon entropy analysis for unknown high-entropy strings.
19    pub entropy_enabled: bool,
20    /// Whether to enable entropy analysis even in standard source code files.
21    pub entropy_in_source_files: bool,
22    /// Shannon entropy threshold (typical secrets are 4.5+).
23    pub entropy_threshold: f64,
24    /// Minimum length for entropy-based secret detection.
25    pub min_secret_len: usize,
26    /// Maximum file size to scan (bytes). Large files are skipped or sampled.
27    pub max_file_size: u64,
28    /// Deduplication strategy.
29    pub dedup: DedupScope,
30
31    /// Whether to enable ML-based probabilistic gating.
32    pub ml_enabled: bool,
33    /// Weight given to the ML score (0.0 to 1.0).
34    pub ml_weight: f64,
35    /// Whether to normalize Unicode characters before scanning.
36    pub unicode_normalization: bool,
37    /// Maximum bytes allowed from recursive decoding.
38    pub decode_size_limit: usize,
39    /// Maximum matches allowed per chunk to prevent OOM.
40    pub max_matches_per_chunk: usize,
41
42    /// List of common secret prefixes to prioritize.
43    pub known_prefixes: Vec<String>,
44    /// List of keywords that strongly indicate a secret.
45    pub secret_keywords: Vec<String>,
46    /// Keywords used in test environments.
47    pub test_keywords: Vec<String>,
48    /// Keywords for placeholders and documentation.
49    pub placeholder_keywords: Vec<String>,
50}
51
52/// Limits for decoding to prevent infinite recursion or memory exhaustion.
53pub const MAX_DECODE_DEPTH_LIMIT: usize = 16;
54
55/// Errors returned while validating a scan configuration.
56#[derive(Debug, Error)]
57pub enum ConfigError {
58    #[error("min_confidence must be between 0.0 and 1.0, found {0}")]
59    InvalidConfidence(f64),
60    #[error("max_decode_depth exceeds limit of {MAX_DECODE_DEPTH_LIMIT}, found {0}")]
61    DepthTooHigh(usize),
62}
63
64impl Default for ScanConfig {
65    fn default() -> Self {
66        Self {
67            min_confidence: 0.3,
68            max_decode_depth: 3,
69            entropy_enabled: true,
70            entropy_in_source_files: false,
71            entropy_threshold: 4.5,
72            min_secret_len: 20,
73            max_file_size: 10 * 1024 * 1024, // 10 MB
74            dedup: DedupScope::Credential,
75            ml_enabled: true,
76            ml_weight: 0.5,
77            unicode_normalization: true,
78            decode_size_limit: 64 * 1024 * 1024,
79            max_matches_per_chunk: 1000,
80            known_prefixes: vec!["AKIA".into(), "ASIA".into(), "ghp_".into(), "sk_".into()],
81            secret_keywords: vec!["password".into(), "secret".into(), "key".into()],
82            test_keywords: vec!["test".into(), "dummy".into(), "mock".into()],
83            placeholder_keywords: vec!["example".into(), "your_".into(), "placeholder".into()],
84        }
85    }
86}
87
88impl ScanConfig {
89    /// Fast configuration optimized for speed over exhaustive recall.
90    pub fn fast() -> Self {
91        Self {
92            max_decode_depth: 2,
93            entropy_enabled: false,
94            ml_enabled: false,
95            ..Default::default()
96        }
97    }
98
99    /// Thorough configuration for deep penetration into encoded layers.
100    pub fn thorough() -> Self {
101        Self {
102            max_decode_depth: 8,
103            entropy_in_source_files: true,
104            ml_enabled: true,
105            ..Default::default()
106        }
107    }
108
109    /// Maximum paranoia: deep decoding and aggressive entropy analysis.
110    pub fn paranoid() -> Self {
111        Self {
112            max_decode_depth: MAX_DECODE_DEPTH_LIMIT,
113            entropy_enabled: true,
114            entropy_in_source_files: true,
115            min_secret_len: 16,
116            ml_enabled: true,
117            ..Default::default()
118        }
119    }
120
121    /// Validate the configuration parameters.
122    pub fn validate(&self) -> Result<(), ConfigError> {
123        if !(0.0..=1.0).contains(&self.min_confidence) {
124            return Err(ConfigError::InvalidConfidence(self.min_confidence));
125        }
126        if self.max_decode_depth > MAX_DECODE_DEPTH_LIMIT {
127            return Err(ConfigError::DepthTooHigh(self.max_decode_depth));
128        }
129        Ok(())
130    }
131}
132
133/// List of filenames that typically contain secrets (e.g. .env, config.json).
134pub fn secret_filenames() -> Vec<String> {
135    vec![
136        ".env",
137        ".env.local",
138        ".env.production",
139        ".env.development",
140        ".env.test",
141        "config.json",
142        "config.yaml",
143        "config.yml",
144        "credentials.json",
145        "secrets.json",
146        "settings.json",
147        "production.json",
148        "development.json",
149        "local.json",
150        "appsettings.json",
151        "web.config",
152        "web.Debug.config",
153        "web.Release.config",
154        "Application.xml",
155        "Settings.xml",
156        "App.config",
157        "pom.xml",
158        "build.gradle",
159        "build.gradle.kts",
160        "package.json",
161        "package-lock.json",
162        "yarn.lock",
163        "composer.json",
164        "composer.lock",
165        "pipfile",
166        "pipfile.lock",
167        "requirements.txt",
168        "gemfile",
169        "gemfile.lock",
170        "cargo.toml",
171        "cargo.lock",
172        "go.mod",
173        "go.sum",
174        "docker-compose.yml",
175        "docker-compose.yaml",
176        "dockerfile",
177        "kubernetes.yml",
178        "kubernetes.yaml",
179        "k8s.yml",
180        "k8s.yaml",
181        "deploy.yml",
182        "deploy.yaml",
183        "service.yml",
184        "service.yaml",
185        "configmap.yml",
186        "configmap.yaml",
187        "secret.yml",
188        "secret.yaml",
189    ]
190    .iter()
191    .map(|s| s.to_string())
192    .collect()
193}