Skip to main content

dotenv_space/utils/
patterns.rs

1/// Secret pattern detection for scanning .env files
2///
3/// This module contains regex patterns and entropy calculation for detecting
4/// accidentally committed secrets. Patterns are based on real-world secret formats
5/// from AWS, Stripe, GitHub, OpenAI, and other major services.
6use lazy_static::lazy_static;
7use regex::Regex;
8
9/// A detected secret pattern
10#[derive(Debug, Clone)]
11pub struct SecretPattern {
12    pub name: String,
13    pub pattern: String,
14    pub confidence: Confidence,
15    pub action_url: Option<String>,
16}
17
18/// Confidence level for secret detection
19#[derive(Debug, Clone, PartialEq, Eq)]
20pub enum Confidence {
21    High,
22    Medium,
23    Low,
24}
25
26impl std::fmt::Display for Confidence {
27    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28        match self {
29            Confidence::High => write!(f, "high"),
30            Confidence::Medium => write!(f, "medium"),
31            Confidence::Low => write!(f, "low"),
32        }
33    }
34}
35
36lazy_static! {
37    /// AWS Access Key ID pattern
38    pub static ref AWS_ACCESS_KEY: Regex = Regex::new(r"AKIA[0-9A-Z]{16}").unwrap();
39
40    /// AWS Secret Access Key pattern (40 chars base64-like)
41    pub static ref AWS_SECRET_KEY: Regex = Regex::new(r"[0-9a-zA-Z/+=]{40}").unwrap();
42
43    /// Stripe Secret Key (live)
44    pub static ref STRIPE_SECRET_LIVE: Regex = Regex::new(r"sk_live_[0-9a-zA-Z]{24,}").unwrap();
45
46    /// Stripe Secret Key (test)
47    pub static ref STRIPE_SECRET_TEST: Regex = Regex::new(r"sk_test_[0-9a-zA-Z]{24,}").unwrap();
48
49    /// GitHub Personal Access Token
50    pub static ref GITHUB_PAT: Regex = Regex::new(r"\bghp_[A-Za-z0-9]{36,40}\b").unwrap();
51
52    /// GitHub OAuth Token
53    pub static ref GITHUB_OAUTH: Regex = Regex::new(r"\bgho_[A-Za-z0-9]{36,40}\b").unwrap();
54
55    /// GitHub App Token
56    pub static ref GITHUB_APP: Regex = Regex::new(r"\b(ghu|ghs)_[A-Za-z0-9]{36,40}\b").unwrap();
57
58    /// OpenAI API Key
59    pub static ref OPENAI_API_KEY: Regex = Regex::new(r"sk-[0-9a-zA-Z]{48}").unwrap();
60
61    /// Anthropic API Key
62    pub static ref ANTHROPIC_API_KEY: Regex = Regex::new(r"sk-ant-api[0-9]{2}-[0-9a-zA-Z\-_]{95}").unwrap();
63
64    /// Generic API key pattern (high entropy)
65    pub static ref GENERIC_API_KEY: Regex = Regex::new(r#"api[_-]?key['\"]?\s*[:=]\s*['\"]?([0-9a-zA-Z_\-]{32,})['\"]?"#).unwrap();
66
67    /// Private Key Header
68    pub static ref PRIVATE_KEY: Regex = Regex::new(r"-----BEGIN [A-Z ]+ PRIVATE KEY-----").unwrap();
69}
70
71/// Get all secret patterns
72pub fn get_patterns() -> Vec<SecretPattern> {
73    vec![
74        SecretPattern {
75            name: "AWS Access Key".to_string(),
76            pattern: r"AKIA[0-9A-Z]{16}".to_string(),
77            confidence: Confidence::High,
78            action_url: Some("https://console.aws.amazon.com/iam".to_string()),
79        },
80        SecretPattern {
81            name: "Stripe Secret Key (Live)".to_string(),
82            pattern: r"sk_live_[0-9a-zA-Z]{24,}".to_string(),
83            confidence: Confidence::High,
84            action_url: Some("https://dashboard.stripe.com/apikeys".to_string()),
85        },
86        SecretPattern {
87            name: "Stripe Secret Key (Test)".to_string(),
88            pattern: r"sk_test_[0-9a-zA-Z]{24,}".to_string(),
89            confidence: Confidence::Medium,
90            action_url: Some("https://dashboard.stripe.com/apikeys".to_string()),
91        },
92        SecretPattern {
93            name: "GitHub Personal Access Token".to_string(),
94            pattern: r"ghp_[0-9a-zA-Z]{36}".to_string(),
95            confidence: Confidence::High,
96            action_url: Some("https://github.com/settings/tokens".to_string()),
97        },
98        SecretPattern {
99            name: "GitHub OAuth Token".to_string(),
100            pattern: r"gho_[0-9a-zA-Z]{36}".to_string(),
101            confidence: Confidence::High,
102            action_url: Some("https://github.com/settings/tokens".to_string()),
103        },
104        SecretPattern {
105            name: "OpenAI API Key".to_string(),
106            pattern: r"sk-[0-9a-zA-Z]{48}".to_string(),
107            confidence: Confidence::High,
108            action_url: Some("https://platform.openai.com/api-keys".to_string()),
109        },
110        SecretPattern {
111            name: "Anthropic API Key".to_string(),
112            pattern: r"sk-ant-api[0-9]{2}-[0-9a-zA-Z\-_]{95}".to_string(),
113            confidence: Confidence::High,
114            action_url: Some("https://console.anthropic.com/settings/keys".to_string()),
115        },
116        SecretPattern {
117            name: "Private Key".to_string(),
118            pattern: r"-----BEGIN [A-Z ]+ PRIVATE KEY-----".to_string(),
119            confidence: Confidence::High,
120            action_url: None,
121        },
122    ]
123}
124
125/// Calculate Shannon entropy of a string (for detecting high-entropy secrets)
126///
127/// Returns a value between 0.0 and ~6.0
128/// - < 3.0: Low entropy (probably not a secret)
129/// - 3.0-4.5: Medium entropy (could be a secret)
130/// - > 4.5: High entropy (likely a secret)
131pub fn calculate_entropy(s: &str) -> f64 {
132    if s.is_empty() {
133        return 0.0;
134    }
135
136    let mut char_counts = std::collections::HashMap::new();
137    for c in s.chars() {
138        *char_counts.entry(c).or_insert(0) += 1;
139    }
140
141    let len = s.len() as f64;
142    let mut entropy = 0.0;
143
144    for count in char_counts.values() {
145        let probability = (*count as f64) / len;
146        entropy -= probability * probability.log2();
147    }
148
149    entropy
150}
151
152/// Check if a value looks like a placeholder (not a real secret)
153pub fn is_placeholder(value: &str) -> bool {
154    let v = value.trim();
155    let lower = v.to_lowercase();
156
157    // Exact known fake/example secrets
158    const EXACT: &[&str] = &[
159        "akiaiosfodnn7example",
160        "wjalrxutnfemi/k7mdeng/bpxrficyexamplekey",
161        "your_key_here",
162        "your_secret_here",
163        "your_token_here",
164        "change_me",
165        "changeme",
166        "replace_me",
167        "xxx",
168        "todo",
169    ];
170
171    if EXACT.iter().any(|p| lower == *p) {
172        return true;
173    }
174
175    // Structured placeholder patterns (safe substrings)
176    const SUBSTRINGS: &[&str] = &[
177        "change_me",
178        "changeme",
179        "your_key_here",
180        "your_secret_here",
181        "your_token_here",
182        "replace_me",
183        "generate-with",
184    ];
185
186    if SUBSTRINGS.iter().any(|p| lower.contains(p)) {
187        return true;
188    }
189
190    false
191}
192
193/// Detect if a value matches any secret pattern
194///
195/// Returns (pattern_name, confidence) if a match is found
196pub fn detect_secret(value: &str, key: &str) -> Option<(String, Confidence, Option<String>)> {
197    // Skip obvious placeholders
198    if is_placeholder(value) {
199        return None;
200    }
201
202    // Check specific patterns
203    if AWS_ACCESS_KEY.is_match(value) {
204        return Some((
205            "AWS Access Key".to_string(),
206            Confidence::High,
207            Some("https://console.aws.amazon.com/iam".to_string()),
208        ));
209    }
210
211    // AWS Secret Key is tricky - high false positive rate
212    // Only flag if key name suggests it's AWS-related
213    if AWS_SECRET_KEY.is_match(value)
214        && (key.to_uppercase().contains("AWS") || key.to_uppercase().contains("SECRET"))
215    {
216        let entropy = calculate_entropy(value);
217        if entropy > 4.5 {
218            return Some((
219                "AWS Secret Access Key".to_string(),
220                Confidence::Medium,
221                Some("https://console.aws.amazon.com/iam".to_string()),
222            ));
223        }
224    }
225
226    if STRIPE_SECRET_LIVE.is_match(value) {
227        return Some((
228            "Stripe Secret Key (LIVE)".to_string(),
229            Confidence::High,
230            Some("https://dashboard.stripe.com/apikeys".to_string()),
231        ));
232    }
233
234    if STRIPE_SECRET_TEST.is_match(value) {
235        return Some((
236            "Stripe Secret Key (test)".to_string(),
237            Confidence::Medium,
238            Some("https://dashboard.stripe.com/apikeys".to_string()),
239        ));
240    }
241
242    if GITHUB_PAT.is_match(value) || GITHUB_OAUTH.is_match(value) || GITHUB_APP.is_match(value) {
243        return Some((
244            "GitHub Token".to_string(),
245            Confidence::High,
246            Some("https://github.com/settings/tokens".to_string()),
247        ));
248    }
249
250    if OPENAI_API_KEY.is_match(value) {
251        return Some((
252            "OpenAI API Key".to_string(),
253            Confidence::High,
254            Some("https://platform.openai.com/api-keys".to_string()),
255        ));
256    }
257
258    if ANTHROPIC_API_KEY.is_match(value) {
259        return Some((
260            "Anthropic API Key".to_string(),
261            Confidence::High,
262            Some("https://console.anthropic.com/settings/keys".to_string()),
263        ));
264    }
265
266    if PRIVATE_KEY.is_match(value) {
267        return Some(("Private Key".to_string(), Confidence::High, None));
268    }
269
270    // Generic high-entropy check as fallback
271    if value.len() >= 32 {
272        let entropy = calculate_entropy(value);
273        if entropy > 4.8 {
274            return Some((
275                "High-entropy string (possible secret)".to_string(),
276                Confidence::Low,
277                None,
278            ));
279        }
280    }
281
282    None
283}
284
285#[cfg(test)]
286mod tests {
287    use super::*;
288
289    #[test]
290    fn test_aws_access_key() {
291        assert!(AWS_ACCESS_KEY.is_match("AKIAIOSFODNN7EXAMPLE"));
292        assert!(!AWS_ACCESS_KEY.is_match("not-an-aws-key"));
293    }
294
295    #[test]
296    fn test_stripe_keys() {
297        assert!(STRIPE_SECRET_LIVE.is_match("sk_live_51Habcdefghijklmnopqrstuvwxyz123456"));
298        assert!(STRIPE_SECRET_TEST.is_match("sk_test_51Habcdefghijklmnopqrstuvwxyz123456"));
299        assert!(!STRIPE_SECRET_LIVE.is_match("sk_test_something"));
300    }
301
302    #[test]
303    fn test_github_tokens() {
304        assert!(GITHUB_PAT.is_match("ghp_1234567890abcdefghijklmnopqrstuvwxyzABCD"));
305
306        assert!(GITHUB_OAUTH.is_match("gho_1234567890abcdefghijklmnopqrstuvwxyzABCD"));
307
308        assert!(GITHUB_APP.is_match("ghu_1234567890abcdefghijklmnopqrstuvwxyzABCD"));
309
310        assert!(!GITHUB_PAT.is_match("ghp_short"));
311        assert!(!GITHUB_PAT.is_match("not_a_token"));
312    }
313
314    #[test]
315    fn test_entropy() {
316        // Low entropy
317        assert!(calculate_entropy("aaaaaaa") < 1.0);
318
319        // High entropy
320        assert!(calculate_entropy("aB3$xY9!zQ2#mK7") > 3.5);
321
322        // Random base64-like
323        let random = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8s9t0";
324        assert!(calculate_entropy(random) > 3.0);
325    }
326
327    #[test]
328    fn test_is_placeholder() {
329        assert!(is_placeholder("YOUR_KEY_HERE"));
330        assert!(is_placeholder("sk_test_CHANGE_ME"));
331        assert!(is_placeholder("AKIAIOSFODNN7EXAMPLE"));
332        assert!(!is_placeholder("sk_live_51HrealkeystuffABC123"));
333        assert!(!is_placeholder("postgresql://localhost:5432/db"));
334    }
335
336    #[test]
337    fn test_detect_secret() {
338        // AWS Access Key
339        let result = detect_secret("AKIAIOSFODNN7EXAMPLE", "AWS_ACCESS_KEY_ID");
340        assert!(result.is_none()); // It's a placeholder
341
342        let result = detect_secret("AKIA4OZRMFJ3VEXAMPLE", "AWS_ACCESS_KEY_ID");
343        assert!(result.is_some());
344        if let Some((name, conf, _)) = result {
345            assert_eq!(name, "AWS Access Key");
346            assert_eq!(conf, Confidence::High);
347        }
348
349        // Stripe Live Key
350        let result = detect_secret("sk_live_51H1234567890abcdefghijk", "STRIPE_SECRET_KEY");
351        assert!(result.is_some());
352        if let Some((name, conf, _)) = result {
353            assert_eq!(name, "Stripe Secret Key (LIVE)");
354            assert_eq!(conf, Confidence::High);
355        }
356
357        // Not a secret
358        let result = detect_secret("localhost", "DATABASE_HOST");
359        assert!(result.is_none());
360    }
361}