Skip to main content

lean_ctx/core/
secret_detection.rs

1use std::sync::OnceLock;
2
3use regex::Regex;
4
5use crate::core::config::{Config, SecretDetectionConfig};
6
7macro_rules! static_regex {
8    ($pattern:expr) => {{
9        static RE: OnceLock<Regex> = OnceLock::new();
10        RE.get_or_init(|| Regex::new($pattern).expect(concat!("invalid regex: ", $pattern)))
11    }};
12}
13
14#[derive(Debug, Clone)]
15pub struct SecretMatch {
16    pub pattern_name: &'static str,
17    pub line_number: usize,
18    pub redacted_preview: String,
19}
20
21fn aws_key_re() -> &'static Regex {
22    static_regex!(r"AKIA[0-9A-Z]{16}")
23}
24
25fn private_key_re() -> &'static Regex {
26    static_regex!(r"-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----")
27}
28
29fn github_token_re() -> &'static Regex {
30    static_regex!(r"gh[ps]_[A-Za-z0-9_]{36,}")
31}
32
33fn anthropic_key_re() -> &'static Regex {
34    static_regex!(r"sk-ant-[A-Za-z0-9_\-]{20,}")
35}
36
37fn openai_key_re() -> &'static Regex {
38    static_regex!(r"sk-[A-Za-z0-9]{20,}")
39}
40
41fn generic_api_key_re() -> &'static Regex {
42    static_regex!(
43        r#"(?i)(?:api[_-]?key|secret[_-]?key|token|password|passwd|access[_-]?token|client[_-]?secret)\s*[=:]\s*['"]?[a-zA-Z0-9_\-]{20,}"#
44    )
45}
46
47fn high_entropy_b64_re() -> &'static Regex {
48    static_regex!(
49        r#"(?i)(?:key|token|secret|password|credential|auth)\s*[=:]\s*['"]?[A-Za-z0-9+/=\-_]{40,}"#
50    )
51}
52
53fn gitlab_pat_re() -> &'static Regex {
54    static_regex!(r"glpat-[A-Za-z0-9_\-]{20,}")
55}
56
57fn jwt_re() -> &'static Regex {
58    static_regex!(r"eyJ[A-Za-z0-9_\-]{10,}\.[A-Za-z0-9_\-]{10,}")
59}
60
61fn slack_token_re() -> &'static Regex {
62    static_regex!(r"xox[bpas]-[0-9a-zA-Z\-]{10,}")
63}
64
65fn stripe_key_re() -> &'static Regex {
66    static_regex!(r"[sr]k_live_[0-9a-zA-Z]{10,}")
67}
68
69fn db_url_re() -> &'static Regex {
70    static_regex!(r"(?:postgres|mysql|mongodb|redis)://[^\s]+:[^\s]+@")
71}
72
73fn npm_token_re() -> &'static Regex {
74    static_regex!(r"npm_[A-Za-z0-9]{10,}")
75}
76
77fn github_fine_grained_re() -> &'static Regex {
78    static_regex!(r"github_pat_[A-Za-z0-9_]{20,}")
79}
80
81const BUILTIN_PATTERNS: &[(&str, fn() -> &'static Regex)] = &[
82    ("aws_key", aws_key_re),
83    ("private_key", private_key_re),
84    ("github_token", github_token_re),
85    ("github_fine_grained", github_fine_grained_re),
86    ("anthropic_key", anthropic_key_re),
87    ("openai_key", openai_key_re),
88    ("gitlab_pat", gitlab_pat_re),
89    ("jwt", jwt_re),
90    ("slack_token", slack_token_re),
91    ("stripe_key", stripe_key_re),
92    ("db_url", db_url_re),
93    ("npm_token", npm_token_re),
94    ("generic_api_key", generic_api_key_re),
95    ("high_entropy_secret", high_entropy_b64_re),
96];
97
98fn make_redacted_preview(matched: &str) -> String {
99    let chars: Vec<char> = matched.chars().collect();
100    if chars.len() <= 6 {
101        return "***".to_string();
102    }
103    let prefix: String = chars[..4].iter().collect();
104    let suffix: String = chars[chars.len() - 2..].iter().collect();
105    format!("{prefix}***{suffix}")
106}
107
108pub fn detect_secrets(content: &str) -> Vec<SecretMatch> {
109    let mut matches = Vec::new();
110
111    let line_offsets: Vec<usize> = std::iter::once(0)
112        .chain(content.match_indices('\n').map(|(i, _)| i + 1))
113        .collect();
114
115    let offset_to_line = |byte_offset: usize| -> usize {
116        match line_offsets.binary_search(&byte_offset) {
117            Ok(i) => i + 1,
118            Err(i) => i,
119        }
120    };
121
122    for &(name, regex_fn) in BUILTIN_PATTERNS {
123        let re = regex_fn();
124        for m in re.find_iter(content) {
125            matches.push(SecretMatch {
126                pattern_name: name,
127                line_number: offset_to_line(m.start()),
128                redacted_preview: make_redacted_preview(m.as_str()),
129            });
130        }
131    }
132
133    matches
134}
135
136pub fn detect_secrets_with_custom(content: &str, custom_patterns: &[String]) -> Vec<SecretMatch> {
137    let mut matches = detect_secrets(content);
138
139    for pattern_str in custom_patterns {
140        if let Ok(re) = Regex::new(pattern_str) {
141            let line_offsets: Vec<usize> = std::iter::once(0)
142                .chain(content.match_indices('\n').map(|(i, _)| i + 1))
143                .collect();
144
145            for m in re.find_iter(content) {
146                let line = match line_offsets.binary_search(&m.start()) {
147                    Ok(i) => i + 1,
148                    Err(i) => i,
149                };
150                matches.push(SecretMatch {
151                    pattern_name: "custom_pattern",
152                    line_number: line,
153                    redacted_preview: make_redacted_preview(m.as_str()),
154                });
155            }
156        }
157    }
158
159    matches
160}
161
162pub fn scan_and_redact(
163    content: &str,
164    config: &SecretDetectionConfig,
165) -> (String, Vec<SecretMatch>) {
166    if !config.enabled {
167        return (content.to_string(), Vec::new());
168    }
169
170    let matches = detect_secrets_with_custom(content, &config.custom_patterns);
171
172    if matches.is_empty() || !config.redact {
173        return (content.to_string(), matches);
174    }
175
176    let mut redacted = content.to_string();
177    for &(name, regex_fn) in BUILTIN_PATTERNS {
178        let re = regex_fn();
179        redacted = re
180            .replace_all(&redacted, |_: &regex::Captures| {
181                format!("[REDACTED:{name}]")
182            })
183            .to_string();
184    }
185
186    for pattern_str in &config.custom_patterns {
187        if let Ok(re) = Regex::new(pattern_str) {
188            redacted = re
189                .replace_all(&redacted, "[REDACTED:custom_pattern]")
190                .to_string();
191        }
192    }
193
194    (redacted, matches)
195}
196
197pub fn scan_and_redact_from_config(content: &str) -> (String, Vec<SecretMatch>) {
198    let cfg = Config::load();
199    scan_and_redact(content, &cfg.secret_detection)
200}
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205
206    #[test]
207    fn detects_aws_key() {
208        let input = "aws_key = AKIAIOSFODNN7EXAMPLE";
209        let matches = detect_secrets(input);
210        assert!(matches.iter().any(|m| m.pattern_name == "aws_key"));
211    }
212
213    #[test]
214    fn detects_private_key_header() {
215        let input = "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIB...";
216        let matches = detect_secrets(input);
217        assert!(matches.iter().any(|m| m.pattern_name == "private_key"));
218    }
219
220    #[test]
221    fn detects_github_token() {
222        let input = "export GITHUB_TOKEN=ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijkl";
223        let matches = detect_secrets(input);
224        assert!(matches.iter().any(|m| m.pattern_name == "github_token"));
225    }
226
227    #[test]
228    fn detects_anthropic_key() {
229        let input = "ANTHROPIC_API_KEY=sk-ant-api03-abcdef1234567890ABCD";
230        let matches = detect_secrets(input);
231        assert!(matches.iter().any(|m| m.pattern_name == "anthropic_key"));
232    }
233
234    #[test]
235    fn detects_openai_key() {
236        let input = "OPENAI_API_KEY=sk-abcdefghijklmnopqrstuvwx";
237        let matches = detect_secrets(input);
238        assert!(matches.iter().any(|m| m.pattern_name == "openai_key"));
239    }
240
241    #[test]
242    fn detects_gitlab_pat() {
243        let input = "token = glpat-xxxxxxxxxxxxxxxxxxxx";
244        let matches = detect_secrets(input);
245        assert!(matches.iter().any(|m| m.pattern_name == "gitlab_pat"));
246    }
247
248    #[test]
249    fn detects_generic_api_key() {
250        let input = "api_key = abcdefghijklmnopqrstuvwxyz1234567890";
251        let matches = detect_secrets(input);
252        assert!(matches.iter().any(
253            |m| m.pattern_name == "generic_api_key" || m.pattern_name == "high_entropy_secret"
254        ));
255    }
256
257    #[test]
258    fn clean_content_returns_empty() {
259        let input = "fn main() { println!(\"hello world\"); }";
260        let matches = detect_secrets(input);
261        assert!(matches.is_empty());
262    }
263
264    #[test]
265    fn detects_jwt() {
266        let input = "token = eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkw";
267        let matches = detect_secrets(input);
268        assert!(matches.iter().any(|m| m.pattern_name == "jwt"));
269    }
270
271    #[test]
272    fn detects_slack_token() {
273        let input = "SLACK_TOKEN=xoxb-1234567890-abcdefghij";
274        let matches = detect_secrets(input);
275        assert!(matches.iter().any(|m| m.pattern_name == "slack_token"));
276    }
277
278    #[test]
279    fn detects_stripe_key() {
280        let input = "stripe_key = sk_live_abcdefghij1234567890";
281        let matches = detect_secrets(input);
282        assert!(matches.iter().any(|m| m.pattern_name == "stripe_key"));
283    }
284
285    #[test]
286    fn detects_db_url() {
287        let input = "DATABASE_URL=postgres://user:password@localhost:5432/db";
288        let matches = detect_secrets(input);
289        assert!(matches.iter().any(|m| m.pattern_name == "db_url"));
290    }
291
292    #[test]
293    fn detects_npm_token() {
294        let input = "NPM_TOKEN=npm_abcdefghij1234567890";
295        let matches = detect_secrets(input);
296        assert!(matches.iter().any(|m| m.pattern_name == "npm_token"));
297    }
298
299    #[test]
300    fn detects_github_fine_grained() {
301        let input = "token = github_pat_ABCDEFGHIJKLMNOPQRSTuvwx";
302        let matches = detect_secrets(input);
303        assert!(matches
304            .iter()
305            .any(|m| m.pattern_name == "github_fine_grained"));
306    }
307
308    #[test]
309    fn redacted_preview_format() {
310        let preview = make_redacted_preview("AKIAIOSFODNN7EXAMPLE");
311        assert!(preview.starts_with("AKIA"));
312        assert!(preview.ends_with("LE"));
313        assert!(preview.contains("***"));
314    }
315
316    #[test]
317    fn redacted_preview_short_string() {
318        let preview = make_redacted_preview("short");
319        assert_eq!(preview, "***");
320    }
321
322    #[test]
323    fn scan_and_redact_replaces_secrets() {
324        let cfg = SecretDetectionConfig {
325            enabled: true,
326            redact: true,
327            custom_patterns: Vec::new(),
328        };
329        let input = "key = AKIAIOSFODNN7EXAMPLE";
330        let (redacted, matches) = scan_and_redact(input, &cfg);
331        assert!(!matches.is_empty());
332        assert!(redacted.contains("[REDACTED:aws_key]"));
333        assert!(!redacted.contains("AKIAIOSFODNN7EXAMPLE"));
334    }
335
336    #[test]
337    fn scan_without_redact_preserves_content() {
338        let cfg = SecretDetectionConfig {
339            enabled: true,
340            redact: false,
341            custom_patterns: Vec::new(),
342        };
343        let input = "key = AKIAIOSFODNN7EXAMPLE";
344        let (output, matches) = scan_and_redact(input, &cfg);
345        assert!(!matches.is_empty());
346        assert_eq!(output, input);
347    }
348
349    #[test]
350    fn disabled_detection_returns_unchanged() {
351        let cfg = SecretDetectionConfig {
352            enabled: false,
353            redact: true,
354            custom_patterns: Vec::new(),
355        };
356        let input = "key = AKIAIOSFODNN7EXAMPLE";
357        let (output, matches) = scan_and_redact(input, &cfg);
358        assert!(matches.is_empty());
359        assert_eq!(output, input);
360    }
361
362    #[test]
363    fn custom_pattern_detected() {
364        let cfg = SecretDetectionConfig {
365            enabled: true,
366            redact: true,
367            custom_patterns: vec![r"MYCORP_[A-Z]{10,}".to_string()],
368        };
369        let input = "value is MYCORP_ABCDEFGHIJKLMNO here";
370        let (redacted, matches) = scan_and_redact(input, &cfg);
371        assert!(matches.iter().any(|m| m.pattern_name == "custom_pattern"));
372        assert!(redacted.contains("[REDACTED:custom_pattern]"));
373    }
374
375    #[test]
376    fn line_numbers_are_correct() {
377        let input = "line1\nline2\nAKIAIOSFODNN7EXAMPLE\nline4";
378        let matches = detect_secrets(input);
379        assert!(!matches.is_empty());
380        assert_eq!(matches[0].line_number, 3);
381    }
382
383    #[test]
384    fn multiple_secrets_on_different_lines() {
385        let input = "AKIAIOSFODNN7EXAMPLE\nclean\nsk-abcdefghijklmnopqrstuvwx";
386        let matches = detect_secrets(input);
387        assert!(matches.len() >= 2);
388        let aws = matches
389            .iter()
390            .find(|m| m.pattern_name == "aws_key")
391            .unwrap();
392        assert_eq!(aws.line_number, 1);
393        let oai = matches
394            .iter()
395            .find(|m| m.pattern_name == "openai_key")
396            .unwrap();
397        assert_eq!(oai.line_number, 3);
398    }
399
400    #[test]
401    fn ec_private_key_detected() {
402        let input = "-----BEGIN EC PRIVATE KEY-----";
403        let matches = detect_secrets(input);
404        assert!(matches.iter().any(|m| m.pattern_name == "private_key"));
405    }
406
407    #[test]
408    fn openssh_private_key_detected() {
409        let input = "-----BEGIN OPENSSH PRIVATE KEY-----";
410        let matches = detect_secrets(input);
411        assert!(matches.iter().any(|m| m.pattern_name == "private_key"));
412    }
413}