Skip to main content

lean_ctx/core/
secret_detection.rs

1use std::sync::OnceLock;
2
3use regex::Regex;
4
5use crate::core::config::{Config, SecretDetectionConfig};
6
7macro_rules! static_regex {
8    ($pattern:expr) => {{
9        static RE: OnceLock<Regex> = OnceLock::new();
10        RE.get_or_init(|| Regex::new($pattern).expect(concat!("invalid regex: ", $pattern)))
11    }};
12}
13
14#[derive(Debug, Clone)]
15pub struct SecretMatch {
16    pub pattern_name: &'static str,
17    pub line_number: usize,
18    pub redacted_preview: String,
19}
20
21fn aws_key_re() -> &'static Regex {
22    static_regex!(r"AKIA[0-9A-Z]{16}")
23}
24
25fn private_key_re() -> &'static Regex {
26    static_regex!(r"-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----")
27}
28
29fn github_token_re() -> &'static Regex {
30    static_regex!(r"gh[ps]_[A-Za-z0-9_]{36,}")
31}
32
33fn anthropic_key_re() -> &'static Regex {
34    static_regex!(r"sk-ant-[A-Za-z0-9_\-]{20,}")
35}
36
37fn openai_key_re() -> &'static Regex {
38    static_regex!(r"sk-[A-Za-z0-9]{20,}")
39}
40
41fn generic_api_key_re() -> &'static Regex {
42    static_regex!(
43        r#"(?i)(?:api[_-]?key|secret[_-]?key|token|password|passwd|access[_-]?token|client[_-]?secret)\s*[=:]\s*['"]?[a-zA-Z0-9_\-]{20,}"#
44    )
45}
46
47fn high_entropy_b64_re() -> &'static Regex {
48    static_regex!(
49        r#"(?i)(?:key|token|secret|password|credential|auth)\s*[=:]\s*['"]?[A-Za-z0-9+/=\-_]{40,}"#
50    )
51}
52
53fn gitlab_pat_re() -> &'static Regex {
54    static_regex!(r"glpat-[A-Za-z0-9_\-]{20,}")
55}
56
57const BUILTIN_PATTERNS: &[(&str, fn() -> &'static Regex)] = &[
58    ("aws_key", aws_key_re),
59    ("private_key", private_key_re),
60    ("github_token", github_token_re),
61    ("anthropic_key", anthropic_key_re),
62    ("openai_key", openai_key_re),
63    ("gitlab_pat", gitlab_pat_re),
64    ("generic_api_key", generic_api_key_re),
65    ("high_entropy_secret", high_entropy_b64_re),
66];
67
68fn make_redacted_preview(matched: &str) -> String {
69    let chars: Vec<char> = matched.chars().collect();
70    if chars.len() <= 6 {
71        return "***".to_string();
72    }
73    let prefix: String = chars[..4].iter().collect();
74    let suffix: String = chars[chars.len() - 2..].iter().collect();
75    format!("{prefix}***{suffix}")
76}
77
78pub fn detect_secrets(content: &str) -> Vec<SecretMatch> {
79    let mut matches = Vec::new();
80
81    let line_offsets: Vec<usize> = std::iter::once(0)
82        .chain(content.match_indices('\n').map(|(i, _)| i + 1))
83        .collect();
84
85    let offset_to_line = |byte_offset: usize| -> usize {
86        match line_offsets.binary_search(&byte_offset) {
87            Ok(i) => i + 1,
88            Err(i) => i,
89        }
90    };
91
92    for &(name, regex_fn) in BUILTIN_PATTERNS {
93        let re = regex_fn();
94        for m in re.find_iter(content) {
95            matches.push(SecretMatch {
96                pattern_name: name,
97                line_number: offset_to_line(m.start()),
98                redacted_preview: make_redacted_preview(m.as_str()),
99            });
100        }
101    }
102
103    matches
104}
105
106pub fn detect_secrets_with_custom(content: &str, custom_patterns: &[String]) -> Vec<SecretMatch> {
107    let mut matches = detect_secrets(content);
108
109    for pattern_str in custom_patterns {
110        if let Ok(re) = Regex::new(pattern_str) {
111            let line_offsets: Vec<usize> = std::iter::once(0)
112                .chain(content.match_indices('\n').map(|(i, _)| i + 1))
113                .collect();
114
115            for m in re.find_iter(content) {
116                let line = match line_offsets.binary_search(&m.start()) {
117                    Ok(i) => i + 1,
118                    Err(i) => i,
119                };
120                matches.push(SecretMatch {
121                    pattern_name: "custom_pattern",
122                    line_number: line,
123                    redacted_preview: make_redacted_preview(m.as_str()),
124                });
125            }
126        }
127    }
128
129    matches
130}
131
132pub fn scan_and_redact(
133    content: &str,
134    config: &SecretDetectionConfig,
135) -> (String, Vec<SecretMatch>) {
136    if !config.enabled {
137        return (content.to_string(), Vec::new());
138    }
139
140    let matches = detect_secrets_with_custom(content, &config.custom_patterns);
141
142    if matches.is_empty() || !config.redact {
143        return (content.to_string(), matches);
144    }
145
146    let mut redacted = content.to_string();
147    for &(name, regex_fn) in BUILTIN_PATTERNS {
148        let re = regex_fn();
149        redacted = re
150            .replace_all(&redacted, |_: &regex::Captures| {
151                format!("[REDACTED:{name}]")
152            })
153            .to_string();
154    }
155
156    for pattern_str in &config.custom_patterns {
157        if let Ok(re) = Regex::new(pattern_str) {
158            redacted = re
159                .replace_all(&redacted, "[REDACTED:custom_pattern]")
160                .to_string();
161        }
162    }
163
164    (redacted, matches)
165}
166
167pub fn scan_and_redact_from_config(content: &str) -> (String, Vec<SecretMatch>) {
168    let cfg = Config::load();
169    scan_and_redact(content, &cfg.secret_detection)
170}
171
172#[cfg(test)]
173mod tests {
174    use super::*;
175
176    #[test]
177    fn detects_aws_key() {
178        let input = "aws_key = AKIAIOSFODNN7EXAMPLE";
179        let matches = detect_secrets(input);
180        assert!(matches.iter().any(|m| m.pattern_name == "aws_key"));
181    }
182
183    #[test]
184    fn detects_private_key_header() {
185        let input = "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIB...";
186        let matches = detect_secrets(input);
187        assert!(matches.iter().any(|m| m.pattern_name == "private_key"));
188    }
189
190    #[test]
191    fn detects_github_token() {
192        let input = "export GITHUB_TOKEN=ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijkl";
193        let matches = detect_secrets(input);
194        assert!(matches.iter().any(|m| m.pattern_name == "github_token"));
195    }
196
197    #[test]
198    fn detects_anthropic_key() {
199        let input = "ANTHROPIC_API_KEY=sk-ant-api03-abcdef1234567890ABCD";
200        let matches = detect_secrets(input);
201        assert!(matches.iter().any(|m| m.pattern_name == "anthropic_key"));
202    }
203
204    #[test]
205    fn detects_openai_key() {
206        let input = "OPENAI_API_KEY=sk-abcdefghijklmnopqrstuvwx";
207        let matches = detect_secrets(input);
208        assert!(matches.iter().any(|m| m.pattern_name == "openai_key"));
209    }
210
211    #[test]
212    fn detects_gitlab_pat() {
213        let input = "token = glpat-xxxxxxxxxxxxxxxxxxxx";
214        let matches = detect_secrets(input);
215        assert!(matches.iter().any(|m| m.pattern_name == "gitlab_pat"));
216    }
217
218    #[test]
219    fn detects_generic_api_key() {
220        let input = "api_key = abcdefghijklmnopqrstuvwxyz1234567890";
221        let matches = detect_secrets(input);
222        assert!(matches.iter().any(
223            |m| m.pattern_name == "generic_api_key" || m.pattern_name == "high_entropy_secret"
224        ));
225    }
226
227    #[test]
228    fn clean_content_returns_empty() {
229        let input = "fn main() { println!(\"hello world\"); }";
230        let matches = detect_secrets(input);
231        assert!(matches.is_empty());
232    }
233
234    #[test]
235    fn redacted_preview_format() {
236        let preview = make_redacted_preview("AKIAIOSFODNN7EXAMPLE");
237        assert!(preview.starts_with("AKIA"));
238        assert!(preview.ends_with("LE"));
239        assert!(preview.contains("***"));
240    }
241
242    #[test]
243    fn redacted_preview_short_string() {
244        let preview = make_redacted_preview("short");
245        assert_eq!(preview, "***");
246    }
247
248    #[test]
249    fn scan_and_redact_replaces_secrets() {
250        let cfg = SecretDetectionConfig {
251            enabled: true,
252            redact: true,
253            custom_patterns: Vec::new(),
254        };
255        let input = "key = AKIAIOSFODNN7EXAMPLE";
256        let (redacted, matches) = scan_and_redact(input, &cfg);
257        assert!(!matches.is_empty());
258        assert!(redacted.contains("[REDACTED:aws_key]"));
259        assert!(!redacted.contains("AKIAIOSFODNN7EXAMPLE"));
260    }
261
262    #[test]
263    fn scan_without_redact_preserves_content() {
264        let cfg = SecretDetectionConfig {
265            enabled: true,
266            redact: false,
267            custom_patterns: Vec::new(),
268        };
269        let input = "key = AKIAIOSFODNN7EXAMPLE";
270        let (output, matches) = scan_and_redact(input, &cfg);
271        assert!(!matches.is_empty());
272        assert_eq!(output, input);
273    }
274
275    #[test]
276    fn disabled_detection_returns_unchanged() {
277        let cfg = SecretDetectionConfig {
278            enabled: false,
279            redact: true,
280            custom_patterns: Vec::new(),
281        };
282        let input = "key = AKIAIOSFODNN7EXAMPLE";
283        let (output, matches) = scan_and_redact(input, &cfg);
284        assert!(matches.is_empty());
285        assert_eq!(output, input);
286    }
287
288    #[test]
289    fn custom_pattern_detected() {
290        let cfg = SecretDetectionConfig {
291            enabled: true,
292            redact: true,
293            custom_patterns: vec![r"MYCORP_[A-Z]{10,}".to_string()],
294        };
295        let input = "value is MYCORP_ABCDEFGHIJKLMNO here";
296        let (redacted, matches) = scan_and_redact(input, &cfg);
297        assert!(matches.iter().any(|m| m.pattern_name == "custom_pattern"));
298        assert!(redacted.contains("[REDACTED:custom_pattern]"));
299    }
300
301    #[test]
302    fn line_numbers_are_correct() {
303        let input = "line1\nline2\nAKIAIOSFODNN7EXAMPLE\nline4";
304        let matches = detect_secrets(input);
305        assert!(!matches.is_empty());
306        assert_eq!(matches[0].line_number, 3);
307    }
308
309    #[test]
310    fn multiple_secrets_on_different_lines() {
311        let input = "AKIAIOSFODNN7EXAMPLE\nclean\nsk-abcdefghijklmnopqrstuvwx";
312        let matches = detect_secrets(input);
313        assert!(matches.len() >= 2);
314        let aws = matches
315            .iter()
316            .find(|m| m.pattern_name == "aws_key")
317            .unwrap();
318        assert_eq!(aws.line_number, 1);
319        let oai = matches
320            .iter()
321            .find(|m| m.pattern_name == "openai_key")
322            .unwrap();
323        assert_eq!(oai.line_number, 3);
324    }
325
326    #[test]
327    fn ec_private_key_detected() {
328        let input = "-----BEGIN EC PRIVATE KEY-----";
329        let matches = detect_secrets(input);
330        assert!(matches.iter().any(|m| m.pattern_name == "private_key"));
331    }
332
333    #[test]
334    fn openssh_private_key_detected() {
335        let input = "-----BEGIN OPENSSH PRIVATE KEY-----";
336        let matches = detect_secrets(input);
337        assert!(matches.iter().any(|m| m.pattern_name == "private_key"));
338    }
339}