lean_ctx/core/
secret_detection.rs1use std::sync::OnceLock;
2
3use regex::Regex;
4
5use crate::core::config::{Config, SecretDetectionConfig};
6
7macro_rules! static_regex {
8 ($pattern:expr) => {{
9 static RE: OnceLock<Regex> = OnceLock::new();
10 RE.get_or_init(|| Regex::new($pattern).expect(concat!("invalid regex: ", $pattern)))
11 }};
12}
13
14#[derive(Debug, Clone)]
15pub struct SecretMatch {
16 pub pattern_name: &'static str,
17 pub line_number: usize,
18 pub redacted_preview: String,
19}
20
21fn aws_key_re() -> &'static Regex {
22 static_regex!(r"AKIA[0-9A-Z]{16}")
23}
24
25fn private_key_re() -> &'static Regex {
26 static_regex!(r"-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----")
27}
28
29fn github_token_re() -> &'static Regex {
30 static_regex!(r"gh[ps]_[A-Za-z0-9_]{36,}")
31}
32
33fn anthropic_key_re() -> &'static Regex {
34 static_regex!(r"sk-ant-[A-Za-z0-9_\-]{20,}")
35}
36
37fn openai_key_re() -> &'static Regex {
38 static_regex!(r"sk-[A-Za-z0-9]{20,}")
39}
40
41fn generic_api_key_re() -> &'static Regex {
42 static_regex!(
43 r#"(?i)(?:api[_-]?key|secret[_-]?key|token|password|passwd|access[_-]?token|client[_-]?secret)\s*[=:]\s*['"]?[a-zA-Z0-9_\-]{20,}"#
44 )
45}
46
47fn high_entropy_b64_re() -> &'static Regex {
48 static_regex!(
49 r#"(?i)(?:key|token|secret|password|credential|auth)\s*[=:]\s*['"]?[A-Za-z0-9+/=\-_]{40,}"#
50 )
51}
52
53fn gitlab_pat_re() -> &'static Regex {
54 static_regex!(r"glpat-[A-Za-z0-9_\-]{20,}")
55}
56
57const BUILTIN_PATTERNS: &[(&str, fn() -> &'static Regex)] = &[
58 ("aws_key", aws_key_re),
59 ("private_key", private_key_re),
60 ("github_token", github_token_re),
61 ("anthropic_key", anthropic_key_re),
62 ("openai_key", openai_key_re),
63 ("gitlab_pat", gitlab_pat_re),
64 ("generic_api_key", generic_api_key_re),
65 ("high_entropy_secret", high_entropy_b64_re),
66];
67
68fn make_redacted_preview(matched: &str) -> String {
69 let chars: Vec<char> = matched.chars().collect();
70 if chars.len() <= 6 {
71 return "***".to_string();
72 }
73 let prefix: String = chars[..4].iter().collect();
74 let suffix: String = chars[chars.len() - 2..].iter().collect();
75 format!("{prefix}***{suffix}")
76}
77
78pub fn detect_secrets(content: &str) -> Vec<SecretMatch> {
79 let mut matches = Vec::new();
80
81 let line_offsets: Vec<usize> = std::iter::once(0)
82 .chain(content.match_indices('\n').map(|(i, _)| i + 1))
83 .collect();
84
85 let offset_to_line = |byte_offset: usize| -> usize {
86 match line_offsets.binary_search(&byte_offset) {
87 Ok(i) => i + 1,
88 Err(i) => i,
89 }
90 };
91
92 for &(name, regex_fn) in BUILTIN_PATTERNS {
93 let re = regex_fn();
94 for m in re.find_iter(content) {
95 matches.push(SecretMatch {
96 pattern_name: name,
97 line_number: offset_to_line(m.start()),
98 redacted_preview: make_redacted_preview(m.as_str()),
99 });
100 }
101 }
102
103 matches
104}
105
106pub fn detect_secrets_with_custom(content: &str, custom_patterns: &[String]) -> Vec<SecretMatch> {
107 let mut matches = detect_secrets(content);
108
109 for pattern_str in custom_patterns {
110 if let Ok(re) = Regex::new(pattern_str) {
111 let line_offsets: Vec<usize> = std::iter::once(0)
112 .chain(content.match_indices('\n').map(|(i, _)| i + 1))
113 .collect();
114
115 for m in re.find_iter(content) {
116 let line = match line_offsets.binary_search(&m.start()) {
117 Ok(i) => i + 1,
118 Err(i) => i,
119 };
120 matches.push(SecretMatch {
121 pattern_name: "custom_pattern",
122 line_number: line,
123 redacted_preview: make_redacted_preview(m.as_str()),
124 });
125 }
126 }
127 }
128
129 matches
130}
131
132pub fn scan_and_redact(
133 content: &str,
134 config: &SecretDetectionConfig,
135) -> (String, Vec<SecretMatch>) {
136 if !config.enabled {
137 return (content.to_string(), Vec::new());
138 }
139
140 let matches = detect_secrets_with_custom(content, &config.custom_patterns);
141
142 if matches.is_empty() || !config.redact {
143 return (content.to_string(), matches);
144 }
145
146 let mut redacted = content.to_string();
147 for &(name, regex_fn) in BUILTIN_PATTERNS {
148 let re = regex_fn();
149 redacted = re
150 .replace_all(&redacted, |_: ®ex::Captures| {
151 format!("[REDACTED:{name}]")
152 })
153 .to_string();
154 }
155
156 for pattern_str in &config.custom_patterns {
157 if let Ok(re) = Regex::new(pattern_str) {
158 redacted = re
159 .replace_all(&redacted, "[REDACTED:custom_pattern]")
160 .to_string();
161 }
162 }
163
164 (redacted, matches)
165}
166
167pub fn scan_and_redact_from_config(content: &str) -> (String, Vec<SecretMatch>) {
168 let cfg = Config::load();
169 scan_and_redact(content, &cfg.secret_detection)
170}
171
172#[cfg(test)]
173mod tests {
174 use super::*;
175
176 #[test]
177 fn detects_aws_key() {
178 let input = "aws_key = AKIAIOSFODNN7EXAMPLE";
179 let matches = detect_secrets(input);
180 assert!(matches.iter().any(|m| m.pattern_name == "aws_key"));
181 }
182
183 #[test]
184 fn detects_private_key_header() {
185 let input = "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIB...";
186 let matches = detect_secrets(input);
187 assert!(matches.iter().any(|m| m.pattern_name == "private_key"));
188 }
189
190 #[test]
191 fn detects_github_token() {
192 let input = "export GITHUB_TOKEN=ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijkl";
193 let matches = detect_secrets(input);
194 assert!(matches.iter().any(|m| m.pattern_name == "github_token"));
195 }
196
197 #[test]
198 fn detects_anthropic_key() {
199 let input = "ANTHROPIC_API_KEY=sk-ant-api03-abcdef1234567890ABCD";
200 let matches = detect_secrets(input);
201 assert!(matches.iter().any(|m| m.pattern_name == "anthropic_key"));
202 }
203
204 #[test]
205 fn detects_openai_key() {
206 let input = "OPENAI_API_KEY=sk-abcdefghijklmnopqrstuvwx";
207 let matches = detect_secrets(input);
208 assert!(matches.iter().any(|m| m.pattern_name == "openai_key"));
209 }
210
211 #[test]
212 fn detects_gitlab_pat() {
213 let input = "token = glpat-xxxxxxxxxxxxxxxxxxxx";
214 let matches = detect_secrets(input);
215 assert!(matches.iter().any(|m| m.pattern_name == "gitlab_pat"));
216 }
217
218 #[test]
219 fn detects_generic_api_key() {
220 let input = "api_key = abcdefghijklmnopqrstuvwxyz1234567890";
221 let matches = detect_secrets(input);
222 assert!(matches.iter().any(
223 |m| m.pattern_name == "generic_api_key" || m.pattern_name == "high_entropy_secret"
224 ));
225 }
226
227 #[test]
228 fn clean_content_returns_empty() {
229 let input = "fn main() { println!(\"hello world\"); }";
230 let matches = detect_secrets(input);
231 assert!(matches.is_empty());
232 }
233
234 #[test]
235 fn redacted_preview_format() {
236 let preview = make_redacted_preview("AKIAIOSFODNN7EXAMPLE");
237 assert!(preview.starts_with("AKIA"));
238 assert!(preview.ends_with("LE"));
239 assert!(preview.contains("***"));
240 }
241
242 #[test]
243 fn redacted_preview_short_string() {
244 let preview = make_redacted_preview("short");
245 assert_eq!(preview, "***");
246 }
247
248 #[test]
249 fn scan_and_redact_replaces_secrets() {
250 let cfg = SecretDetectionConfig {
251 enabled: true,
252 redact: true,
253 custom_patterns: Vec::new(),
254 };
255 let input = "key = AKIAIOSFODNN7EXAMPLE";
256 let (redacted, matches) = scan_and_redact(input, &cfg);
257 assert!(!matches.is_empty());
258 assert!(redacted.contains("[REDACTED:aws_key]"));
259 assert!(!redacted.contains("AKIAIOSFODNN7EXAMPLE"));
260 }
261
262 #[test]
263 fn scan_without_redact_preserves_content() {
264 let cfg = SecretDetectionConfig {
265 enabled: true,
266 redact: false,
267 custom_patterns: Vec::new(),
268 };
269 let input = "key = AKIAIOSFODNN7EXAMPLE";
270 let (output, matches) = scan_and_redact(input, &cfg);
271 assert!(!matches.is_empty());
272 assert_eq!(output, input);
273 }
274
275 #[test]
276 fn disabled_detection_returns_unchanged() {
277 let cfg = SecretDetectionConfig {
278 enabled: false,
279 redact: true,
280 custom_patterns: Vec::new(),
281 };
282 let input = "key = AKIAIOSFODNN7EXAMPLE";
283 let (output, matches) = scan_and_redact(input, &cfg);
284 assert!(matches.is_empty());
285 assert_eq!(output, input);
286 }
287
288 #[test]
289 fn custom_pattern_detected() {
290 let cfg = SecretDetectionConfig {
291 enabled: true,
292 redact: true,
293 custom_patterns: vec![r"MYCORP_[A-Z]{10,}".to_string()],
294 };
295 let input = "value is MYCORP_ABCDEFGHIJKLMNO here";
296 let (redacted, matches) = scan_and_redact(input, &cfg);
297 assert!(matches.iter().any(|m| m.pattern_name == "custom_pattern"));
298 assert!(redacted.contains("[REDACTED:custom_pattern]"));
299 }
300
301 #[test]
302 fn line_numbers_are_correct() {
303 let input = "line1\nline2\nAKIAIOSFODNN7EXAMPLE\nline4";
304 let matches = detect_secrets(input);
305 assert!(!matches.is_empty());
306 assert_eq!(matches[0].line_number, 3);
307 }
308
309 #[test]
310 fn multiple_secrets_on_different_lines() {
311 let input = "AKIAIOSFODNN7EXAMPLE\nclean\nsk-abcdefghijklmnopqrstuvwx";
312 let matches = detect_secrets(input);
313 assert!(matches.len() >= 2);
314 let aws = matches
315 .iter()
316 .find(|m| m.pattern_name == "aws_key")
317 .unwrap();
318 assert_eq!(aws.line_number, 1);
319 let oai = matches
320 .iter()
321 .find(|m| m.pattern_name == "openai_key")
322 .unwrap();
323 assert_eq!(oai.line_number, 3);
324 }
325
326 #[test]
327 fn ec_private_key_detected() {
328 let input = "-----BEGIN EC PRIVATE KEY-----";
329 let matches = detect_secrets(input);
330 assert!(matches.iter().any(|m| m.pattern_name == "private_key"));
331 }
332
333 #[test]
334 fn openssh_private_key_detected() {
335 let input = "-----BEGIN OPENSSH PRIVATE KEY-----";
336 let matches = detect_secrets(input);
337 assert!(matches.iter().any(|m| m.pattern_name == "private_key"));
338 }
339}