1use std::sync::OnceLock;
2
3use regex::Regex;
4
5use crate::core::config::{Config, SecretDetectionConfig};
6
7macro_rules! static_regex {
8 ($pattern:expr) => {{
9 static RE: OnceLock<Regex> = OnceLock::new();
10 RE.get_or_init(|| Regex::new($pattern).expect(concat!("invalid regex: ", $pattern)))
11 }};
12}
13
14#[derive(Debug, Clone)]
15pub struct SecretMatch {
16 pub pattern_name: &'static str,
17 pub line_number: usize,
18 pub redacted_preview: String,
19}
20
21fn aws_key_re() -> &'static Regex {
22 static_regex!(r"AKIA[0-9A-Z]{16}")
23}
24
25fn private_key_re() -> &'static Regex {
26 static_regex!(r"-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----")
27}
28
29fn github_token_re() -> &'static Regex {
30 static_regex!(r"gh[ps]_[A-Za-z0-9_]{36,}")
31}
32
33fn anthropic_key_re() -> &'static Regex {
34 static_regex!(r"sk-ant-[A-Za-z0-9_\-]{20,}")
35}
36
37fn openai_key_re() -> &'static Regex {
38 static_regex!(r"sk-[A-Za-z0-9]{20,}")
39}
40
41fn generic_api_key_re() -> &'static Regex {
42 static_regex!(
43 r#"(?i)(?:api[_-]?key|secret[_-]?key|token|password|passwd|access[_-]?token|client[_-]?secret)\s*[=:]\s*['"]?[a-zA-Z0-9_\-]{20,}"#
44 )
45}
46
47fn high_entropy_b64_re() -> &'static Regex {
48 static_regex!(
49 r#"(?i)(?:key|token|secret|password|credential|auth)\s*[=:]\s*['"]?[A-Za-z0-9+/=\-_]{40,}"#
50 )
51}
52
53fn gitlab_pat_re() -> &'static Regex {
54 static_regex!(r"glpat-[A-Za-z0-9_\-]{20,}")
55}
56
57fn jwt_re() -> &'static Regex {
58 static_regex!(r"eyJ[A-Za-z0-9_\-]{10,}\.[A-Za-z0-9_\-]{10,}")
59}
60
61fn slack_token_re() -> &'static Regex {
62 static_regex!(r"xox[bpas]-[0-9a-zA-Z\-]{10,}")
63}
64
65fn stripe_key_re() -> &'static Regex {
66 static_regex!(r"[sr]k_live_[0-9a-zA-Z]{10,}")
67}
68
69fn db_url_re() -> &'static Regex {
70 static_regex!(r"(?:postgres|mysql|mongodb|redis)://[^\s]+:[^\s]+@")
71}
72
73fn npm_token_re() -> &'static Regex {
74 static_regex!(r"npm_[A-Za-z0-9]{10,}")
75}
76
77fn github_fine_grained_re() -> &'static Regex {
78 static_regex!(r"github_pat_[A-Za-z0-9_]{20,}")
79}
80
81const BUILTIN_PATTERNS: &[(&str, fn() -> &'static Regex)] = &[
82 ("aws_key", aws_key_re),
83 ("private_key", private_key_re),
84 ("github_token", github_token_re),
85 ("github_fine_grained", github_fine_grained_re),
86 ("anthropic_key", anthropic_key_re),
87 ("openai_key", openai_key_re),
88 ("gitlab_pat", gitlab_pat_re),
89 ("jwt", jwt_re),
90 ("slack_token", slack_token_re),
91 ("stripe_key", stripe_key_re),
92 ("db_url", db_url_re),
93 ("npm_token", npm_token_re),
94 ("generic_api_key", generic_api_key_re),
95 ("high_entropy_secret", high_entropy_b64_re),
96];
97
98fn make_redacted_preview(matched: &str) -> String {
99 let chars: Vec<char> = matched.chars().collect();
100 if chars.len() <= 6 {
101 return "***".to_string();
102 }
103 let prefix: String = chars[..4].iter().collect();
104 let suffix: String = chars[chars.len() - 2..].iter().collect();
105 format!("{prefix}***{suffix}")
106}
107
108pub fn detect_secrets(content: &str) -> Vec<SecretMatch> {
109 let mut matches = Vec::new();
110
111 let line_offsets: Vec<usize> = std::iter::once(0)
112 .chain(content.match_indices('\n').map(|(i, _)| i + 1))
113 .collect();
114
115 let offset_to_line = |byte_offset: usize| -> usize {
116 match line_offsets.binary_search(&byte_offset) {
117 Ok(i) => i + 1,
118 Err(i) => i,
119 }
120 };
121
122 for &(name, regex_fn) in BUILTIN_PATTERNS {
123 let re = regex_fn();
124 for m in re.find_iter(content) {
125 matches.push(SecretMatch {
126 pattern_name: name,
127 line_number: offset_to_line(m.start()),
128 redacted_preview: make_redacted_preview(m.as_str()),
129 });
130 }
131 }
132
133 matches
134}
135
136pub fn detect_secrets_with_custom(content: &str, custom_patterns: &[String]) -> Vec<SecretMatch> {
137 let mut matches = detect_secrets(content);
138
139 for pattern_str in custom_patterns {
140 if let Ok(re) = Regex::new(pattern_str) {
141 let line_offsets: Vec<usize> = std::iter::once(0)
142 .chain(content.match_indices('\n').map(|(i, _)| i + 1))
143 .collect();
144
145 for m in re.find_iter(content) {
146 let line = match line_offsets.binary_search(&m.start()) {
147 Ok(i) => i + 1,
148 Err(i) => i,
149 };
150 matches.push(SecretMatch {
151 pattern_name: "custom_pattern",
152 line_number: line,
153 redacted_preview: make_redacted_preview(m.as_str()),
154 });
155 }
156 }
157 }
158
159 matches
160}
161
162pub fn scan_and_redact(
163 content: &str,
164 config: &SecretDetectionConfig,
165) -> (String, Vec<SecretMatch>) {
166 if !config.enabled {
167 return (content.to_string(), Vec::new());
168 }
169
170 let matches = detect_secrets_with_custom(content, &config.custom_patterns);
171
172 if matches.is_empty() || !config.redact {
173 return (content.to_string(), matches);
174 }
175
176 let mut redacted = content.to_string();
177 for &(name, regex_fn) in BUILTIN_PATTERNS {
178 let re = regex_fn();
179 redacted = re
180 .replace_all(&redacted, |_: ®ex::Captures| {
181 format!("[REDACTED:{name}]")
182 })
183 .to_string();
184 }
185
186 for pattern_str in &config.custom_patterns {
187 if let Ok(re) = Regex::new(pattern_str) {
188 redacted = re
189 .replace_all(&redacted, "[REDACTED:custom_pattern]")
190 .to_string();
191 }
192 }
193
194 (redacted, matches)
195}
196
197pub fn scan_and_redact_from_config(content: &str) -> (String, Vec<SecretMatch>) {
198 let cfg = Config::load();
199 scan_and_redact(content, &cfg.secret_detection)
200}
201
202#[cfg(test)]
203mod tests {
204 use super::*;
205
206 #[test]
207 fn detects_aws_key() {
208 let input = "aws_key = AKIAIOSFODNN7EXAMPLE";
209 let matches = detect_secrets(input);
210 assert!(matches.iter().any(|m| m.pattern_name == "aws_key"));
211 }
212
213 #[test]
214 fn detects_private_key_header() {
215 let input = "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIB...";
216 let matches = detect_secrets(input);
217 assert!(matches.iter().any(|m| m.pattern_name == "private_key"));
218 }
219
220 #[test]
221 fn detects_github_token() {
222 let input = "export GITHUB_TOKEN=ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijkl";
223 let matches = detect_secrets(input);
224 assert!(matches.iter().any(|m| m.pattern_name == "github_token"));
225 }
226
227 #[test]
228 fn detects_anthropic_key() {
229 let input = "ANTHROPIC_API_KEY=sk-ant-api03-abcdef1234567890ABCD";
230 let matches = detect_secrets(input);
231 assert!(matches.iter().any(|m| m.pattern_name == "anthropic_key"));
232 }
233
234 #[test]
235 fn detects_openai_key() {
236 let input = "OPENAI_API_KEY=sk-abcdefghijklmnopqrstuvwx";
237 let matches = detect_secrets(input);
238 assert!(matches.iter().any(|m| m.pattern_name == "openai_key"));
239 }
240
241 #[test]
242 fn detects_gitlab_pat() {
243 let input = "token = glpat-xxxxxxxxxxxxxxxxxxxx";
244 let matches = detect_secrets(input);
245 assert!(matches.iter().any(|m| m.pattern_name == "gitlab_pat"));
246 }
247
248 #[test]
249 fn detects_generic_api_key() {
250 let input = "api_key = abcdefghijklmnopqrstuvwxyz1234567890";
251 let matches = detect_secrets(input);
252 assert!(matches.iter().any(
253 |m| m.pattern_name == "generic_api_key" || m.pattern_name == "high_entropy_secret"
254 ));
255 }
256
257 #[test]
258 fn clean_content_returns_empty() {
259 let input = "fn main() { println!(\"hello world\"); }";
260 let matches = detect_secrets(input);
261 assert!(matches.is_empty());
262 }
263
264 #[test]
265 fn detects_jwt() {
266 let input = "token = eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkw";
267 let matches = detect_secrets(input);
268 assert!(matches.iter().any(|m| m.pattern_name == "jwt"));
269 }
270
271 #[test]
272 fn detects_slack_token() {
273 let input = "SLACK_TOKEN=xoxb-1234567890-abcdefghij";
274 let matches = detect_secrets(input);
275 assert!(matches.iter().any(|m| m.pattern_name == "slack_token"));
276 }
277
278 #[test]
279 fn detects_stripe_key() {
280 let input = "stripe_key = sk_live_abcdefghij1234567890";
281 let matches = detect_secrets(input);
282 assert!(matches.iter().any(|m| m.pattern_name == "stripe_key"));
283 }
284
285 #[test]
286 fn detects_db_url() {
287 let input = "DATABASE_URL=postgres://user:password@localhost:5432/db";
288 let matches = detect_secrets(input);
289 assert!(matches.iter().any(|m| m.pattern_name == "db_url"));
290 }
291
292 #[test]
293 fn detects_npm_token() {
294 let input = "NPM_TOKEN=npm_abcdefghij1234567890";
295 let matches = detect_secrets(input);
296 assert!(matches.iter().any(|m| m.pattern_name == "npm_token"));
297 }
298
299 #[test]
300 fn detects_github_fine_grained() {
301 let input = "token = github_pat_ABCDEFGHIJKLMNOPQRSTuvwx";
302 let matches = detect_secrets(input);
303 assert!(matches
304 .iter()
305 .any(|m| m.pattern_name == "github_fine_grained"));
306 }
307
308 #[test]
309 fn redacted_preview_format() {
310 let preview = make_redacted_preview("AKIAIOSFODNN7EXAMPLE");
311 assert!(preview.starts_with("AKIA"));
312 assert!(preview.ends_with("LE"));
313 assert!(preview.contains("***"));
314 }
315
316 #[test]
317 fn redacted_preview_short_string() {
318 let preview = make_redacted_preview("short");
319 assert_eq!(preview, "***");
320 }
321
322 #[test]
323 fn scan_and_redact_replaces_secrets() {
324 let cfg = SecretDetectionConfig {
325 enabled: true,
326 redact: true,
327 custom_patterns: Vec::new(),
328 };
329 let input = "key = AKIAIOSFODNN7EXAMPLE";
330 let (redacted, matches) = scan_and_redact(input, &cfg);
331 assert!(!matches.is_empty());
332 assert!(redacted.contains("[REDACTED:aws_key]"));
333 assert!(!redacted.contains("AKIAIOSFODNN7EXAMPLE"));
334 }
335
336 #[test]
337 fn scan_without_redact_preserves_content() {
338 let cfg = SecretDetectionConfig {
339 enabled: true,
340 redact: false,
341 custom_patterns: Vec::new(),
342 };
343 let input = "key = AKIAIOSFODNN7EXAMPLE";
344 let (output, matches) = scan_and_redact(input, &cfg);
345 assert!(!matches.is_empty());
346 assert_eq!(output, input);
347 }
348
349 #[test]
350 fn disabled_detection_returns_unchanged() {
351 let cfg = SecretDetectionConfig {
352 enabled: false,
353 redact: true,
354 custom_patterns: Vec::new(),
355 };
356 let input = "key = AKIAIOSFODNN7EXAMPLE";
357 let (output, matches) = scan_and_redact(input, &cfg);
358 assert!(matches.is_empty());
359 assert_eq!(output, input);
360 }
361
362 #[test]
363 fn custom_pattern_detected() {
364 let cfg = SecretDetectionConfig {
365 enabled: true,
366 redact: true,
367 custom_patterns: vec![r"MYCORP_[A-Z]{10,}".to_string()],
368 };
369 let input = "value is MYCORP_ABCDEFGHIJKLMNO here";
370 let (redacted, matches) = scan_and_redact(input, &cfg);
371 assert!(matches.iter().any(|m| m.pattern_name == "custom_pattern"));
372 assert!(redacted.contains("[REDACTED:custom_pattern]"));
373 }
374
375 #[test]
376 fn line_numbers_are_correct() {
377 let input = "line1\nline2\nAKIAIOSFODNN7EXAMPLE\nline4";
378 let matches = detect_secrets(input);
379 assert!(!matches.is_empty());
380 assert_eq!(matches[0].line_number, 3);
381 }
382
383 #[test]
384 fn multiple_secrets_on_different_lines() {
385 let input = "AKIAIOSFODNN7EXAMPLE\nclean\nsk-abcdefghijklmnopqrstuvwx";
386 let matches = detect_secrets(input);
387 assert!(matches.len() >= 2);
388 let aws = matches
389 .iter()
390 .find(|m| m.pattern_name == "aws_key")
391 .unwrap();
392 assert_eq!(aws.line_number, 1);
393 let oai = matches
394 .iter()
395 .find(|m| m.pattern_name == "openai_key")
396 .unwrap();
397 assert_eq!(oai.line_number, 3);
398 }
399
400 #[test]
401 fn ec_private_key_detected() {
402 let input = "-----BEGIN EC PRIVATE KEY-----";
403 let matches = detect_secrets(input);
404 assert!(matches.iter().any(|m| m.pattern_name == "private_key"));
405 }
406
407 #[test]
408 fn openssh_private_key_detected() {
409 let input = "-----BEGIN OPENSSH PRIVATE KEY-----";
410 let matches = detect_secrets(input);
411 assert!(matches.iter().any(|m| m.pattern_name == "private_key"));
412 }
413}