1use once_cell::sync::Lazy;
8use regex::Regex;
9
10use crate::pages::redact::CustomPattern;
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
14pub enum PatternCategory {
15 ApiKeys,
17 PrivateKeys,
19 ConnectionStrings,
21 PersonalInfo,
23 InternalUrls,
25}
26
27impl PatternCategory {
28 pub fn label(self) -> &'static str {
29 match self {
30 PatternCategory::ApiKeys => "API Keys & Tokens",
31 PatternCategory::PrivateKeys => "Private Keys",
32 PatternCategory::ConnectionStrings => "Connection Strings",
33 PatternCategory::PersonalInfo => "Personal Information",
34 PatternCategory::InternalUrls => "Internal URLs",
35 }
36 }
37}
38
39#[derive(Debug, Clone)]
41pub struct PatternDef {
42 pub id: &'static str,
43 pub name: &'static str,
44 pub category: PatternCategory,
45 pub description: &'static str,
46 pub pattern: &'static str,
47 pub replacement: &'static str,
48}
49
50pub static AWS_ACCESS_KEY: PatternDef = PatternDef {
55 id: "aws_access_key",
56 name: "AWS Access Key ID",
57 category: PatternCategory::ApiKeys,
58 description: "AWS access key identifiers (AKIA...)",
59 pattern: r"\bAKIA[0-9A-Z]{16}\b",
60 replacement: "[AWS_KEY_REDACTED]",
61};
62
63pub static AWS_SECRET_KEY: PatternDef = PatternDef {
64 id: "aws_secret_key",
65 name: "AWS Secret Key",
66 category: PatternCategory::ApiKeys,
67 description: "AWS secret access keys in configuration contexts",
68 pattern: r#"(?i)aws(.{0,20})?(secret|access)?[_-]?key\s*[:=]\s*['"]?[A-Za-z0-9/+=]{40}['"]?"#,
69 replacement: "[AWS_SECRET_REDACTED]",
70};
71
72pub static OPENAI_KEY: PatternDef = PatternDef {
73 id: "openai_key",
74 name: "OpenAI API Key",
75 category: PatternCategory::ApiKeys,
76 description: "OpenAI API keys (sk-...)",
77 pattern: r"\bsk-[A-Za-z0-9]{20,}\b",
78 replacement: "[OPENAI_KEY_REDACTED]",
79};
80
81pub static ANTHROPIC_KEY: PatternDef = PatternDef {
82 id: "anthropic_key",
83 name: "Anthropic API Key",
84 category: PatternCategory::ApiKeys,
85 description: "Anthropic API keys (sk-ant-...)",
86 pattern: r"\bsk-ant-[A-Za-z0-9\-]{20,}\b",
87 replacement: "[ANTHROPIC_KEY_REDACTED]",
88};
89
90pub static GITHUB_TOKEN: PatternDef = PatternDef {
91 id: "github_token",
92 name: "GitHub Token",
93 category: PatternCategory::ApiKeys,
94 description: "GitHub personal access tokens and app tokens",
95 pattern: r"\bgh[pousr]_[A-Za-z0-9]{36}\b",
96 replacement: "[GITHUB_TOKEN_REDACTED]",
97};
98
99pub static GENERIC_API_KEY: PatternDef = PatternDef {
100 id: "generic_api_key",
101 name: "Generic API Key",
102 category: PatternCategory::ApiKeys,
103 description: "Generic API keys, tokens, and secrets in assignment contexts",
104 pattern: r#"(?i)(api[_-]?key|api[_-]?token|auth[_-]?token|access[_-]?token|secret[_-]?key)\s*[:=]\s*['"]?[A-Za-z0-9_\-]{16,}['"]?"#,
105 replacement: "[API_KEY_REDACTED]",
106};
107
108pub static BEARER_TOKEN: PatternDef = PatternDef {
109 id: "bearer_token",
110 name: "Bearer Token",
111 category: PatternCategory::ApiKeys,
112 description: "Bearer authorization tokens in headers",
113 pattern: r"(?i)Bearer\s+[A-Za-z0-9\-_.~+/]+=*",
114 replacement: "Bearer [TOKEN_REDACTED]",
115};
116
117pub static SSH_PRIVATE_KEY: PatternDef = PatternDef {
122 id: "ssh_private_key",
123 name: "SSH Private Key",
124 category: PatternCategory::PrivateKeys,
125 description: "SSH and OpenSSH private key headers",
126 pattern: r"-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----",
127 replacement: "[PRIVATE_KEY_REDACTED]",
128};
129
130pub static PEM_PRIVATE_KEY: PatternDef = PatternDef {
131 id: "pem_private_key",
132 name: "PEM Private Key",
133 category: PatternCategory::PrivateKeys,
134 description: "PEM-encoded private keys",
135 pattern: r"-----BEGIN (?:ENCRYPTED )?PRIVATE KEY-----",
136 replacement: "[PRIVATE_KEY_REDACTED]",
137};
138
139pub static PGP_PRIVATE_KEY: PatternDef = PatternDef {
140 id: "pgp_private_key",
141 name: "PGP Private Key",
142 category: PatternCategory::PrivateKeys,
143 description: "PGP/GPG private key blocks",
144 pattern: r"-----BEGIN PGP PRIVATE KEY BLOCK-----",
145 replacement: "[PGP_KEY_REDACTED]",
146};
147
148pub static DATABASE_URL: PatternDef = PatternDef {
153 id: "database_url",
154 name: "Database URL",
155 category: PatternCategory::ConnectionStrings,
156 description: "PostgreSQL, MySQL, MongoDB, and Redis connection strings",
157 pattern: r#"(?i)\b(postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp)://[^\s'""]+"#,
158 replacement: "[DATABASE_URL_REDACTED]",
159};
160
161pub static DATABASE_PASSWORD: PatternDef = PatternDef {
162 id: "database_password",
163 name: "Database Password",
164 category: PatternCategory::ConnectionStrings,
165 description: "Database passwords in configuration",
166 pattern: r#"(?i)(db[_-]?pass(?:word)?|database[_-]?pass(?:word)?)\s*[:=]\s*['"]?[^\s'"]{4,}['"]?"#,
167 replacement: "[DB_PASSWORD_REDACTED]",
168};
169
170pub static CONNECTION_STRING: PatternDef = PatternDef {
171 id: "connection_string",
172 name: "Connection String",
173 category: PatternCategory::ConnectionStrings,
174 description: "Generic connection strings with credentials",
175 pattern: r#"(?i)(connection[_-]?string|conn[_-]?str)\s*[:=]\s*['"][^'"]+['"]"#,
176 replacement: "[CONNECTION_STRING_REDACTED]",
177};
178
179pub static EMAIL_ADDRESS: PatternDef = PatternDef {
184 id: "email_address",
185 name: "Email Address",
186 category: PatternCategory::PersonalInfo,
187 description: "Email addresses",
188 pattern: r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b",
189 replacement: "[EMAIL_REDACTED]",
190};
191
192pub static PHONE_NUMBER: PatternDef = PatternDef {
193 id: "phone_number",
194 name: "Phone Number",
195 category: PatternCategory::PersonalInfo,
196 description: "Phone numbers in various formats",
197 pattern: r"\b(?:\+?1[-.\s]?)?\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}\b",
198 replacement: "[PHONE_REDACTED]",
199};
200
201pub static IP_ADDRESS: PatternDef = PatternDef {
202 id: "ip_address",
203 name: "IP Address",
204 category: PatternCategory::PersonalInfo,
205 description: "IPv4 addresses (all addresses matched; private ranges handled separately)",
206 pattern: r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b",
207 replacement: "[IP_REDACTED]",
208};
209
210pub static SOCIAL_SECURITY: PatternDef = PatternDef {
211 id: "social_security",
212 name: "Social Security Number",
213 category: PatternCategory::PersonalInfo,
214 description: "US Social Security Numbers",
215 pattern: r"\b[0-9]{3}-[0-9]{2}-[0-9]{4}\b",
216 replacement: "[SSN_REDACTED]",
217};
218
219pub static CREDIT_CARD: PatternDef = PatternDef {
220 id: "credit_card",
221 name: "Credit Card Number",
222 category: PatternCategory::PersonalInfo,
223 description: "Credit card numbers (basic pattern)",
224 pattern: r"\b(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13}|6(?:011|5[0-9]{2})[0-9]{12})\b",
225 replacement: "[CARD_REDACTED]",
226};
227
228pub static INTERNAL_URL: PatternDef = PatternDef {
233 id: "internal_url",
234 name: "Internal URL",
235 category: PatternCategory::InternalUrls,
236 description: "URLs with internal/corporate domains",
237 pattern: r"https?://[a-zA-Z0-9.-]+\.(internal|local|corp|intra|private|lan)\b[^\s]*",
238 replacement: "[INTERNAL_URL_REDACTED]",
239};
240
241pub static LOCALHOST_URL: PatternDef = PatternDef {
242 id: "localhost_url",
243 name: "Localhost URL",
244 category: PatternCategory::InternalUrls,
245 description: "Localhost and 127.0.0.1 URLs",
246 pattern: r"https?://(?:localhost|127\.0\.0\.1)(?::[0-9]+)?[^\s]*",
247 replacement: "[LOCALHOST_URL_REDACTED]",
248};
249
250pub static PRIVATE_IP_URL: PatternDef = PatternDef {
251 id: "private_ip_url",
252 name: "Private IP URL",
253 category: PatternCategory::InternalUrls,
254 description: "URLs with private IP addresses",
255 pattern: r"https?://(?:10\.|192\.168\.|172\.(?:1[6-9]|2[0-9]|3[01])\.)[0-9.]+(?::[0-9]+)?[^\s]*",
256 replacement: "[PRIVATE_IP_URL_REDACTED]",
257};
258
259pub static ALL_PATTERNS: Lazy<Vec<&'static PatternDef>> = Lazy::new(|| {
265 vec![
266 &AWS_ACCESS_KEY,
268 &AWS_SECRET_KEY,
269 &OPENAI_KEY,
270 &ANTHROPIC_KEY,
271 &GITHUB_TOKEN,
272 &GENERIC_API_KEY,
273 &BEARER_TOKEN,
274 &SSH_PRIVATE_KEY,
276 &PEM_PRIVATE_KEY,
277 &PGP_PRIVATE_KEY,
278 &DATABASE_URL,
280 &DATABASE_PASSWORD,
281 &CONNECTION_STRING,
282 &EMAIL_ADDRESS,
284 &PHONE_NUMBER,
285 &IP_ADDRESS,
286 &SOCIAL_SECURITY,
287 &CREDIT_CARD,
288 &INTERNAL_URL,
290 &LOCALHOST_URL,
291 &PRIVATE_IP_URL,
292 ]
293});
294
295impl PatternDef {
296 pub fn to_custom_pattern(&self) -> Option<CustomPattern> {
298 let regex = Regex::new(self.pattern).ok()?;
299 Some(CustomPattern {
300 name: self.name.to_string(),
301 pattern: regex,
302 replacement: self.replacement.to_string(),
303 enabled: true,
304 })
305 }
306}
307
308pub fn patterns_for_public() -> Vec<CustomPattern> {
312 let patterns = [
313 &AWS_ACCESS_KEY,
315 &AWS_SECRET_KEY,
316 &OPENAI_KEY,
317 &ANTHROPIC_KEY,
318 &GITHUB_TOKEN,
319 &GENERIC_API_KEY,
320 &BEARER_TOKEN,
321 &SSH_PRIVATE_KEY,
323 &PEM_PRIVATE_KEY,
324 &PGP_PRIVATE_KEY,
325 &DATABASE_URL,
327 &DATABASE_PASSWORD,
328 &CONNECTION_STRING,
329 &EMAIL_ADDRESS,
331 &PHONE_NUMBER,
332 &IP_ADDRESS,
333 &SOCIAL_SECURITY,
334 &CREDIT_CARD,
335 &INTERNAL_URL,
337 &LOCALHOST_URL,
338 &PRIVATE_IP_URL,
339 ];
340
341 patterns
342 .iter()
343 .filter_map(|p| p.to_custom_pattern())
344 .collect()
345}
346
347pub fn patterns_for_team() -> Vec<CustomPattern> {
351 let patterns = [
352 &AWS_ACCESS_KEY,
354 &AWS_SECRET_KEY,
355 &OPENAI_KEY,
356 &ANTHROPIC_KEY,
357 &GITHUB_TOKEN,
358 &SSH_PRIVATE_KEY,
360 &PEM_PRIVATE_KEY,
361 &PGP_PRIVATE_KEY,
362 &DATABASE_URL,
364 &DATABASE_PASSWORD,
365 &EMAIL_ADDRESS,
367 &SOCIAL_SECURITY,
368 &CREDIT_CARD,
369 ];
370
371 patterns
372 .iter()
373 .filter_map(|p| p.to_custom_pattern())
374 .collect()
375}
376
377pub fn patterns_for_personal() -> Vec<CustomPattern> {
381 let patterns = [
382 &SSH_PRIVATE_KEY,
384 &PEM_PRIVATE_KEY,
385 &PGP_PRIVATE_KEY,
386 &AWS_ACCESS_KEY,
388 &AWS_SECRET_KEY,
389 &DATABASE_PASSWORD,
391 ];
392
393 patterns
394 .iter()
395 .filter_map(|p| p.to_custom_pattern())
396 .collect()
397}
398
399pub fn patterns_by_category(category: PatternCategory) -> Vec<&'static PatternDef> {
401 ALL_PATTERNS
402 .iter()
403 .filter(|p| p.category == category)
404 .copied()
405 .collect()
406}
407
408#[cfg(test)]
409mod tests {
410 use super::*;
411
412 #[test]
413 fn test_all_patterns_compile() {
414 for pattern in ALL_PATTERNS.iter() {
415 let result = Regex::new(pattern.pattern);
416 assert!(
417 result.is_ok(),
418 "Pattern {} failed to compile: {:?}",
419 pattern.id,
420 result.err()
421 );
422 }
423 }
424
425 #[test]
426 fn test_to_custom_pattern() {
427 let custom = AWS_ACCESS_KEY.to_custom_pattern();
428 assert!(custom.is_some());
429 let custom = custom.unwrap();
430 assert_eq!(custom.name, "AWS Access Key ID");
431 assert!(custom.enabled);
432 }
433
434 #[test]
435 fn test_public_has_most_patterns() {
436 let public = patterns_for_public();
437 let team = patterns_for_team();
438 let personal = patterns_for_personal();
439
440 assert!(public.len() >= team.len());
441 assert!(team.len() >= personal.len());
442 }
443
444 #[test]
445 fn test_personal_has_critical_patterns() {
446 let personal = patterns_for_personal();
447
448 assert!(personal.iter().any(|p| p.name.contains("Private Key")));
450
451 assert!(personal.iter().any(|p| p.name.contains("AWS")));
453 }
454
455 #[test]
456 fn test_patterns_by_category() {
457 let api_patterns = patterns_by_category(PatternCategory::ApiKeys);
458 assert!(!api_patterns.is_empty());
459 assert!(
460 api_patterns
461 .iter()
462 .all(|p| p.category == PatternCategory::ApiKeys)
463 );
464 }
465
466 #[test]
467 fn test_pattern_matches_aws_key() {
468 let pattern = Regex::new(AWS_ACCESS_KEY.pattern).unwrap();
469 assert!(pattern.is_match("Found key AKIAIOSFODNN7EXAMPLE in config"));
470 assert!(!pattern.is_match("Not a key"));
471 }
472
473 #[test]
474 fn test_pattern_matches_openai_key() {
475 let pattern = Regex::new(OPENAI_KEY.pattern).unwrap();
476 assert!(pattern.is_match("Using sk-abc123def456ghi789jkl012mno345pqr678"));
477 assert!(!pattern.is_match("sk-short")); }
479
480 #[test]
481 fn test_pattern_matches_email() {
482 let pattern = Regex::new(EMAIL_ADDRESS.pattern).unwrap();
483 assert!(pattern.is_match("Contact user@example.com for help"));
484 assert!(pattern.is_match("test.user+tag@sub.domain.org"));
485 }
486
487 #[test]
488 fn test_email_pattern_uses_ascii_letter_classes() {
489 let pattern = Regex::new(EMAIL_ADDRESS.pattern).unwrap();
490
491 assert!(pattern.is_match("Contact USER_123@example.COM"));
492 assert!(!pattern.is_match("Contact user@example.δοκιμή"));
493 assert!(EMAIL_ADDRESS.pattern.contains("[A-Za-z]"));
494 assert!(!EMAIL_ADDRESS.pattern.contains("\\p"));
495 }
496
497 #[test]
498 fn test_pattern_matches_database_url() {
499 let pattern = Regex::new(DATABASE_URL.pattern).unwrap();
500 assert!(pattern.is_match("postgres://user:pass@host:5432/db"));
501 assert!(pattern.is_match("mongodb+srv://user:pass@cluster.mongodb.net/db"));
502 assert!(pattern.is_match("redis://localhost:6379"));
503 }
504
505 #[test]
506 fn test_pattern_matches_private_key() {
507 let pattern = Regex::new(SSH_PRIVATE_KEY.pattern).unwrap();
508 assert!(pattern.is_match("-----BEGIN RSA PRIVATE KEY-----"));
509 assert!(pattern.is_match("-----BEGIN OPENSSH PRIVATE KEY-----"));
510 assert!(pattern.is_match("-----BEGIN PRIVATE KEY-----"));
511 }
512}