Skip to main content

coding_agent_search/pages/
patterns.rs

1//! Pattern library for privacy profiles.
2//!
3//! This module provides pre-defined regex patterns for redacting sensitive data.
4//! Patterns are categorized by type and can be composed into profiles with different
5//! privacy levels.
6
7use once_cell::sync::Lazy;
8use regex::Regex;
9
10use crate::pages::redact::CustomPattern;
11
12/// Categories of sensitive patterns for organizational clarity.
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
14pub enum PatternCategory {
15    /// API keys and tokens (AWS, OpenAI, Anthropic, GitHub, etc.)
16    ApiKeys,
17    /// Private keys (SSH, PEM, PGP)
18    PrivateKeys,
19    /// Database and service connection strings
20    ConnectionStrings,
21    /// Personal identifiable information
22    PersonalInfo,
23    /// Internal infrastructure references
24    InternalUrls,
25}
26
27impl PatternCategory {
28    pub fn label(self) -> &'static str {
29        match self {
30            PatternCategory::ApiKeys => "API Keys & Tokens",
31            PatternCategory::PrivateKeys => "Private Keys",
32            PatternCategory::ConnectionStrings => "Connection Strings",
33            PatternCategory::PersonalInfo => "Personal Information",
34            PatternCategory::InternalUrls => "Internal URLs",
35        }
36    }
37}
38
39/// A pattern definition with metadata for display and categorization.
40#[derive(Debug, Clone)]
41pub struct PatternDef {
42    pub id: &'static str,
43    pub name: &'static str,
44    pub category: PatternCategory,
45    pub description: &'static str,
46    pub pattern: &'static str,
47    pub replacement: &'static str,
48}
49
50// ============================================================================
51// API Keys & Tokens
52// ============================================================================
53
54pub static AWS_ACCESS_KEY: PatternDef = PatternDef {
55    id: "aws_access_key",
56    name: "AWS Access Key ID",
57    category: PatternCategory::ApiKeys,
58    description: "AWS access key identifiers (AKIA...)",
59    pattern: r"\bAKIA[0-9A-Z]{16}\b",
60    replacement: "[AWS_KEY_REDACTED]",
61};
62
63pub static AWS_SECRET_KEY: PatternDef = PatternDef {
64    id: "aws_secret_key",
65    name: "AWS Secret Key",
66    category: PatternCategory::ApiKeys,
67    description: "AWS secret access keys in configuration contexts",
68    pattern: r#"(?i)aws(.{0,20})?(secret|access)?[_-]?key\s*[:=]\s*['"]?[A-Za-z0-9/+=]{40}['"]?"#,
69    replacement: "[AWS_SECRET_REDACTED]",
70};
71
72pub static OPENAI_KEY: PatternDef = PatternDef {
73    id: "openai_key",
74    name: "OpenAI API Key",
75    category: PatternCategory::ApiKeys,
76    description: "OpenAI API keys (sk-...)",
77    pattern: r"\bsk-[A-Za-z0-9]{20,}\b",
78    replacement: "[OPENAI_KEY_REDACTED]",
79};
80
81pub static ANTHROPIC_KEY: PatternDef = PatternDef {
82    id: "anthropic_key",
83    name: "Anthropic API Key",
84    category: PatternCategory::ApiKeys,
85    description: "Anthropic API keys (sk-ant-...)",
86    pattern: r"\bsk-ant-[A-Za-z0-9\-]{20,}\b",
87    replacement: "[ANTHROPIC_KEY_REDACTED]",
88};
89
90pub static GITHUB_TOKEN: PatternDef = PatternDef {
91    id: "github_token",
92    name: "GitHub Token",
93    category: PatternCategory::ApiKeys,
94    description: "GitHub personal access tokens and app tokens",
95    pattern: r"\bgh[pousr]_[A-Za-z0-9]{36}\b",
96    replacement: "[GITHUB_TOKEN_REDACTED]",
97};
98
99pub static GENERIC_API_KEY: PatternDef = PatternDef {
100    id: "generic_api_key",
101    name: "Generic API Key",
102    category: PatternCategory::ApiKeys,
103    description: "Generic API keys, tokens, and secrets in assignment contexts",
104    pattern: r#"(?i)(api[_-]?key|api[_-]?token|auth[_-]?token|access[_-]?token|secret[_-]?key)\s*[:=]\s*['"]?[A-Za-z0-9_\-]{16,}['"]?"#,
105    replacement: "[API_KEY_REDACTED]",
106};
107
108pub static BEARER_TOKEN: PatternDef = PatternDef {
109    id: "bearer_token",
110    name: "Bearer Token",
111    category: PatternCategory::ApiKeys,
112    description: "Bearer authorization tokens in headers",
113    pattern: r"(?i)Bearer\s+[A-Za-z0-9\-_.~+/]+=*",
114    replacement: "Bearer [TOKEN_REDACTED]",
115};
116
117// ============================================================================
118// Private Keys
119// ============================================================================
120
121pub static SSH_PRIVATE_KEY: PatternDef = PatternDef {
122    id: "ssh_private_key",
123    name: "SSH Private Key",
124    category: PatternCategory::PrivateKeys,
125    description: "SSH and OpenSSH private key headers",
126    pattern: r"-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----",
127    replacement: "[PRIVATE_KEY_REDACTED]",
128};
129
130pub static PEM_PRIVATE_KEY: PatternDef = PatternDef {
131    id: "pem_private_key",
132    name: "PEM Private Key",
133    category: PatternCategory::PrivateKeys,
134    description: "PEM-encoded private keys",
135    pattern: r"-----BEGIN (?:ENCRYPTED )?PRIVATE KEY-----",
136    replacement: "[PRIVATE_KEY_REDACTED]",
137};
138
139pub static PGP_PRIVATE_KEY: PatternDef = PatternDef {
140    id: "pgp_private_key",
141    name: "PGP Private Key",
142    category: PatternCategory::PrivateKeys,
143    description: "PGP/GPG private key blocks",
144    pattern: r"-----BEGIN PGP PRIVATE KEY BLOCK-----",
145    replacement: "[PGP_KEY_REDACTED]",
146};
147
148// ============================================================================
149// Connection Strings
150// ============================================================================
151
152pub static DATABASE_URL: PatternDef = PatternDef {
153    id: "database_url",
154    name: "Database URL",
155    category: PatternCategory::ConnectionStrings,
156    description: "PostgreSQL, MySQL, MongoDB, and Redis connection strings",
157    pattern: r#"(?i)\b(postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp)://[^\s'""]+"#,
158    replacement: "[DATABASE_URL_REDACTED]",
159};
160
161pub static DATABASE_PASSWORD: PatternDef = PatternDef {
162    id: "database_password",
163    name: "Database Password",
164    category: PatternCategory::ConnectionStrings,
165    description: "Database passwords in configuration",
166    pattern: r#"(?i)(db[_-]?pass(?:word)?|database[_-]?pass(?:word)?)\s*[:=]\s*['"]?[^\s'"]{4,}['"]?"#,
167    replacement: "[DB_PASSWORD_REDACTED]",
168};
169
170pub static CONNECTION_STRING: PatternDef = PatternDef {
171    id: "connection_string",
172    name: "Connection String",
173    category: PatternCategory::ConnectionStrings,
174    description: "Generic connection strings with credentials",
175    pattern: r#"(?i)(connection[_-]?string|conn[_-]?str)\s*[:=]\s*['"][^'"]+['"]"#,
176    replacement: "[CONNECTION_STRING_REDACTED]",
177};
178
179// ============================================================================
180// Personal Information
181// ============================================================================
182
183pub static EMAIL_ADDRESS: PatternDef = PatternDef {
184    id: "email_address",
185    name: "Email Address",
186    category: PatternCategory::PersonalInfo,
187    description: "Email addresses",
188    pattern: r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b",
189    replacement: "[EMAIL_REDACTED]",
190};
191
192pub static PHONE_NUMBER: PatternDef = PatternDef {
193    id: "phone_number",
194    name: "Phone Number",
195    category: PatternCategory::PersonalInfo,
196    description: "Phone numbers in various formats",
197    pattern: r"\b(?:\+?1[-.\s]?)?\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}\b",
198    replacement: "[PHONE_REDACTED]",
199};
200
201pub static IP_ADDRESS: PatternDef = PatternDef {
202    id: "ip_address",
203    name: "IP Address",
204    category: PatternCategory::PersonalInfo,
205    description: "IPv4 addresses (all addresses matched; private ranges handled separately)",
206    pattern: r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b",
207    replacement: "[IP_REDACTED]",
208};
209
210pub static SOCIAL_SECURITY: PatternDef = PatternDef {
211    id: "social_security",
212    name: "Social Security Number",
213    category: PatternCategory::PersonalInfo,
214    description: "US Social Security Numbers",
215    pattern: r"\b[0-9]{3}-[0-9]{2}-[0-9]{4}\b",
216    replacement: "[SSN_REDACTED]",
217};
218
219pub static CREDIT_CARD: PatternDef = PatternDef {
220    id: "credit_card",
221    name: "Credit Card Number",
222    category: PatternCategory::PersonalInfo,
223    description: "Credit card numbers (basic pattern)",
224    pattern: r"\b(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13}|6(?:011|5[0-9]{2})[0-9]{12})\b",
225    replacement: "[CARD_REDACTED]",
226};
227
228// ============================================================================
229// Internal URLs
230// ============================================================================
231
232pub static INTERNAL_URL: PatternDef = PatternDef {
233    id: "internal_url",
234    name: "Internal URL",
235    category: PatternCategory::InternalUrls,
236    description: "URLs with internal/corporate domains",
237    pattern: r"https?://[a-zA-Z0-9.-]+\.(internal|local|corp|intra|private|lan)\b[^\s]*",
238    replacement: "[INTERNAL_URL_REDACTED]",
239};
240
241pub static LOCALHOST_URL: PatternDef = PatternDef {
242    id: "localhost_url",
243    name: "Localhost URL",
244    category: PatternCategory::InternalUrls,
245    description: "Localhost and 127.0.0.1 URLs",
246    pattern: r"https?://(?:localhost|127\.0\.0\.1)(?::[0-9]+)?[^\s]*",
247    replacement: "[LOCALHOST_URL_REDACTED]",
248};
249
250pub static PRIVATE_IP_URL: PatternDef = PatternDef {
251    id: "private_ip_url",
252    name: "Private IP URL",
253    category: PatternCategory::InternalUrls,
254    description: "URLs with private IP addresses",
255    pattern: r"https?://(?:10\.|192\.168\.|172\.(?:1[6-9]|2[0-9]|3[01])\.)[0-9.]+(?::[0-9]+)?[^\s]*",
256    replacement: "[PRIVATE_IP_URL_REDACTED]",
257};
258
259// ============================================================================
260// Pattern Collections
261// ============================================================================
262
263/// All defined patterns for iteration.
264pub static ALL_PATTERNS: Lazy<Vec<&'static PatternDef>> = Lazy::new(|| {
265    vec![
266        // API Keys
267        &AWS_ACCESS_KEY,
268        &AWS_SECRET_KEY,
269        &OPENAI_KEY,
270        &ANTHROPIC_KEY,
271        &GITHUB_TOKEN,
272        &GENERIC_API_KEY,
273        &BEARER_TOKEN,
274        // Private Keys
275        &SSH_PRIVATE_KEY,
276        &PEM_PRIVATE_KEY,
277        &PGP_PRIVATE_KEY,
278        // Connection Strings
279        &DATABASE_URL,
280        &DATABASE_PASSWORD,
281        &CONNECTION_STRING,
282        // Personal Info
283        &EMAIL_ADDRESS,
284        &PHONE_NUMBER,
285        &IP_ADDRESS,
286        &SOCIAL_SECURITY,
287        &CREDIT_CARD,
288        // Internal URLs
289        &INTERNAL_URL,
290        &LOCALHOST_URL,
291        &PRIVATE_IP_URL,
292    ]
293});
294
295impl PatternDef {
296    /// Convert this pattern definition to a CustomPattern for the redaction engine.
297    pub fn to_custom_pattern(&self) -> Option<CustomPattern> {
298        let regex = Regex::new(self.pattern).ok()?;
299        Some(CustomPattern {
300            name: self.name.to_string(),
301            pattern: regex,
302            replacement: self.replacement.to_string(),
303            enabled: true,
304        })
305    }
306}
307
308/// Get patterns for public sharing (maximum redaction).
309///
310/// Includes all pattern categories for thorough data sanitization.
311pub fn patterns_for_public() -> Vec<CustomPattern> {
312    let patterns = [
313        // All API keys and tokens
314        &AWS_ACCESS_KEY,
315        &AWS_SECRET_KEY,
316        &OPENAI_KEY,
317        &ANTHROPIC_KEY,
318        &GITHUB_TOKEN,
319        &GENERIC_API_KEY,
320        &BEARER_TOKEN,
321        // All private keys
322        &SSH_PRIVATE_KEY,
323        &PEM_PRIVATE_KEY,
324        &PGP_PRIVATE_KEY,
325        // All connection strings
326        &DATABASE_URL,
327        &DATABASE_PASSWORD,
328        &CONNECTION_STRING,
329        // All personal info
330        &EMAIL_ADDRESS,
331        &PHONE_NUMBER,
332        &IP_ADDRESS,
333        &SOCIAL_SECURITY,
334        &CREDIT_CARD,
335        // All internal URLs
336        &INTERNAL_URL,
337        &LOCALHOST_URL,
338        &PRIVATE_IP_URL,
339    ];
340
341    patterns
342        .iter()
343        .filter_map(|p| p.to_custom_pattern())
344        .collect()
345}
346
347/// Get patterns for team sharing (moderate redaction).
348///
349/// Includes external credentials but allows internal references.
350pub fn patterns_for_team() -> Vec<CustomPattern> {
351    let patterns = [
352        // External API keys only
353        &AWS_ACCESS_KEY,
354        &AWS_SECRET_KEY,
355        &OPENAI_KEY,
356        &ANTHROPIC_KEY,
357        &GITHUB_TOKEN,
358        // Private keys (always sensitive)
359        &SSH_PRIVATE_KEY,
360        &PEM_PRIVATE_KEY,
361        &PGP_PRIVATE_KEY,
362        // External service credentials
363        &DATABASE_URL,
364        &DATABASE_PASSWORD,
365        // External personal info
366        &EMAIL_ADDRESS,
367        &SOCIAL_SECURITY,
368        &CREDIT_CARD,
369    ];
370
371    patterns
372        .iter()
373        .filter_map(|p| p.to_custom_pattern())
374        .collect()
375}
376
377/// Get patterns for personal backup (minimal redaction).
378///
379/// Only removes critical secrets like private keys and cloud credentials.
380pub fn patterns_for_personal() -> Vec<CustomPattern> {
381    let patterns = [
382        // Critical private keys only
383        &SSH_PRIVATE_KEY,
384        &PEM_PRIVATE_KEY,
385        &PGP_PRIVATE_KEY,
386        // Cloud provider keys
387        &AWS_ACCESS_KEY,
388        &AWS_SECRET_KEY,
389        // Database credentials with passwords
390        &DATABASE_PASSWORD,
391    ];
392
393    patterns
394        .iter()
395        .filter_map(|p| p.to_custom_pattern())
396        .collect()
397}
398
399/// Get patterns by category.
400pub fn patterns_by_category(category: PatternCategory) -> Vec<&'static PatternDef> {
401    ALL_PATTERNS
402        .iter()
403        .filter(|p| p.category == category)
404        .copied()
405        .collect()
406}
407
408#[cfg(test)]
409mod tests {
410    use super::*;
411
412    #[test]
413    fn test_all_patterns_compile() {
414        for pattern in ALL_PATTERNS.iter() {
415            let result = Regex::new(pattern.pattern);
416            assert!(
417                result.is_ok(),
418                "Pattern {} failed to compile: {:?}",
419                pattern.id,
420                result.err()
421            );
422        }
423    }
424
425    #[test]
426    fn test_to_custom_pattern() {
427        let custom = AWS_ACCESS_KEY.to_custom_pattern();
428        assert!(custom.is_some());
429        let custom = custom.unwrap();
430        assert_eq!(custom.name, "AWS Access Key ID");
431        assert!(custom.enabled);
432    }
433
434    #[test]
435    fn test_public_has_most_patterns() {
436        let public = patterns_for_public();
437        let team = patterns_for_team();
438        let personal = patterns_for_personal();
439
440        assert!(public.len() >= team.len());
441        assert!(team.len() >= personal.len());
442    }
443
444    #[test]
445    fn test_personal_has_critical_patterns() {
446        let personal = patterns_for_personal();
447
448        // Should have private key patterns
449        assert!(personal.iter().any(|p| p.name.contains("Private Key")));
450
451        // Should have AWS patterns
452        assert!(personal.iter().any(|p| p.name.contains("AWS")));
453    }
454
455    #[test]
456    fn test_patterns_by_category() {
457        let api_patterns = patterns_by_category(PatternCategory::ApiKeys);
458        assert!(!api_patterns.is_empty());
459        assert!(
460            api_patterns
461                .iter()
462                .all(|p| p.category == PatternCategory::ApiKeys)
463        );
464    }
465
466    #[test]
467    fn test_pattern_matches_aws_key() {
468        let pattern = Regex::new(AWS_ACCESS_KEY.pattern).unwrap();
469        assert!(pattern.is_match("Found key AKIAIOSFODNN7EXAMPLE in config"));
470        assert!(!pattern.is_match("Not a key"));
471    }
472
473    #[test]
474    fn test_pattern_matches_openai_key() {
475        let pattern = Regex::new(OPENAI_KEY.pattern).unwrap();
476        assert!(pattern.is_match("Using sk-abc123def456ghi789jkl012mno345pqr678"));
477        assert!(!pattern.is_match("sk-short")); // Too short
478    }
479
480    #[test]
481    fn test_pattern_matches_email() {
482        let pattern = Regex::new(EMAIL_ADDRESS.pattern).unwrap();
483        assert!(pattern.is_match("Contact user@example.com for help"));
484        assert!(pattern.is_match("test.user+tag@sub.domain.org"));
485    }
486
487    #[test]
488    fn test_email_pattern_uses_ascii_letter_classes() {
489        let pattern = Regex::new(EMAIL_ADDRESS.pattern).unwrap();
490
491        assert!(pattern.is_match("Contact USER_123@example.COM"));
492        assert!(!pattern.is_match("Contact user@example.δοκιμή"));
493        assert!(EMAIL_ADDRESS.pattern.contains("[A-Za-z]"));
494        assert!(!EMAIL_ADDRESS.pattern.contains("\\p"));
495    }
496
497    #[test]
498    fn test_pattern_matches_database_url() {
499        let pattern = Regex::new(DATABASE_URL.pattern).unwrap();
500        assert!(pattern.is_match("postgres://user:pass@host:5432/db"));
501        assert!(pattern.is_match("mongodb+srv://user:pass@cluster.mongodb.net/db"));
502        assert!(pattern.is_match("redis://localhost:6379"));
503    }
504
505    #[test]
506    fn test_pattern_matches_private_key() {
507        let pattern = Regex::new(SSH_PRIVATE_KEY.pattern).unwrap();
508        assert!(pattern.is_match("-----BEGIN RSA PRIVATE KEY-----"));
509        assert!(pattern.is_match("-----BEGIN OPENSSH PRIVATE KEY-----"));
510        assert!(pattern.is_match("-----BEGIN PRIVATE KEY-----"));
511    }
512}