Skip to main content

tryaudex_core/
leakdetect.rs

1use regex::Regex;
2use std::sync::LazyLock;
3
4/// Result of scanning a line for credential leaks.
5#[derive(Debug, Clone)]
6pub struct LeakDetection {
7    pub pattern_name: String,
8    pub matched_text: String,
9}
10
11static PATTERNS: LazyLock<Vec<(&str, Regex)>> = LazyLock::new(|| {
12    vec![
13        // AWS Access Key ID (starts with AKIA, ASIA, AIDA, AROA)
14        (
15            "AWS Access Key ID",
16            Regex::new(r"(?:^|[^A-Z0-9])(A[KS]IA[0-9A-Z]{16})(?:[^A-Z0-9]|$)").unwrap(),
17        ),
18        // AWS Secret Access Key (40 chars, base64-ish)
19        (
20            "AWS Secret Access Key",
21            Regex::new(r"(?:^|[^A-Za-z0-9/+=])([A-Za-z0-9/+=]{40})(?:[^A-Za-z0-9/+=]|$)").unwrap(),
22        ),
23        // AWS Session Token (starts with FwoGZX or IQoJb3)
24        (
25            "AWS Session Token",
26            Regex::new(r"(?:FwoGZX|IQoJb3)[A-Za-z0-9/+=]{50,}").unwrap(),
27        ),
28        // GCP OAuth2 access token (ya29.)
29        (
30            "GCP Access Token",
31            Regex::new(r"ya29\.[A-Za-z0-9_-]{50,}").unwrap(),
32        ),
33        // Azure Bearer Token (eyJ prefix, JWT format)
34        (
35            "Azure Bearer Token",
36            Regex::new(r"eyJ[A-Za-z0-9_-]{20,}\.eyJ[A-Za-z0-9_-]{20,}\.[A-Za-z0-9_-]{20,}")
37                .unwrap(),
38        ),
39        // Generic long base64 secrets (likely tokens, >100 chars)
40        (
41            "Long Base64 Token",
42            Regex::new(r"(?:^|[=:]\s*)[A-Za-z0-9+/]{100,}={0,2}(?:\s|$)").unwrap(),
43        ),
44    ]
45});
46
47/// Scan a single line for credential leaks.
48/// Returns a list of detected leaks, or empty if clean.
49pub fn scan_line(line: &str) -> Vec<LeakDetection> {
50    // Skip lines that are clearly not leaks (common false positive patterns)
51    if line.trim().is_empty() || line.starts_with('#') || line.starts_with("//") {
52        return Vec::new();
53    }
54
55    let mut detections = Vec::new();
56
57    for (name, pattern) in PATTERNS.iter() {
58        if let Some(m) = pattern.find(line) {
59            let matched = m.as_str();
60            // Skip very short matches that are likely false positives
61            if matched.len() < 16 {
62                continue;
63            }
64            detections.push(LeakDetection {
65                pattern_name: name.to_string(),
66                matched_text: redact(matched),
67            });
68        }
69    }
70
71    detections
72}
73
74/// Scan multiple lines of output for credential leaks.
75pub fn scan_output(output: &str) -> Vec<LeakDetection> {
76    output.lines().flat_map(scan_line).collect()
77}
78
79/// Redact all detected credential patterns in a string, replacing them
80/// with safe placeholders. Use this before writing to audit logs.
81pub fn redact_secrets(input: &str) -> String {
82    let mut result = input.to_string();
83    for (name, pattern) in PATTERNS.iter() {
84        // Replace all matches with redacted form
85        let mut offset: i64 = 0;
86        let matches: Vec<_> = pattern.find_iter(input).collect();
87        for m in matches {
88            let matched = m.as_str();
89            if matched.len() < 16 {
90                continue;
91            }
92            let replacement = format!("[REDACTED:{}:{}]", name, redact(matched));
93            let start = (m.start() as i64 + offset) as usize;
94            let end = (m.end() as i64 + offset) as usize;
95            result.replace_range(start..end, &replacement);
96            offset += replacement.len() as i64 - matched.len() as i64;
97        }
98    }
99    result
100}
101
102/// Redact a matched credential, showing only first 8 and last 4 chars.
103fn redact(s: &str) -> String {
104    if s.len() <= 16 {
105        format!("{}...", &s[..4])
106    } else {
107        format!("{}...{}", &s[..8], &s[s.len() - 4..])
108    }
109}
110
111#[cfg(test)]
112mod tests {
113    use super::*;
114
115    #[test]
116    fn test_detect_aws_access_key() {
117        let line = "export AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE";
118        let leaks = scan_line(line);
119        assert!(!leaks.is_empty(), "Should detect AWS access key");
120        assert!(leaks
121            .iter()
122            .any(|l| l.pattern_name.contains("AWS Access Key ID")));
123    }
124
125    #[test]
126    fn test_detect_aws_session_token() {
127        let line = "AWS_SESSION_TOKEN=FwoGZXIvYXdzEBYaDHxkJ3lNJAHvLj4mZiLOAd3FuKnNv0lRZx5example";
128        let leaks = scan_line(line);
129        assert!(!leaks.is_empty(), "Should detect AWS session token");
130    }
131
132    #[test]
133    fn test_detect_gcp_token() {
134        let line =
135            "Authorization: Bearer ya29.a0AfH6SMBx1234567890abcdefghijklmnopqrstuvwxyz1234567890";
136        let leaks = scan_line(line);
137        assert!(!leaks.is_empty(), "Should detect GCP access token");
138        assert!(leaks.iter().any(|l| l.pattern_name.contains("GCP")));
139    }
140
141    #[test]
142    fn test_detect_azure_jwt() {
143        let line = "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIn0.Signature1234567890abcdef";
144        let leaks = scan_line(line);
145        assert!(!leaks.is_empty(), "Should detect Azure/JWT token");
146    }
147
148    #[test]
149    fn test_clean_output() {
150        let line = "Successfully listed 42 objects in s3://my-bucket";
151        let leaks = scan_line(line);
152        assert!(
153            leaks.is_empty(),
154            "Normal output should not trigger detection"
155        );
156    }
157
158    #[test]
159    fn test_skip_comments() {
160        let line = "# AKIAIOSFODNN7EXAMPLE";
161        let leaks = scan_line(line);
162        assert!(leaks.is_empty(), "Comments should be skipped");
163    }
164
165    #[test]
166    fn test_redact() {
167        assert_eq!(redact("AKIAIOSFODNN7EXAMPLE"), "AKIAIOSF...MPLE");
168        assert_eq!(redact("short"), "shor...");
169    }
170
171    #[test]
172    fn test_scan_multiline() {
173        let output = "line1 ok\nAKIAIOSFODNN7EXAMPLE leaked\nline3 ok";
174        let leaks = scan_output(output);
175        assert!(!leaks.is_empty());
176    }
177
178    #[test]
179    fn test_redact_secrets_aws_key() {
180        let input = "key=AKIAIOSFODNN7EXAMPLE in output";
181        let redacted = redact_secrets(input);
182        assert!(!redacted.contains("AKIAIOSFODNN7EXAMPLE"));
183        assert!(redacted.contains("[REDACTED:AWS Access Key ID:"));
184    }
185
186    #[test]
187    fn test_redact_secrets_gcp_token() {
188        let input = "token=ya29.a0AfH6SMBx1234567890abcdefghijklmnopqrstuvwxyz1234567890";
189        let redacted = redact_secrets(input);
190        assert!(!redacted.contains("ya29.a0AfH6SMBx1234567890abcdefghijklmnopqrstuvwxyz1234567890"));
191        assert!(redacted.contains("[REDACTED:GCP Access Token:"));
192    }
193
194    #[test]
195    fn test_redact_secrets_clean_input() {
196        let input = "normal log entry with no secrets";
197        let redacted = redact_secrets(input);
198        assert_eq!(redacted, input);
199    }
200
201    #[test]
202    fn test_redact_secrets_preserves_structure() {
203        let input = r#"{"command":["aws","s3","ls"],"key":"safe-value"}"#;
204        let redacted = redact_secrets(input);
205        assert_eq!(redacted, input); // No secrets, unchanged
206    }
207}