Skip to main content

tryaudex_core/
leakdetect.rs

1use regex::Regex;
2use std::sync::LazyLock;
3
4/// Result of scanning a line for credential leaks.
5#[derive(Debug, Clone)]
6pub struct LeakDetection {
7    pub pattern_name: String,
8    pub matched_text: String,
9}
10
11static PATTERNS: LazyLock<Vec<(&str, Regex)>> = LazyLock::new(|| {
12    vec![
13        // AWS Access Key ID (starts with AKIA, ASIA, AIDA, AROA)
14        (
15            "AWS Access Key ID",
16            Regex::new(r"(?:^|[^A-Z0-9])(A[KS]IA[0-9A-Z]{16})(?:[^A-Z0-9]|$)").unwrap(),
17        ),
18        // AWS Secret Access Key (40 chars, base64-ish, preceded by a key-like context)
19        (
20            "AWS Secret Access Key",
21            Regex::new(r"(?i)(?:secret.?access.?key|aws_secret|SECRET_KEY)\s*[=:]\s*([A-Za-z0-9/+=]{40})(?:[^A-Za-z0-9/+=]|$)").unwrap(),
22        ),
23        // AWS Session Token (starts with FwoGZX or IQoJb3)
24        (
25            "AWS Session Token",
26            Regex::new(r"(?:FwoGZX|IQoJb3)[A-Za-z0-9/+=]{50,}").unwrap(),
27        ),
28        // GCP OAuth2 access token (ya29.)
29        (
30            "GCP Access Token",
31            Regex::new(r"ya29\.[A-Za-z0-9_-]{50,}").unwrap(),
32        ),
33        // Azure Bearer Token (eyJ prefix, JWT format)
34        (
35            "Azure Bearer Token",
36            Regex::new(r"eyJ[A-Za-z0-9_-]{20,}\.eyJ[A-Za-z0-9_-]{20,}\.[A-Za-z0-9_-]{20,}")
37                .unwrap(),
38        ),
39        // GitHub personal access tokens (ghp_, gho_, ghs_, ghr_, github_pat_)
40        (
41            "GitHub Token",
42            Regex::new(r"(?:ghp_|gho_|ghs_|ghr_|github_pat_)[A-Za-z0-9_]{36,}").unwrap(),
43        ),
44        // Vault tokens (hvs. prefix)
45        (
46            "Vault Token",
47            Regex::new(r"hvs\.[A-Za-z0-9_-]{24,}").unwrap(),
48        ),
49        // Private key headers
50        (
51            "Private Key",
52            Regex::new(r"-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----").unwrap(),
53        ),
54        // Generic long base64 secrets (likely tokens, >100 chars)
55        (
56            "Long Base64 Token",
57            Regex::new(r"(?:^|[=:]\s*)[A-Za-z0-9+/]{100,}={0,2}(?:\s|$)").unwrap(),
58        ),
59    ]
60});
61
62/// Scan a single line for credential leaks.
63/// Returns a list of detected leaks, or empty if clean.
64pub fn scan_line(line: &str) -> Vec<LeakDetection> {
65    if line.trim().is_empty() {
66        return Vec::new();
67    }
68    // NOTE: We intentionally do NOT skip comment lines (# or //) because
69    // real credentials in comments are still a leak risk.
70
71    let mut detections = Vec::new();
72
73    for (name, pattern) in PATTERNS.iter() {
74        if let Some(m) = pattern.find(line) {
75            let matched = m.as_str();
76            // Skip very short matches that are likely false positives
77            if matched.len() < 16 {
78                continue;
79            }
80            detections.push(LeakDetection {
81                pattern_name: name.to_string(),
82                matched_text: redact(matched),
83            });
84        }
85    }
86
87    detections
88}
89
90/// Scan multiple lines of output for credential leaks.
91pub fn scan_output(output: &str) -> Vec<LeakDetection> {
92    output.lines().flat_map(scan_line).collect()
93}
94
95/// Redact all detected credential patterns in a string, replacing them
96/// with safe placeholders. Use this before writing to audit logs.
97pub fn redact_secrets(input: &str) -> String {
98    let mut result = input.to_string();
99    for (name, pattern) in PATTERNS.iter() {
100        // Apply replacements on the current result string (not the original
101        // input) to avoid offset corruption when multiple patterns overlap.
102        let current = result.clone();
103        let mut new_result = String::with_capacity(current.len());
104        let mut last_end = 0;
105
106        for m in pattern.find_iter(&current) {
107            let matched = m.as_str();
108            if matched.len() < 16 {
109                continue;
110            }
111            new_result.push_str(&current[last_end..m.start()]);
112            new_result.push_str(&format!("[REDACTED:{}:{}]", name, redact(matched)));
113            last_end = m.end();
114        }
115        new_result.push_str(&current[last_end..]);
116        result = new_result;
117    }
118    result
119}
120
121/// Redact a matched credential, showing only first 8 and last 4 chars.
122///
123/// R6-M35: slice by char count, not bytes. All current credential regexes
124/// match ASCII-only patterns, but `redact` is a general helper — passing a
125/// short non-ASCII string (e.g. from a future pattern or a fuzzing test)
126/// to the old `s.len() <= 16 → &s[..4]` path would panic because the 4th
127/// byte can fall inside a multi-byte UTF-8 character.
128fn redact(s: &str) -> String {
129    let char_count = s.chars().count();
130    if char_count <= 16 {
131        let head: String = s.chars().take(4).collect();
132        format!("{}...", head)
133    } else {
134        let head: String = s.chars().take(8).collect();
135        let tail: String = s.chars().skip(char_count - 4).collect();
136        format!("{}...{}", head, tail)
137    }
138}
139
140#[cfg(test)]
141mod tests {
142    use super::*;
143
144    #[test]
145    fn test_detect_aws_access_key() {
146        let line = "export AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE";
147        let leaks = scan_line(line);
148        assert!(!leaks.is_empty(), "Should detect AWS access key");
149        assert!(leaks
150            .iter()
151            .any(|l| l.pattern_name.contains("AWS Access Key ID")));
152    }
153
154    #[test]
155    fn test_detect_aws_session_token() {
156        let line = "AWS_SESSION_TOKEN=FwoGZXIvYXdzEBYaDHxkJ3lNJAHvLj4mZiLOAd3FuKnNv0lRZx5example";
157        let leaks = scan_line(line);
158        assert!(!leaks.is_empty(), "Should detect AWS session token");
159    }
160
161    #[test]
162    fn test_detect_gcp_token() {
163        let line =
164            "Authorization: Bearer ya29.a0AfH6SMBx1234567890abcdefghijklmnopqrstuvwxyz1234567890";
165        let leaks = scan_line(line);
166        assert!(!leaks.is_empty(), "Should detect GCP access token");
167        assert!(leaks.iter().any(|l| l.pattern_name.contains("GCP")));
168    }
169
170    #[test]
171    fn test_detect_azure_jwt() {
172        let line = "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIn0.Signature1234567890abcdef";
173        let leaks = scan_line(line);
174        assert!(!leaks.is_empty(), "Should detect Azure/JWT token");
175    }
176
177    #[test]
178    fn test_clean_output() {
179        let line = "Successfully listed 42 objects in s3://my-bucket";
180        let leaks = scan_line(line);
181        assert!(
182            leaks.is_empty(),
183            "Normal output should not trigger detection"
184        );
185    }
186
187    #[test]
188    fn test_detect_in_comments() {
189        let line = "# AKIAIOSFODNN7EXAMPLE";
190        let leaks = scan_line(line);
191        assert!(
192            !leaks.is_empty(),
193            "Credentials in comments should still be detected"
194        );
195    }
196
197    #[test]
198    fn test_redact() {
199        assert_eq!(redact("AKIAIOSFODNN7EXAMPLE"), "AKIAIOSF...MPLE");
200        assert_eq!(redact("short"), "shor...");
201    }
202
203    #[test]
204    fn test_scan_multiline() {
205        let output = "line1 ok\nAKIAIOSFODNN7EXAMPLE leaked\nline3 ok";
206        let leaks = scan_output(output);
207        assert!(!leaks.is_empty());
208    }
209
210    #[test]
211    fn test_redact_secrets_aws_key() {
212        let input = "key=AKIAIOSFODNN7EXAMPLE in output";
213        let redacted = redact_secrets(input);
214        assert!(!redacted.contains("AKIAIOSFODNN7EXAMPLE"));
215        assert!(redacted.contains("[REDACTED:AWS Access Key ID:"));
216    }
217
218    #[test]
219    fn test_redact_secrets_gcp_token() {
220        let input = "token=ya29.a0AfH6SMBx1234567890abcdefghijklmnopqrstuvwxyz1234567890";
221        let redacted = redact_secrets(input);
222        assert!(!redacted.contains("ya29.a0AfH6SMBx1234567890abcdefghijklmnopqrstuvwxyz1234567890"));
223        assert!(redacted.contains("[REDACTED:GCP Access Token:"));
224    }
225
226    #[test]
227    fn test_redact_secrets_clean_input() {
228        let input = "normal log entry with no secrets";
229        let redacted = redact_secrets(input);
230        assert_eq!(redacted, input);
231    }
232
233    #[test]
234    fn test_redact_secrets_preserves_structure() {
235        let input = r#"{"command":["aws","s3","ls"],"key":"safe-value"}"#;
236        let redacted = redact_secrets(input);
237        assert_eq!(redacted, input); // No secrets, unchanged
238    }
239}