Skip to main content

zeph_core/
redact.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::borrow::Cow;
5use std::sync::LazyLock;
6
7/// Apply both secret redaction and path sanitization in a single pass.
8///
9/// Returns `Cow::Borrowed` when no changes are needed (zero-allocation fast path).
10#[must_use]
11pub fn scrub_content(text: &str) -> Cow<'_, str> {
12    let after_secrets = match redact_secrets(text) {
13        Cow::Borrowed(_) => {
14            // No secrets found: only run path scan on original text
15            return match sanitize_paths(text) {
16                Cow::Owned(s) => Cow::Owned(s),
17                Cow::Borrowed(_) => Cow::Borrowed(text),
18            };
19        }
20        Cow::Owned(s) => s,
21    };
22
23    // Second pass: path sanitization on already-modified string
24    match sanitize_paths(&after_secrets) {
25        Cow::Owned(s) => Cow::Owned(s),
26        Cow::Borrowed(_) => Cow::Owned(after_secrets),
27    }
28}
29
30use regex::Regex;
31
32const SECRET_PREFIXES: &[&str] = &[
33    "sk-",
34    "sk_live_",
35    "sk_test_",
36    "AKIA",
37    "ghp_",
38    "gho_",
39    "-----BEGIN",
40    "xoxb-",
41    "xoxp-",
42    "AIza",
43    "ya29\\.",
44    "glpat-",
45    "hf_",
46    "npm_",
47    "dckr_pat_",
48];
49
50// Matches any secret prefix followed by non-whitespace characters.
51// Using alternation so a single pass covers all prefixes.
52static SECRET_REGEX: LazyLock<Regex> = LazyLock::new(|| {
53    let pattern = SECRET_PREFIXES.join("|");
54    let full = format!("(?:{pattern})[^\\s\"'`,;{{}}\\[\\]]*");
55    Regex::new(&full).expect("secret redaction regex is valid")
56});
57
58static PATH_REGEX: LazyLock<Regex> = LazyLock::new(|| {
59    Regex::new(r#"(?:/home/|/Users/|/root/|/tmp/|/var/)[^\s"'`,;{}\[\]]*"#)
60        .expect("path redaction regex is valid")
61});
62
63/// Replace tokens containing known secret patterns with `[REDACTED]`.
64///
65/// Detects secrets embedded in URLs, JSON values, and quoted strings.
66/// Returns `Cow::Borrowed` when no secrets found (zero-allocation fast path).
67#[must_use]
68pub fn redact_secrets(text: &str) -> Cow<'_, str> {
69    // Fast path: check for any prefix substring before running regex.
70    let raw_prefixes = &[
71        "sk-",
72        "sk_live_",
73        "sk_test_",
74        "AKIA",
75        "ghp_",
76        "gho_",
77        "-----BEGIN",
78        "xoxb-",
79        "xoxp-",
80        "AIza",
81        "ya29.",
82        "glpat-",
83        "hf_",
84        "npm_",
85        "dckr_pat_",
86    ];
87    if !raw_prefixes.iter().any(|p| text.contains(p)) {
88        return Cow::Borrowed(text);
89    }
90
91    let result = SECRET_REGEX.replace_all(text, "[REDACTED]");
92    match result {
93        Cow::Borrowed(_) => Cow::Borrowed(text),
94        Cow::Owned(s) => Cow::Owned(s),
95    }
96}
97
98/// Replace absolute filesystem paths with `[PATH]` to prevent information disclosure.
99#[must_use]
100pub fn sanitize_paths(text: &str) -> Cow<'_, str> {
101    const PATH_PREFIXES: &[&str] = &["/home/", "/Users/", "/root/", "/tmp/", "/var/"];
102
103    if !PATH_PREFIXES.iter().any(|p| text.contains(p)) {
104        return Cow::Borrowed(text);
105    }
106
107    let result = PATH_REGEX.replace_all(text, "[PATH]");
108    match result {
109        Cow::Borrowed(_) => Cow::Borrowed(text),
110        Cow::Owned(s) => Cow::Owned(s),
111    }
112}
113
114#[cfg(test)]
115mod tests {
116    use super::*;
117
118    #[test]
119    fn redacts_openai_key() {
120        let text = "Use key sk-abc123def456 for API calls";
121        let result = redact_secrets(text);
122        assert_eq!(result, "Use key [REDACTED] for API calls");
123    }
124
125    #[test]
126    fn redacts_stripe_live_key() {
127        let text = "Stripe key: sk_live_abcdef123456";
128        let result = redact_secrets(text);
129        assert!(result.contains("[REDACTED]"));
130        assert!(!result.contains("sk_live_"));
131    }
132
133    #[test]
134    fn redacts_stripe_test_key() {
135        let text = "Test key sk_test_abc123";
136        let result = redact_secrets(text);
137        assert!(result.contains("[REDACTED]"));
138    }
139
140    #[test]
141    fn redacts_aws_key() {
142        let text = "AWS access key: AKIAIOSFODNN7EXAMPLE";
143        let result = redact_secrets(text);
144        assert!(result.contains("[REDACTED]"));
145        assert!(!result.contains("AKIA"));
146    }
147
148    #[test]
149    fn redacts_github_pat() {
150        let text = "Token: ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
151        let result = redact_secrets(text);
152        assert!(result.contains("[REDACTED]"));
153        assert!(!result.contains("ghp_"));
154    }
155
156    #[test]
157    fn redacts_github_oauth() {
158        let text = "OAuth: gho_xxxxxxxxxxxx";
159        let result = redact_secrets(text);
160        assert!(result.contains("[REDACTED]"));
161    }
162
163    #[test]
164    fn redacts_private_key_header() {
165        let text = "Found -----BEGIN RSA PRIVATE KEY----- in file";
166        let result = redact_secrets(text);
167        assert!(result.contains("[REDACTED]"));
168        assert!(!result.contains("-----BEGIN"));
169    }
170
171    #[test]
172    fn redacts_slack_tokens() {
173        let text = "Bot token xoxb-123-456 and user xoxp-789";
174        let result = redact_secrets(text);
175        assert_eq!(result, "Bot token [REDACTED] and user [REDACTED]");
176    }
177
178    #[test]
179    fn preserves_normal_text() {
180        let text = "This is a normal response with no secrets";
181        let result = redact_secrets(text);
182        assert_eq!(result, text);
183        assert!(matches!(result, Cow::Borrowed(_)));
184    }
185
186    #[test]
187    fn handles_empty_string() {
188        assert_eq!(redact_secrets(""), "");
189    }
190
191    #[test]
192    fn multiple_secrets_redacted() {
193        let text = "Keys: sk-abc123 AKIAIOSFODNN7 ghp_xxxxx";
194        let result = redact_secrets(text);
195        assert_eq!(result, "Keys: [REDACTED] [REDACTED] [REDACTED]");
196    }
197
198    #[test]
199    fn preserves_multiline_whitespace() {
200        let text = "Line one\n  indented line\n\ttabbed line\nsk-secret here";
201        let result = redact_secrets(text);
202        assert_eq!(
203            result,
204            "Line one\n  indented line\n\ttabbed line\n[REDACTED] here"
205        );
206    }
207
208    #[test]
209    fn preserves_code_block_formatting() {
210        let text = "```rust\nfn main() {\n    let key = \"sk-abc123\";\n    println!(\"{}\", key);\n}\n```";
211        let result = redact_secrets(text);
212        assert!(result.contains("```rust\nfn"));
213        assert!(result.contains("    let"));
214        assert!(result.contains("[REDACTED]"));
215        assert!(!result.contains("sk-abc123"));
216    }
217
218    #[test]
219    fn preserves_multiple_spaces() {
220        let text = "word1   word2     word3";
221        let result = redact_secrets(text);
222        assert_eq!(result, text);
223    }
224
225    #[test]
226    fn no_allocation_without_secrets() {
227        let text = "safe text without any secrets";
228        let result = redact_secrets(text);
229        assert!(matches!(result, Cow::Borrowed(_)));
230    }
231
232    #[test]
233    fn all_secret_prefixes_tested() {
234        for prefix in &[
235            "sk-",
236            "sk_live_",
237            "sk_test_",
238            "AKIA",
239            "ghp_",
240            "gho_",
241            "-----BEGIN",
242            "xoxb-",
243            "xoxp-",
244            "AIza",
245            "ya29.",
246            "glpat-",
247            "hf_",
248            "npm_",
249            "dckr_pat_",
250        ] {
251            let text = format!("token: {prefix}abc123");
252            let result = redact_secrets(&text);
253            assert!(result.contains("[REDACTED]"), "Failed for prefix: {prefix}");
254            assert!(!result.contains(*prefix), "Prefix not redacted: {prefix}");
255        }
256    }
257
258    #[test]
259    fn redacts_google_api_key() {
260        let text = "Google key: AIzaSyA1234567890abcdefghijklmnop";
261        let result = redact_secrets(text);
262        assert!(result.contains("[REDACTED]"));
263        assert!(!result.contains("AIza"));
264    }
265
266    #[test]
267    fn redacts_google_oauth_token() {
268        let text = "OAuth token ya29.a0AfH6SMBx1234567890";
269        let result = redact_secrets(text);
270        assert!(result.contains("[REDACTED]"));
271        assert!(!result.contains("ya29."));
272    }
273
274    #[test]
275    fn redacts_gitlab_pat() {
276        let text = "GitLab token: glpat-xxxxxxxxxxxxxxxxxxxx";
277        let result = redact_secrets(text);
278        assert!(result.contains("[REDACTED]"));
279        assert!(!result.contains("glpat-"));
280    }
281
282    #[test]
283    fn only_whitespace() {
284        assert_eq!(redact_secrets("   \n\t  "), "   \n\t  ");
285    }
286
287    #[test]
288    fn secret_at_end_of_line() {
289        let text = "token: sk-abc123";
290        let result = redact_secrets(text);
291        assert_eq!(result, "token: [REDACTED]");
292    }
293
294    #[test]
295    fn redacts_secret_in_url() {
296        let text = "https://api.example.com?key=sk-abc123xyz";
297        let result = redact_secrets(text);
298        assert!(result.contains("[REDACTED]"));
299        assert!(!result.contains("sk-abc123xyz"));
300    }
301
302    #[test]
303    fn redacts_secret_in_json() {
304        let text = r#"{"api_key":"sk-abc123def456"}"#;
305        let result = redact_secrets(text);
306        assert!(result.contains("[REDACTED]"));
307        assert!(!result.contains("sk-abc123def456"));
308    }
309
310    #[test]
311    fn sanitize_home_path() {
312        let text = "error at /home/user/project/src/main.rs:42";
313        let result = sanitize_paths(text);
314        assert_eq!(result, "error at [PATH]");
315    }
316
317    #[test]
318    fn sanitize_users_path() {
319        let text = "failed: /Users/dev/code/lib.rs not found";
320        let result = sanitize_paths(text);
321        assert!(result.contains("[PATH]"));
322        assert!(!result.contains("/Users/"));
323    }
324
325    #[test]
326    fn sanitize_no_paths() {
327        let text = "normal error message";
328        let result = sanitize_paths(text);
329        assert!(matches!(result, Cow::Borrowed(_)));
330    }
331
332    #[test]
333    fn redacts_huggingface_token() {
334        let text = "HuggingFace token: hf_abcdefghijklmnopqrstuvwxyz";
335        let result = redact_secrets(text);
336        assert!(result.contains("[REDACTED]"));
337        assert!(!result.contains("hf_"));
338    }
339
340    #[test]
341    fn redacts_npm_token() {
342        let text = "NPM token npm_abc123XYZ";
343        let result = redact_secrets(text);
344        assert!(result.contains("[REDACTED]"));
345        assert!(!result.contains("npm_abc"));
346    }
347
348    #[test]
349    fn redacts_docker_pat() {
350        let text = "Docker token: dckr_pat_xxxxxxxxxxxx";
351        let result = redact_secrets(text);
352        assert!(result.contains("[REDACTED]"));
353        assert!(!result.contains("dckr_pat_"));
354    }
355
356    use proptest::prelude::*;
357
358    #[test]
359    fn scrub_no_match_passthrough() {
360        let text = "hello world, nothing sensitive here";
361        let result = scrub_content(text);
362        assert!(matches!(result, Cow::Borrowed(_)));
363        assert_eq!(result.as_ref(), text);
364    }
365
366    #[test]
367    fn scrub_only_secrets() {
368        let text = "key: sk-abc123def";
369        let result = scrub_content(text);
370        assert!(result.contains("[REDACTED]"));
371        assert!(!result.contains("sk-abc123"));
372        assert!(!result.contains("/home/"));
373    }
374
375    #[test]
376    fn scrub_only_paths() {
377        let text = "error at /Users/dev/project/src/main.rs:42";
378        let result = scrub_content(text);
379        assert!(result.contains("[PATH]"));
380        assert!(!result.contains("/Users/dev/"));
381    }
382
383    #[test]
384    fn scrub_secrets_and_paths_combined() {
385        let text = "token sk-abc123 found at /home/user/config.toml";
386        let result = scrub_content(text);
387        assert!(result.contains("[REDACTED]"));
388        assert!(result.contains("[PATH]"));
389        assert!(!result.contains("sk-abc123"));
390        assert!(!result.contains("/home/user/"));
391    }
392
393    #[test]
394    fn scrub_secrets_no_paths() {
395        // Secret found but no path → function returns Cow::Owned (modified string)
396        let text = "use sk-abc123 for auth";
397        let result = scrub_content(text);
398        assert!(
399            matches!(result, Cow::Owned(_)),
400            "must return Cow::Owned when secret was found"
401        );
402        assert!(result.contains("[REDACTED]"));
403        assert!(!result.contains("[PATH]"));
404    }
405
406    #[test]
407    fn sanitize_paths_all_prefixes() {
408        let cases = [
409            ("/root/secrets.toml", "/root/"),
410            ("/tmp/tmpfile.lock", "/tmp/"),
411            ("/var/log/app.log", "/var/"),
412        ];
413        for (text, prefix) in cases {
414            let result = sanitize_paths(text);
415            assert!(result.contains("[PATH]"), "{prefix} must be sanitized");
416            assert!(
417                !result.contains(prefix),
418                "{prefix} must be removed from output"
419            );
420        }
421    }
422
423    proptest! {
424        #[test]
425        fn redact_secrets_never_panics(s in ".*") {
426            let _ = redact_secrets(&s);
427        }
428
429        #[test]
430        fn sanitize_paths_never_panics(s in ".*") {
431            let _ = sanitize_paths(&s);
432        }
433
434        #[test]
435        fn redact_preserves_non_secret_text(s in "[a-zA-Z0-9 .,!?]{1,200}") {
436            // Only test strings that genuinely contain no secret prefixes.
437            let secret_prefixes = [
438                "sk-", "sk_live_", "sk_test_", "AKIA", "ghp_", "gho_",
439                "-----BEGIN", "xoxb-", "xoxp-", "AIza", "ya29.", "glpat-",
440                "hf_", "npm_", "dckr_pat_",
441            ];
442            if !secret_prefixes.iter().any(|p| s.contains(p)) {
443                let result = redact_secrets(&s);
444                assert_eq!(result.as_ref(), s.as_str());
445            }
446        }
447
448        #[test]
449        fn scrub_content_never_panics(s in ".*") {
450            let _ = scrub_content(&s);
451        }
452
453        #[test]
454        fn scrub_content_result_never_contains_raw_secret(s in ".*") {
455            let result = scrub_content(&s);
456            let secret_prefixes = [
457                "sk-", "sk_live_", "sk_test_", "AKIA", "ghp_", "gho_",
458                "xoxb-", "xoxp-", "AIza", "glpat-", "dckr_pat_",
459            ];
460            for prefix in secret_prefixes {
461                assert!(
462                    !result.contains(prefix),
463                    "scrub_content must redact prefix: {prefix}"
464                );
465            }
466        }
467    }
468}