Skip to main content

anodizer_core/
redact.rs

1//! Secret redaction for command output.
2//!
3//! Mirrors GoReleaser's `internal/redact/redact.go`: scans environment
4//! variables for secret-looking entries and replaces their values in
5//! output strings with `$KEY_NAME`.
6
7/// Key suffixes that indicate a secret value.
8const SECRET_KEY_SUFFIXES: &[&str] = &["_KEY", "_SECRET", "_PASSWORD", "_TOKEN"];
9
10/// Value prefixes that indicate a secret regardless of key name.
11const SECRET_VALUE_PREFIXES: &[&str] = &[
12    "sk-",
13    "ghp_",
14    "ghs_",
15    "gho_",
16    "ghu_",
17    "dckr_pat_",
18    "glpat-",
19    "AIZA",
20    "xox",
21];
22
23/// Returns true if this env entry looks like it contains a secret.
24///
25/// The empty string is the only excluded value — every non-empty value
26/// matching the heuristics is redacted, mirroring upstream
27/// GoReleaser's `internal/redact/redact.go::isSecret` after the
28/// length-floor was removed (commit `d1cdbb2`).
29fn is_secret(key: &str, value: &str) -> bool {
30    if value.is_empty() {
31        return false;
32    }
33    let key_upper = key.to_uppercase();
34    if SECRET_KEY_SUFFIXES.iter().any(|s| key_upper.ends_with(s)) {
35        return true;
36    }
37    SECRET_VALUE_PREFIXES.iter().any(|p| value.starts_with(p))
38}
39
40/// Redact secret values in a string, replacing them with `$KEY_NAME`.
41///
42/// Longer values are replaced first to prevent partial matches.
43///
44/// Mirrors GoReleaser's `redact.String(s, env)` API.
45pub fn string(input: &str, env: &[(String, String)]) -> String {
46    let mut secrets: Vec<(&str, &str)> = env
47        .iter()
48        .filter(|(k, v)| is_secret(k, v))
49        .map(|(k, v)| (k.as_str(), v.as_str()))
50        .collect();
51    secrets.sort_by(|a, b| b.1.len().cmp(&a.1.len()).then_with(|| a.0.cmp(b.0)));
52
53    let mut result = input.to_string();
54    for (key, value) in secrets {
55        result = result.replace(value, &format!("${}", key));
56    }
57    result
58}
59
60/// Convenience wrapper: redact secrets in `input` using the current
61/// process env (`std::env::vars()`) PLUS strip inline URL credentials.
62///
63/// Used by modules that don't have a `Context` in scope (e.g. the `git/`
64/// shell-out helpers) and still want the same redaction surface as the
65/// `StageLogger`. Equivalent to `redact_url_credentials(input)` followed
66/// by `string(..., &process_env_vec)`.
67pub fn redact_process_env(input: &str) -> String {
68    let env: Vec<(String, String)> = std::env::vars().collect();
69    let stripped = redact_url_credentials(input);
70    string(&stripped, &env)
71}
72
73/// Strip embedded userinfo (credentials) from any URLs found in `input`.
74///
75/// For each occurrence of `<scheme>://<userinfo>@<host>...`, the substring
76/// between `://` and the first `@` is replaced with `<redacted>`. Non-URL
77/// text is left untouched, and URLs without a userinfo component are
78/// unchanged. Handles `http`, `https`, and any other `<scheme>://` form.
79///
80/// Use this as a defense-in-depth complement to [`string`] when the secret
81/// is inlined in a URL but the bare token value is not necessarily exported
82/// as an env var (e.g. a `git_url` config string the user templated with a
83/// literal `https://user:pass@host`).
84pub fn redact_url_credentials(input: &str) -> String {
85    // Walk the string and rewrite each `<scheme>://<userinfo>@` segment.
86    // For each `://` we find, look up to the next path / query / fragment /
87    // whitespace boundary; if that authority segment contains an `@`, the
88    // text before the LAST `@` is the userinfo (RFC 3986 §3.2.1 allows
89    // unreserved `@` in the password subcomponent only when percent-encoded,
90    // but real-world tokens contain literal `@` often enough that we treat
91    // the last `@` as the host separator).
92    let mut result = String::with_capacity(input.len());
93    let mut rest = input;
94    while let Some(scheme_end) = rest.find("://") {
95        let after_scheme_start = scheme_end + 3;
96        result.push_str(&rest[..after_scheme_start]);
97        let after_scheme = &rest[after_scheme_start..];
98        let terminator = after_scheme
99            .find(|c: char| matches!(c, '/' | '?' | '#') || c.is_whitespace())
100            .unwrap_or(after_scheme.len());
101        let authority = &after_scheme[..terminator];
102        if let Some(last_at) = authority.rfind('@') {
103            // userinfo = authority[..last_at], host-start = last_at + 1
104            result.push_str("<redacted>@");
105            result.push_str(&authority[last_at + 1..]);
106            rest = &after_scheme[terminator..];
107        } else {
108            result.push_str(authority);
109            rest = &after_scheme[terminator..];
110        }
111    }
112    result.push_str(rest);
113    result
114}
115
116/// Strip bearer / authorization tokens that may have been echoed by a
117/// remote endpoint into a response body before that body lands in an
118/// error message. Defense in depth — if a misbehaving registry mirrors
119/// the request's `Authorization` header back in an error response, this
120/// helper prevents the token from showing up in user-visible logs.
121///
122/// Replaces:
123///   - `Bearer <token>` → `Bearer <redacted>` (case-insensitive on the
124///     keyword; the canonical replacement spelling is always "Bearer").
125///     A "Bearer" match requires the keyword to appear at the start of
126///     the input OR immediately after one of `[ \t:,;("'<\n\r]` so that
127///     prose words like "bearer of bad news" do not match.
128///   - `Authorization:` followed by any value through end-of-line →
129///     `Authorization: <redacted>` (case-insensitive on the header name).
130///     The entire header value is consumed so `Authorization: Bearer X`
131///     doesn't leak `X` after the header redaction.
132///
133/// Use as a wrapper around any remote-supplied body text being interpolated
134/// into an error message or log line. The bare token (no scheme prefix)
135/// remains untouched — for that, rely on `string(..., env)` matching the
136/// env-var-based heuristics.
137pub fn redact_bearer_tokens(input: &str) -> String {
138    let bytes = input.as_bytes();
139    let mut out = String::with_capacity(input.len());
140    let mut i = 0;
141    while i < bytes.len() {
142        // Authorization: <rest-of-line>
143        // Always allowed to match at i (the header name itself is unambiguous
144        // when followed by a `:`). Consume through the next \n / \r so a
145        // multi-line body with subsequent normal text isn't redacted past
146        // the header's terminator.
147        if let Some(name_len) = match_authorization_prefix(&bytes[i..]) {
148            out.push_str("Authorization: <redacted>");
149            i += name_len;
150            while i < bytes.len() && bytes[i] != b'\n' && bytes[i] != b'\r' {
151                i += 1;
152            }
153            continue;
154        }
155        // Bearer <token>
156        // Require the preceding byte (if any) to be a token-boundary
157        // character so prose like "the bearer of bad news" doesn't match.
158        let preceded_by_boundary = i == 0
159            || matches!(
160                bytes[i - 1],
161                b' ' | b'\t' | b':' | b',' | b';' | b'(' | b'"' | b'\'' | b'<' | b'\n' | b'\r'
162            );
163        if preceded_by_boundary && let Some(kw_len) = match_bearer_prefix(&bytes[i..]) {
164            out.push_str("Bearer <redacted>");
165            i += kw_len;
166            // Skip the token value: a run of non-whitespace characters.
167            while i < bytes.len() && !bytes[i].is_ascii_whitespace() {
168                i += 1;
169            }
170            continue;
171        }
172        // Emit one byte verbatim and advance.
173        out.push(bytes[i] as char);
174        i += 1;
175    }
176    out
177}
178
179/// Returns Some(prefix_len) if `bytes` starts with case-insensitive
180/// "Bearer " (the trailing space is required so we don't match "Bearertown").
181fn match_bearer_prefix(bytes: &[u8]) -> Option<usize> {
182    const KW: &[u8] = b"Bearer ";
183    if bytes.len() < KW.len() {
184        return None;
185    }
186    for (i, kw_byte) in KW.iter().enumerate() {
187        if !bytes[i].eq_ignore_ascii_case(kw_byte) {
188            return None;
189        }
190    }
191    Some(KW.len())
192}
193
194/// Returns Some(prefix_len) if `bytes` starts with case-insensitive
195/// "Authorization:" (the trailing colon is required to disambiguate from
196/// prose mentioning the word "authorization").
197fn match_authorization_prefix(bytes: &[u8]) -> Option<usize> {
198    const KW: &[u8] = b"Authorization:";
199    if bytes.len() < KW.len() {
200        return None;
201    }
202    for (i, kw_byte) in KW.iter().enumerate() {
203        if !bytes[i].eq_ignore_ascii_case(kw_byte) {
204            return None;
205        }
206    }
207    Some(KW.len())
208}
209
210#[cfg(test)]
211mod tests {
212    use super::*;
213
214    #[test]
215    fn test_redact_by_key_suffix() {
216        let env = vec![
217            (
218                "DOCKER_PASSWORD".to_string(),
219                "mysecretpassword123".to_string(),
220            ),
221            ("PLAIN_VAR".to_string(), "not-a-secret".to_string()),
222        ];
223        let result = string("Login with mysecretpassword123 succeeded", &env);
224        assert_eq!(result, "Login with $DOCKER_PASSWORD succeeded");
225        assert!(!result.contains("mysecretpassword123"));
226    }
227
228    #[test]
229    fn test_redact_by_value_prefix() {
230        let env = vec![("MY_TOKEN".to_string(), "ghp_abc123def456ghi789".to_string())];
231        let result = string("Using token ghp_abc123def456ghi789", &env);
232        assert_eq!(result, "Using token $MY_TOKEN");
233    }
234
235    #[test]
236    fn test_redact_includes_short_secret_when_key_looks_secret() {
237        // Mirrors upstream rename in `internal/redact/redact_test.go` after
238        // the length-floor was removed: a 5-char value under a `*_KEY` key
239        // must still be redacted.
240        let env = vec![("API_KEY".to_string(), "short".to_string())];
241        let result = string("Value is short", &env);
242        assert_eq!(result, "Value is $API_KEY");
243    }
244
245    #[test]
246    fn test_redact_skips_empty_value() {
247        // The empty string is the only excluded value: an unset env var
248        // would otherwise replace every empty substring in the input,
249        // turning "abc" into "$API_KEY a$API_KEY b$API_KEY c$API_KEY".
250        let env = vec![("API_KEY".to_string(), String::new())];
251        let result = string("Value is short", &env);
252        assert_eq!(result, "Value is short");
253    }
254
255    #[test]
256    fn test_redact_longer_values_first() {
257        let env = vec![
258            ("SHORT_TOKEN".to_string(), "abcdefghij".to_string()),
259            ("LONG_TOKEN".to_string(), "abcdefghijklmnop".to_string()),
260        ];
261        let result = string("secret: abcdefghijklmnop", &env);
262        // Longer match should be replaced first
263        assert_eq!(result, "secret: $LONG_TOKEN");
264    }
265
266    #[test]
267    fn test_redact_no_secrets() {
268        let env = vec![("PATH".to_string(), "/usr/bin:/usr/local/bin".to_string())];
269        let result = string("PATH is set", &env);
270        assert_eq!(result, "PATH is set");
271    }
272
273    #[test]
274    fn test_redact_multiple_occurrences() {
275        let env = vec![(
276            "REGISTRY_PASSWORD".to_string(),
277            "supersecret123".to_string(),
278        )];
279        let result = string("auth supersecret123 retry supersecret123", &env);
280        assert_eq!(result, "auth $REGISTRY_PASSWORD retry $REGISTRY_PASSWORD");
281    }
282
283    #[test]
284    fn test_is_secret_key_suffixes() {
285        assert!(is_secret("DOCKER_PASSWORD", "longvalue1234"));
286        assert!(is_secret("API_TOKEN", "longvalue1234"));
287        assert!(is_secret("signing_key", "longvalue1234")); // case insensitive
288        assert!(is_secret("MY_SECRET", "longvalue1234"));
289        assert!(!is_secret("MY_CONFIG", "longvalue1234"));
290    }
291
292    #[test]
293    fn test_is_secret_value_prefixes() {
294        assert!(is_secret("ANYTHING", "ghp_1234567890"));
295        assert!(is_secret("ANYTHING", "sk-1234567890"));
296        assert!(is_secret("ANYTHING", "dckr_pat_1234567890"));
297        assert!(is_secret("ANYTHING", "glpat-1234567890"));
298        assert!(!is_secret("ANYTHING", "regular_value1234"));
299    }
300
301    #[test]
302    fn test_redact_sort_stability_same_length() {
303        // When two secrets have the same value length, sort by key name
304        // for deterministic output regardless of HashMap iteration order.
305        let env = vec![
306            ("B_SECRET".to_string(), "same_length_val".to_string()),
307            ("A_SECRET".to_string(), "same_length_val".to_string()),
308        ];
309        // Both keys map to the same value, so whichever sorts first by
310        // key name should win — A_SECRET comes before B_SECRET.
311        let result = string("found same_length_val here", &env);
312        assert_eq!(result, "found $A_SECRET here");
313    }
314
315    #[test]
316    fn test_redact_deterministic_with_different_lengths() {
317        // Longer values still replaced first, secondary sort by key is tiebreaker
318        let env = vec![
319            ("Z_TOKEN".to_string(), "short_secret_val".to_string()),
320            (
321                "A_TOKEN".to_string(),
322                "a_longer_secret_value_here".to_string(),
323            ),
324        ];
325        let result = string("prefix a_longer_secret_value_here suffix", &env);
326        assert_eq!(result, "prefix $A_TOKEN suffix");
327    }
328
329    #[test]
330    fn test_redact_url_credentials_https_with_token() {
331        let input = "remote: https://ghp_abc123def@github.com/owner/repo.git";
332        let result = redact_url_credentials(input);
333        assert_eq!(
334            result,
335            "remote: https://<redacted>@github.com/owner/repo.git"
336        );
337        assert!(!result.contains("ghp_abc123def"));
338    }
339
340    #[test]
341    fn test_redact_url_credentials_user_pass_pair() {
342        let input = "pushing to https://user:p@ssw0rd@gitlab.example.com/foo/bar";
343        let result = redact_url_credentials(input);
344        assert_eq!(
345            result, "pushing to https://<redacted>@gitlab.example.com/foo/bar",
346            "userinfo must cover the entire user:pass segment up to the host-@"
347        );
348    }
349
350    #[test]
351    fn test_redact_url_credentials_no_userinfo_unchanged() {
352        let input = "fetching https://github.com/owner/repo.git";
353        assert_eq!(redact_url_credentials(input), input);
354    }
355
356    #[test]
357    fn test_redact_url_credentials_ssh_unchanged() {
358        // SSH-style `git@github.com:owner/repo.git` has no `://`, so the
359        // helper leaves it alone. The `git@` is part of the SSH user, not
360        // an embedded credential.
361        let input = "fetching git@github.com:owner/repo.git";
362        assert_eq!(redact_url_credentials(input), input);
363    }
364
365    #[test]
366    fn test_redact_url_credentials_multiple_urls_in_one_line() {
367        let input = "from https://token1@a.com/x to https://token2@b.com/y";
368        let result = redact_url_credentials(input);
369        assert_eq!(
370            result, "from https://<redacted>@a.com/x to https://<redacted>@b.com/y",
371            "both URLs must be redacted, leaving the connecting prose intact"
372        );
373    }
374
375    #[test]
376    fn test_redact_url_credentials_does_not_consume_path_at_sign() {
377        // `@` in a path segment (after the first `/`) must NOT be treated
378        // as a userinfo terminator.
379        let input = "GET https://api.example.com/users/foo@bar.com/profile";
380        assert_eq!(
381            redact_url_credentials(input),
382            input,
383            "an `@` after the first `/` is part of the path, not userinfo"
384        );
385    }
386
387    #[test]
388    fn test_redact_url_credentials_empty_input() {
389        assert_eq!(redact_url_credentials(""), "");
390    }
391
392    #[test]
393    fn test_redact_url_credentials_plain_text() {
394        let input = "no URLs here, just words";
395        assert_eq!(redact_url_credentials(input), input);
396    }
397
398    #[test]
399    fn test_redact_url_credentials_percent_encoded_userinfo() {
400        // A percent-encoded `@` in the userinfo (e.g. an account name like
401        // `user@name`) does not break the terminator scan: the function
402        // looks for the LAST `@` before the path / query / fragment /
403        // whitespace boundary, so both `@`s collapse into a single
404        // `<redacted>` replacement.
405        let input = "https://user%40name:pass@host.example.com/path";
406        let result = redact_url_credentials(input);
407        assert_eq!(result, "https://<redacted>@host.example.com/path");
408        assert!(!result.contains("user%40name"));
409        assert!(!result.contains("pass"));
410    }
411
412    #[test]
413    fn test_redact_url_credentials_trailing_query() {
414        // A `?` after the host begins the query string; userinfo must still
415        // be stripped, and the query is preserved verbatim.
416        let input = "https://user:pass@host.example.com?foo=bar";
417        let result = redact_url_credentials(input);
418        assert_eq!(result, "https://<redacted>@host.example.com?foo=bar");
419        assert!(!result.contains("user:pass"));
420        assert!(result.ends_with("?foo=bar"));
421    }
422
423    #[test]
424    fn test_redact_url_credentials_trailing_fragment() {
425        // A `#` after the host begins the fragment; userinfo must still
426        // be stripped, and the fragment is preserved verbatim.
427        let input = "https://user:pass@host.example.com#frag";
428        let result = redact_url_credentials(input);
429        assert_eq!(result, "https://<redacted>@host.example.com#frag");
430        assert!(!result.contains("user:pass"));
431        assert!(result.ends_with("#frag"));
432    }
433
434    #[test]
435    fn test_redact_url_credentials_whitespace_boundary() {
436        // Whitespace following the host terminates the authority. The
437        // userinfo is redacted and the trailing prose is preserved.
438        let input = "https://user:pass@host.example.com then more";
439        let result = redact_url_credentials(input);
440        assert_eq!(result, "https://<redacted>@host.example.com then more");
441        assert!(!result.contains("user:pass"));
442        assert!(result.ends_with(" then more"));
443    }
444
445    #[test]
446    fn test_redact_bearer_tokens_basic() {
447        let input = "auth header: Bearer ghp_abcdef123456 expires soon";
448        let result = redact_bearer_tokens(input);
449        assert_eq!(result, "auth header: Bearer <redacted> expires soon");
450        assert!(!result.contains("ghp_abcdef123456"));
451    }
452
453    #[test]
454    fn test_redact_bearer_tokens_case_insensitive() {
455        // The keyword "Bearer" is case-insensitive but the canonical
456        // output form is always "Bearer".
457        let input = "bearer ghp_lowercase_token";
458        assert_eq!(
459            redact_bearer_tokens(input),
460            "Bearer <redacted>",
461            "lowercase 'bearer' must still redact"
462        );
463        let input = "BEARER ghp_uppercase_token";
464        assert_eq!(redact_bearer_tokens(input), "Bearer <redacted>");
465    }
466
467    #[test]
468    fn test_redact_bearer_tokens_authorization_header() {
469        // "Authorization:" consumes through end-of-line, so the entire
470        // header value is redacted as one unit. Trailing content after
471        // a newline is preserved verbatim.
472        let input = "request: Authorization: Bearer ghp_xyz\nresponse: 401";
473        let result = redact_bearer_tokens(input);
474        assert_eq!(
475            result, "request: Authorization: <redacted>\nresponse: 401",
476            "header value (including the inner Bearer token) must be redacted as one"
477        );
478        assert!(!result.contains("ghp_xyz"));
479    }
480
481    #[test]
482    fn test_redact_bearer_tokens_authorization_header_single_line() {
483        // No newline → the header value runs to end-of-input; that's fine,
484        // the entire tail is redacted (defensive: better one over-redaction
485        // than one leaked token).
486        let input = "Authorization: Bearer ghp_xyz";
487        let result = redact_bearer_tokens(input);
488        assert_eq!(result, "Authorization: <redacted>");
489        assert!(!result.contains("ghp_xyz"));
490    }
491
492    #[test]
493    fn test_redact_bearer_tokens_no_match_unchanged() {
494        // No "Bearer " / "Authorization:" tokens → string unchanged.
495        // Note: we cannot distinguish prose use of "bearer" from a real
496        // header; the redactor errs on the side of over-redaction (it
497        // would treat "bearer of bad news" as "Bearer <redacted> bad
498        // news"). Both branches are still safer than leaking a token.
499        let input = "some random text with no relevant tokens here";
500        assert_eq!(redact_bearer_tokens(input), input);
501    }
502
503    #[test]
504    fn test_redact_bearer_tokens_over_redacts_prose_use() {
505        // Documents the known over-redaction behavior: "bearer of bad
506        // news" looks like a Bearer-token construct because the redactor
507        // can't tell prose from a header. The trade-off is intentional —
508        // safer to over-redact a prose word than to leak a real token.
509        let input = "the bearer of bad news arrived";
510        let result = redact_bearer_tokens(input);
511        assert_eq!(result, "the Bearer <redacted> bad news arrived");
512    }
513
514    #[test]
515    fn test_redact_bearer_tokens_empty_input() {
516        assert_eq!(redact_bearer_tokens(""), "");
517    }
518
519    #[test]
520    fn test_redact_bearer_tokens_handles_multiple_occurrences() {
521        let input = "first Bearer ghp_aaa and second Bearer ghp_bbb done";
522        let result = redact_bearer_tokens(input);
523        assert_eq!(
524            result,
525            "first Bearer <redacted> and second Bearer <redacted> done"
526        );
527        assert!(!result.contains("ghp_aaa"));
528        assert!(!result.contains("ghp_bbb"));
529    }
530}