anodizer-core 0.2.0

Core configuration, context, and template engine for the anodizer release tool
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
//! Secret redaction for command output.
//!
//! Mirrors GoReleaser's `internal/redact/redact.go`: scans environment
//! variables for secret-looking entries and replaces their values in
//! output strings with `$KEY_NAME`.

/// Key suffixes that indicate a secret value.
const SECRET_KEY_SUFFIXES: &[&str] = &["_KEY", "_SECRET", "_PASSWORD", "_TOKEN"];

/// Value prefixes that indicate a secret regardless of key name.
const SECRET_VALUE_PREFIXES: &[&str] = &[
    "sk-",
    "ghp_",
    "ghs_",
    "gho_",
    "ghu_",
    "dckr_pat_",
    "glpat-",
    "AIZA",
    "xox",
];

/// Returns true if this env entry looks like it contains a secret.
///
/// The empty string is the only excluded value — every non-empty value
/// matching the heuristics is redacted, mirroring upstream
/// GoReleaser's `internal/redact/redact.go::isSecret` after the
/// length-floor was removed (commit `d1cdbb2`).
fn is_secret(key: &str, value: &str) -> bool {
    if value.is_empty() {
        return false;
    }
    let key_upper = key.to_uppercase();
    if SECRET_KEY_SUFFIXES.iter().any(|s| key_upper.ends_with(s)) {
        return true;
    }
    SECRET_VALUE_PREFIXES.iter().any(|p| value.starts_with(p))
}

/// Redact secret values in a string, replacing them with `$KEY_NAME`.
///
/// Longer values are replaced first to prevent partial matches.
///
/// Mirrors GoReleaser's `redact.String(s, env)` API.
pub fn string(input: &str, env: &[(String, String)]) -> String {
    let mut secrets: Vec<(&str, &str)> = env
        .iter()
        .filter(|(k, v)| is_secret(k, v))
        .map(|(k, v)| (k.as_str(), v.as_str()))
        .collect();
    secrets.sort_by(|a, b| b.1.len().cmp(&a.1.len()).then_with(|| a.0.cmp(b.0)));

    let mut result = input.to_string();
    for (key, value) in secrets {
        result = result.replace(value, &format!("${}", key));
    }
    result
}

/// Convenience wrapper: redact secrets in `input` using the current
/// process env (`std::env::vars()`) PLUS strip inline URL credentials.
///
/// Used by modules that don't have a `Context` in scope (e.g. the `git/`
/// shell-out helpers) and still want the same redaction surface as the
/// `StageLogger`. Equivalent to `redact_url_credentials(input)` followed
/// by `string(..., &process_env_vec)`.
pub fn redact_process_env(input: &str) -> String {
    let env: Vec<(String, String)> = std::env::vars().collect();
    let stripped = redact_url_credentials(input);
    string(&stripped, &env)
}

/// Strip embedded userinfo (credentials) from any URLs found in `input`.
///
/// For each occurrence of `<scheme>://<userinfo>@<host>...`, the substring
/// between `://` and the first `@` is replaced with `<redacted>`. Non-URL
/// text is left untouched, and URLs without a userinfo component are
/// unchanged. Handles `http`, `https`, and any other `<scheme>://` form.
///
/// Use this as a defense-in-depth complement to [`string`] when the secret
/// is inlined in a URL but the bare token value is not necessarily exported
/// as an env var (e.g. a `git_url` config string the user templated with a
/// literal `https://user:pass@host`).
pub fn redact_url_credentials(input: &str) -> String {
    // Walk the string and rewrite each `<scheme>://<userinfo>@` segment.
    // For each `://` we find, look up to the next path / query / fragment /
    // whitespace boundary; if that authority segment contains an `@`, the
    // text before the LAST `@` is the userinfo (RFC 3986 §3.2.1 allows
    // unreserved `@` in the password subcomponent only when percent-encoded,
    // but real-world tokens contain literal `@` often enough that we treat
    // the last `@` as the host separator).
    let mut result = String::with_capacity(input.len());
    let mut rest = input;
    while let Some(scheme_end) = rest.find("://") {
        let after_scheme_start = scheme_end + 3;
        result.push_str(&rest[..after_scheme_start]);
        let after_scheme = &rest[after_scheme_start..];
        let terminator = after_scheme
            .find(|c: char| matches!(c, '/' | '?' | '#') || c.is_whitespace())
            .unwrap_or(after_scheme.len());
        let authority = &after_scheme[..terminator];
        if let Some(last_at) = authority.rfind('@') {
            // userinfo = authority[..last_at], host-start = last_at + 1
            result.push_str("<redacted>@");
            result.push_str(&authority[last_at + 1..]);
            rest = &after_scheme[terminator..];
        } else {
            result.push_str(authority);
            rest = &after_scheme[terminator..];
        }
    }
    result.push_str(rest);
    result
}

/// Strip bearer / authorization tokens that may have been echoed by a
/// remote endpoint into a response body before that body lands in an
/// error message. Defense in depth — if a misbehaving registry mirrors
/// the request's `Authorization` header back in an error response, this
/// helper prevents the token from showing up in user-visible logs.
///
/// Replaces:
///   - `Bearer <token>` → `Bearer <redacted>` (case-insensitive on the
///     keyword; the canonical replacement spelling is always "Bearer").
///     A "Bearer" match requires the keyword to appear at the start of
///     the input OR immediately after one of `[ \t:,;("'<\n\r]` so that
///     prose words like "bearer of bad news" do not match.
///   - `Authorization:` followed by any value through end-of-line →
///     `Authorization: <redacted>` (case-insensitive on the header name).
///     The entire header value is consumed so `Authorization: Bearer X`
///     doesn't leak `X` after the header redaction.
///
/// Use as a wrapper around any remote-supplied body text being interpolated
/// into an error message or log line. The bare token (no scheme prefix)
/// remains untouched — for that, rely on `string(..., env)` matching the
/// env-var-based heuristics.
pub fn redact_bearer_tokens(input: &str) -> String {
    let bytes = input.as_bytes();
    let mut out = String::with_capacity(input.len());
    let mut i = 0;
    while i < bytes.len() {
        // Authorization: <rest-of-line>
        // Always allowed to match at i (the header name itself is unambiguous
        // when followed by a `:`). Consume through the next \n / \r so a
        // multi-line body with subsequent normal text isn't redacted past
        // the header's terminator.
        if let Some(name_len) = match_authorization_prefix(&bytes[i..]) {
            out.push_str("Authorization: <redacted>");
            i += name_len;
            while i < bytes.len() && bytes[i] != b'\n' && bytes[i] != b'\r' {
                i += 1;
            }
            continue;
        }
        // Bearer <token>
        // Require the preceding byte (if any) to be a token-boundary
        // character so prose like "the bearer of bad news" doesn't match.
        let preceded_by_boundary = i == 0
            || matches!(
                bytes[i - 1],
                b' ' | b'\t' | b':' | b',' | b';' | b'(' | b'"' | b'\'' | b'<' | b'\n' | b'\r'
            );
        if preceded_by_boundary && let Some(kw_len) = match_bearer_prefix(&bytes[i..]) {
            out.push_str("Bearer <redacted>");
            i += kw_len;
            // Skip the token value: a run of non-whitespace characters.
            while i < bytes.len() && !bytes[i].is_ascii_whitespace() {
                i += 1;
            }
            continue;
        }
        // Emit one byte verbatim and advance.
        out.push(bytes[i] as char);
        i += 1;
    }
    out
}

/// Returns Some(prefix_len) if `bytes` starts with case-insensitive
/// "Bearer " (the trailing space is required so we don't match "Bearertown").
fn match_bearer_prefix(bytes: &[u8]) -> Option<usize> {
    const KW: &[u8] = b"Bearer ";
    if bytes.len() < KW.len() {
        return None;
    }
    for (i, kw_byte) in KW.iter().enumerate() {
        if !bytes[i].eq_ignore_ascii_case(kw_byte) {
            return None;
        }
    }
    Some(KW.len())
}

/// Returns Some(prefix_len) if `bytes` starts with case-insensitive
/// "Authorization:" (the trailing colon is required to disambiguate from
/// prose mentioning the word "authorization").
fn match_authorization_prefix(bytes: &[u8]) -> Option<usize> {
    const KW: &[u8] = b"Authorization:";
    if bytes.len() < KW.len() {
        return None;
    }
    for (i, kw_byte) in KW.iter().enumerate() {
        if !bytes[i].eq_ignore_ascii_case(kw_byte) {
            return None;
        }
    }
    Some(KW.len())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_redact_by_key_suffix() {
        let env = vec![
            (
                "DOCKER_PASSWORD".to_string(),
                "mysecretpassword123".to_string(),
            ),
            ("PLAIN_VAR".to_string(), "not-a-secret".to_string()),
        ];
        let result = string("Login with mysecretpassword123 succeeded", &env);
        assert_eq!(result, "Login with $DOCKER_PASSWORD succeeded");
        assert!(!result.contains("mysecretpassword123"));
    }

    #[test]
    fn test_redact_by_value_prefix() {
        let env = vec![("MY_TOKEN".to_string(), "ghp_abc123def456ghi789".to_string())];
        let result = string("Using token ghp_abc123def456ghi789", &env);
        assert_eq!(result, "Using token $MY_TOKEN");
    }

    #[test]
    fn test_redact_includes_short_secret_when_key_looks_secret() {
        // Mirrors upstream rename in `internal/redact/redact_test.go` after
        // the length-floor was removed: a 5-char value under a `*_KEY` key
        // must still be redacted.
        let env = vec![("API_KEY".to_string(), "short".to_string())];
        let result = string("Value is short", &env);
        assert_eq!(result, "Value is $API_KEY");
    }

    #[test]
    fn test_redact_skips_empty_value() {
        // The empty string is the only excluded value: an unset env var
        // would otherwise replace every empty substring in the input,
        // turning "abc" into "$API_KEY a$API_KEY b$API_KEY c$API_KEY".
        let env = vec![("API_KEY".to_string(), String::new())];
        let result = string("Value is short", &env);
        assert_eq!(result, "Value is short");
    }

    #[test]
    fn test_redact_longer_values_first() {
        let env = vec![
            ("SHORT_TOKEN".to_string(), "abcdefghij".to_string()),
            ("LONG_TOKEN".to_string(), "abcdefghijklmnop".to_string()),
        ];
        let result = string("secret: abcdefghijklmnop", &env);
        // Longer match should be replaced first
        assert_eq!(result, "secret: $LONG_TOKEN");
    }

    #[test]
    fn test_redact_no_secrets() {
        let env = vec![("PATH".to_string(), "/usr/bin:/usr/local/bin".to_string())];
        let result = string("PATH is set", &env);
        assert_eq!(result, "PATH is set");
    }

    #[test]
    fn test_redact_multiple_occurrences() {
        let env = vec![(
            "REGISTRY_PASSWORD".to_string(),
            "supersecret123".to_string(),
        )];
        let result = string("auth supersecret123 retry supersecret123", &env);
        assert_eq!(result, "auth $REGISTRY_PASSWORD retry $REGISTRY_PASSWORD");
    }

    #[test]
    fn test_is_secret_key_suffixes() {
        assert!(is_secret("DOCKER_PASSWORD", "longvalue1234"));
        assert!(is_secret("API_TOKEN", "longvalue1234"));
        assert!(is_secret("signing_key", "longvalue1234")); // case insensitive
        assert!(is_secret("MY_SECRET", "longvalue1234"));
        assert!(!is_secret("MY_CONFIG", "longvalue1234"));
    }

    #[test]
    fn test_is_secret_value_prefixes() {
        assert!(is_secret("ANYTHING", "ghp_1234567890"));
        assert!(is_secret("ANYTHING", "sk-1234567890"));
        assert!(is_secret("ANYTHING", "dckr_pat_1234567890"));
        assert!(is_secret("ANYTHING", "glpat-1234567890"));
        assert!(!is_secret("ANYTHING", "regular_value1234"));
    }

    #[test]
    fn test_redact_sort_stability_same_length() {
        // When two secrets have the same value length, sort by key name
        // for deterministic output regardless of HashMap iteration order.
        let env = vec![
            ("B_SECRET".to_string(), "same_length_val".to_string()),
            ("A_SECRET".to_string(), "same_length_val".to_string()),
        ];
        // Both keys map to the same value, so whichever sorts first by
        // key name should win — A_SECRET comes before B_SECRET.
        let result = string("found same_length_val here", &env);
        assert_eq!(result, "found $A_SECRET here");
    }

    #[test]
    fn test_redact_deterministic_with_different_lengths() {
        // Longer values still replaced first, secondary sort by key is tiebreaker
        let env = vec![
            ("Z_TOKEN".to_string(), "short_secret_val".to_string()),
            (
                "A_TOKEN".to_string(),
                "a_longer_secret_value_here".to_string(),
            ),
        ];
        let result = string("prefix a_longer_secret_value_here suffix", &env);
        assert_eq!(result, "prefix $A_TOKEN suffix");
    }

    #[test]
    fn test_redact_url_credentials_https_with_token() {
        let input = "remote: https://ghp_abc123def@github.com/owner/repo.git";
        let result = redact_url_credentials(input);
        assert_eq!(
            result,
            "remote: https://<redacted>@github.com/owner/repo.git"
        );
        assert!(!result.contains("ghp_abc123def"));
    }

    #[test]
    fn test_redact_url_credentials_user_pass_pair() {
        let input = "pushing to https://user:p@ssw0rd@gitlab.example.com/foo/bar";
        let result = redact_url_credentials(input);
        assert_eq!(
            result, "pushing to https://<redacted>@gitlab.example.com/foo/bar",
            "userinfo must cover the entire user:pass segment up to the host-@"
        );
    }

    #[test]
    fn test_redact_url_credentials_no_userinfo_unchanged() {
        let input = "fetching https://github.com/owner/repo.git";
        assert_eq!(redact_url_credentials(input), input);
    }

    #[test]
    fn test_redact_url_credentials_ssh_unchanged() {
        // SSH-style `git@github.com:owner/repo.git` has no `://`, so the
        // helper leaves it alone. The `git@` is part of the SSH user, not
        // an embedded credential. Mirrors `git::remote::strip_url_credentials`.
        let input = "fetching git@github.com:owner/repo.git";
        assert_eq!(redact_url_credentials(input), input);
    }

    #[test]
    fn test_redact_url_credentials_multiple_urls_in_one_line() {
        let input = "from https://token1@a.com/x to https://token2@b.com/y";
        let result = redact_url_credentials(input);
        assert_eq!(
            result, "from https://<redacted>@a.com/x to https://<redacted>@b.com/y",
            "both URLs must be redacted, leaving the connecting prose intact"
        );
    }

    #[test]
    fn test_redact_url_credentials_does_not_consume_path_at_sign() {
        // `@` in a path segment (after the first `/`) must NOT be treated
        // as a userinfo terminator.
        let input = "GET https://api.example.com/users/foo@bar.com/profile";
        assert_eq!(
            redact_url_credentials(input),
            input,
            "an `@` after the first `/` is part of the path, not userinfo"
        );
    }

    #[test]
    fn test_redact_url_credentials_empty_input() {
        assert_eq!(redact_url_credentials(""), "");
    }

    #[test]
    fn test_redact_url_credentials_plain_text() {
        let input = "no URLs here, just words";
        assert_eq!(redact_url_credentials(input), input);
    }

    #[test]
    fn test_redact_url_credentials_percent_encoded_userinfo() {
        // A percent-encoded `@` in the userinfo (e.g. an account name like
        // `user@name`) does not break the terminator scan: the function
        // looks for the LAST `@` before the path / query / fragment /
        // whitespace boundary, so both `@`s collapse into a single
        // `<redacted>` replacement.
        let input = "https://user%40name:pass@host.example.com/path";
        let result = redact_url_credentials(input);
        assert_eq!(result, "https://<redacted>@host.example.com/path");
        assert!(!result.contains("user%40name"));
        assert!(!result.contains("pass"));
    }

    #[test]
    fn test_redact_url_credentials_trailing_query() {
        // A `?` after the host begins the query string; userinfo must still
        // be stripped, and the query is preserved verbatim.
        let input = "https://user:pass@host.example.com?foo=bar";
        let result = redact_url_credentials(input);
        assert_eq!(result, "https://<redacted>@host.example.com?foo=bar");
        assert!(!result.contains("user:pass"));
        assert!(result.ends_with("?foo=bar"));
    }

    #[test]
    fn test_redact_url_credentials_trailing_fragment() {
        // A `#` after the host begins the fragment; userinfo must still
        // be stripped, and the fragment is preserved verbatim.
        let input = "https://user:pass@host.example.com#frag";
        let result = redact_url_credentials(input);
        assert_eq!(result, "https://<redacted>@host.example.com#frag");
        assert!(!result.contains("user:pass"));
        assert!(result.ends_with("#frag"));
    }

    #[test]
    fn test_redact_url_credentials_whitespace_boundary() {
        // Whitespace following the host terminates the authority. The
        // userinfo is redacted and the trailing prose is preserved.
        let input = "https://user:pass@host.example.com then more";
        let result = redact_url_credentials(input);
        assert_eq!(result, "https://<redacted>@host.example.com then more");
        assert!(!result.contains("user:pass"));
        assert!(result.ends_with(" then more"));
    }

    #[test]
    fn test_redact_bearer_tokens_basic() {
        let input = "auth header: Bearer ghp_abcdef123456 expires soon";
        let result = redact_bearer_tokens(input);
        assert_eq!(result, "auth header: Bearer <redacted> expires soon");
        assert!(!result.contains("ghp_abcdef123456"));
    }

    #[test]
    fn test_redact_bearer_tokens_case_insensitive() {
        // The keyword "Bearer" is case-insensitive but the canonical
        // output form is always "Bearer".
        let input = "bearer ghp_lowercase_token";
        assert_eq!(
            redact_bearer_tokens(input),
            "Bearer <redacted>",
            "lowercase 'bearer' must still redact"
        );
        let input = "BEARER ghp_uppercase_token";
        assert_eq!(redact_bearer_tokens(input), "Bearer <redacted>");
    }

    #[test]
    fn test_redact_bearer_tokens_authorization_header() {
        // "Authorization:" consumes through end-of-line, so the entire
        // header value is redacted as one unit. Trailing content after
        // a newline is preserved verbatim.
        let input = "request: Authorization: Bearer ghp_xyz\nresponse: 401";
        let result = redact_bearer_tokens(input);
        assert_eq!(
            result, "request: Authorization: <redacted>\nresponse: 401",
            "header value (including the inner Bearer token) must be redacted as one"
        );
        assert!(!result.contains("ghp_xyz"));
    }

    #[test]
    fn test_redact_bearer_tokens_authorization_header_single_line() {
        // No newline → the header value runs to end-of-input; that's fine,
        // the entire tail is redacted (defensive: better one over-redaction
        // than one leaked token).
        let input = "Authorization: Bearer ghp_xyz";
        let result = redact_bearer_tokens(input);
        assert_eq!(result, "Authorization: <redacted>");
        assert!(!result.contains("ghp_xyz"));
    }

    #[test]
    fn test_redact_bearer_tokens_no_match_unchanged() {
        // No "Bearer " / "Authorization:" tokens → string unchanged.
        // Note: we cannot distinguish prose use of "bearer" from a real
        // header; the redactor errs on the side of over-redaction (it
        // would treat "bearer of bad news" as "Bearer <redacted> bad
        // news"). Both branches are still safer than leaking a token.
        let input = "some random text with no relevant tokens here";
        assert_eq!(redact_bearer_tokens(input), input);
    }

    #[test]
    fn test_redact_bearer_tokens_over_redacts_prose_use() {
        // Documents the known over-redaction behavior: "bearer of bad
        // news" looks like a Bearer-token construct because the redactor
        // can't tell prose from a header. The trade-off is intentional —
        // safer to over-redact a prose word than to leak a real token.
        let input = "the bearer of bad news arrived";
        let result = redact_bearer_tokens(input);
        assert_eq!(result, "the Bearer <redacted> bad news arrived");
    }

    #[test]
    fn test_redact_bearer_tokens_empty_input() {
        assert_eq!(redact_bearer_tokens(""), "");
    }

    #[test]
    fn test_redact_bearer_tokens_handles_multiple_occurrences() {
        let input = "first Bearer ghp_aaa and second Bearer ghp_bbb done";
        let result = redact_bearer_tokens(input);
        assert_eq!(
            result,
            "first Bearer <redacted> and second Bearer <redacted> done"
        );
        assert!(!result.contains("ghp_aaa"));
        assert!(!result.contains("ghp_bbb"));
    }
}