Skip to main content

rivet/
redact.rs

1//! **Layer: Cross-cutting helper** (credential redaction invariant, v0.7.2 P0.3)
2//!
3//! Single chokepoint for stripping plaintext credential material out of
4//! strings that are about to land in operator-visible artifacts: logs,
5//! `summary.json` / `summary.md`, the run journal, Slack/webhook payloads,
6//! and hard-failure error messages bubbling out of any subcommand.
7//!
8//! The invariant this module backs:
9//!
10//! > A credential that the operator passed through `password`,
11//! > `*_env`, `*_file`, `credentials_file`, or as an embedded
12//! > `user:password@host` URL MUST NOT round-trip into any persisted or
13//! > emitted artifact.  When in doubt, redact.
14//!
15//! Scope:
16//! - **Embedded-URL passwords**: `scheme://user:password@host…` →
17//!   `scheme://REDACTED@host…`.  This is the only pattern Rivet
18//!   round-trips through driver/error context, so it is the single
19//!   high-value rewrite.  Patches expand here.
20//! - **Known token-shape secrets** (AWS access keys etc.) are *not*
21//!   matched on shape today — they shouldn't be in stringified error
22//!   context unless the operator passed `--source 'aws_access_key_id=AKIA…'`
23//!   by mistake.  If a leak vector is discovered, add it here, write a
24//!   regression test, and roll a patch release.
25//!
26//! What this module does NOT guarantee (documented in [`SECURITY.md`]):
27//! - Third-party driver/library output that bypasses our error wrappers.
28//! - In-memory secrets — `Zeroizing<String>` is used at the source-config
29//!   boundary, but anything copied into a `String` along the way may
30//!   linger in process memory until allocator reuse.
31//! - Secrets the operator captured *outside* Rivet (shell history, env
32//!   var dumps, `ps` snapshots) — out of scope.
33
34/// Replace `user:password@host` userinfo segments in any URL-like
35/// substring with `REDACTED@host`.
36///
37/// Conservative match:
38/// - scheme is `[A-Za-z][A-Za-z0-9+.\-]*`
39/// - followed by `://`
40/// - then a userinfo run of non-whitespace, non-`/`, non-`?`, non-`#`
41///   characters containing `:` (i.e. `user:password`)
42/// - terminated by `@`
43///
44/// A bare `user@host` (no `:`) is preserved verbatim — there's no
45/// password to redact, and stripping the username makes log lines
46/// harder to triage.  Operators wanting full userinfo redaction can
47/// continue to rely on `SourceConfig::redact_for_artifact` for the
48/// structural path.
49///
50/// Idempotent: once-redacted strings pass through unchanged.
51pub fn redact_url_passwords(s: &str) -> String {
52    // Find `scheme://userinfo@` segments.  We don't pull in a regex
53    // crate just for this one pattern — a hand-rolled walk is faster
54    // and avoids a dep that grows the binary.
55    //
56    // F-NEW-C (0.7.5 audit): the previous version copied non-matching
57    // bytes one at a time via `out.push(bytes[i] as char)`, which
58    // re-interpreted each UTF-8 byte as a Unicode code point and
59    // re-encoded it.  Every multi-byte glyph (em-dash, Cyrillic, …)
60    // became double-encoded mojibake (`—` → `â\u{80}\u{94}`) in any
61    // error message that hit the redactor.  Correct fix: copy the
62    // next UTF-8 codepoint as a whole slice of `s`, not byte-by-byte.
63    let bytes = s.as_bytes();
64    let mut out = String::with_capacity(s.len());
65    let mut i = 0;
66    while i < bytes.len() {
67        if let Some((rewritten, advance)) = try_redact_at(bytes, i) {
68            out.push_str(&rewritten);
69            i = advance;
70            continue;
71        }
72        let b = bytes[i];
73        if b.is_ascii() {
74            out.push(b as char);
75            i += 1;
76        } else {
77            // Multi-byte UTF-8 codepoint starting at `i`; continuation
78            // bytes have the form 10xxxxxx.  Copy the whole codepoint
79            // verbatim from the source string.
80            let start = i;
81            i += 1;
82            while i < bytes.len() && (bytes[i] & 0xC0) == 0x80 {
83                i += 1;
84            }
85            out.push_str(&s[start..i]);
86        }
87    }
88    out
89}
90
91/// If `bytes[i..]` starts a `scheme://userinfo@` pattern with a `:` in
92/// the userinfo (a password segment), return the rewritten prefix and
93/// the new cursor position.  Otherwise return `None`.
94fn try_redact_at(bytes: &[u8], i: usize) -> Option<(String, usize)> {
95    // scheme: must start with an ASCII letter
96    if !bytes.get(i).is_some_and(|b| b.is_ascii_alphabetic()) {
97        return None;
98    }
99    let mut j = i + 1;
100    while j < bytes.len() {
101        let b = bytes[j];
102        if b.is_ascii_alphanumeric() || matches!(b, b'+' | b'.' | b'-') {
103            j += 1;
104        } else {
105            break;
106        }
107    }
108    // `://`
109    if !bytes[j..].starts_with(b"://") {
110        return None;
111    }
112    let userinfo_start = j + 3;
113    // Walk userinfo until terminator.  We require a `:` (password
114    // segment) and an `@` before any path/query/whitespace.
115    let mut k = userinfo_start;
116    let mut has_colon = false;
117    while k < bytes.len() {
118        let b = bytes[k];
119        if b == b'@' {
120            break;
121        }
122        if matches!(b, b'/' | b'?' | b'#') || b.is_ascii_whitespace() {
123            return None;
124        }
125        if b == b':' {
126            has_colon = true;
127        }
128        k += 1;
129    }
130    if !has_colon || k >= bytes.len() || bytes[k] != b'@' {
131        return None;
132    }
133    // Slice out `scheme://`, replace userinfo with `REDACTED`.
134    let scheme_part = std::str::from_utf8(&bytes[i..userinfo_start]).ok()?;
135    Some((format!("{scheme_part}REDACTED"), k))
136}
137
138/// Compose every redactor.  Use this at every boundary that turns a
139/// driver/library error (or any operator-untrusted string) into a
140/// persisted or emitted artifact.
141pub fn redact_secrets(s: &str) -> String {
142    redact_url_passwords(s)
143}
144
145/// Convenience: format an `anyhow::Error` with `{:#}` and redact the
146/// result in one call.  Use at the boundary of every error-to-artifact
147/// path (`summary.error_message = ...`, `log::error!(... e ...)`).
148pub fn redact_error(e: &anyhow::Error) -> String {
149    redact_secrets(&format!("{e:#}"))
150}
151
152/// Render one log record into a redacted, operator-visible line.
153///
154/// The module scope names **logs** as a redaction target, but the `log::*`
155/// macros bypass the artifact-path redaction that is wired by hand at the
156/// error/summary call sites — a `log::warn!("…{e}", e)` whose `e` captured a
157/// `scheme://user:password@host` connect error would otherwise print the
158/// password to stderr. `main`'s `env_logger` formatter delegates here so the
159/// log **sink** itself is the chokepoint: every line, present and future,
160/// passes through [`redact_secrets`] with no reliance on each call site
161/// remembering to redact. Kept in this module (beside the other redactors) and
162/// log-crate-agnostic (`level` is a pre-rendered `&str`) so the wiring is
163/// unit-testable without capturing global stderr.
164pub fn redacted_log_line(timestamp: &str, level: &str, target: &str, message: &str) -> String {
165    redact_secrets(&format!("[{timestamp} {level} {target}] {message}"))
166}
167
168#[cfg(test)]
169mod tests {
170    use super::*;
171
172    // ── redact_url_passwords ───────────────────────────────────────────────
173
174    #[test]
175    fn rewrites_postgres_userinfo_with_password() {
176        let s = "connection failed to postgresql://alice:s3cret@db.prod:5432/orders: timeout";
177        let out = redact_url_passwords(s);
178        assert!(!out.contains("s3cret"), "password must be stripped: {out}");
179        assert!(
180            out.contains("postgresql://REDACTED@db.prod:5432/orders"),
181            "expected REDACTED@host, got: {out}",
182        );
183    }
184
185    #[test]
186    fn rewrites_mysql_userinfo_with_password() {
187        let s = "auth error: mysql://root:hunter2@10.0.0.5:3306/billing";
188        let out = redact_url_passwords(s);
189        assert!(!out.contains("hunter2"));
190        assert!(out.contains("mysql://REDACTED@10.0.0.5"));
191    }
192
193    #[test]
194    fn preserves_bare_user_at_host_without_password() {
195        // `user@host` has no password to strip; rewriting it would lose
196        // useful triage signal.  Pin the conservative behaviour.
197        let s = "connection: postgresql://alice@db.prod:5432/orders";
198        assert_eq!(redact_url_passwords(s), s);
199    }
200
201    #[test]
202    fn idempotent_on_already_redacted_string() {
203        let s = "postgresql://REDACTED@db.prod:5432/orders";
204        assert_eq!(redact_url_passwords(s), s);
205    }
206
207    #[test]
208    fn preserves_non_url_text_with_at_sign() {
209        // `email@example.com` is not a URL — must not be rewritten.
210        let s = "user alice@example.com reported failure";
211        assert_eq!(redact_url_passwords(s), s);
212    }
213
214    #[test]
215    fn handles_multiple_urls_in_one_string() {
216        let s = "primary postgresql://a:b@h1/d failed, retrying mysql://c:d@h2/d";
217        let out = redact_url_passwords(s);
218        assert!(!out.contains("a:b@"));
219        assert!(!out.contains("c:d@"));
220        assert!(out.contains("postgresql://REDACTED@h1/d"));
221        assert!(out.contains("mysql://REDACTED@h2/d"));
222    }
223
224    #[test]
225    fn stops_at_whitespace_in_userinfo() {
226        // Userinfo cannot contain whitespace — defensive guard against
227        // matching wild `://foo bar@…` substrings inside prose.
228        let s = "scheme://broken token@host";
229        assert_eq!(redact_url_passwords(s), s);
230    }
231
232    #[test]
233    fn preserves_strings_without_urls() {
234        let s = "export 'orders' failed: relation does not exist";
235        assert_eq!(redact_url_passwords(s), s);
236    }
237
238    // ── redact_error ───────────────────────────────────────────────────────
239
240    #[test]
241    fn redact_error_strips_password_from_anyhow_chain() {
242        let e = anyhow::anyhow!("connect failed to postgresql://alice:s3cret@db.prod/orders");
243        let out = redact_error(&e);
244        assert!(!out.contains("s3cret"));
245        assert!(out.contains("REDACTED@db.prod"));
246    }
247
248    // ── F-NEW-C: multi-byte UTF-8 must round-trip ─────────────────────────────
249
250    #[test]
251    fn preserves_em_dash_and_other_multibyte_glyphs() {
252        // Before the F-NEW-C fix, the byte-by-byte loop in
253        // `redact_url_passwords` double-encoded every non-ASCII codepoint:
254        // the em-dash `—` (UTF-8 e2 80 94) came out as `â\u{80}\u{94}`
255        // (c3 a2 c2 80 c2 94).  This test pins the round-trip so the
256        // regression cannot return silently in any error message.
257        let s = "export 'orders': --resume refused — destination prefix has _SUCCESS";
258        assert_eq!(
259            redact_url_passwords(s),
260            s,
261            "non-URL text containing an em-dash must pass through unchanged"
262        );
263
264        let s2 = "сообщение об ошибке: cannot connect";
265        assert_eq!(
266            redact_url_passwords(s2),
267            s2,
268            "Cyrillic text must pass through unchanged"
269        );
270
271        // And it must still redact correctly when the string contains
272        // BOTH multi-byte glyphs and a redactable URL.
273        let s3 = "ошибка — postgresql://u:p@host/db: dropped";
274        let out = redact_url_passwords(s3);
275        assert!(out.contains("ошибка — postgresql://REDACTED@host/db"));
276        assert!(!out.contains("u:p@"));
277    }
278}