rivet/redact.rs
1//! **Layer: Cross-cutting helper** (credential redaction invariant, v0.7.2 P0.3)
2//!
3//! Single chokepoint for stripping plaintext credential material out of
4//! strings that are about to land in operator-visible artifacts: logs,
5//! `summary.json` / `summary.md`, the run journal, Slack/webhook payloads,
6//! and hard-failure error messages bubbling out of any subcommand.
7//!
8//! The invariant this module backs:
9//!
10//! > A credential that the operator passed through `password`,
11//! > `*_env`, `*_file`, `credentials_file`, or as an embedded
12//! > `user:password@host` URL MUST NOT round-trip into any persisted or
13//! > emitted artifact. When in doubt, redact.
14//!
15//! Scope:
16//! - **Embedded-URL passwords**: `scheme://user:password@host…` →
17//! `scheme://REDACTED@host…`. This is the only pattern Rivet
18//! round-trips through driver/error context, so it is the single
19//! high-value rewrite. Patches expand here.
20//! - **Known token-shape secrets** (AWS access keys etc.) are *not*
21//! matched on shape today — they shouldn't be in stringified error
22//! context unless the operator passed `--source 'aws_access_key_id=AKIA…'`
23//! by mistake. If a leak vector is discovered, add it here, write a
24//! regression test, and roll a patch release.
25//!
26//! What this module does NOT guarantee (documented in [`SECURITY.md`]):
27//! - Third-party driver/library output that bypasses our error wrappers.
28//! - In-memory secrets — `Zeroizing<String>` is used at the source-config
29//! boundary, but anything copied into a `String` along the way may
30//! linger in process memory until allocator reuse.
31//! - Secrets the operator captured *outside* Rivet (shell history, env
32//! var dumps, `ps` snapshots) — out of scope.
33
34/// Replace `user:password@host` userinfo segments in any URL-like
35/// substring with `REDACTED@host`.
36///
37/// Conservative match:
38/// - scheme is `[A-Za-z][A-Za-z0-9+.\-]*`
39/// - followed by `://`
40/// - then a userinfo run of non-whitespace, non-`/`, non-`?`, non-`#`
41/// characters containing `:` (i.e. `user:password`)
42/// - terminated by `@`
43///
44/// A bare `user@host` (no `:`) is preserved verbatim — there's no
45/// password to redact, and stripping the username makes log lines
46/// harder to triage. Operators wanting full userinfo redaction can
47/// continue to rely on `SourceConfig::redact_for_artifact` for the
48/// structural path.
49///
50/// Idempotent: once-redacted strings pass through unchanged.
51pub fn redact_url_passwords(s: &str) -> String {
52 // Find `scheme://userinfo@` segments. We don't pull in a regex
53 // crate just for this one pattern — a hand-rolled walk is faster
54 // and avoids a dep that grows the binary.
55 //
56 // F-NEW-C (0.7.5 audit): the previous version copied non-matching
57 // bytes one at a time via `out.push(bytes[i] as char)`, which
58 // re-interpreted each UTF-8 byte as a Unicode code point and
59 // re-encoded it. Every multi-byte glyph (em-dash, Cyrillic, …)
60 // became double-encoded mojibake (`—` → `â\u{80}\u{94}`) in any
61 // error message that hit the redactor. Correct fix: copy the
62 // next UTF-8 codepoint as a whole slice of `s`, not byte-by-byte.
63 let bytes = s.as_bytes();
64 let mut out = String::with_capacity(s.len());
65 let mut i = 0;
66 while i < bytes.len() {
67 if let Some((rewritten, advance)) = try_redact_at(bytes, i) {
68 out.push_str(&rewritten);
69 i = advance;
70 continue;
71 }
72 let b = bytes[i];
73 if b.is_ascii() {
74 out.push(b as char);
75 i += 1;
76 } else {
77 // Multi-byte UTF-8 codepoint starting at `i`; continuation
78 // bytes have the form 10xxxxxx. Copy the whole codepoint
79 // verbatim from the source string.
80 let start = i;
81 i += 1;
82 while i < bytes.len() && (bytes[i] & 0xC0) == 0x80 {
83 i += 1;
84 }
85 out.push_str(&s[start..i]);
86 }
87 }
88 out
89}
90
91/// If `bytes[i..]` starts a `scheme://userinfo@` pattern with a `:` in
92/// the userinfo (a password segment), return the rewritten prefix and
93/// the new cursor position. Otherwise return `None`.
94fn try_redact_at(bytes: &[u8], i: usize) -> Option<(String, usize)> {
95 // scheme: must start with an ASCII letter
96 if !bytes.get(i).is_some_and(|b| b.is_ascii_alphabetic()) {
97 return None;
98 }
99 let mut j = i + 1;
100 while j < bytes.len() {
101 let b = bytes[j];
102 if b.is_ascii_alphanumeric() || matches!(b, b'+' | b'.' | b'-') {
103 j += 1;
104 } else {
105 break;
106 }
107 }
108 // `://`
109 if !bytes[j..].starts_with(b"://") {
110 return None;
111 }
112 let userinfo_start = j + 3;
113 // Walk the authority until the path/query/fragment/whitespace
114 // terminator, tracking the LAST `@` we cross. A password may itself
115 // contain `@` (`user:p@ssw0rd@host`), so splitting at the FIRST `@`
116 // would leak the tail after it; the userinfo terminator is the last
117 // `@` before the path — mirroring `redact_pg_url` in state/mod.rs,
118 // which uses `rfind('@')` for the same reason. `has_colon` must reflect
119 // a `:` *within the userinfo* (before that last `@`), not a host:port
120 // colon after it, so we recompute it from the chosen `@`.
121 let mut k = userinfo_start;
122 let mut last_at: Option<usize> = None;
123 while k < bytes.len() {
124 let b = bytes[k];
125 if b == b'@' {
126 last_at = Some(k);
127 } else if matches!(b, b'/' | b'?' | b'#') || b.is_ascii_whitespace() {
128 break;
129 }
130 k += 1;
131 }
132 let at = last_at?;
133 let has_colon = bytes[userinfo_start..at].contains(&b':');
134 if !has_colon {
135 return None;
136 }
137 // Slice out `scheme://`, replace userinfo with `REDACTED`.
138 let scheme_part = std::str::from_utf8(&bytes[i..userinfo_start]).ok()?;
139 Some((format!("{scheme_part}REDACTED"), at))
140}
141
142/// Compose every redactor. Use this at every boundary that turns a
143/// driver/library error (or any operator-untrusted string) into a
144/// persisted or emitted artifact.
145pub fn redact_secrets(s: &str) -> String {
146 redact_url_passwords(s)
147}
148
149/// Convenience: format an `anyhow::Error` with `{:#}` and redact the
150/// result in one call. Use at the boundary of every error-to-artifact
151/// path (`summary.error_message = ...`, `log::error!(... e ...)`).
152pub fn redact_error(e: &anyhow::Error) -> String {
153 redact_secrets(&format!("{e:#}"))
154}
155
156/// Render one log record into a redacted, operator-visible line.
157///
158/// The module scope names **logs** as a redaction target, but the `log::*`
159/// macros bypass the artifact-path redaction that is wired by hand at the
160/// error/summary call sites — a `log::warn!("…{e}", e)` whose `e` captured a
161/// `scheme://user:password@host` connect error would otherwise print the
162/// password to stderr. `main`'s `env_logger` formatter delegates here so the
163/// log **sink** itself is the chokepoint: every line, present and future,
164/// passes through [`redact_secrets`] with no reliance on each call site
165/// remembering to redact. Kept in this module (beside the other redactors) and
166/// log-crate-agnostic (`level` is a pre-rendered `&str`) so the wiring is
167/// unit-testable without capturing global stderr.
168pub fn redacted_log_line(timestamp: &str, level: &str, target: &str, message: &str) -> String {
169 redact_secrets(&format!("[{timestamp} {level} {target}] {message}"))
170}
171
172#[cfg(test)]
173mod tests {
174 use super::*;
175
176 // ── redact_url_passwords ───────────────────────────────────────────────
177
178 #[test]
179 fn rewrites_postgres_userinfo_with_password() {
180 let s = "connection failed to postgresql://alice:s3cret@db.prod:5432/orders: timeout";
181 let out = redact_url_passwords(s);
182 assert!(!out.contains("s3cret"), "password must be stripped: {out}");
183 assert!(
184 out.contains("postgresql://REDACTED@db.prod:5432/orders"),
185 "expected REDACTED@host, got: {out}",
186 );
187 }
188
189 #[test]
190 fn rewrites_mysql_userinfo_with_password() {
191 let s = "auth error: mysql://root:hunter2@10.0.0.5:3306/billing";
192 let out = redact_url_passwords(s);
193 assert!(!out.contains("hunter2"));
194 assert!(out.contains("mysql://REDACTED@10.0.0.5"));
195 }
196
197 #[test]
198 fn preserves_bare_user_at_host_without_password() {
199 // `user@host` has no password to strip; rewriting it would lose
200 // useful triage signal. Pin the conservative behaviour.
201 let s = "connection: postgresql://alice@db.prod:5432/orders";
202 assert_eq!(redact_url_passwords(s), s);
203 }
204
205 #[test]
206 fn idempotent_on_already_redacted_string() {
207 let s = "postgresql://REDACTED@db.prod:5432/orders";
208 assert_eq!(redact_url_passwords(s), s);
209 }
210
211 #[test]
212 fn preserves_non_url_text_with_at_sign() {
213 // `email@example.com` is not a URL — must not be rewritten.
214 let s = "user alice@example.com reported failure";
215 assert_eq!(redact_url_passwords(s), s);
216 }
217
218 #[test]
219 fn handles_multiple_urls_in_one_string() {
220 let s = "primary postgresql://a:b@h1/d failed, retrying mysql://c:d@h2/d";
221 let out = redact_url_passwords(s);
222 assert!(!out.contains("a:b@"));
223 assert!(!out.contains("c:d@"));
224 assert!(out.contains("postgresql://REDACTED@h1/d"));
225 assert!(out.contains("mysql://REDACTED@h2/d"));
226 }
227
228 #[test]
229 fn stops_at_whitespace_in_userinfo() {
230 // Userinfo cannot contain whitespace — defensive guard against
231 // matching wild `://foo bar@…` substrings inside prose.
232 let s = "scheme://broken token@host";
233 assert_eq!(redact_url_passwords(s), s);
234 }
235
236 #[test]
237 fn preserves_strings_without_urls() {
238 let s = "export 'orders' failed: relation does not exist";
239 assert_eq!(redact_url_passwords(s), s);
240 }
241
242 // ── redact_error ───────────────────────────────────────────────────────
243
244 #[test]
245 fn redact_error_strips_password_from_anyhow_chain() {
246 let e = anyhow::anyhow!("connect failed to postgresql://alice:s3cret@db.prod/orders");
247 let out = redact_error(&e);
248 assert!(!out.contains("s3cret"));
249 assert!(out.contains("REDACTED@db.prod"));
250 }
251
252 // ── F-NEW-C: multi-byte UTF-8 must round-trip ─────────────────────────────
253
254 #[test]
255 fn preserves_em_dash_and_other_multibyte_glyphs() {
256 // Before the F-NEW-C fix, the byte-by-byte loop in
257 // `redact_url_passwords` double-encoded every non-ASCII codepoint:
258 // the em-dash `—` (UTF-8 e2 80 94) came out as `â\u{80}\u{94}`
259 // (c3 a2 c2 80 c2 94). This test pins the round-trip so the
260 // regression cannot return silently in any error message.
261 let s = "export 'orders': --resume refused — destination prefix has _SUCCESS";
262 assert_eq!(
263 redact_url_passwords(s),
264 s,
265 "non-URL text containing an em-dash must pass through unchanged"
266 );
267
268 let s2 = "сообщение об ошибке: cannot connect";
269 assert_eq!(
270 redact_url_passwords(s2),
271 s2,
272 "Cyrillic text must pass through unchanged"
273 );
274
275 // And it must still redact correctly when the string contains
276 // BOTH multi-byte glyphs and a redactable URL.
277 let s3 = "ошибка — postgresql://u:p@host/db: dropped";
278 let out = redact_url_passwords(s3);
279 assert!(out.contains("ошибка — postgresql://REDACTED@host/db"));
280 assert!(!out.contains("u:p@"));
281 }
282
283 // ── SEC-RED: embedded `@` in password must not leak ───────────────────────
284
285 #[test]
286 fn sec_redact_url_password_with_at() {
287 // SEC-RED V8: redact_url_passwords splits userinfo at the FIRST `@`,
288 // leaking the password tail after an embedded `@`. The userinfo walk in
289 // `try_redact_at` breaks on the first `@` it sees, so the password
290 // `p@ssw0rd` is split: only `p` is treated as the password and the
291 // tail `ssw0rd` survives in the output. The userinfo terminator must be
292 // the LAST `@` before the path/query (rfind semantics, as already used
293 // by redact_pg_url in state/mod.rs).
294 let s = "connect failed to postgresql://rivet:p@ssw0rd@db.example.com:5432/orders";
295 let out = redact_url_passwords(s);
296 // No fragment of the password may survive. `ssw0rd` is the tail that
297 // leaks today.
298 assert!(
299 !out.contains("ssw0rd"),
300 "password tail after embedded @ must not leak: {out}"
301 );
302 assert!(
303 !out.contains("p@ssw0rd"),
304 "full password must not leak: {out}"
305 );
306 // Host and path must be retained, redacted to REDACTED@host.
307 assert!(
308 out.contains("postgresql://REDACTED@db.example.com:5432/orders"),
309 "expected REDACTED@host with embedded-@ password stripped, got: {out}"
310 );
311
312 // Guard: a normal password (no embedded @) still redacts correctly so
313 // this test pins the fix rather than just any change.
314 let normal = "connect failed to postgresql://rivet:s3cret@db.example.com:5432/orders";
315 let normal_out = redact_url_passwords(normal);
316 assert!(
317 !normal_out.contains("s3cret"),
318 "normal password must still be redacted: {normal_out}"
319 );
320 assert!(
321 normal_out.contains("postgresql://REDACTED@db.example.com:5432/orders"),
322 "normal password redaction unchanged: {normal_out}"
323 );
324 }
325}