rivet/redact.rs
1//! **Layer: Cross-cutting helper** (credential redaction invariant, v0.7.2 P0.3)
2//!
3//! Single chokepoint for stripping plaintext credential material out of
4//! strings that are about to land in operator-visible artifacts: logs,
5//! `summary.json` / `summary.md`, the run journal, Slack/webhook payloads,
6//! and hard-failure error messages bubbling out of any subcommand.
7//!
8//! The invariant this module backs:
9//!
10//! > A credential that the operator passed through `password`,
11//! > `*_env`, `*_file`, `credentials_file`, or as an embedded
12//! > `user:password@host` URL MUST NOT round-trip into any persisted or
13//! > emitted artifact. When in doubt, redact.
14//!
15//! Scope:
16//! - **Embedded-URL passwords**: `scheme://user:password@host…` →
17//! `scheme://REDACTED@host…`. This is the only pattern Rivet
18//! round-trips through driver/error context, so it is the single
19//! high-value rewrite. Patches expand here.
20//! - **Known token-shape secrets** (AWS access keys etc.) are *not*
21//! matched on shape today — they shouldn't be in stringified error
22//! context unless the operator passed `--source 'aws_access_key_id=AKIA…'`
23//! by mistake. If a leak vector is discovered, add it here, write a
24//! regression test, and roll a patch release.
25//!
26//! What this module does NOT guarantee (documented in [`SECURITY.md`]):
27//! - Third-party driver/library output that bypasses our error wrappers.
28//! - In-memory secrets — `Zeroizing<String>` is used at the source-config
29//! boundary, but anything copied into a `String` along the way may
30//! linger in process memory until allocator reuse.
31//! - Secrets the operator captured *outside* Rivet (shell history, env
32//! var dumps, `ps` snapshots) — out of scope.
33
34/// Replace `user:password@host` userinfo segments in any URL-like
35/// substring with `REDACTED@host`.
36///
37/// Conservative match:
38/// - scheme is `[A-Za-z][A-Za-z0-9+.\-]*`
39/// - followed by `://`
40/// - then a userinfo run of non-whitespace, non-`/`, non-`?`, non-`#`
41/// characters containing `:` (i.e. `user:password`)
42/// - terminated by `@`
43///
44/// A bare `user@host` (no `:`) is preserved verbatim — there's no
45/// password to redact, and stripping the username makes log lines
46/// harder to triage. Operators wanting full userinfo redaction can
47/// continue to rely on `SourceConfig::redact_for_artifact` for the
48/// structural path.
49///
50/// Idempotent: once-redacted strings pass through unchanged.
51pub fn redact_url_passwords(s: &str) -> String {
52 // Find `scheme://userinfo@` segments. We don't pull in a regex
53 // crate just for this one pattern — a hand-rolled walk is faster
54 // and avoids a dep that grows the binary.
55 //
56 // F-NEW-C (0.7.5 audit): the previous version copied non-matching
57 // bytes one at a time via `out.push(bytes[i] as char)`, which
58 // re-interpreted each UTF-8 byte as a Unicode code point and
59 // re-encoded it. Every multi-byte glyph (em-dash, Cyrillic, …)
60 // became double-encoded mojibake (`—` → `â\u{80}\u{94}`) in any
61 // error message that hit the redactor. Correct fix: copy the
62 // next UTF-8 codepoint as a whole slice of `s`, not byte-by-byte.
63 let bytes = s.as_bytes();
64 let mut out = String::with_capacity(s.len());
65 let mut i = 0;
66 while i < bytes.len() {
67 if let Some((rewritten, advance)) = try_redact_at(bytes, i) {
68 out.push_str(&rewritten);
69 i = advance;
70 continue;
71 }
72 let b = bytes[i];
73 if b.is_ascii() {
74 out.push(b as char);
75 i += 1;
76 } else {
77 // Multi-byte UTF-8 codepoint starting at `i`; continuation
78 // bytes have the form 10xxxxxx. Copy the whole codepoint
79 // verbatim from the source string.
80 let start = i;
81 i += 1;
82 while i < bytes.len() && (bytes[i] & 0xC0) == 0x80 {
83 i += 1;
84 }
85 out.push_str(&s[start..i]);
86 }
87 }
88 out
89}
90
91/// If `bytes[i..]` starts a `scheme://userinfo@` pattern with a `:` in
92/// the userinfo (a password segment), return the rewritten prefix and
93/// the new cursor position. Otherwise return `None`.
94fn try_redact_at(bytes: &[u8], i: usize) -> Option<(String, usize)> {
95 // scheme: must start with an ASCII letter
96 if !bytes.get(i).is_some_and(|b| b.is_ascii_alphabetic()) {
97 return None;
98 }
99 let mut j = i + 1;
100 while j < bytes.len() {
101 let b = bytes[j];
102 if b.is_ascii_alphanumeric() || matches!(b, b'+' | b'.' | b'-') {
103 j += 1;
104 } else {
105 break;
106 }
107 }
108 // `://`
109 if !bytes[j..].starts_with(b"://") {
110 return None;
111 }
112 let userinfo_start = j + 3;
113 // Walk userinfo until terminator. We require a `:` (password
114 // segment) and an `@` before any path/query/whitespace.
115 let mut k = userinfo_start;
116 let mut has_colon = false;
117 while k < bytes.len() {
118 let b = bytes[k];
119 if b == b'@' {
120 break;
121 }
122 if matches!(b, b'/' | b'?' | b'#') || b.is_ascii_whitespace() {
123 return None;
124 }
125 if b == b':' {
126 has_colon = true;
127 }
128 k += 1;
129 }
130 if !has_colon || k >= bytes.len() || bytes[k] != b'@' {
131 return None;
132 }
133 // Slice out `scheme://`, replace userinfo with `REDACTED`.
134 let scheme_part = std::str::from_utf8(&bytes[i..userinfo_start]).ok()?;
135 Some((format!("{scheme_part}REDACTED"), k))
136}
137
138/// Compose every redactor. Use this at every boundary that turns a
139/// driver/library error (or any operator-untrusted string) into a
140/// persisted or emitted artifact.
141pub fn redact_secrets(s: &str) -> String {
142 redact_url_passwords(s)
143}
144
145/// Convenience: format an `anyhow::Error` with `{:#}` and redact the
146/// result in one call. Use at the boundary of every error-to-artifact
147/// path (`summary.error_message = ...`, `log::error!(... e ...)`).
148pub fn redact_error(e: &anyhow::Error) -> String {
149 redact_secrets(&format!("{e:#}"))
150}
151
152/// Render one log record into a redacted, operator-visible line.
153///
154/// The module scope names **logs** as a redaction target, but the `log::*`
155/// macros bypass the artifact-path redaction that is wired by hand at the
156/// error/summary call sites — a `log::warn!("…{e}", e)` whose `e` captured a
157/// `scheme://user:password@host` connect error would otherwise print the
158/// password to stderr. `main`'s `env_logger` formatter delegates here so the
159/// log **sink** itself is the chokepoint: every line, present and future,
160/// passes through [`redact_secrets`] with no reliance on each call site
161/// remembering to redact. Kept in this module (beside the other redactors) and
162/// log-crate-agnostic (`level` is a pre-rendered `&str`) so the wiring is
163/// unit-testable without capturing global stderr.
164pub fn redacted_log_line(timestamp: &str, level: &str, target: &str, message: &str) -> String {
165 redact_secrets(&format!("[{timestamp} {level} {target}] {message}"))
166}
167
168#[cfg(test)]
169mod tests {
170 use super::*;
171
172 // ── redact_url_passwords ───────────────────────────────────────────────
173
174 #[test]
175 fn rewrites_postgres_userinfo_with_password() {
176 let s = "connection failed to postgresql://alice:s3cret@db.prod:5432/orders: timeout";
177 let out = redact_url_passwords(s);
178 assert!(!out.contains("s3cret"), "password must be stripped: {out}");
179 assert!(
180 out.contains("postgresql://REDACTED@db.prod:5432/orders"),
181 "expected REDACTED@host, got: {out}",
182 );
183 }
184
185 #[test]
186 fn rewrites_mysql_userinfo_with_password() {
187 let s = "auth error: mysql://root:hunter2@10.0.0.5:3306/billing";
188 let out = redact_url_passwords(s);
189 assert!(!out.contains("hunter2"));
190 assert!(out.contains("mysql://REDACTED@10.0.0.5"));
191 }
192
193 #[test]
194 fn preserves_bare_user_at_host_without_password() {
195 // `user@host` has no password to strip; rewriting it would lose
196 // useful triage signal. Pin the conservative behaviour.
197 let s = "connection: postgresql://alice@db.prod:5432/orders";
198 assert_eq!(redact_url_passwords(s), s);
199 }
200
201 #[test]
202 fn idempotent_on_already_redacted_string() {
203 let s = "postgresql://REDACTED@db.prod:5432/orders";
204 assert_eq!(redact_url_passwords(s), s);
205 }
206
207 #[test]
208 fn preserves_non_url_text_with_at_sign() {
209 // `email@example.com` is not a URL — must not be rewritten.
210 let s = "user alice@example.com reported failure";
211 assert_eq!(redact_url_passwords(s), s);
212 }
213
214 #[test]
215 fn handles_multiple_urls_in_one_string() {
216 let s = "primary postgresql://a:b@h1/d failed, retrying mysql://c:d@h2/d";
217 let out = redact_url_passwords(s);
218 assert!(!out.contains("a:b@"));
219 assert!(!out.contains("c:d@"));
220 assert!(out.contains("postgresql://REDACTED@h1/d"));
221 assert!(out.contains("mysql://REDACTED@h2/d"));
222 }
223
224 #[test]
225 fn stops_at_whitespace_in_userinfo() {
226 // Userinfo cannot contain whitespace — defensive guard against
227 // matching wild `://foo bar@…` substrings inside prose.
228 let s = "scheme://broken token@host";
229 assert_eq!(redact_url_passwords(s), s);
230 }
231
232 #[test]
233 fn preserves_strings_without_urls() {
234 let s = "export 'orders' failed: relation does not exist";
235 assert_eq!(redact_url_passwords(s), s);
236 }
237
238 // ── redact_error ───────────────────────────────────────────────────────
239
240 #[test]
241 fn redact_error_strips_password_from_anyhow_chain() {
242 let e = anyhow::anyhow!("connect failed to postgresql://alice:s3cret@db.prod/orders");
243 let out = redact_error(&e);
244 assert!(!out.contains("s3cret"));
245 assert!(out.contains("REDACTED@db.prod"));
246 }
247
248 // ── F-NEW-C: multi-byte UTF-8 must round-trip ─────────────────────────────
249
250 #[test]
251 fn preserves_em_dash_and_other_multibyte_glyphs() {
252 // Before the F-NEW-C fix, the byte-by-byte loop in
253 // `redact_url_passwords` double-encoded every non-ASCII codepoint:
254 // the em-dash `—` (UTF-8 e2 80 94) came out as `â\u{80}\u{94}`
255 // (c3 a2 c2 80 c2 94). This test pins the round-trip so the
256 // regression cannot return silently in any error message.
257 let s = "export 'orders': --resume refused — destination prefix has _SUCCESS";
258 assert_eq!(
259 redact_url_passwords(s),
260 s,
261 "non-URL text containing an em-dash must pass through unchanged"
262 );
263
264 let s2 = "сообщение об ошибке: cannot connect";
265 assert_eq!(
266 redact_url_passwords(s2),
267 s2,
268 "Cyrillic text must pass through unchanged"
269 );
270
271 // And it must still redact correctly when the string contains
272 // BOTH multi-byte glyphs and a redactable URL.
273 let s3 = "ошибка — postgresql://u:p@host/db: dropped";
274 let out = redact_url_passwords(s3);
275 assert!(out.contains("ошибка — postgresql://REDACTED@host/db"));
276 assert!(!out.contains("u:p@"));
277 }
278}