reddb_wire/sanitizer.rs
1//! Connection-string sanitizer + typed taint guard (issue #179, ADR 0010).
2//!
3//! The Whiz / Babeld disclosure (March 2026) is the canonical example
4//! of the failure mode this module exists to prevent: caller-supplied
5//! strings concatenated into a structured serialization format whose
6//! delimiter the caller controls let the caller smuggle a forged field
7//! past the producer and into the consumer's parser. The
8//! [serialization-boundary audit][audit] enumerated 15 instances of
9//! the pattern in this codebase. F-04 is the broadest: ~141
10//! `tracing::*!` call sites that interpolate user-supplied strings via
11//! `Display`, so a CR/LF in a connection-string-derived token, a
12//! tenant name, or a collection name forges a log line.
13//!
14//! [audit]: ../../docs/security/serialization-boundary-audit-2026-05-06.md
15//!
16//! This module ships three things:
17//!
18//! 1. [`Tainted<T>`] — a non-`Display` wrapper. The only way to
19//! project the inner string into a structured serialization
20//! boundary is [`Tainted::escape_for`], which forces the caller to
21//! name the [`Boundary`] and returns an [`EscapedFor`] under the
22//! boundary's escape contract.
23//! 2. [`ConnStringSanitizer`] — a deep module wrapping the existing
24//! [`crate::conn_string::parse`]. It returns a
25//! [`ParsedConnString`] whose host / cert-path / endpoint / query
26//! fields are exposed as `Tainted<String>` so downstream consumers
27//! cannot accidentally route a tainted byte through `Display`.
28//! 3. [`audit_safe_log_field`] — a thin `Display` adapter that strips
29//! CR/LF/NUL/control bytes from a `&str` for log emission. The
30//! structured fix is [`Tainted::escape_for(Boundary::LogField)`];
31//! the helper exists because the codebase has hundreds of call
32//! sites where a full type-system migration is mechanical work
33//! that CI lint #180 tracks separately, and the helper unblocks
34//! incremental migration without expanding the attack surface.
35
36use std::fmt;
37
38use crate::conn_string::{
39 parse as parse_conn_string, ConnectionTarget, ParseError as ConnParseError,
40};
41
42// ---------------------------------------------------------------------------
43// Boundary + escape error
44// ---------------------------------------------------------------------------
45
46/// Serialization boundaries supported by [`Tainted::escape_for`].
47///
48/// Each variant names the exact escape contract the boundary expects.
49/// The contract is implemented by [`Tainted::escape_for`] and
50/// validated by the proptest corpus in this crate's test suite, so
51/// adding a variant requires extending both.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum Boundary {
54 /// HTTP/1.1 + HTTP/2 header value (`http::HeaderValue`-safe).
55 /// Strips CR, LF, NUL, and tab — the four bytes that let an
56 /// attacker terminate the current header and inject a second one.
57 /// The output is guaranteed to be accepted by
58 /// [`http::HeaderValue::from_str`] (modulo bytes outside ASCII
59 /// 0x20-0x7E which are passed through; the gRPC layer rejects
60 /// non-visible-ASCII separately).
61 HttpHeader,
62 /// gRPC metadata value. gRPC metadata sits on HTTP/2 headers, so
63 /// the contract is identical to [`Boundary::HttpHeader`].
64 GrpcMetadata,
65 /// Plain-text log line emitted via `tracing` or any other
66 /// line-oriented formatter. Strips CR / LF / NUL / control bytes
67 /// (0x00-0x1F + 0x7F) and percent-encodes them, so a smuggled
68 /// `\nlevel=ERROR` survives as `%0Alevel=ERROR` in the captured
69 /// line — visibly tampered, never authoritative.
70 LogField,
71 /// Structured audit field. Pass-through; the
72 /// `AuditFieldEscaper` (#177, slice AC) owns the on-disk encoder
73 /// and rejects control bytes at emit time. Exposing the typed
74 /// value here lets the audit lane consume `Tainted<String>`
75 /// without going through a string detour.
76 AuditField,
77 /// JSON value. Pass-through; the `SerializedJsonField` (#178,
78 /// slice AB) round-trips through `serde_json::Value::String` and
79 /// inherits serde's escape contract. Exposing the typed value
80 /// here lets the JSON lane consume `Tainted<String>` without a
81 /// string detour.
82 JsonValue,
83}
84
85impl Boundary {
86 pub fn as_str(self) -> &'static str {
87 match self {
88 Boundary::HttpHeader => "HttpHeader",
89 Boundary::GrpcMetadata => "GrpcMetadata",
90 Boundary::LogField => "LogField",
91 Boundary::AuditField => "AuditField",
92 Boundary::JsonValue => "JsonValue",
93 }
94 }
95}
96
97/// Stable error code returned by [`Tainted::escape_for`].
98///
99/// The escape paths in this module are total — they always produce a
100/// safe value — so today the only failure mode is "input was so long
101/// that escaping would exceed [`Tainted::MAX_ESCAPED_LEN`]". Future
102/// boundaries (e.g. an MTLS SAN slot with a 256-byte cap) get their
103/// own variants here without breaking existing callers.
104#[derive(Debug, Clone, PartialEq, Eq)]
105pub enum EscapeError {
106 /// Escaping the input would produce a value longer than
107 /// [`Tainted::MAX_ESCAPED_LEN`].
108 TooLong { boundary: Boundary, bytes: usize },
109}
110
111impl fmt::Display for EscapeError {
112 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
113 match self {
114 EscapeError::TooLong { boundary, bytes } => write!(
115 f,
116 "escape_for({}) would emit {} bytes (limit {})",
117 boundary.as_str(),
118 bytes,
119 Tainted::<String>::MAX_ESCAPED_LEN,
120 ),
121 }
122 }
123}
124
125impl std::error::Error for EscapeError {}
126
127// ---------------------------------------------------------------------------
128// EscapedFor — boundary-tagged result of `Tainted::escape_for`
129// ---------------------------------------------------------------------------
130
131/// Output of [`Tainted::escape_for`]. Carries the boundary it was
132/// escaped for so a header setter can statically refuse a value that
133/// was escaped for a log line, and vice versa.
134#[derive(Debug, Clone, PartialEq, Eq)]
135pub struct EscapedFor {
136 boundary: Boundary,
137 value: String,
138}
139
140impl EscapedFor {
141 pub fn boundary(&self) -> Boundary {
142 self.boundary
143 }
144 pub fn as_str(&self) -> &str {
145 &self.value
146 }
147 pub fn into_string(self) -> String {
148 self.value
149 }
150}
151
152impl fmt::Display for EscapedFor {
153 /// `EscapedFor` is `Display`-able by design — once the value has
154 /// crossed the [`Tainted::escape_for`] gate the boundary's escape
155 /// contract has been applied and the bytes are safe to render
156 /// against that boundary's parser.
157 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
158 f.write_str(&self.value)
159 }
160}
161
162// ---------------------------------------------------------------------------
163// Tainted<T> — non-`Display` wrapper around caller-supplied bytes
164// ---------------------------------------------------------------------------
165
166/// Caller-controlled value that has not yet crossed a serialization
167/// boundary. Deliberately does **not** implement [`fmt::Display`];
168/// the only way to project a `Tainted<String>` into a serialized
169/// surface is [`Tainted::escape_for`].
170///
171/// Implements [`fmt::Debug`] (which `tracing` `?value` calls) because
172/// `Debug` quote-wraps and escapes control bytes, so emitting a
173/// `Tainted<String>` via `?value` is safe in a way `%value` is not.
174///
175/// The propagation rule is enforced by visibility, not by the type
176/// system: the inner field is `pub(crate)` so only this crate can
177/// build a `Tainted<String>`. Downstream crates receive
178/// `Tainted<String>` from [`ConnStringSanitizer::parse`] and cannot
179/// peel it; they must call [`Tainted::escape_for`] or
180/// [`Tainted::expose_secret`] (the latter named loudly to surface in
181/// review).
182#[derive(Clone, PartialEq, Eq)]
183pub struct Tainted<T>(pub(crate) T);
184
185impl<T> Tainted<T> {
186 /// Build a `Tainted` from a caller-supplied value. This is the
187 /// one place the type system loses ground; every site that calls
188 /// it should be reviewable.
189 pub fn new(value: T) -> Self {
190 Self(value)
191 }
192
193 /// Escape hatch for callers that need the raw inner. Named
194 /// loudly so a grep / review / lint flags it. Prefer
195 /// [`Tainted::escape_for`].
196 pub fn expose_secret(&self) -> &T {
197 &self.0
198 }
199
200 /// Consuming variant of [`Tainted::expose_secret`].
201 pub fn into_inner(self) -> T {
202 self.0
203 }
204}
205
206impl<T: fmt::Debug> fmt::Debug for Tainted<T> {
207 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
208 // `Debug`-quoting + control-byte escaping is the safe form
209 // that F-04 recommends as the mechanical fix; preserve it.
210 f.debug_tuple("Tainted").field(&self.0).finish()
211 }
212}
213
214impl Tainted<String> {
215 /// Hard cap on the size of any escaped boundary projection.
216 /// Mirrors the `max_uri_bytes` default in [`crate::conn_string`]
217 /// (8 KiB) so a tainted value that fits the parser also fits the
218 /// boundary projection.
219 pub const MAX_ESCAPED_LEN: usize = 8 * 1024;
220
221 /// Project the tainted value into the named [`Boundary`]'s
222 /// escape contract. Returns [`EscapedFor`] tagged with the
223 /// boundary, so a header setter can statically refuse a value
224 /// that was escaped for a log line.
225 pub fn escape_for(&self, boundary: Boundary) -> Result<EscapedFor, EscapeError> {
226 let escaped = match boundary {
227 Boundary::HttpHeader | Boundary::GrpcMetadata => escape_http_header(&self.0),
228 Boundary::LogField => escape_log_field(&self.0),
229 // AuditField + JsonValue are pass-through: their downstream
230 // guard owns the encoder. Returning the inner string
231 // tagged with the boundary lets a typed setter (`fn
232 // set(field: EscapedFor)` whose `boundary` matches
233 // `AuditField`) consume it without a re-escape.
234 Boundary::AuditField | Boundary::JsonValue => self.0.clone(),
235 };
236 if escaped.len() > Self::MAX_ESCAPED_LEN {
237 return Err(EscapeError::TooLong {
238 boundary,
239 bytes: escaped.len(),
240 });
241 }
242 Ok(EscapedFor {
243 boundary,
244 value: escaped,
245 })
246 }
247}
248
249impl From<String> for Tainted<String> {
250 fn from(s: String) -> Self {
251 Tainted(s)
252 }
253}
254
255impl From<&str> for Tainted<String> {
256 fn from(s: &str) -> Self {
257 Tainted(s.to_string())
258 }
259}
260
261// ---------------------------------------------------------------------------
262// Boundary-specific escapers
263// ---------------------------------------------------------------------------
264
265/// `HeaderValue` / gRPC metadata contract: strip the four header
266/// terminators (CR, LF, NUL, tab). Other bytes pass through. The
267/// downstream constructor (`http::HeaderValue::from_str`) is the
268/// authoritative gate; this function is the producer-side guard.
269fn escape_http_header(s: &str) -> String {
270 let mut out = String::with_capacity(s.len());
271 for b in s.bytes() {
272 match b {
273 b'\r' | b'\n' | 0x00 | b'\t' => {
274 // Strip. Header field-values forbid these per RFC 9110.
275 }
276 _ => out.push(b as char),
277 }
278 }
279 out
280}
281
282/// Log-field contract: percent-encode CR / LF / NUL / control bytes
283/// (0x00-0x1F + 0x7F). Other bytes pass through. Percent-encoding
284/// (rather than stripping) preserves visible evidence of tampering
285/// in the captured log line.
286fn escape_log_field(s: &str) -> String {
287 let mut out = String::with_capacity(s.len());
288 for b in s.bytes() {
289 if b < 0x20 || b == 0x7F {
290 out.push('%');
291 out.push(hex_nibble(b >> 4));
292 out.push(hex_nibble(b & 0x0F));
293 } else {
294 out.push(b as char);
295 }
296 }
297 out
298}
299
300fn hex_nibble(n: u8) -> char {
301 match n {
302 0..=9 => (b'0' + n) as char,
303 10..=15 => (b'A' + (n - 10)) as char,
304 _ => unreachable!(),
305 }
306}
307
308// ---------------------------------------------------------------------------
309// audit_safe_log_field — incremental-migration helper for F-04
310// ---------------------------------------------------------------------------
311
312/// `Display` adapter that strips CR / LF / NUL / control bytes from
313/// a borrowed `&str` for log emission.
314///
315/// This is the F-04 incremental-migration helper. The structural fix
316/// is [`Tainted<String>::escape_for(Boundary::LogField)`], which the
317/// codebase will adopt as the new tracing-fields plumbing lands; the
318/// helper exists so contributors can mechanically migrate a single
319/// `tracing::info!(user = %username, ...)` call site to
320/// `tracing::info!(user = %audit_safe_log_field(&username), ...)`
321/// without first re-typing the upstream variable as `Tainted<String>`.
322///
323/// Output bytes match `escape_for(Boundary::LogField)`. The contract
324/// is enforced by a shared helper.
325pub fn audit_safe_log_field(value: &str) -> impl fmt::Display + '_ {
326 AuditSafeLogField(value)
327}
328
329struct AuditSafeLogField<'a>(&'a str);
330
331impl fmt::Display for AuditSafeLogField<'_> {
332 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
333 for b in self.0.bytes() {
334 if b < 0x20 || b == 0x7F {
335 write!(f, "%{:02X}", b)?;
336 } else {
337 f.write_str(std::str::from_utf8(&[b]).unwrap_or("?"))?;
338 }
339 }
340 Ok(())
341 }
342}
343
344// ---------------------------------------------------------------------------
345// ConnStringSanitizer + ParsedConnString
346// ---------------------------------------------------------------------------
347
348/// Parse a connection URI and surface its caller-controlled fields
349/// inside [`Tainted<String>`].
350///
351/// Wraps [`crate::conn_string::parse`]. The underlying
352/// [`ConnectionTarget`] is preserved verbatim — this is purely an
353/// additive layer that lets new code paths consume tainted values
354/// without forcing a breaking change on existing consumers
355/// ([`reddb_client::connect`], `red_client`, the driver crates).
356pub struct ConnStringSanitizer;
357
358impl ConnStringSanitizer {
359 /// Parse `uri` into a [`ParsedConnString`]. Same DoS guardrails
360 /// as [`crate::conn_string::parse`].
361 pub fn parse(uri: &str) -> Result<ParsedConnString, ConnParseError> {
362 let target = parse_conn_string(uri)?;
363 Ok(ParsedConnString { target })
364 }
365}
366
367/// Sanitized view of a parsed connection string.
368///
369/// Holds the raw [`ConnectionTarget`] (preserves backward
370/// compatibility with [`crate::conn_string::parse`]) plus typed
371/// accessors that hand caller-supplied fields out as
372/// [`Tainted<String>`]. Downstream consumers that re-emit the values
373/// (gRPC `Endpoint::from`, log lines, error messages, audit fields)
374/// route through [`Tainted::escape_for`] rather than the raw inner.
375#[derive(Debug, Clone, PartialEq, Eq)]
376pub struct ParsedConnString {
377 target: ConnectionTarget,
378}
379
380impl ParsedConnString {
381 /// Typed view of the parsed target. Each variant carries
382 /// [`Tainted<String>`] for every caller-influenced field.
383 pub fn target(&self) -> TaintedTarget<'_> {
384 match &self.target {
385 ConnectionTarget::Memory => TaintedTarget::Memory,
386 ConnectionTarget::File { path } => TaintedTarget::File { path },
387 ConnectionTarget::Grpc { endpoint } => TaintedTarget::Grpc {
388 endpoint: TaintedRef(endpoint),
389 },
390 ConnectionTarget::GrpcCluster {
391 primary,
392 replicas,
393 force_primary,
394 } => TaintedTarget::GrpcCluster {
395 primary: TaintedRef(primary),
396 replicas,
397 force_primary: *force_primary,
398 },
399 ConnectionTarget::Http { base_url } => TaintedTarget::Http {
400 base_url: TaintedRef(base_url),
401 },
402 ConnectionTarget::RedWire { host, port, tls } => TaintedTarget::RedWire {
403 host: TaintedRef(host),
404 port: *port,
405 tls: *tls,
406 },
407 }
408 }
409
410 /// Hand the underlying [`ConnectionTarget`] back. Backward-compat
411 /// hatch for callers that have not yet been migrated to consume
412 /// [`TaintedTarget`].
413 pub fn into_connection_target(self) -> ConnectionTarget {
414 self.target
415 }
416
417 /// Borrow the underlying [`ConnectionTarget`].
418 pub fn as_connection_target(&self) -> &ConnectionTarget {
419 &self.target
420 }
421}
422
423/// Borrowed-side analogue of [`Tainted<String>`]. Same escape API,
424/// no allocation when the caller only wants `expose_secret`.
425#[derive(Debug, Clone, Copy, PartialEq, Eq)]
426pub struct TaintedRef<'a>(&'a String);
427
428impl<'a> TaintedRef<'a> {
429 pub fn expose_secret(&self) -> &'a str {
430 self.0.as_str()
431 }
432 pub fn to_owned_tainted(&self) -> Tainted<String> {
433 Tainted(self.0.clone())
434 }
435 pub fn escape_for(&self, boundary: Boundary) -> Result<EscapedFor, EscapeError> {
436 Tainted(self.0.clone()).escape_for(boundary)
437 }
438}
439
440/// Typed view of [`ConnectionTarget`] with [`TaintedRef`] in place
441/// of the bare `String` fields.
442#[derive(Debug)]
443pub enum TaintedTarget<'a> {
444 Memory,
445 File {
446 path: &'a std::path::Path,
447 },
448 Grpc {
449 endpoint: TaintedRef<'a>,
450 },
451 GrpcCluster {
452 primary: TaintedRef<'a>,
453 replicas: &'a [String],
454 force_primary: bool,
455 },
456 Http {
457 base_url: TaintedRef<'a>,
458 },
459 RedWire {
460 host: TaintedRef<'a>,
461 port: u16,
462 tls: bool,
463 },
464}
465
466// ---------------------------------------------------------------------------
467// Tests
468// ---------------------------------------------------------------------------
469
470#[cfg(test)]
471mod tests {
472 use super::*;
473
474 #[test]
475 fn header_strip_crlf_nul_tab() {
476 let t = Tainted::<String>::from("v1\r\nX-Forged: yes\0\there");
477 let e = t.escape_for(Boundary::HttpHeader).unwrap();
478 assert_eq!(e.boundary(), Boundary::HttpHeader);
479 assert!(!e.as_str().contains('\r'));
480 assert!(!e.as_str().contains('\n'));
481 assert!(!e.as_str().contains('\0'));
482 assert!(!e.as_str().contains('\t'));
483 assert_eq!(e.as_str(), "v1X-Forged: yeshere");
484 }
485
486 #[test]
487 fn grpc_metadata_matches_http_header_contract() {
488 let payload = "alice\r\nx-trace-id: forged";
489 let h = Tainted::from(payload)
490 .escape_for(Boundary::HttpHeader)
491 .unwrap();
492 let g = Tainted::from(payload)
493 .escape_for(Boundary::GrpcMetadata)
494 .unwrap();
495 assert_eq!(h.as_str(), g.as_str());
496 }
497
498 #[test]
499 fn log_field_percent_encodes_control_bytes() {
500 let t = Tainted::<String>::from(
501 "alice\nlevel=ERROR\rcluster_breach=true\ttab\0nul\x07bel\x1bescape\x7fdel",
502 );
503 let e = t.escape_for(Boundary::LogField).unwrap();
504 let s = e.as_str();
505 // Every control byte must be escaped, not stripped, so
506 // tampering remains visible in the log line.
507 assert!(!s.contains('\n'));
508 assert!(!s.contains('\r'));
509 assert!(!s.contains('\0'));
510 assert!(!s.contains('\t'));
511 assert!(!s.contains('\x07'));
512 assert!(!s.contains('\x1b'));
513 assert!(!s.contains('\x7f'));
514 assert!(s.contains("%0A"));
515 assert!(s.contains("%0D"));
516 assert!(s.contains("%00"));
517 assert!(s.contains("%09"));
518 assert!(s.contains("%07"));
519 assert!(s.contains("%1B"));
520 assert!(s.contains("%7F"));
521 }
522
523 #[test]
524 fn audit_and_json_pass_through() {
525 // Audit + JSON are pass-through; their downstream guard owns
526 // the encoder. The boundary tag travels with the value so a
527 // typed setter can refuse a mismatched escape.
528 let raw = "alice\nbob";
529 let a = Tainted::from(raw).escape_for(Boundary::AuditField).unwrap();
530 let j = Tainted::from(raw).escape_for(Boundary::JsonValue).unwrap();
531 assert_eq!(a.as_str(), raw);
532 assert_eq!(j.as_str(), raw);
533 assert_eq!(a.boundary(), Boundary::AuditField);
534 assert_eq!(j.boundary(), Boundary::JsonValue);
535 }
536
537 #[test]
538 fn audit_safe_log_field_strips_crlf() {
539 let evil = "alice\nlevel=ERROR cluster_breach=true";
540 let rendered = format!("{}", audit_safe_log_field(evil));
541 assert!(!rendered.contains('\n'));
542 assert!(!rendered.contains('\r'));
543 assert!(rendered.contains("%0A"));
544 }
545
546 #[test]
547 fn audit_safe_log_field_matches_log_field_boundary() {
548 // Same bytes out as escape_for(LogField). This is the
549 // contract: the helper is a `Display` adapter for the same
550 // escaper, so an incremental migration via the helper does
551 // not change behaviour when the call site later upgrades to
552 // Tainted<String>.
553 let evil = "user\rname\nrow=1\0nul\x1Besc\x7Fdel";
554 let helper = format!("{}", audit_safe_log_field(evil));
555 let typed = Tainted::from(evil)
556 .escape_for(Boundary::LogField)
557 .unwrap()
558 .into_string();
559 assert_eq!(helper, typed);
560 }
561
562 #[test]
563 fn tainted_is_not_display() {
564 // Compile-time check: the caller cannot accidentally write a
565 // Tainted<String> through `{}`. We can't *test* the absence
566 // of an impl with a unit test, but Debug-quoting *is* present
567 // and round-trips control bytes through the standard escape.
568 let t = Tainted::from("alice\nbob");
569 let dbg = format!("{:?}", t);
570 assert!(dbg.contains("\\n"), "Debug must escape control bytes");
571 }
572
573 #[test]
574 fn parser_round_trip_grpc() {
575 let parsed = ConnStringSanitizer::parse("grpc://node-1:5055").unwrap();
576 match parsed.target() {
577 TaintedTarget::Grpc { endpoint } => {
578 assert_eq!(endpoint.expose_secret(), "http://node-1:5055");
579 let h = endpoint.escape_for(Boundary::HttpHeader).unwrap();
580 assert!(!h.as_str().contains('\n'));
581 }
582 other => panic!("unexpected variant: {:?}", other),
583 }
584 }
585
586 #[test]
587 fn parser_round_trip_redwire() {
588 let parsed = ConnStringSanitizer::parse("reds://example.com:9999").unwrap();
589 match parsed.target() {
590 TaintedTarget::RedWire { host, port, tls } => {
591 assert_eq!(host.expose_secret(), "example.com");
592 assert_eq!(port, 9999);
593 assert!(tls);
594 }
595 other => panic!("unexpected variant: {:?}", other),
596 }
597 }
598
599 #[test]
600 fn parser_into_connection_target_compat() {
601 // Backward-compat hatch: the underlying ConnectionTarget is
602 // unchanged so existing consumers keep working.
603 let parsed = ConnStringSanitizer::parse("memory://").unwrap();
604 assert_eq!(parsed.into_connection_target(), ConnectionTarget::Memory);
605 }
606
607 #[test]
608 fn escape_too_long_surfaces_typed_error() {
609 let big = "a".repeat(Tainted::<String>::MAX_ESCAPED_LEN + 1);
610 let err = Tainted::from(big.as_str())
611 .escape_for(Boundary::LogField)
612 .unwrap_err();
613 match err {
614 EscapeError::TooLong { boundary, bytes } => {
615 assert_eq!(boundary, Boundary::LogField);
616 assert!(bytes > Tainted::<String>::MAX_ESCAPED_LEN);
617 }
618 }
619 }
620}