Skip to main content

reddb_wire/
sanitizer.rs

1//! Connection-string sanitizer + typed taint guard (issue #179, ADR 0010).
2//!
3//! The Whiz / Babeld disclosure (March 2026) is the canonical example
4//! of the failure mode this module exists to prevent: caller-supplied
5//! strings concatenated into a structured serialization format whose
6//! delimiter the caller controls let the caller smuggle a forged field
7//! past the producer and into the consumer's parser. The
8//! [serialization-boundary audit][audit] enumerated 15 instances of
9//! the pattern in this codebase. F-04 is the broadest: ~141
10//! `tracing::*!` call sites that interpolate user-supplied strings via
11//! `Display`, so a CR/LF in a connection-string-derived token, a
12//! tenant name, or a collection name forges a log line.
13//!
14//! [audit]: ../../docs/security/serialization-boundary-audit-2026-05-06.md
15//!
16//! This module ships three things:
17//!
18//! 1. [`Tainted<T>`] — a non-`Display` wrapper. The only way to
19//!    project the inner string into a structured serialization
20//!    boundary is [`Tainted::escape_for`], which forces the caller to
21//!    name the [`Boundary`] and returns an [`EscapedFor`] under the
22//!    boundary's escape contract.
23//! 2. [`ConnStringSanitizer`] — a deep module wrapping the existing
24//!    [`crate::conn_string::parse`]. It returns a
25//!    [`ParsedConnString`] whose host / cert-path / endpoint / query
26//!    fields are exposed as `Tainted<String>` so downstream consumers
27//!    cannot accidentally route a tainted byte through `Display`.
28//! 3. [`audit_safe_log_field`] — a thin `Display` adapter that strips
29//!    CR/LF/NUL/control bytes from a `&str` for log emission. The
30//!    structured fix is [`Tainted::escape_for(Boundary::LogField)`];
31//!    the helper exists because the codebase has hundreds of call
32//!    sites where a full type-system migration is mechanical work
33//!    that CI lint #180 tracks separately, and the helper unblocks
34//!    incremental migration without expanding the attack surface.
35
36use std::fmt;
37
38use crate::conn_string::{
39    parse as parse_conn_string, ConnectionTarget, ParseError as ConnParseError,
40};
41
42// ---------------------------------------------------------------------------
43// Boundary + escape error
44// ---------------------------------------------------------------------------
45
46/// Serialization boundaries supported by [`Tainted::escape_for`].
47///
48/// Each variant names the exact escape contract the boundary expects.
49/// The contract is implemented by [`Tainted::escape_for`] and
50/// validated by the proptest corpus in this crate's test suite, so
51/// adding a variant requires extending both.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum Boundary {
54    /// HTTP/1.1 + HTTP/2 header value (`http::HeaderValue`-safe).
55    /// Strips CR, LF, NUL, and tab — the four bytes that let an
56    /// attacker terminate the current header and inject a second one.
57    /// The output is guaranteed to be accepted by
58    /// [`http::HeaderValue::from_str`] (modulo bytes outside ASCII
59    /// 0x20-0x7E which are passed through; the gRPC layer rejects
60    /// non-visible-ASCII separately).
61    HttpHeader,
62    /// gRPC metadata value. gRPC metadata sits on HTTP/2 headers, so
63    /// the contract is identical to [`Boundary::HttpHeader`].
64    GrpcMetadata,
65    /// Plain-text log line emitted via `tracing` or any other
66    /// line-oriented formatter. Strips CR / LF / NUL / control bytes
67    /// (0x00-0x1F + 0x7F) and percent-encodes them, so a smuggled
68    /// `\nlevel=ERROR` survives as `%0Alevel=ERROR` in the captured
69    /// line — visibly tampered, never authoritative.
70    LogField,
71    /// Structured audit field. Pass-through; the
72    /// `AuditFieldEscaper` (#177, slice AC) owns the on-disk encoder
73    /// and rejects control bytes at emit time. Exposing the typed
74    /// value here lets the audit lane consume `Tainted<String>`
75    /// without going through a string detour.
76    AuditField,
77    /// JSON value. Pass-through; the `SerializedJsonField` (#178,
78    /// slice AB) round-trips through `serde_json::Value::String` and
79    /// inherits serde's escape contract. Exposing the typed value
80    /// here lets the JSON lane consume `Tainted<String>` without a
81    /// string detour.
82    JsonValue,
83}
84
85impl Boundary {
86    pub fn as_str(self) -> &'static str {
87        match self {
88            Boundary::HttpHeader => "HttpHeader",
89            Boundary::GrpcMetadata => "GrpcMetadata",
90            Boundary::LogField => "LogField",
91            Boundary::AuditField => "AuditField",
92            Boundary::JsonValue => "JsonValue",
93        }
94    }
95}
96
97/// Stable error code returned by [`Tainted::escape_for`].
98///
99/// The escape paths in this module are total — they always produce a
100/// safe value — so today the only failure mode is "input was so long
101/// that escaping would exceed [`Tainted::MAX_ESCAPED_LEN`]". Future
102/// boundaries (e.g. an MTLS SAN slot with a 256-byte cap) get their
103/// own variants here without breaking existing callers.
104#[derive(Debug, Clone, PartialEq, Eq)]
105pub enum EscapeError {
106    /// Escaping the input would produce a value longer than
107    /// [`Tainted::MAX_ESCAPED_LEN`].
108    TooLong { boundary: Boundary, bytes: usize },
109}
110
111impl fmt::Display for EscapeError {
112    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
113        match self {
114            EscapeError::TooLong { boundary, bytes } => write!(
115                f,
116                "escape_for({}) would emit {} bytes (limit {})",
117                boundary.as_str(),
118                bytes,
119                Tainted::<String>::MAX_ESCAPED_LEN,
120            ),
121        }
122    }
123}
124
125impl std::error::Error for EscapeError {}
126
127// ---------------------------------------------------------------------------
128// EscapedFor — boundary-tagged result of `Tainted::escape_for`
129// ---------------------------------------------------------------------------
130
131/// Output of [`Tainted::escape_for`]. Carries the boundary it was
132/// escaped for so a header setter can statically refuse a value that
133/// was escaped for a log line, and vice versa.
134#[derive(Debug, Clone, PartialEq, Eq)]
135pub struct EscapedFor {
136    boundary: Boundary,
137    value: String,
138}
139
140impl EscapedFor {
141    pub fn boundary(&self) -> Boundary {
142        self.boundary
143    }
144    pub fn as_str(&self) -> &str {
145        &self.value
146    }
147    pub fn into_string(self) -> String {
148        self.value
149    }
150}
151
152impl fmt::Display for EscapedFor {
153    /// `EscapedFor` is `Display`-able by design — once the value has
154    /// crossed the [`Tainted::escape_for`] gate the boundary's escape
155    /// contract has been applied and the bytes are safe to render
156    /// against that boundary's parser.
157    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
158        f.write_str(&self.value)
159    }
160}
161
162// ---------------------------------------------------------------------------
163// Tainted<T> — non-`Display` wrapper around caller-supplied bytes
164// ---------------------------------------------------------------------------
165
166/// Caller-controlled value that has not yet crossed a serialization
167/// boundary. Deliberately does **not** implement [`fmt::Display`];
168/// the only way to project a `Tainted<String>` into a serialized
169/// surface is [`Tainted::escape_for`].
170///
171/// Implements [`fmt::Debug`] (which `tracing` `?value` calls) because
172/// `Debug` quote-wraps and escapes control bytes, so emitting a
173/// `Tainted<String>` via `?value` is safe in a way `%value` is not.
174///
175/// The propagation rule is enforced by visibility, not by the type
176/// system: the inner field is `pub(crate)` so only this crate can
177/// build a `Tainted<String>`. Downstream crates receive
178/// `Tainted<String>` from [`ConnStringSanitizer::parse`] and cannot
179/// peel it; they must call [`Tainted::escape_for`] or
180/// [`Tainted::expose_secret`] (the latter named loudly to surface in
181/// review).
182#[derive(Clone, PartialEq, Eq)]
183pub struct Tainted<T>(pub(crate) T);
184
185impl<T> Tainted<T> {
186    /// Build a `Tainted` from a caller-supplied value. This is the
187    /// one place the type system loses ground; every site that calls
188    /// it should be reviewable.
189    pub fn new(value: T) -> Self {
190        Self(value)
191    }
192
193    /// Escape hatch for callers that need the raw inner. Named
194    /// loudly so a grep / review / lint flags it. Prefer
195    /// [`Tainted::escape_for`].
196    pub fn expose_secret(&self) -> &T {
197        &self.0
198    }
199
200    /// Consuming variant of [`Tainted::expose_secret`].
201    pub fn into_inner(self) -> T {
202        self.0
203    }
204}
205
206impl<T: fmt::Debug> fmt::Debug for Tainted<T> {
207    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
208        // `Debug`-quoting + control-byte escaping is the safe form
209        // that F-04 recommends as the mechanical fix; preserve it.
210        f.debug_tuple("Tainted").field(&self.0).finish()
211    }
212}
213
214impl Tainted<String> {
215    /// Hard cap on the size of any escaped boundary projection.
216    /// Mirrors the `max_uri_bytes` default in [`crate::conn_string`]
217    /// (8 KiB) so a tainted value that fits the parser also fits the
218    /// boundary projection.
219    pub const MAX_ESCAPED_LEN: usize = 8 * 1024;
220
221    /// Project the tainted value into the named [`Boundary`]'s
222    /// escape contract. Returns [`EscapedFor`] tagged with the
223    /// boundary, so a header setter can statically refuse a value
224    /// that was escaped for a log line.
225    pub fn escape_for(&self, boundary: Boundary) -> Result<EscapedFor, EscapeError> {
226        let escaped = match boundary {
227            Boundary::HttpHeader | Boundary::GrpcMetadata => escape_http_header(&self.0),
228            Boundary::LogField => escape_log_field(&self.0),
229            // AuditField + JsonValue are pass-through: their downstream
230            // guard owns the encoder. Returning the inner string
231            // tagged with the boundary lets a typed setter (`fn
232            // set(field: EscapedFor)` whose `boundary` matches
233            // `AuditField`) consume it without a re-escape.
234            Boundary::AuditField | Boundary::JsonValue => self.0.clone(),
235        };
236        if escaped.len() > Self::MAX_ESCAPED_LEN {
237            return Err(EscapeError::TooLong {
238                boundary,
239                bytes: escaped.len(),
240            });
241        }
242        Ok(EscapedFor {
243            boundary,
244            value: escaped,
245        })
246    }
247}
248
249impl From<String> for Tainted<String> {
250    fn from(s: String) -> Self {
251        Tainted(s)
252    }
253}
254
255impl From<&str> for Tainted<String> {
256    fn from(s: &str) -> Self {
257        Tainted(s.to_string())
258    }
259}
260
261// ---------------------------------------------------------------------------
262// Boundary-specific escapers
263// ---------------------------------------------------------------------------
264
265/// `HeaderValue` / gRPC metadata contract: strip the four header
266/// terminators (CR, LF, NUL, tab). Other bytes pass through. The
267/// downstream constructor (`http::HeaderValue::from_str`) is the
268/// authoritative gate; this function is the producer-side guard.
269fn escape_http_header(s: &str) -> String {
270    let mut out = String::with_capacity(s.len());
271    for b in s.bytes() {
272        match b {
273            b'\r' | b'\n' | 0x00 | b'\t' => {
274                // Strip. Header field-values forbid these per RFC 9110.
275            }
276            _ => out.push(b as char),
277        }
278    }
279    out
280}
281
282/// Log-field contract: percent-encode CR / LF / NUL / control bytes
283/// (0x00-0x1F + 0x7F). Other bytes pass through. Percent-encoding
284/// (rather than stripping) preserves visible evidence of tampering
285/// in the captured log line.
286fn escape_log_field(s: &str) -> String {
287    let mut out = String::with_capacity(s.len());
288    for b in s.bytes() {
289        if b < 0x20 || b == 0x7F {
290            out.push('%');
291            out.push(hex_nibble(b >> 4));
292            out.push(hex_nibble(b & 0x0F));
293        } else {
294            out.push(b as char);
295        }
296    }
297    out
298}
299
300fn hex_nibble(n: u8) -> char {
301    match n {
302        0..=9 => (b'0' + n) as char,
303        10..=15 => (b'A' + (n - 10)) as char,
304        _ => unreachable!(),
305    }
306}
307
308// ---------------------------------------------------------------------------
309// audit_safe_log_field — incremental-migration helper for F-04
310// ---------------------------------------------------------------------------
311
312/// `Display` adapter that strips CR / LF / NUL / control bytes from
313/// a borrowed `&str` for log emission.
314///
315/// This is the F-04 incremental-migration helper. The structural fix
316/// is [`Tainted<String>::escape_for(Boundary::LogField)`], which the
317/// codebase will adopt as the new tracing-fields plumbing lands; the
318/// helper exists so contributors can mechanically migrate a single
319/// `tracing::info!(user = %username, ...)` call site to
320/// `tracing::info!(user = %audit_safe_log_field(&username), ...)`
321/// without first re-typing the upstream variable as `Tainted<String>`.
322///
323/// Output bytes match `escape_for(Boundary::LogField)`. The contract
324/// is enforced by a shared helper.
325pub fn audit_safe_log_field(value: &str) -> impl fmt::Display + '_ {
326    AuditSafeLogField(value)
327}
328
329struct AuditSafeLogField<'a>(&'a str);
330
331impl fmt::Display for AuditSafeLogField<'_> {
332    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
333        for b in self.0.bytes() {
334            if b < 0x20 || b == 0x7F {
335                write!(f, "%{:02X}", b)?;
336            } else {
337                f.write_str(std::str::from_utf8(&[b]).unwrap_or("?"))?;
338            }
339        }
340        Ok(())
341    }
342}
343
344// ---------------------------------------------------------------------------
345// ConnStringSanitizer + ParsedConnString
346// ---------------------------------------------------------------------------
347
348/// Parse a connection URI and surface its caller-controlled fields
349/// inside [`Tainted<String>`].
350///
351/// Wraps [`crate::conn_string::parse`]. The underlying
352/// [`ConnectionTarget`] is preserved verbatim — this is purely an
353/// additive layer that lets new code paths consume tainted values
354/// without forcing a breaking change on existing consumers
355/// ([`reddb_client::connect`], `red_client`, the driver crates).
356pub struct ConnStringSanitizer;
357
358impl ConnStringSanitizer {
359    /// Parse `uri` into a [`ParsedConnString`]. Same DoS guardrails
360    /// as [`crate::conn_string::parse`].
361    pub fn parse(uri: &str) -> Result<ParsedConnString, ConnParseError> {
362        let target = parse_conn_string(uri)?;
363        Ok(ParsedConnString { target })
364    }
365}
366
367/// Sanitized view of a parsed connection string.
368///
369/// Holds the raw [`ConnectionTarget`] (preserves backward
370/// compatibility with [`crate::conn_string::parse`]) plus typed
371/// accessors that hand caller-supplied fields out as
372/// [`Tainted<String>`]. Downstream consumers that re-emit the values
373/// (gRPC `Endpoint::from`, log lines, error messages, audit fields)
374/// route through [`Tainted::escape_for`] rather than the raw inner.
375#[derive(Debug, Clone, PartialEq, Eq)]
376pub struct ParsedConnString {
377    target: ConnectionTarget,
378}
379
380impl ParsedConnString {
381    /// Typed view of the parsed target. Each variant carries
382    /// [`Tainted<String>`] for every caller-influenced field.
383    pub fn target(&self) -> TaintedTarget<'_> {
384        match &self.target {
385            ConnectionTarget::Memory => TaintedTarget::Memory,
386            ConnectionTarget::File { path } => TaintedTarget::File { path },
387            ConnectionTarget::Grpc { endpoint } => TaintedTarget::Grpc {
388                endpoint: TaintedRef(endpoint),
389            },
390            ConnectionTarget::GrpcCluster {
391                primary,
392                replicas,
393                force_primary,
394            } => TaintedTarget::GrpcCluster {
395                primary: TaintedRef(primary),
396                replicas,
397                force_primary: *force_primary,
398            },
399            ConnectionTarget::Http { base_url } => TaintedTarget::Http {
400                base_url: TaintedRef(base_url),
401            },
402            ConnectionTarget::RedWire { host, port, tls } => TaintedTarget::RedWire {
403                host: TaintedRef(host),
404                port: *port,
405                tls: *tls,
406            },
407        }
408    }
409
410    /// Hand the underlying [`ConnectionTarget`] back. Backward-compat
411    /// hatch for callers that have not yet been migrated to consume
412    /// [`TaintedTarget`].
413    pub fn into_connection_target(self) -> ConnectionTarget {
414        self.target
415    }
416
417    /// Borrow the underlying [`ConnectionTarget`].
418    pub fn as_connection_target(&self) -> &ConnectionTarget {
419        &self.target
420    }
421}
422
423/// Borrowed-side analogue of [`Tainted<String>`]. Same escape API,
424/// no allocation when the caller only wants `expose_secret`.
425#[derive(Debug, Clone, Copy, PartialEq, Eq)]
426pub struct TaintedRef<'a>(&'a String);
427
428impl<'a> TaintedRef<'a> {
429    pub fn expose_secret(&self) -> &'a str {
430        self.0.as_str()
431    }
432    pub fn to_owned_tainted(&self) -> Tainted<String> {
433        Tainted(self.0.clone())
434    }
435    pub fn escape_for(&self, boundary: Boundary) -> Result<EscapedFor, EscapeError> {
436        Tainted(self.0.clone()).escape_for(boundary)
437    }
438}
439
440/// Typed view of [`ConnectionTarget`] with [`TaintedRef`] in place
441/// of the bare `String` fields.
442#[derive(Debug)]
443pub enum TaintedTarget<'a> {
444    Memory,
445    File {
446        path: &'a std::path::Path,
447    },
448    Grpc {
449        endpoint: TaintedRef<'a>,
450    },
451    GrpcCluster {
452        primary: TaintedRef<'a>,
453        replicas: &'a [String],
454        force_primary: bool,
455    },
456    Http {
457        base_url: TaintedRef<'a>,
458    },
459    RedWire {
460        host: TaintedRef<'a>,
461        port: u16,
462        tls: bool,
463    },
464}
465
466// ---------------------------------------------------------------------------
467// Tests
468// ---------------------------------------------------------------------------
469
470#[cfg(test)]
471mod tests {
472    use super::*;
473
474    #[test]
475    fn header_strip_crlf_nul_tab() {
476        let t = Tainted::<String>::from("v1\r\nX-Forged: yes\0\there");
477        let e = t.escape_for(Boundary::HttpHeader).unwrap();
478        assert_eq!(e.boundary(), Boundary::HttpHeader);
479        assert!(!e.as_str().contains('\r'));
480        assert!(!e.as_str().contains('\n'));
481        assert!(!e.as_str().contains('\0'));
482        assert!(!e.as_str().contains('\t'));
483        assert_eq!(e.as_str(), "v1X-Forged: yeshere");
484    }
485
486    #[test]
487    fn grpc_metadata_matches_http_header_contract() {
488        let payload = "alice\r\nx-trace-id: forged";
489        let h = Tainted::from(payload)
490            .escape_for(Boundary::HttpHeader)
491            .unwrap();
492        let g = Tainted::from(payload)
493            .escape_for(Boundary::GrpcMetadata)
494            .unwrap();
495        assert_eq!(h.as_str(), g.as_str());
496    }
497
498    #[test]
499    fn log_field_percent_encodes_control_bytes() {
500        let t = Tainted::<String>::from(
501            "alice\nlevel=ERROR\rcluster_breach=true\ttab\0nul\x07bel\x1bescape\x7fdel",
502        );
503        let e = t.escape_for(Boundary::LogField).unwrap();
504        let s = e.as_str();
505        // Every control byte must be escaped, not stripped, so
506        // tampering remains visible in the log line.
507        assert!(!s.contains('\n'));
508        assert!(!s.contains('\r'));
509        assert!(!s.contains('\0'));
510        assert!(!s.contains('\t'));
511        assert!(!s.contains('\x07'));
512        assert!(!s.contains('\x1b'));
513        assert!(!s.contains('\x7f'));
514        assert!(s.contains("%0A"));
515        assert!(s.contains("%0D"));
516        assert!(s.contains("%00"));
517        assert!(s.contains("%09"));
518        assert!(s.contains("%07"));
519        assert!(s.contains("%1B"));
520        assert!(s.contains("%7F"));
521    }
522
523    #[test]
524    fn audit_and_json_pass_through() {
525        // Audit + JSON are pass-through; their downstream guard owns
526        // the encoder. The boundary tag travels with the value so a
527        // typed setter can refuse a mismatched escape.
528        let raw = "alice\nbob";
529        let a = Tainted::from(raw).escape_for(Boundary::AuditField).unwrap();
530        let j = Tainted::from(raw).escape_for(Boundary::JsonValue).unwrap();
531        assert_eq!(a.as_str(), raw);
532        assert_eq!(j.as_str(), raw);
533        assert_eq!(a.boundary(), Boundary::AuditField);
534        assert_eq!(j.boundary(), Boundary::JsonValue);
535    }
536
537    #[test]
538    fn audit_safe_log_field_strips_crlf() {
539        let evil = "alice\nlevel=ERROR cluster_breach=true";
540        let rendered = format!("{}", audit_safe_log_field(evil));
541        assert!(!rendered.contains('\n'));
542        assert!(!rendered.contains('\r'));
543        assert!(rendered.contains("%0A"));
544    }
545
546    #[test]
547    fn audit_safe_log_field_matches_log_field_boundary() {
548        // Same bytes out as escape_for(LogField). This is the
549        // contract: the helper is a `Display` adapter for the same
550        // escaper, so an incremental migration via the helper does
551        // not change behaviour when the call site later upgrades to
552        // Tainted<String>.
553        let evil = "user\rname\nrow=1\0nul\x1Besc\x7Fdel";
554        let helper = format!("{}", audit_safe_log_field(evil));
555        let typed = Tainted::from(evil)
556            .escape_for(Boundary::LogField)
557            .unwrap()
558            .into_string();
559        assert_eq!(helper, typed);
560    }
561
562    #[test]
563    fn tainted_is_not_display() {
564        // Compile-time check: the caller cannot accidentally write a
565        // Tainted<String> through `{}`. We can't *test* the absence
566        // of an impl with a unit test, but Debug-quoting *is* present
567        // and round-trips control bytes through the standard escape.
568        let t = Tainted::from("alice\nbob");
569        let dbg = format!("{:?}", t);
570        assert!(dbg.contains("\\n"), "Debug must escape control bytes");
571    }
572
573    #[test]
574    fn parser_round_trip_grpc() {
575        let parsed = ConnStringSanitizer::parse("grpc://node-1:5055").unwrap();
576        match parsed.target() {
577            TaintedTarget::Grpc { endpoint } => {
578                assert_eq!(endpoint.expose_secret(), "http://node-1:5055");
579                let h = endpoint.escape_for(Boundary::HttpHeader).unwrap();
580                assert!(!h.as_str().contains('\n'));
581            }
582            other => panic!("unexpected variant: {:?}", other),
583        }
584    }
585
586    #[test]
587    fn parser_round_trip_redwire() {
588        let parsed = ConnStringSanitizer::parse("reds://example.com:9999").unwrap();
589        match parsed.target() {
590            TaintedTarget::RedWire { host, port, tls } => {
591                assert_eq!(host.expose_secret(), "example.com");
592                assert_eq!(port, 9999);
593                assert!(tls);
594            }
595            other => panic!("unexpected variant: {:?}", other),
596        }
597    }
598
599    #[test]
600    fn parser_into_connection_target_compat() {
601        // Backward-compat hatch: the underlying ConnectionTarget is
602        // unchanged so existing consumers keep working.
603        let parsed = ConnStringSanitizer::parse("memory://").unwrap();
604        assert_eq!(parsed.into_connection_target(), ConnectionTarget::Memory);
605    }
606
607    #[test]
608    fn escape_too_long_surfaces_typed_error() {
609        let big = "a".repeat(Tainted::<String>::MAX_ESCAPED_LEN + 1);
610        let err = Tainted::from(big.as_str())
611            .escape_for(Boundary::LogField)
612            .unwrap_err();
613        match err {
614            EscapeError::TooLong { boundary, bytes } => {
615                assert_eq!(boundary, Boundary::LogField);
616                assert!(bytes > Tainted::<String>::MAX_ESCAPED_LEN);
617            }
618        }
619    }
620}