Skip to main content

reddb_server/
json_field.rs

1//! `SerializedJsonField` — typed guard for JSON-envelope construction.
2//!
3//! Issue [#178](https://github.com/reddb-io/reddb/issues/178), enforcing
4//! [ADR 0010 §3](../../../docs/adr/0010-serialization-boundary-discipline.md#3-serializedjsonfield--helloack--payloadreply--topology-json).
5//!
6//! # The boundary
7//!
8//! Every JSON envelope this server emits — HelloAck (issue #166), gRPC
9//! `PayloadReply` (`crates/reddb-server/src/grpc/service_impl.rs`), and
10//! HTTP response bodies (`crates/reddb-server/src/server/handlers_*.rs`)
11//! — is a structured serialization format whose delimiter (`"`,
12//! control bytes, `{`/`}`, `:`) the untrusted caller can attempt to
13//! inject. The Whiz / Babeld pattern is `serialize(trusted ++
14//! untrusted)` without escape: the producer emits attacker-controlled
15//! bytes verbatim and the downstream parser sees a forged field.
16//!
17//! `SerializedJsonField` is the typed point of crossing for that
18//! boundary on the producer side. Caller-influenced data does not get
19//! formatted into a JSON envelope as raw bytes; it round-trips through
20//! [`crate::serde_json::Value`] first, picking up the canonical
21//! RFC-8259-compliant escape contract from
22//! [`crate::serde_json::Value::to_string_compact`] (the F-01 hotfix
23//! shipped in #181).
24//!
25//! # Public surface
26//!
27//! - [`SerializedJsonField::tainted`] — wrap an untrusted, caller-
28//!   influenced string. Returns a [`crate::serde_json::Value::String`]
29//!   that, when serialized, will have every control byte and JSON
30//!   delimiter escaped per RFC 8259 §7. Use this for error messages,
31//!   user-supplied identifiers, free-form text, and anything reaching
32//!   the envelope from a parser, header, or request body.
33//! - [`SerializedJsonField::typed`] — wrap a known-typed value that
34//!   implements [`crate::serde_json::JsonEncode`] (the in-house
35//!   counterpart to `serde::Serialize`). Returns the value's
36//!   canonical [`crate::serde_json::Value`] representation. Use this
37//!   for structs and enums whose schema is owned by the server; it
38//!   guarantees the round-trip even for nested string fields.
39//!
40//! Both forms produce a [`crate::serde_json::Value`] that the rest of
41//! the envelope assembly (`Map::insert`, `to_string_compact`)
42//! consumes uniformly. A caller never hands raw bytes to the JSON
43//! emitter; everything goes through `Value`.
44//!
45//! # F-05 — SQL parser error message routing
46//!
47//! Audit finding F-05 (see
48//! `docs/security/serialization-boundary-audit-2026-05-06.md`)
49//! observes that SQL parser errors interpolate user-supplied SQL
50//! fragments into their `Display` strings via bare `format!`. When
51//! such an error message reaches an HTTP response body via
52//! [`crate::server::transport::json_error`], the F-05 fix on the JSON
53//! wire side is to route the message through
54//! [`SerializedJsonField::tainted`] before embedding it. That fix
55//! lands in this slice via the
56//! [`crate::server::transport::json_error`] retrofit, which now wraps
57//! every error message with the guard regardless of upstream origin.
58//! The parser-side F-05 cleanup (avoiding `format!` for the offending
59//! fragment in the first place) is a separate concern tracked under
60//! Lane AG / issue #184.
61//!
62//! # Why `tainted` does not return an error
63//!
64//! Unlike [`crate::server::header_escape_guard::HeaderEscapeGuard`],
65//! which rejects CR/LF/NUL/tab outright, `SerializedJsonField` cannot
66//! reject anything: every Unicode string is a legal JSON string under
67//! RFC 8259 §7 once escaped. The contract is "round-trip", not
68//! "validate". The result is always emittable.
69
70use crate::serde_json::{JsonEncode, Value};
71
72/// Typed guard for JSON-envelope field construction. See module docs.
73///
74/// Zero-sized; the type exists only to namespace the constructors and
75/// to make audit grep (`SerializedJsonField::tainted`,
76/// `SerializedJsonField::typed`) trivially locatable.
77pub struct SerializedJsonField;
78
79impl SerializedJsonField {
80    /// Wrap an untrusted, caller-influenced string as a JSON value.
81    ///
82    /// The returned [`Value::String`] will, on serialization through
83    /// [`Value::to_string_compact`], have every control byte
84    /// (`U+0000..U+001F`), embedded `"`, `\`, and other JSON
85    /// delimiters escaped per RFC 8259 §7. The downstream parser sees
86    /// the original bytes verbatim — it does *not* see the bytes as
87    /// envelope structure.
88    ///
89    /// This function is the canonical entry point for caller-
90    /// influenced JSON-envelope fields. Examples:
91    ///
92    /// - Error messages reaching `json_error` (HTTP body)
93    /// - SQL parser error fragments (F-05 fix)
94    /// - User-supplied identifiers reflected back into a response
95    /// - Connection-string fragments arriving via
96    ///   [`reddb_wire::Tainted`] after `escape_for(Boundary::JsonValue)`
97    pub fn tainted(s: &str) -> Value {
98        Value::String(s.to_string())
99    }
100
101    /// Wrap a server-owned, typed value as a JSON value.
102    ///
103    /// Use this for structs and enums whose schema the server owns
104    /// (configuration, status snapshots, typed view-models). The
105    /// `JsonEncode` impl walks the type and produces the canonical
106    /// [`Value`] tree; any nested string fields automatically inherit
107    /// the round-trip guarantee.
108    ///
109    /// Note: `JsonEncode` is this workspace's in-house counterpart to
110    /// `serde::Serialize`; the dependency-free split is documented in
111    /// [`crate::serde_json`].
112    pub fn typed<T: JsonEncode + ?Sized>(value: &T) -> Value {
113        value.to_json_value()
114    }
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120    use crate::serde_json::{from_str, Map};
121
122    fn round_trip(input: &str) -> String {
123        let mut obj = Map::new();
124        obj.insert("field".to_string(), SerializedJsonField::tainted(input));
125        let envelope = Value::Object(obj).to_string_compact();
126        // Parse back as a JSON object and pull out `field`.
127        let parsed: Value = from_str(&envelope).expect("envelope must be valid JSON");
128        match parsed {
129            Value::Object(map) => match map.get("field").cloned() {
130                Some(Value::String(s)) => s,
131                other => panic!("field missing or wrong shape: {other:?}"),
132            },
133            other => panic!("expected object, got {other:?}"),
134        }
135    }
136
137    #[test]
138    fn json_field_tainted_round_trips_quote_smuggling_attempt() {
139        // Classic envelope-smuggling payload: caller hopes to terminate
140        // the field early and inject a sibling key.
141        let payload = r#"val"; "injected": true"#;
142        assert_eq!(round_trip(payload), payload);
143    }
144
145    #[test]
146    fn json_field_tainted_round_trips_crlf_in_value() {
147        // CRLF in a JSON value must survive as `\r\n` escape, not as
148        // raw bytes that confuse a downstream line-oriented log
149        // shipper that re-splits the body.
150        let payload = "first line\r\n\"injected_key\": \"x";
151        assert_eq!(round_trip(payload), payload);
152    }
153
154    #[test]
155    fn json_field_tainted_escapes_all_control_bytes() {
156        // Every byte 0x00..0x20 must be escaped to a `\uXXXX` form (or
157        // a short escape for the standard ones) — never silently
158        // dropped, never emitted raw.
159        for byte in 0x00u8..0x20 {
160            let payload: String = char::from_u32(byte as u32).unwrap().to_string();
161            let mut obj = Map::new();
162            obj.insert("k".to_string(), SerializedJsonField::tainted(&payload));
163            let envelope = Value::Object(obj).to_string_compact();
164            // The raw control byte must not appear in the envelope.
165            assert!(
166                !envelope.as_bytes().contains(&byte) || byte == b'\n' && envelope.contains("\\n"),
167                "byte 0x{byte:02x} appeared raw in envelope: {envelope:?}"
168            );
169            // And the round-trip must yield the original byte.
170            assert_eq!(round_trip(&payload), payload);
171        }
172    }
173
174    #[test]
175    fn json_field_tainted_round_trips_existing_escape_sequences() {
176        // The caller's literal `\n` (two chars: backslash + n) must
177        // survive as the *literal* two-char sequence — the wrapper
178        // must not re-interpret it as an actual newline.
179        let payload = r#"contains \n and \t as literal chars"#;
180        assert_eq!(round_trip(payload), payload);
181    }
182
183    #[test]
184    fn json_field_tainted_round_trips_deeply_nested_escapes() {
185        // Worst-case: caller hands us a string that *itself* looks
186        // like a JSON-in-JSON envelope. The wrapper round-trips it as
187        // a single string field — the downstream parser sees one
188        // string, not a nested object.
189        let payload =
190            r#"{"outer":"{\"inner\":\"{\\\"deepest\\\":\\\"\\\\\\\"end\\\\\\\"\\\"}\"}"}"#;
191        assert_eq!(round_trip(payload), payload);
192    }
193
194    #[test]
195    fn json_field_tainted_round_trips_when_used_as_object_key() {
196        // Object keys are also JSON strings; the same escape contract
197        // applies. We test by inserting the tainted string as a key.
198        let key = "key\"with\\quotes\nand-newlines";
199        let mut obj = Map::new();
200        obj.insert(key.to_string(), SerializedJsonField::tainted("v"));
201        let envelope = Value::Object(obj).to_string_compact();
202        let parsed: Value = from_str(&envelope).expect("envelope must be valid JSON");
203        match parsed {
204            Value::Object(map) => assert!(
205                map.get(key).is_some(),
206                "key did not round-trip; map keys: {:?}",
207                map.keys().collect::<Vec<_>>()
208            ),
209            other => panic!("expected object, got {other:?}"),
210        }
211    }
212
213    #[test]
214    fn json_field_tainted_round_trips_unicode_and_emoji() {
215        let payload = "café — naïve façade — 日本語 — 🦀";
216        assert_eq!(round_trip(payload), payload);
217    }
218
219    /// Regression for issue #191: every input below uses multi-byte
220    /// UTF-8 sequences that the in-house JSON parser used to truncate
221    /// to Latin-1 (each continuation byte decoded as `ch as char`).
222    /// The corpus exercises the four payload classes named in the
223    /// issue: Latin extended, CJK, full emoji set (including
224    /// supplementary-plane code points that JSON encodes as a UTF-16
225    /// surrogate pair), and mixed RTL+LTR.
226    #[test]
227    fn json_field_tainted_round_trips_multibyte_utf8_corpus() {
228        let corpus: &[&str] = &[
229            // Latin-1 / extended Latin: 2-byte UTF-8.
230            "café — naïve façade — Œuvre",
231            // CJK ideographs: 3-byte UTF-8.
232            "日本語テスト — 中文 — 한국어",
233            // Supplementary-plane emoji: 4-byte UTF-8 (and the JSON
234            // form `💩` is a surrogate pair).
235            "🦀🚀💩🌍 family: 👨‍👩‍👧‍👦",
236            // Mixed RTL (Arabic, Hebrew) + LTR with bidi controls.
237            "Hello مرحبا שלום — mix",
238        ];
239        for payload in corpus {
240            assert_eq!(round_trip(payload), *payload, "payload {payload:?}");
241        }
242    }
243
244    #[test]
245    fn json_field_typed_emits_canonical_representation() {
246        // `typed` for known-good values goes through JsonEncode and
247        // produces a canonical Value tree.
248        let v = SerializedJsonField::typed(&42_i64);
249        assert_eq!(v.as_i64(), Some(42));
250        let v = SerializedJsonField::typed(&true);
251        assert_eq!(v.as_bool(), Some(true));
252        let v = SerializedJsonField::typed(&"hello");
253        assert_eq!(v.as_str(), Some("hello"));
254    }
255
256    /// Regression: a malicious payload combining every smuggling
257    /// trick at once. Every byte must round-trip through the guard.
258    #[test]
259    fn json_field_tainted_handles_full_malicious_corpus() {
260        let corpus: &[&str] = &[
261            r#"{"key": "val"; "injected": true}"#,
262            "line1\r\nContent-Length: 0\r\n\r\nhost: evil",
263            "control\x00bytes\x01\x02\x03\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x1fend",
264            "escapes \\n \\r \\t \\u0041 \\\\ \\\" literal",
265            r#"deeply{"nested":{"json":{"inside":"of","a":"string"}}}deeply"#,
266            "trailing-newline\n",
267            "\"-prefixed",
268            "back\\slash-suffix\\",
269            // F-05 flavour: an SQL parser error message embedding a
270            // user fragment that itself looks like JSON.
271            r#"sql parse error: unexpected token "}" near "select * from t where j = '{\"x\":1}'""#,
272        ];
273        for payload in corpus {
274            assert_eq!(round_trip(payload), *payload, "corpus payload: {payload:?}");
275        }
276    }
277}