pmcp_server_toolkit/workbook/render_uri.rs
1//! The `workbook://` render-pointer URI codec (WBSV-05, V12).
2//!
3//! `render_workbook` does NOT return the `.xlsx` bytes. It validates the inputs,
4//! then returns a `workbook://` URI that encodes the (canonical) inputs PLUS the
5//! bundle provenance stamp. The bytes are recomputed per `resources/read` by
6//! decoding the URI, re-verifying provenance, re-validating the inputs, re-running
7//! the executor, and rendering (see [`super::render_resource`]). This keeps the
8//! server STATELESS (Lambda-safe — no session, no server-side render cache, V3).
9//!
10//! # The URI as an attacker-controlled payload
11//!
12//! The pointer round-trips through the client, so the URI handed back to
13//! `resources/read` is UNTRUSTED — an attacker may forge, truncate, oversize, or
14//! cross-wire it. The codec is hardened accordingly:
15//!
16//! - **Size guard FIRST (T-92-14 / V12):** [`decode`] rejects any URI longer than
17//! [`MAX_ENCODED_URI_LEN`] BEFORE any base64 work — an oversized payload never
18//! reaches the allocator-heavy decode path (DoS mitigation).
19//! - **Total, panic-free decode (T-92-17):** every malformed / truncated / garbage
20//! input returns `Err(WorkbookToolError)`, NEVER a panic. The crate `deny(panic)`
21//! lint plus the [`prop_decode_total`](tests) proptest enforce totality over
22//! arbitrary/adversarial input.
23//!
24//! Provenance verification (decoded stamp == bundle stamp) and input re-validation
25//! happen on the READ side ([`super::render_resource`]), not here — this module is
26//! purely the codec.
27//!
28//! # Privacy note (Codex MEDIUM #10)
29//!
30//! The `workbook://` URI ENCODES the caller's inputs in its payload. A client,
31//! proxy, or gateway that logs resource URIs will therefore log the inputs.
32//! Operators handling sensitive inputs must treat the URI as sensitive. See
33//! `docs/workbook-uri-spec.md` for the published contract + privacy warning.
34
35// Compiler/clippy-enforced panic-freedom on the value path (mirrors the runtime).
36#![cfg_attr(
37 not(test),
38 deny(clippy::unwrap_used, clippy::expect_used, clippy::panic)
39)]
40
41use base64::Engine;
42use serde::{Deserialize, Serialize};
43use serde_json::Value;
44
45use super::error::WorkbookToolError;
46use super::ProvStamp;
47
48/// The `workbook://` scheme prefix every render pointer carries.
49pub const RENDER_URI_PREFIX: &str = "workbook://render/";
50
51/// The MIME type of the rendered `.xlsx` workbook (the OOXML spreadsheet type).
52/// Advertised by `render_workbook` and carried on the `resources/read` content so
53/// the client knows the base64 payload is a downloadable spreadsheet.
54pub const WORKBOOK_XLSX_MIME: &str =
55 "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
56
57/// The hard upper bound on an encoded `workbook://` URI length, in bytes.
58///
59/// [`decode`] rejects any URI longer than this BEFORE doing any base64 decode —
60/// the size guard is the first thing checked, so an oversized attacker payload
61/// never reaches the allocating decode path (T-92-14 / V12, DoS mitigation).
62///
63/// 64 KiB is generous for a tax-style input map (a handful of scalars + a small
64/// provenance triple) while bounding the per-read decode cost. It is part of the
65/// published `workbook://` contract (`docs/workbook-uri-spec.md`).
66pub const MAX_ENCODED_URI_LEN: usize = 64 * 1024;
67
68/// The decoded render payload: the canonical input DTO plus the provenance stamp
69/// that was bound into the URI at `render_workbook` time.
70///
71/// The read side ([`super::render_resource`]) VERIFIES `provenance` against the
72/// live bundle stamp and RE-VALIDATES `dto` through
73/// [`super::input::validate_input`] before re-running — neither is trusted as-is.
74#[derive(Debug, Clone, PartialEq, Eq)]
75pub struct DecodedRender {
76 /// The canonical wire DTO (`{ inputs, overrides }`) — the SAME shape
77 /// [`super::input::validate_input`] accepts, so it re-validates on read.
78 pub dto: Value,
79 /// The provenance stamp bound into the URI at encode time. The read side
80 /// rejects the URI if this does not equal the live bundle stamp
81 /// (cross-provenance spoofing guard, T-92-15).
82 pub provenance: ProvStamp,
83}
84
85/// The on-wire JSON payload (pre-base64). Kept private — callers go through
86/// [`encode`] / [`decode`] which own the scheme prefix + size guard.
87///
88/// The `provenance` triple `{ bundle_id, version, combined_hash }` is
89/// [`ProvStamp`] itself (its serde derives ARE the wire contract — Codex
90/// HIGH #3: the `combined_hash` field, NEVER a source-workbook hash).
91#[derive(Debug, Deserialize)]
92struct RenderPayload {
93 /// The canonical input DTO.
94 dto: Value,
95 /// The provenance stamp bound into the URI at encode time.
96 provenance: ProvStamp,
97}
98
99/// Borrowing serialize-only twin of [`RenderPayload`] — same field names and
100/// order, so the encoded bytes are identical without cloning the DTO + stamp.
101#[derive(Serialize)]
102struct RenderPayloadRef<'a> {
103 dto: &'a Value,
104 provenance: &'a ProvStamp,
105}
106
107/// Encode a validated input DTO + provenance stamp into a `workbook://` render
108/// pointer URI.
109///
110/// The payload `{ dto, provenance }` is serialized to canonical JSON then
111/// base64-encoded with the URL-safe, unpadded alphabet (so the result is a clean
112/// URI path segment). The bytes are NOT here — they are recomputed on
113/// `resources/read` from this URI.
114///
115/// # Errors
116///
117/// Returns [`WorkbookToolError::invalid_input`] only if the canonical DTO cannot
118/// be serialized (it always can for a [`super::input::ValidatedInput`] DTO; the
119/// fallible signature keeps the call site `?`-chained and panic-free).
120#[allow(clippy::result_large_err)]
121pub fn encode(dto: &Value, provenance: &ProvStamp) -> Result<String, WorkbookToolError> {
122 let payload = RenderPayloadRef { dto, provenance };
123 let json = serde_json::to_vec(&payload).map_err(|e| {
124 WorkbookToolError::invalid_input(format!("could not encode render payload: {e}"))
125 })?;
126 let b64 = base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(json);
127 Ok(format!("{RENDER_URI_PREFIX}{b64}"))
128}
129
130/// Decode a `workbook://` render pointer URI back into its [`DecodedRender`]
131/// payload — TOTAL and panic-free over arbitrary/adversarial input.
132///
133/// The size guard is checked FIRST (T-92-14 / V12): a URI longer than
134/// [`MAX_ENCODED_URI_LEN`] is rejected BEFORE any base64 decode, so an oversized
135/// attacker payload never reaches the allocating decode path.
136///
137/// # Errors
138///
139/// Returns [`WorkbookToolError::invalid_input`] for ANY malformed input — an
140/// oversized URI, a wrong/absent scheme prefix, non-base64 body, non-UTF-8 or
141/// non-JSON decoded bytes, or a payload missing the `dto`/`provenance` fields.
142/// NEVER panics (T-92-17, `deny(panic)` + proptest-proven).
143#[allow(clippy::result_large_err)]
144pub fn decode(uri: &str) -> Result<DecodedRender, WorkbookToolError> {
145 // 1. SIZE GUARD FIRST (T-92-14 / V12) — reject oversized BEFORE any decode.
146 if uri.len() > MAX_ENCODED_URI_LEN {
147 return Err(WorkbookToolError::invalid_input(format!(
148 "workbook:// URI exceeds the {MAX_ENCODED_URI_LEN}-byte limit ({} bytes)",
149 uri.len()
150 )));
151 }
152 // 2. Scheme prefix (a non-workbook URI is not ours).
153 let body = uri.strip_prefix(RENDER_URI_PREFIX).ok_or_else(|| {
154 WorkbookToolError::invalid_input(
155 "not a workbook://render/ URI (missing scheme prefix)".to_string(),
156 )
157 })?;
158 // 3. base64 (URL-safe, unpadded) — total: a garbage body is an Err.
159 let bytes = base64::engine::general_purpose::URL_SAFE_NO_PAD
160 .decode(body)
161 .map_err(|e| {
162 WorkbookToolError::invalid_input(format!("workbook:// URI body is not base64: {e}"))
163 })?;
164 // 4. JSON parse — total: non-UTF-8 / non-JSON / wrong-shape is an Err.
165 let payload: RenderPayload = serde_json::from_slice(&bytes).map_err(|e| {
166 WorkbookToolError::invalid_input(format!("workbook:// URI payload is not valid: {e}"))
167 })?;
168 Ok(DecodedRender {
169 dto: payload.dto,
170 provenance: payload.provenance,
171 })
172}
173
174#[cfg(test)]
175mod tests {
176 use super::*;
177 use proptest::prelude::*;
178 use serde_json::json;
179
180 fn stamp() -> ProvStamp {
181 ProvStamp {
182 bundle_id: "tax-calc".to_string(),
183 version: "1.1.0".to_string(),
184 combined_hash: "a".repeat(64),
185 }
186 }
187
188 fn dto() -> Value {
189 json!({
190 "inputs": { "gross_income": 60000.0, "filing_status": "single" },
191 "overrides": {},
192 })
193 }
194
195 #[test]
196 fn round_trip_yields_same_dto_and_provenance() {
197 let uri = encode(&dto(), &stamp()).expect("encode");
198 assert!(uri.starts_with(RENDER_URI_PREFIX), "carries the scheme");
199 let decoded = decode(&uri).expect("decode");
200 assert_eq!(decoded.dto, dto(), "dto round-trips");
201 assert_eq!(decoded.provenance, stamp(), "provenance round-trips");
202 }
203
204 #[test]
205 fn encode_is_deterministic() {
206 // The same (dto, provenance) always encodes to the SAME URI — required for
207 // stateless regen-on-read byte-identity downstream.
208 let a = encode(&dto(), &stamp()).expect("encode a");
209 let b = encode(&dto(), &stamp()).expect("encode b");
210 assert_eq!(a, b, "encode is deterministic");
211 }
212
213 #[test]
214 fn oversized_uri_is_rejected_before_decode() {
215 // A URI longer than MAX_ENCODED_URI_LEN is rejected by the size guard
216 // FIRST, before any base64 work (T-92-14 / V12). Build a body that is
217 // valid base64 so the ONLY thing that can reject it is the size guard.
218 let big_body = "A".repeat(MAX_ENCODED_URI_LEN + 1);
219 let uri = format!("{RENDER_URI_PREFIX}{big_body}");
220 assert!(uri.len() > MAX_ENCODED_URI_LEN);
221 let err = decode(&uri).expect_err("oversized rejected");
222 assert_eq!(err.code, "invalid_input");
223 assert!(
224 err.reason.contains("limit"),
225 "rejected by the size guard, not by base64: {}",
226 err.reason
227 );
228 }
229
230 #[test]
231 fn corrupted_uri_decodes_to_err_never_panics() {
232 // A truncated / garbage body is an Err, never a panic.
233 let uri = encode(&dto(), &stamp()).expect("encode");
234 let truncated = &uri[..uri.len() - 5];
235 let _ = decode(truncated); // may be Ok-shaped-but-Err or Err; must not panic
236 let garbage = format!("{RENDER_URI_PREFIX}!!!not base64!!!");
237 assert!(decode(&garbage).is_err(), "garbage base64 is an Err");
238 let wrong_scheme = "https://example.com/evil";
239 assert!(decode(wrong_scheme).is_err(), "wrong scheme is an Err");
240 // valid base64 of non-JSON bytes
241 let not_json = base64::engine::general_purpose::URL_SAFE_NO_PAD.encode([0xff, 0xfe, 0x00]);
242 assert!(
243 decode(&format!("{RENDER_URI_PREFIX}{not_json}")).is_err(),
244 "valid base64 of non-JSON is an Err"
245 );
246 }
247
248 proptest! {
249 /// Round-trip + determinism over arbitrary valid input maps: any
250 /// string-keyed scalar input map encodes then decodes to the SAME dto +
251 /// provenance, and encode is deterministic.
252 #[test]
253 fn prop_encode_decode_identity(
254 keys in proptest::collection::vec("[a-z_]{1,12}", 0..6),
255 nums in proptest::collection::vec(any::<i32>(), 0..6),
256 ) {
257 let mut inputs = serde_json::Map::new();
258 for (k, n) in keys.iter().zip(nums.iter()) {
259 inputs.insert(k.clone(), json!(n));
260 }
261 let d = json!({ "inputs": inputs, "overrides": {} });
262 let uri = encode(&d, &stamp()).expect("encode");
263 let again = encode(&d, &stamp()).expect("encode again");
264 prop_assert_eq!(&uri, &again, "encode deterministic");
265 let decoded = decode(&uri).expect("decode");
266 prop_assert_eq!(decoded.dto, d, "dto identity");
267 prop_assert_eq!(decoded.provenance, stamp(), "provenance identity");
268 }
269
270 /// Decode totality (the CLAUDE.md ALWAYS-fuzz requirement, via proptest):
271 /// `decode` over ARBITRARY/adversarial strings — random text, truncated and
272 /// garbage base64, oversized payloads past MAX_ENCODED_URI_LEN, prefixed and
273 /// unprefixed — is TOTAL: it NEVER panics and ALWAYS returns Ok or
274 /// Err(WorkbookToolError) (T-92-17). The assertion is reaching this line
275 /// without unwinding; we additionally exercise oversized + prefixed shapes.
276 #[test]
277 fn prop_decode_total(s in ".{0,2048}") {
278 // bare arbitrary string
279 let _ = decode(&s);
280 // with our scheme prefix (drives the base64/JSON arms)
281 let _ = decode(&format!("{RENDER_URI_PREFIX}{s}"));
282 // an oversized variant (drives the size guard arm)
283 let oversized = format!("{}{}", RENDER_URI_PREFIX, "A".repeat(MAX_ENCODED_URI_LEN + 1));
284 match decode(&oversized) {
285 Ok(_) | Err(_) => {}, // total: Ok|Err, never a panic
286 }
287 }
288 }
289}