vyre_foundation/serial/wire.rs
1// Stable binary IR wire format for serialized IR programs.
2
3use crate::ir::{BufferDecl, DataType, Expr, Node, Program};
4
5/// The `decode` module.
6pub mod decode;
7/// The `encode` module.
8pub mod encode;
9/// The `framing` module.
10pub mod framing;
11/// The `tags` module.
12pub mod tags;
13
14/// Maximum buffers accepted from one IR wire-format program.
15///
16/// I10 requires bounded allocation before validating semantics. This limit
17/// rejects hostile wire blobs before allocating the buffer table.
18pub const MAX_BUFFERS: usize = 16_384;
19
20/// Maximum statement nodes accepted from any single wire-format node list.
21///
22/// I10 requires node vectors to be bounded before allocation; nested lists are
23/// each checked against this budget as they are decoded.
24pub const MAX_NODES: usize = 1_000_000;
25
26/// Maximum call arguments accepted from one wire-format call expression.
27///
28/// I10 requires expression argument vectors to be bounded before allocation.
29pub const MAX_ARGS: usize = 4_096;
30
31/// Maximum UTF-8 string length accepted from the IR wire format.
32///
33/// I10 bounds allocation for names and operation identifiers carried by
34/// attacker-controlled wire bytes.
35pub const MAX_STRING_LEN: usize = 1 << 20;
36
37/// Maximum opaque payload length accepted from the IR wire format.
38///
39/// I10 bounds allocation for extension-defined `Expr::Opaque` and
40/// `Node::Opaque` payloads carried by attacker-controlled wire bytes.
41/// Must match the encoder limit in `put_node.rs` and `put_expr.rs`.
42pub const MAX_OPAQUE_PAYLOAD_LEN: usize = MAX_ARGS * 1024;
43
44/// Maximum recursive decode depth for the IR wire format.
45///
46/// The limit is applied to the **shared** recursion counter in `Reader`
47/// that `Reader::node` and `Reader::expr` both increment on entry and
48/// decrement on exit. A hostile blob cannot evade the cap by alternating
49/// statement and expression nesting - every nested decode call, whether it
50/// descends into a `Node::If`/`Loop`/`Block` body or into a nested
51/// [`Expr`] argument tree, counts against the same budget. Depth ≥
52/// `MAX_DECODE_DEPTH` is rejected with a `Fix:`-prefixed error before any
53/// stack frame is pushed, preventing stack-overflow `DoS` from a blob that
54/// nests `Block(Block(... Block(...) ...))` a million times deep.
55///
56/// Covers audit L.1.35 (HIGH).
57pub const MAX_DECODE_DEPTH: u32 = 64;
58
59/// Hard ceiling on the size of a single wire-encoded Program in bytes.
60///
61/// The framing layer rejects larger blobs before any decode allocation so
62/// attacker-controlled input cannot force unbounded memory growth.
63pub const MAX_PROGRAM_BYTES: usize = 64 * 1024 * 1024;
64
65pub(crate) struct Reader<'a> {
66 pub bytes: &'a [u8],
67 pub pos: usize,
68 /// Current recursion depth on the decode call stack. Incremented by
69 /// every `node()` and `expr()` call and compared against
70 /// [`MAX_DECODE_DEPTH`] before any nested decode proceeds.
71 pub depth: u32,
72}
73
74impl Program {
75 /// Serialize this IR program into the stable `VIR0` IR wire format.
76 ///
77 /// # Errors
78 ///
79 /// Returns [`crate::error::Error::WireFormatValidation`] when a count
80 /// cannot be represented in the versioned wire format or when a public
81 /// enum variant has no registered stable wire tag. The `message` field
82 /// carries the actionable diagnostic prose including a `Fix:` hint.
83 #[inline]
84 #[must_use]
85 pub fn to_wire(&self) -> Result<Vec<u8>, crate::error::Error> {
86 encode::to_wire(self).map_err(wire_err)
87 }
88
89 /// Serialize this IR program into the stable `VIR0` IR wire format,
90 /// appending to an existing buffer.
91 ///
92 /// # Errors
93 ///
94 /// Returns [`crate::error::Error::WireFormatValidation`] when a count
95 /// cannot be represented in the versioned wire format or when a public
96 /// enum variant has no registered stable wire tag. The `message` field
97 /// carries the actionable diagnostic prose including a `Fix:` hint.
98 #[inline]
99 pub fn to_wire_into(&self, dst: &mut Vec<u8>) -> Result<(), crate::error::Error> {
100 encode::to_wire_into(self, dst).map_err(wire_err)
101 }
102
103 /// Serialize this IR program into bytes.
104 ///
105 /// This compatibility wrapper preserves the pre-`to_wire` API name.
106 ///
107 /// On an encoding error, an empty vector is returned after logging the
108 /// failure. Use [`Program::to_wire`] when the caller needs to handle the
109 /// error explicitly.
110 #[must_use]
111 #[inline]
112 pub fn to_bytes(&self) -> Vec<u8> {
113 match self.to_wire() {
114 Ok(bytes) => bytes,
115 Err(error) => {
116 tracing::error!(
117 error = %error,
118 "Program::to_bytes: wire encoding failed; returning empty bytes. \
119 Fix: call Program::to_wire and handle the validation error explicitly."
120 );
121 Vec::new()
122 }
123 }
124 }
125
126 /// Deserialize an IR program from the stable `VYRE` IR wire format.
127 ///
128 /// # Errors
129 ///
130 /// Returns [`crate::error::Error::VersionMismatch`] when the
131 /// payload advertises a schema version this runtime does not
132 /// understand. Returns [`crate::error::Error::WireFormatValidation`]
133 /// for any other decode failure - truncated bytes, unknown enum
134 /// tag, integrity digest mismatch, or malformed structural
135 /// section.
136 #[inline]
137 #[must_use]
138 pub fn from_wire(bytes: &[u8]) -> Result<Self, crate::error::Error> {
139 if bytes.len() > MAX_PROGRAM_BYTES {
140 return Err(wire_err(format!(
141 "Fix: wire blob is {} bytes, exceeding the {}-byte IR framing cap. Reject this input or split the Program before serialization.",
142 bytes.len(),
143 MAX_PROGRAM_BYTES
144 )));
145 }
146 // The version field is validated before the string-based
147 // decoder so that an out-of-range version surfaces as the
148 // typed `VersionMismatch` variant instead of being absorbed
149 // into the generic `WireFormatValidation` bucket. Tooling
150 // that hangs off the diagnostic code `E-WIRE-VERSION` relies
151 // on this distinction.
152 if bytes.len() >= framing::MAGIC.len() + 2
153 && &bytes[..framing::MAGIC.len()] == framing::MAGIC
154 {
155 let version = u16::from_le_bytes([bytes[4], bytes[5]]);
156 if version != framing::WIRE_FORMAT_VERSION {
157 return Err(crate::error::Error::VersionMismatch {
158 expected: u32::from(framing::WIRE_FORMAT_VERSION),
159 found: u32::from(version),
160 });
161 }
162 }
163 decode::from_wire(bytes).map_err(wire_err)
164 }
165
166 /// Deserialize an IR program from bytes.
167 ///
168 /// This compatibility wrapper preserves the pre-`from_wire` API name.
169 ///
170 /// # Errors
171 ///
172 /// Returns the same actionable decode errors as [`Program::from_wire`].
173 #[inline]
174 #[must_use]
175 pub fn from_bytes(bytes: &[u8]) -> Result<Self, crate::error::Error> {
176 Self::from_wire(bytes)
177 }
178
179 /// Stable content hash of this Program, used as a cache identity.
180 ///
181 /// Computed as BLAKE3 of the canonical wire-format encoding. This is the
182 /// exact-match identity for persistent-cache consumers that need a
183 /// deterministic key per Program without re-implementing canonicalization.
184 /// On canonical wire-encoding failure, the value is a domain-separated
185 /// error digest rather than an all-zero sentinel, so malformed programs do
186 /// not collapse into the same cache identity.
187 #[must_use]
188 pub fn content_hash(&self) -> [u8; 32] {
189 self.fingerprint()
190 }
191}
192
193/// Wrap an internal wire-format error string in the typed [`crate::error::Error`]
194/// so every public boundary of this module returns a structured variant
195/// callers can match on.
196fn wire_err(message: String) -> crate::error::Error {
197 crate::error::Error::WireFormatValidation { message }
198}
199
200/// Append stable VIR0 wire bytes for a [`DataType`] (tag + any payload) into
201/// `buf`. Used by disk-cache fingerprinting where `Debug` output would be
202/// the wrong contract.
203///
204/// # Errors
205///
206/// Returns a wire-format diagnostic when `value` contains a datatype variant
207/// without a stable tag or a payload that cannot fit the VIR0 encoding.
208pub fn append_data_type_fingerprint(buf: &mut Vec<u8>, value: &DataType) -> Result<(), String> {
209 tags::data_type_tag::put_data_type(buf, value).map_err(String::from)
210}
211
212/// Append stable VIR0 wire bytes for a `Node` statement list (count + each
213/// node). Matches the statement encoding used in full program wire (`to_wire`)
214/// (without the file envelope, metadata, or buffer table).
215///
216/// # Errors
217///
218/// Returns a wire-format diagnostic when the node list or any nested payload
219/// cannot be represented in VIR0.
220pub fn append_node_list_fingerprint(buf: &mut Vec<u8>, nodes: &[Node]) -> Result<(), String> {
221 encode::put_nodes(buf, nodes).map_err(String::from)
222}
223
224#[cfg(test)]
225mod tests {
226 use super::*;
227 use crate::ir::{BufferAccess, BufferDecl, DataType, Node, Program};
228
229 #[test]
230 #[inline]
231 pub(crate) fn to_bytes_returns_empty_on_wire_error() {
232 let long_name = "x".repeat(MAX_STRING_LEN + 1);
233 let program = Program::wrapped(
234 vec![BufferDecl::storage(
235 &long_name,
236 0,
237 BufferAccess::ReadOnly,
238 DataType::U32,
239 )],
240 [1, 1, 1],
241 vec![],
242 );
243 assert!(program.to_wire().is_err());
244 assert!(program.to_bytes().is_empty());
245 }
246
247 /// EDGE-001 regression: `MAX_DECODE_DEPTH` covers **both** Node and Expr
248 /// recursion through the same counter. A blob that nests statement
249 /// bodies past the depth limit must be rejected at decode time,
250 /// preventing stack-overflow DoS on untrusted input.
251 ///
252 /// The test runs on a dedicated thread with an 8 MiB stack because
253 /// the encode/decode walk down a `MAX_DECODE_DEPTH + 1`-deep Block
254 /// tree uses ~3–4× the native frames the default 2 MiB test stack
255 /// allocates. Without the explicit stack, the test itself
256 /// stack-overflows before the decode guard ever fires - masking
257 /// the real assertion.
258 #[test]
259 pub(crate) fn decode_depth_cap_rejects_deeply_nested_blocks() {
260 std::thread::Builder::new()
261 .stack_size(8 * 1024 * 1024)
262 .spawn(run_decode_depth_cap)
263 .expect("Fix: spawn test worker")
264 .join()
265 .expect("Fix: decode-depth-cap worker panicked");
266 }
267
268 fn run_decode_depth_cap() {
269 // Build the nested program iteratively so the test thread's
270 // stack only owns the tree, not a recursion chain the depth
271 // of the tree.
272 let mut inner = Node::Block(vec![]);
273 for _ in 0..MAX_DECODE_DEPTH {
274 inner = Node::Block(vec![inner]);
275 }
276 let program = Program::wrapped(
277 vec![BufferDecl::read_write("out", 0, DataType::U32)],
278 [1, 1, 1],
279 vec![inner],
280 );
281 let bytes = program
282 .to_wire()
283 .expect("Fix: building a (MAX_DEPTH+1)-nested program must still encode");
284 let decoded = Program::from_wire(&bytes);
285 assert!(
286 decoded.is_err(),
287 "decoding a program deeper than MAX_DECODE_DEPTH must fail; got Ok"
288 );
289 let err = decoded.unwrap_err().to_string();
290 assert!(
291 err.contains("Fix:"),
292 "depth-exceed error must carry a `Fix:` hint, got: {err}"
293 );
294 }
295}
296
297/// OPAQUE-001 regression: encoder and decoder must agree on the
298/// maximum opaque payload length. A payload at MAX_OPAQUE_PAYLOAD_LEN
299/// must encode; a payload one byte larger must fail at encode time.
300#[test]
301pub(crate) fn opaque_payload_limit_is_symmetric() {
302 use crate::ir::{Expr, ExprNode};
303 use std::any::Any;
304
305 #[derive(Debug)]
306 struct BigOpaque(Vec<u8>);
307 impl ExprNode for BigOpaque {
308 fn extension_kind(&self) -> &'static str {
309 "test.big"
310 }
311 fn debug_identity(&self) -> &str {
312 "test.big"
313 }
314 fn result_type(&self) -> Option<DataType> {
315 Some(DataType::U32)
316 }
317 fn cse_safe(&self) -> bool {
318 false
319 }
320 fn stable_fingerprint(&self) -> [u8; 32] {
321 [0; 32]
322 }
323 fn validate_extension(&self) -> Result<(), String> {
324 Ok(())
325 }
326 fn as_any(&self) -> &dyn Any {
327 self
328 }
329 fn wire_payload(&self) -> Vec<u8> {
330 self.0.clone()
331 }
332 }
333
334 // At the limit: must encode successfully.
335 let expr_ok = Expr::opaque(BigOpaque(vec![0u8; MAX_OPAQUE_PAYLOAD_LEN]));
336 let program_ok = Program::wrapped(
337 vec![BufferDecl::read_write("out", 0, DataType::U32)],
338 [1, 1, 1],
339 vec![Node::let_bind("_", expr_ok)],
340 );
341 assert!(
342 program_ok.to_wire().is_ok(),
343 "at-limit opaque payload ({MAX_OPAQUE_PAYLOAD_LEN} bytes) must encode"
344 );
345
346 // One byte over: must fail at encode time.
347 let expr_over = Expr::opaque(BigOpaque(vec![0u8; MAX_OPAQUE_PAYLOAD_LEN + 1]));
348 let program_over = Program::wrapped(
349 vec![BufferDecl::read_write("out", 0, DataType::U32)],
350 [1, 1, 1],
351 vec![Node::let_bind("_", expr_over)],
352 );
353 let err = program_over
354 .to_wire()
355 .expect_err("opaque payload exceeding MAX_OPAQUE_PAYLOAD_LEN must fail at encode");
356 let msg = err.to_string();
357 assert!(
358 msg.contains("MAX_OPAQUE_PAYLOAD_LEN") || msg.contains(&MAX_OPAQUE_PAYLOAD_LEN.to_string()),
359 "error should mention the limit, got: {msg}"
360 );
361}