vyre_foundation/serial/wire.rs
1// Stable binary IR wire format for serialized IR programs.
2
3use crate::ir::{BufferDecl, DataType, Expr, Node, Program};
4
5/// The `decode` module.
6pub mod decode;
7/// The `encode` module.
8pub mod encode;
9/// The `framing` module.
10pub mod framing;
11/// The `tags` module.
12pub mod tags;
13
14/// Maximum buffers accepted from one IR wire-format program.
15///
16/// I10 requires bounded allocation before validating semantics. This limit
17/// rejects hostile wire blobs before allocating the buffer table.
18pub const MAX_BUFFERS: usize = 16_384;
19
20/// Maximum statement nodes accepted from any single wire-format node list.
21///
22/// I10 requires node vectors to be bounded before allocation; nested lists are
23/// each checked against this budget as they are decoded.
24pub const MAX_NODES: usize = 1_000_000;
25
26/// Maximum call arguments accepted from one wire-format call expression.
27///
28/// I10 requires expression argument vectors to be bounded before allocation.
29pub const MAX_ARGS: usize = 4_096;
30
31/// Maximum UTF-8 string length accepted from the IR wire format.
32///
33/// I10 bounds allocation for names and operation identifiers carried by
34/// attacker-controlled wire bytes.
35pub const MAX_STRING_LEN: usize = 1 << 20;
36
37/// Maximum recursive decode depth for the IR wire format.
38///
39/// The limit is applied to the **shared** recursion counter in `Reader`
40/// that `Reader::node` and `Reader::expr` both increment on entry and
41/// decrement on exit. A hostile blob cannot evade the cap by alternating
42/// statement and expression nesting — every nested decode call, whether it
43/// descends into a `Node::If`/`Loop`/`Block` body or into a nested
44/// [`Expr`] argument tree, counts against the same budget. Depth ≥
45/// `MAX_DECODE_DEPTH` is rejected with a `Fix:`-prefixed error before any
46/// stack frame is pushed, preventing stack-overflow DoS from a blob that
47/// nests `Block(Block(... Block(...) ...))` a million times deep.
48///
49/// Covers audit L.1.35 (HIGH).
50pub const MAX_DECODE_DEPTH: u32 = 256;
51
52/// Hard ceiling on the size of a single wire-encoded Program in bytes.
53///
54/// The framing layer rejects larger blobs before any decode allocation so
55/// attacker-controlled input cannot force unbounded memory growth.
56pub const MAX_PROGRAM_BYTES: usize = 64 * 1024 * 1024;
57
58pub(crate) struct Reader<'a> {
59 pub bytes: &'a [u8],
60 pub pos: usize,
61 /// Current recursion depth on the decode call stack. Incremented by
62 /// every `node()` and `expr()` call and compared against
63 /// [`MAX_DECODE_DEPTH`] before any nested decode proceeds.
64 pub depth: u32,
65}
66
67impl Program {
68 /// Serialize this IR program into the stable `VIR0` IR wire format.
69 ///
70 /// # Errors
71 ///
72 /// Returns [`crate::error::Error::WireFormatValidation`] when a count
73 /// cannot be represented in the versioned wire format or when a public
74 /// enum variant has no registered stable wire tag. The `message` field
75 /// carries the actionable diagnostic prose including a `Fix:` hint.
76 #[inline]
77 #[must_use]
78 pub fn to_wire(&self) -> Result<Vec<u8>, crate::error::Error> {
79 encode::to_wire(self).map_err(wire_err)
80 }
81
82 /// Serialize this IR program into the stable `VIR0` IR wire format,
83 /// appending to an existing buffer.
84 ///
85 /// # Errors
86 ///
87 /// Returns [`crate::error::Error::WireFormatValidation`] when a count
88 /// cannot be represented in the versioned wire format or when a public
89 /// enum variant has no registered stable wire tag. The `message` field
90 /// carries the actionable diagnostic prose including a `Fix:` hint.
91 #[inline]
92 pub fn to_wire_into(&self, dst: &mut Vec<u8>) -> Result<(), crate::error::Error> {
93 encode::to_wire_into(self, dst).map_err(wire_err)
94 }
95
96 /// Serialize this IR program into bytes.
97 ///
98 /// This compatibility wrapper preserves the pre-`to_wire` API name.
99 ///
100 /// On an encoding error, an empty vector is returned after logging the
101 /// failure. Use [`Program::to_wire`] when the caller needs to handle the
102 /// error explicitly.
103 #[must_use]
104 #[inline]
105 pub fn to_bytes(&self) -> Vec<u8> {
106 match self.to_wire() {
107 Ok(bytes) => bytes,
108 Err(error) => {
109 tracing::error!(
110 error = %error,
111 "Program::to_bytes: wire encoding failed; returning empty bytes. \
112 Fix: call Program::to_wire and handle the validation error explicitly."
113 );
114 Vec::new()
115 }
116 }
117 }
118
119 /// Deserialize an IR program from the stable `VYRE` IR wire format.
120 ///
121 /// # Errors
122 ///
123 /// Returns [`crate::error::Error::VersionMismatch`] when the
124 /// payload advertises a schema version this runtime does not
125 /// understand. Returns [`crate::error::Error::WireFormatValidation`]
126 /// for any other decode failure — truncated bytes, unknown enum
127 /// tag, integrity digest mismatch, or malformed structural
128 /// section.
129 #[inline]
130 #[must_use]
131 pub fn from_wire(bytes: &[u8]) -> Result<Self, crate::error::Error> {
132 if bytes.len() > MAX_PROGRAM_BYTES {
133 return Err(wire_err(format!(
134 "Fix: wire blob is {} bytes, exceeding the {}-byte IR framing cap. Reject this input or split the Program before serialization.",
135 bytes.len(),
136 MAX_PROGRAM_BYTES
137 )));
138 }
139 // The version field is validated before the string-based
140 // decoder so that an out-of-range version surfaces as the
141 // typed `VersionMismatch` variant instead of being absorbed
142 // into the generic `WireFormatValidation` bucket. Tooling
143 // that hangs off the diagnostic code `E-WIRE-VERSION` relies
144 // on this distinction.
145 if bytes.len() >= framing::MAGIC.len() + 2
146 && &bytes[..framing::MAGIC.len()] == framing::MAGIC
147 {
148 let version = u16::from_le_bytes([bytes[4], bytes[5]]);
149 if version != framing::WIRE_FORMAT_VERSION {
150 return Err(crate::error::Error::VersionMismatch {
151 expected: u32::from(framing::WIRE_FORMAT_VERSION),
152 found: u32::from(version),
153 });
154 }
155 }
156 decode::from_wire(bytes).map_err(wire_err)
157 }
158
159 /// Deserialize an IR program from bytes.
160 ///
161 /// This compatibility wrapper preserves the pre-`from_wire` API name.
162 ///
163 /// # Errors
164 ///
165 /// Returns the same actionable decode errors as [`Program::from_wire`].
166 #[inline]
167 #[must_use]
168 pub fn from_bytes(bytes: &[u8]) -> Result<Self, crate::error::Error> {
169 Self::from_wire(bytes)
170 }
171
172 /// Stable content hash of this Program, used as a cache identity.
173 ///
174 /// Computed as BLAKE3 of the canonical wire-format encoding. This is the
175 /// exact-match identity for persistent-cache consumers that need a
176 /// deterministic key per Program without re-implementing canonicalization.
177 /// On canonical wire-encoding failure, the value is a domain-separated
178 /// error digest rather than an all-zero sentinel, so malformed programs do
179 /// not collapse into the same cache identity.
180 #[must_use]
181 pub fn content_hash(&self) -> [u8; 32] {
182 self.fingerprint()
183 }
184}
185
186/// Wrap an internal wire-format error string in the typed [`crate::error::Error`]
187/// so every public boundary of this module returns a structured variant
188/// callers can match on.
189fn wire_err(message: String) -> crate::error::Error {
190 crate::error::Error::WireFormatValidation { message }
191}
192
193/// Append stable VIR0 wire bytes for a [`DataType`] (tag + any payload) into
194/// `buf`. Used by disk-cache fingerprinting where `Debug` output would be
195/// the wrong contract.
196pub fn append_data_type_fingerprint(buf: &mut Vec<u8>, value: &DataType) -> Result<(), String> {
197 tags::data_type_tag::put_data_type(buf, value).map_err(String::from)
198}
199
200/// Append stable VIR0 wire bytes for a `Node` statement list (count + each
201/// node). Matches the statement encoding used in full program wire (`to_wire`)
202/// (without the file envelope, metadata, or buffer table).
203pub fn append_node_list_fingerprint(buf: &mut Vec<u8>, nodes: &[Node]) -> Result<(), String> {
204 encode::put_nodes(buf, nodes).map_err(String::from)
205}
206
207#[cfg(test)]
208mod tests {
209 use super::*;
210 use crate::ir::{BufferAccess, BufferDecl, DataType, Node, Program};
211
212 #[test]
213 #[inline]
214 pub(crate) fn to_bytes_returns_empty_on_wire_error() {
215 let long_name = "x".repeat(MAX_STRING_LEN + 1);
216 let program = Program::wrapped(
217 vec![BufferDecl::storage(
218 &long_name,
219 0,
220 BufferAccess::ReadOnly,
221 DataType::U32,
222 )],
223 [1, 1, 1],
224 vec![],
225 );
226 assert!(program.to_wire().is_err());
227 assert!(program.to_bytes().is_empty());
228 }
229
230 /// EDGE-001 regression: `MAX_DECODE_DEPTH` covers **both** Node and Expr
231 /// recursion through the same counter. A blob that nests statement
232 /// bodies past the depth limit must be rejected at decode time,
233 /// preventing stack-overflow DoS on untrusted input.
234 ///
235 /// The test runs on a dedicated thread with an 8 MiB stack because
236 /// the encode/decode walk down a `MAX_DECODE_DEPTH + 1`-deep Block
237 /// tree uses ~3–4× the native frames the default 2 MiB test stack
238 /// allocates. Without the explicit stack, the test itself
239 /// stack-overflows before the decode guard ever fires — masking
240 /// the real assertion.
241 #[test]
242 pub(crate) fn decode_depth_cap_rejects_deeply_nested_blocks() {
243 std::thread::Builder::new()
244 .stack_size(8 * 1024 * 1024)
245 .spawn(run_decode_depth_cap)
246 .expect("Fix: spawn test worker")
247 .join()
248 .expect("Fix: decode-depth-cap worker panicked");
249 }
250
251 fn run_decode_depth_cap() {
252 // Build the nested program iteratively so the test thread's
253 // stack only owns the tree, not a recursion chain the depth
254 // of the tree.
255 let mut inner = Node::Block(vec![]);
256 for _ in 0..MAX_DECODE_DEPTH {
257 inner = Node::Block(vec![inner]);
258 }
259 let program = Program::wrapped(
260 vec![BufferDecl::read_write("out", 0, DataType::U32)],
261 [1, 1, 1],
262 vec![inner],
263 );
264 let bytes = program
265 .to_wire()
266 .expect("Fix: building a (MAX_DEPTH+1)-nested program must still encode");
267 let decoded = Program::from_wire(&bytes);
268 assert!(
269 decoded.is_err(),
270 "decoding a program deeper than MAX_DECODE_DEPTH must fail; got Ok"
271 );
272 let err = decoded.unwrap_err().to_string();
273 assert!(
274 err.contains("Fix:"),
275 "depth-exceed error must carry a `Fix:` hint, got: {err}"
276 );
277 }
278}