Skip to main content

vyre_foundation/serial/
wire.rs

1// Stable binary IR wire format for serialized IR programs.
2
3use crate::ir::{BufferDecl, DataType, Expr, Node, Program};
4
5/// The `decode` module.
6pub mod decode;
7/// The `encode` module.
8pub mod encode;
9/// The `framing` module.
10pub mod framing;
11/// The `tags` module.
12pub mod tags;
13
14/// Maximum buffers accepted from one IR wire-format program.
15///
16/// I10 requires bounded allocation before validating semantics. This limit
17/// rejects hostile wire blobs before allocating the buffer table.
18pub const MAX_BUFFERS: usize = 16_384;
19
20/// Maximum statement nodes accepted from any single wire-format node list.
21///
22/// I10 requires node vectors to be bounded before allocation; nested lists are
23/// each checked against this budget as they are decoded.
24pub const MAX_NODES: usize = 1_000_000;
25
26/// Maximum call arguments accepted from one wire-format call expression.
27///
28/// I10 requires expression argument vectors to be bounded before allocation.
29pub const MAX_ARGS: usize = 4_096;
30
31/// Maximum UTF-8 string length accepted from the IR wire format.
32///
33/// I10 bounds allocation for names and operation identifiers carried by
34/// attacker-controlled wire bytes.
35pub const MAX_STRING_LEN: usize = 1 << 20;
36
37/// Maximum opaque payload length accepted from the IR wire format.
38///
39/// I10 bounds allocation for extension-defined `Expr::Opaque` and
40/// `Node::Opaque` payloads carried by attacker-controlled wire bytes.
41/// Must match the encoder limit in `put_node.rs` and `put_expr.rs`.
42pub const MAX_OPAQUE_PAYLOAD_LEN: usize = MAX_ARGS * 1024;
43
44/// Maximum recursive decode depth for the IR wire format.
45///
46/// The limit is applied to the **shared** recursion counter in `Reader`
47/// that `Reader::node` and `Reader::expr` both increment on entry and
48/// decrement on exit. A hostile blob cannot evade the cap by alternating
49/// statement and expression nesting  -  every nested decode call, whether it
50/// descends into a `Node::If`/`Loop`/`Block` body or into a nested
51/// [`Expr`] argument tree, counts against the same budget. Depth ≥
52/// `MAX_DECODE_DEPTH` is rejected with a `Fix:`-prefixed error before any
53/// stack frame is pushed, preventing stack-overflow `DoS` from a blob that
54/// nests `Block(Block(... Block(...) ...))` a million times deep.
55///
56/// Covers audit L.1.35 (HIGH).
57pub const MAX_DECODE_DEPTH: u32 = 64;
58
59/// Hard ceiling on the size of a single wire-encoded Program in bytes.
60///
61/// The framing layer rejects larger blobs before any decode allocation so
62/// attacker-controlled input cannot force unbounded memory growth.
63pub const MAX_PROGRAM_BYTES: usize = 64 * 1024 * 1024;
64
65pub(crate) struct Reader<'a> {
66    pub bytes: &'a [u8],
67    pub pos: usize,
68    /// Current recursion depth on the decode call stack. Incremented by
69    /// every `node()` and `expr()` call and compared against
70    /// [`MAX_DECODE_DEPTH`] before any nested decode proceeds.
71    pub depth: u32,
72}
73
74impl Program {
75    /// Serialize this IR program into the stable `VIR0` IR wire format.
76    ///
77    /// # Errors
78    ///
79    /// Returns [`crate::error::Error::WireFormatValidation`] when a count
80    /// cannot be represented in the versioned wire format or when a public
81    /// enum variant has no registered stable wire tag. The `message` field
82    /// carries the actionable diagnostic prose including a `Fix:` hint.
83    #[inline]
84    #[must_use]
85    pub fn to_wire(&self) -> Result<Vec<u8>, crate::error::Error> {
86        encode::to_wire(self).map_err(wire_err)
87    }
88
89    /// Serialize this IR program into the stable `VIR0` IR wire format,
90    /// appending to an existing buffer.
91    ///
92    /// # Errors
93    ///
94    /// Returns [`crate::error::Error::WireFormatValidation`] when a count
95    /// cannot be represented in the versioned wire format or when a public
96    /// enum variant has no registered stable wire tag. The `message` field
97    /// carries the actionable diagnostic prose including a `Fix:` hint.
98    #[inline]
99    pub fn to_wire_into(&self, dst: &mut Vec<u8>) -> Result<(), crate::error::Error> {
100        encode::to_wire_into(self, dst).map_err(wire_err)
101    }
102
103    /// Serialize this IR program into bytes.
104    ///
105    /// This compatibility wrapper preserves the pre-`to_wire` API name.
106    ///
107    /// On an encoding error, an empty vector is returned after logging the
108    /// failure. Use [`Program::to_wire`] when the caller needs to handle the
109    /// error explicitly.
110    #[must_use]
111    #[inline]
112    pub fn to_bytes(&self) -> Vec<u8> {
113        match self.to_wire() {
114            Ok(bytes) => bytes,
115            Err(error) => {
116                tracing::error!(
117                    error = %error,
118                    "Program::to_bytes: wire encoding failed; returning empty bytes. \
119                     Fix: call Program::to_wire and handle the validation error explicitly."
120                );
121                Vec::new()
122            }
123        }
124    }
125
126    /// Deserialize an IR program from the stable `VYRE` IR wire format.
127    ///
128    /// # Errors
129    ///
130    /// Returns [`crate::error::Error::VersionMismatch`] when the
131    /// payload advertises a schema version this runtime does not
132    /// understand. Returns [`crate::error::Error::WireFormatValidation`]
133    /// for any other decode failure  -  truncated bytes, unknown enum
134    /// tag, integrity digest mismatch, or malformed structural
135    /// section.
136    #[inline]
137    #[must_use]
138    pub fn from_wire(bytes: &[u8]) -> Result<Self, crate::error::Error> {
139        if bytes.len() > MAX_PROGRAM_BYTES {
140            return Err(wire_err(format!(
141                "Fix: wire blob is {} bytes, exceeding the {}-byte IR framing cap. Reject this input or split the Program before serialization.",
142                bytes.len(),
143                MAX_PROGRAM_BYTES
144            )));
145        }
146        // The version field is validated before the string-based
147        // decoder so that an out-of-range version surfaces as the
148        // typed `VersionMismatch` variant instead of being absorbed
149        // into the generic `WireFormatValidation` bucket. Tooling
150        // that hangs off the diagnostic code `E-WIRE-VERSION` relies
151        // on this distinction.
152        if bytes.len() >= framing::MAGIC.len() + 2
153            && &bytes[..framing::MAGIC.len()] == framing::MAGIC
154        {
155            let version = u16::from_le_bytes([bytes[4], bytes[5]]);
156            if version != framing::WIRE_FORMAT_VERSION {
157                return Err(crate::error::Error::VersionMismatch {
158                    expected: u32::from(framing::WIRE_FORMAT_VERSION),
159                    found: u32::from(version),
160                });
161            }
162        }
163        decode::from_wire(bytes).map_err(wire_err)
164    }
165
166    /// Deserialize an IR program from bytes.
167    ///
168    /// This compatibility wrapper preserves the pre-`from_wire` API name.
169    ///
170    /// # Errors
171    ///
172    /// Returns the same actionable decode errors as [`Program::from_wire`].
173    #[inline]
174    #[must_use]
175    pub fn from_bytes(bytes: &[u8]) -> Result<Self, crate::error::Error> {
176        Self::from_wire(bytes)
177    }
178
179    /// Stable content hash of this Program, used as a cache identity.
180    ///
181    /// Computed as BLAKE3 of the canonical wire-format encoding. This is the
182    /// exact-match identity for persistent-cache consumers that need a
183    /// deterministic key per Program without re-implementing canonicalization.
184    /// On canonical wire-encoding failure, the value is a domain-separated
185    /// error digest rather than an all-zero sentinel, so malformed programs do
186    /// not collapse into the same cache identity.
187    #[must_use]
188    pub fn content_hash(&self) -> [u8; 32] {
189        self.fingerprint()
190    }
191}
192
193/// Wrap an internal wire-format error string in the typed [`crate::error::Error`]
194/// so every public boundary of this module returns a structured variant
195/// callers can match on.
196fn wire_err(message: String) -> crate::error::Error {
197    crate::error::Error::WireFormatValidation { message }
198}
199
200/// Append stable VIR0 wire bytes for a [`DataType`] (tag + any payload) into
201/// `buf`. Used by disk-cache fingerprinting where `Debug` output would be
202/// the wrong contract.
203///
204/// # Errors
205///
206/// Returns a wire-format diagnostic when `value` contains a datatype variant
207/// without a stable tag or a payload that cannot fit the VIR0 encoding.
208pub fn append_data_type_fingerprint(buf: &mut Vec<u8>, value: &DataType) -> Result<(), String> {
209    tags::data_type_tag::put_data_type(buf, value).map_err(String::from)
210}
211
212/// Append stable VIR0 wire bytes for a `Node` statement list (count + each
213/// node). Matches the statement encoding used in full program wire (`to_wire`)
214/// (without the file envelope, metadata, or buffer table).
215///
216/// # Errors
217///
218/// Returns a wire-format diagnostic when the node list or any nested payload
219/// cannot be represented in VIR0.
220pub fn append_node_list_fingerprint(buf: &mut Vec<u8>, nodes: &[Node]) -> Result<(), String> {
221    encode::put_nodes(buf, nodes).map_err(String::from)
222}
223
224#[cfg(test)]
225mod tests {
226    use super::*;
227    use crate::ir::{BufferAccess, BufferDecl, DataType, Node, Program};
228
229    #[test]
230    #[inline]
231    pub(crate) fn to_bytes_returns_empty_on_wire_error() {
232        let long_name = "x".repeat(MAX_STRING_LEN + 1);
233        let program = Program::wrapped(
234            vec![BufferDecl::storage(
235                &long_name,
236                0,
237                BufferAccess::ReadOnly,
238                DataType::U32,
239            )],
240            [1, 1, 1],
241            vec![],
242        );
243        assert!(program.to_wire().is_err());
244        assert!(program.to_bytes().is_empty());
245    }
246
247    /// EDGE-001 regression: `MAX_DECODE_DEPTH` covers **both** Node and Expr
248    /// recursion through the same counter. A blob that nests statement
249    /// bodies past the depth limit must be rejected at decode time,
250    /// preventing stack-overflow DoS on untrusted input.
251    ///
252    /// The test runs on a dedicated thread with an 8 MiB stack because
253    /// the encode/decode walk down a `MAX_DECODE_DEPTH + 1`-deep Block
254    /// tree uses ~3–4× the native frames the default 2 MiB test stack
255    /// allocates. Without the explicit stack, the test itself
256    /// stack-overflows before the decode guard ever fires  -  masking
257    /// the real assertion.
258    #[test]
259    pub(crate) fn decode_depth_cap_rejects_deeply_nested_blocks() {
260        std::thread::Builder::new()
261            .stack_size(8 * 1024 * 1024)
262            .spawn(run_decode_depth_cap)
263            .expect("Fix: spawn test worker")
264            .join()
265            .expect("Fix: decode-depth-cap worker panicked");
266    }
267
268    fn run_decode_depth_cap() {
269        // Build the nested program iteratively so the test thread's
270        // stack only owns the tree, not a recursion chain the depth
271        // of the tree.
272        let mut inner = Node::Block(vec![]);
273        for _ in 0..MAX_DECODE_DEPTH {
274            inner = Node::Block(vec![inner]);
275        }
276        let program = Program::wrapped(
277            vec![BufferDecl::read_write("out", 0, DataType::U32)],
278            [1, 1, 1],
279            vec![inner],
280        );
281        let bytes = program
282            .to_wire()
283            .expect("Fix: building a (MAX_DEPTH+1)-nested program must still encode");
284        let decoded = Program::from_wire(&bytes);
285        assert!(
286            decoded.is_err(),
287            "decoding a program deeper than MAX_DECODE_DEPTH must fail; got Ok"
288        );
289        let err = decoded.unwrap_err().to_string();
290        assert!(
291            err.contains("Fix:"),
292            "depth-exceed error must carry a `Fix:` hint, got: {err}"
293        );
294    }
295}
296
297/// OPAQUE-001 regression: encoder and decoder must agree on the
298/// maximum opaque payload length. A payload at MAX_OPAQUE_PAYLOAD_LEN
299/// must encode; a payload one byte larger must fail at encode time.
300#[test]
301pub(crate) fn opaque_payload_limit_is_symmetric() {
302    use crate::ir::{Expr, ExprNode};
303    use std::any::Any;
304
305    #[derive(Debug)]
306    struct BigOpaque(Vec<u8>);
307    impl ExprNode for BigOpaque {
308        fn extension_kind(&self) -> &'static str {
309            "test.big"
310        }
311        fn debug_identity(&self) -> &str {
312            "test.big"
313        }
314        fn result_type(&self) -> Option<DataType> {
315            Some(DataType::U32)
316        }
317        fn cse_safe(&self) -> bool {
318            false
319        }
320        fn stable_fingerprint(&self) -> [u8; 32] {
321            [0; 32]
322        }
323        fn validate_extension(&self) -> Result<(), String> {
324            Ok(())
325        }
326        fn as_any(&self) -> &dyn Any {
327            self
328        }
329        fn wire_payload(&self) -> Vec<u8> {
330            self.0.clone()
331        }
332    }
333
334    // At the limit: must encode successfully.
335    let expr_ok = Expr::opaque(BigOpaque(vec![0u8; MAX_OPAQUE_PAYLOAD_LEN]));
336    let program_ok = Program::wrapped(
337        vec![BufferDecl::read_write("out", 0, DataType::U32)],
338        [1, 1, 1],
339        vec![Node::let_bind("_", expr_ok)],
340    );
341    assert!(
342        program_ok.to_wire().is_ok(),
343        "at-limit opaque payload ({MAX_OPAQUE_PAYLOAD_LEN} bytes) must encode"
344    );
345
346    // One byte over: must fail at encode time.
347    let expr_over = Expr::opaque(BigOpaque(vec![0u8; MAX_OPAQUE_PAYLOAD_LEN + 1]));
348    let program_over = Program::wrapped(
349        vec![BufferDecl::read_write("out", 0, DataType::U32)],
350        [1, 1, 1],
351        vec![Node::let_bind("_", expr_over)],
352    );
353    let err = program_over
354        .to_wire()
355        .expect_err("opaque payload exceeding MAX_OPAQUE_PAYLOAD_LEN must fail at encode");
356    let msg = err.to_string();
357    assert!(
358        msg.contains("MAX_OPAQUE_PAYLOAD_LEN") || msg.contains(&MAX_OPAQUE_PAYLOAD_LEN.to_string()),
359        "error should mention the limit, got: {msg}"
360    );
361}