Skip to main content

vyre_foundation/serial/
wire.rs

1// Stable binary IR wire format for serialized IR programs.
2
3use crate::ir::{BufferDecl, DataType, Expr, Node, Program};
4
5/// The `decode` module.
6pub mod decode;
7/// The `encode` module.
8pub mod encode;
9/// The `framing` module.
10pub mod framing;
11/// The `tags` module.
12pub mod tags;
13
14/// Maximum buffers accepted from one IR wire-format program.
15///
16/// I10 requires bounded allocation before validating semantics. This limit
17/// rejects hostile wire blobs before allocating the buffer table.
18pub const MAX_BUFFERS: usize = 16_384;
19
20/// Maximum statement nodes accepted from any single wire-format node list.
21///
22/// I10 requires node vectors to be bounded before allocation; nested lists are
23/// each checked against this budget as they are decoded.
24pub const MAX_NODES: usize = 1_000_000;
25
26/// Maximum call arguments accepted from one wire-format call expression.
27///
28/// I10 requires expression argument vectors to be bounded before allocation.
29pub const MAX_ARGS: usize = 4_096;
30
31/// Maximum UTF-8 string length accepted from the IR wire format.
32///
33/// I10 bounds allocation for names and operation identifiers carried by
34/// attacker-controlled wire bytes.
35pub const MAX_STRING_LEN: usize = 1 << 20;
36
37/// Maximum recursive decode depth for the IR wire format.
38///
39/// The limit is applied to the **shared** recursion counter in `Reader`
40/// that `Reader::node` and `Reader::expr` both increment on entry and
41/// decrement on exit. A hostile blob cannot evade the cap by alternating
42/// statement and expression nesting — every nested decode call, whether it
43/// descends into a `Node::If`/`Loop`/`Block` body or into a nested
44/// [`Expr`] argument tree, counts against the same budget. Depth ≥
45/// `MAX_DECODE_DEPTH` is rejected with a `Fix:`-prefixed error before any
46/// stack frame is pushed, preventing stack-overflow DoS from a blob that
47/// nests `Block(Block(... Block(...) ...))` a million times deep.
48///
49/// Covers audit L.1.35 (HIGH).
50pub const MAX_DECODE_DEPTH: u32 = 256;
51
52/// Hard ceiling on the size of a single wire-encoded Program in bytes.
53///
54/// The framing layer rejects larger blobs before any decode allocation so
55/// attacker-controlled input cannot force unbounded memory growth.
56pub const MAX_PROGRAM_BYTES: usize = 64 * 1024 * 1024;
57
58pub(crate) struct Reader<'a> {
59    pub bytes: &'a [u8],
60    pub pos: usize,
61    /// Current recursion depth on the decode call stack. Incremented by
62    /// every `node()` and `expr()` call and compared against
63    /// [`MAX_DECODE_DEPTH`] before any nested decode proceeds.
64    pub depth: u32,
65}
66
67impl Program {
68    /// Serialize this IR program into the stable `VIR0` IR wire format.
69    ///
70    /// # Errors
71    ///
72    /// Returns [`crate::error::Error::WireFormatValidation`] when a count
73    /// cannot be represented in the versioned wire format or when a public
74    /// enum variant has no registered stable wire tag. The `message` field
75    /// carries the actionable diagnostic prose including a `Fix:` hint.
76    #[inline]
77    #[must_use]
78    pub fn to_wire(&self) -> Result<Vec<u8>, crate::error::Error> {
79        encode::to_wire(self).map_err(wire_err)
80    }
81
82    /// Serialize this IR program into the stable `VIR0` IR wire format,
83    /// appending to an existing buffer.
84    ///
85    /// # Errors
86    ///
87    /// Returns [`crate::error::Error::WireFormatValidation`] when a count
88    /// cannot be represented in the versioned wire format or when a public
89    /// enum variant has no registered stable wire tag. The `message` field
90    /// carries the actionable diagnostic prose including a `Fix:` hint.
91    #[inline]
92    pub fn to_wire_into(&self, dst: &mut Vec<u8>) -> Result<(), crate::error::Error> {
93        encode::to_wire_into(self, dst).map_err(wire_err)
94    }
95
96    /// Serialize this IR program into bytes.
97    ///
98    /// This compatibility wrapper preserves the pre-`to_wire` API name.
99    ///
100    /// On an encoding error, an empty vector is returned after logging the
101    /// failure. Use [`Program::to_wire`] when the caller needs to handle the
102    /// error explicitly.
103    #[must_use]
104    #[inline]
105    pub fn to_bytes(&self) -> Vec<u8> {
106        match self.to_wire() {
107            Ok(bytes) => bytes,
108            Err(error) => {
109                tracing::error!(
110                    error = %error,
111                    "Program::to_bytes: wire encoding failed; returning empty bytes. \
112                     Fix: call Program::to_wire and handle the validation error explicitly."
113                );
114                Vec::new()
115            }
116        }
117    }
118
119    /// Deserialize an IR program from the stable `VYRE` IR wire format.
120    ///
121    /// # Errors
122    ///
123    /// Returns [`crate::error::Error::VersionMismatch`] when the
124    /// payload advertises a schema version this runtime does not
125    /// understand. Returns [`crate::error::Error::WireFormatValidation`]
126    /// for any other decode failure — truncated bytes, unknown enum
127    /// tag, integrity digest mismatch, or malformed structural
128    /// section.
129    #[inline]
130    #[must_use]
131    pub fn from_wire(bytes: &[u8]) -> Result<Self, crate::error::Error> {
132        if bytes.len() > MAX_PROGRAM_BYTES {
133            return Err(wire_err(format!(
134                "Fix: wire blob is {} bytes, exceeding the {}-byte IR framing cap. Reject this input or split the Program before serialization.",
135                bytes.len(),
136                MAX_PROGRAM_BYTES
137            )));
138        }
139        // The version field is validated before the string-based
140        // decoder so that an out-of-range version surfaces as the
141        // typed `VersionMismatch` variant instead of being absorbed
142        // into the generic `WireFormatValidation` bucket. Tooling
143        // that hangs off the diagnostic code `E-WIRE-VERSION` relies
144        // on this distinction.
145        if bytes.len() >= framing::MAGIC.len() + 2
146            && &bytes[..framing::MAGIC.len()] == framing::MAGIC
147        {
148            let version = u16::from_le_bytes([bytes[4], bytes[5]]);
149            if version != framing::WIRE_FORMAT_VERSION {
150                return Err(crate::error::Error::VersionMismatch {
151                    expected: u32::from(framing::WIRE_FORMAT_VERSION),
152                    found: u32::from(version),
153                });
154            }
155        }
156        decode::from_wire(bytes).map_err(wire_err)
157    }
158
159    /// Deserialize an IR program from bytes.
160    ///
161    /// This compatibility wrapper preserves the pre-`from_wire` API name.
162    ///
163    /// # Errors
164    ///
165    /// Returns the same actionable decode errors as [`Program::from_wire`].
166    #[inline]
167    #[must_use]
168    pub fn from_bytes(bytes: &[u8]) -> Result<Self, crate::error::Error> {
169        Self::from_wire(bytes)
170    }
171
172    /// Stable content hash of this Program, used as a cache identity.
173    ///
174    /// Computed as BLAKE3 of the canonical wire-format encoding. This is the
175    /// exact-match identity for persistent-cache consumers that need a
176    /// deterministic key per Program without re-implementing canonicalization.
177    /// On canonical wire-encoding failure, the value is a domain-separated
178    /// error digest rather than an all-zero sentinel, so malformed programs do
179    /// not collapse into the same cache identity.
180    #[must_use]
181    pub fn content_hash(&self) -> [u8; 32] {
182        self.fingerprint()
183    }
184}
185
186/// Wrap an internal wire-format error string in the typed [`crate::error::Error`]
187/// so every public boundary of this module returns a structured variant
188/// callers can match on.
189fn wire_err(message: String) -> crate::error::Error {
190    crate::error::Error::WireFormatValidation { message }
191}
192
193/// Append stable VIR0 wire bytes for a [`DataType`] (tag + any payload) into
194/// `buf`. Used by disk-cache fingerprinting where `Debug` output would be
195/// the wrong contract.
196pub fn append_data_type_fingerprint(buf: &mut Vec<u8>, value: &DataType) -> Result<(), String> {
197    tags::data_type_tag::put_data_type(buf, value).map_err(String::from)
198}
199
200/// Append stable VIR0 wire bytes for a `Node` statement list (count + each
201/// node). Matches the statement encoding used in full program wire (`to_wire`)
202/// (without the file envelope, metadata, or buffer table).
203pub fn append_node_list_fingerprint(buf: &mut Vec<u8>, nodes: &[Node]) -> Result<(), String> {
204    encode::put_nodes(buf, nodes).map_err(String::from)
205}
206
207#[cfg(test)]
208mod tests {
209    use super::*;
210    use crate::ir::{BufferAccess, BufferDecl, DataType, Node, Program};
211
212    #[test]
213    #[inline]
214    pub(crate) fn to_bytes_returns_empty_on_wire_error() {
215        let long_name = "x".repeat(MAX_STRING_LEN + 1);
216        let program = Program::wrapped(
217            vec![BufferDecl::storage(
218                &long_name,
219                0,
220                BufferAccess::ReadOnly,
221                DataType::U32,
222            )],
223            [1, 1, 1],
224            vec![],
225        );
226        assert!(program.to_wire().is_err());
227        assert!(program.to_bytes().is_empty());
228    }
229
230    /// EDGE-001 regression: `MAX_DECODE_DEPTH` covers **both** Node and Expr
231    /// recursion through the same counter. A blob that nests statement
232    /// bodies past the depth limit must be rejected at decode time,
233    /// preventing stack-overflow DoS on untrusted input.
234    ///
235    /// The test runs on a dedicated thread with an 8 MiB stack because
236    /// the encode/decode walk down a `MAX_DECODE_DEPTH + 1`-deep Block
237    /// tree uses ~3–4× the native frames the default 2 MiB test stack
238    /// allocates. Without the explicit stack, the test itself
239    /// stack-overflows before the decode guard ever fires — masking
240    /// the real assertion.
241    #[test]
242    pub(crate) fn decode_depth_cap_rejects_deeply_nested_blocks() {
243        std::thread::Builder::new()
244            .stack_size(8 * 1024 * 1024)
245            .spawn(run_decode_depth_cap)
246            .expect("Fix: spawn test worker")
247            .join()
248            .expect("Fix: decode-depth-cap worker panicked");
249    }
250
251    fn run_decode_depth_cap() {
252        // Build the nested program iteratively so the test thread's
253        // stack only owns the tree, not a recursion chain the depth
254        // of the tree.
255        let mut inner = Node::Block(vec![]);
256        for _ in 0..MAX_DECODE_DEPTH {
257            inner = Node::Block(vec![inner]);
258        }
259        let program = Program::wrapped(
260            vec![BufferDecl::read_write("out", 0, DataType::U32)],
261            [1, 1, 1],
262            vec![inner],
263        );
264        let bytes = program
265            .to_wire()
266            .expect("Fix: building a (MAX_DEPTH+1)-nested program must still encode");
267        let decoded = Program::from_wire(&bytes);
268        assert!(
269            decoded.is_err(),
270            "decoding a program deeper than MAX_DECODE_DEPTH must fail; got Ok"
271        );
272        let err = decoded.unwrap_err().to_string();
273        assert!(
274            err.contains("Fix:"),
275            "depth-exceed error must carry a `Fix:` hint, got: {err}"
276        );
277    }
278}