Skip to main content

vyre_foundation/serial/
text.rs

1// Canonical text format for vyre IR programs (VYRE_RELEASE_PLAN Phase 2.3-2.4).
2//
3// The text format is a *stable* human-diffable encoding of the IR
4// that round-trips byte-for-byte through the binary wire format.
5//
6// ```text
7// vyre_ir v0.1
8// wire_bytes 42
9// 56495230 00050001 ... (hex-encoded wire body)
10// ```
11//
12// # Format
13//
14// ```ebnf
15// program       = header "\n" body "\n"
16// header        = "vyre_ir v0.1\n" wire_byte_line
17// wire_byte_line = "wire_bytes " uint32 "\n"
18// body          = hex_line { hex_line }
19// hex_line      = hex_byte{1..64} "\n"
20// hex_byte      = hex_digit hex_digit
21// hex_digit     = "0".."9" | "a".."f"
22// uint32        = ("0".."9")+
23// ```
24//
25// The header's `wire_bytes` line carries the body length in bytes so
26// the parser can reject truncation before allocating the decode
27// buffer. The body is the exact output of [`Program::to_wire`]
28// rendered as lowercase hex, chunked at 32 bytes per line (64 hex
29// characters) for diffability. The last line may be shorter.
30//
31// # Why route through the binary wire format?
32//
33// The binary wire format is already a stable canonical encoding of
34// every `Program` variant, already has bounds checks, already has
35// cross-crate parity tests, and is already the thing the runtime
36// uses. Building a second parser that reads a handwritten
37// hierarchical syntax (S-expressions, JSON, TOML, etc.) would
38// duplicate every escape/bounds/unicode check while providing no
39// additional safety. The text format layered on top of the binary
40// format is:
41//
42// - **Deterministic** — same program always produces the same bytes
43//   because `to_wire` is deterministic and hex encoding is
44//   deterministic.
45// - **Human-diffable** — `git diff` on two `.vyre` files shows
46//   exactly which bytes changed, which in the wire format usually
47//   corresponds to specific node/buffer changes.
48// - **Round-trippable** — the round-trip property
49//   `from_text(to_text(p)) == p` holds by construction because the
50//   inner `to_wire`/`from_wire` already round-trips. This file only
51//   adds the hex envelope.
52// - **Small** — ~150 LOC of parser and serializer total, fits in
53//   one file, one set of tests.
54//
55// A richer S-expression form can be layered on top later if a
56// reader wants op-by-op pretty printing; the stable format for
57// persistence and CI diff is this one.
58
59use crate::ir_inner::model::program::Program;
60
61/// Magic header that every text-format program starts with.
62///
63/// Bumping the version requires a migration. The parser rejects any
64/// program with a different header.
65pub const TEXT_FORMAT_HEADER: &str = "vyre_ir v0.1";
66
67/// Maximum body length in bytes the parser will accept before
68/// failing with a bounded-allocation error. Mirrors the I10 bound
69/// on `Program::from_wire`: 64 MiB is larger than any legitimate
70/// program but small enough that a hostile input cannot trigger an
71/// OOM.
72pub const MAX_TEXT_WIRE_BYTES: usize = 64 * 1024 * 1024;
73
74/// How many wire bytes pack into each hex line. 32 bytes = 64
75/// hex characters, which keeps line width under the standard
76/// 80-column budget including the trailing newline.
77pub const WIRE_BYTES_PER_LINE: usize = 32;
78
79/// Error returned when a text-format program fails to parse.
80///
81/// Every variant carries an actionable `Fix:` message rendered via
82/// [`TextParseError::fix_hint`]. Parsing never panics.
83#[derive(Debug, Clone, PartialEq, Eq)]
84#[non_exhaustive]
85pub enum TextParseError {
86    /// The program did not start with the `vyre_ir v0.1` header.
87    MissingHeader {
88        /// Snippet of the first line, truncated to 64 characters.
89        observed: String,
90    },
91    /// The header was present but the second line was not the
92    /// expected `wire_bytes N` declaration.
93    MissingWireBytesLine {
94        /// Snippet of the second line, truncated to 64 characters.
95        observed: String,
96    },
97    /// `wire_bytes` parsed but exceeded `MAX_TEXT_WIRE_BYTES`.
98    WireBytesTooLarge {
99        /// The declared length.
100        declared: usize,
101    },
102    /// A hex line contained a non-hex character.
103    InvalidHexCharacter {
104        /// Offending line number (1-indexed).
105        line: usize,
106        /// The character that broke the parse.
107        character: char,
108    },
109    /// A hex line's character count was odd, which cannot round-trip
110    /// to whole bytes.
111    OddHexLineLength {
112        /// Offending line number (1-indexed).
113        line: usize,
114        /// The observed character count.
115        observed: usize,
116    },
117    /// Total decoded bytes did not match the declared `wire_bytes`.
118    DeclaredLengthMismatch {
119        /// Declared byte count from the header.
120        declared: usize,
121        /// Actual decoded byte count.
122        actual: usize,
123    },
124    /// The inner binary wire decoder rejected the byte payload.
125    ///
126    /// The carried error is whatever [`Program::from_wire`] emitted —
127    /// a typed [`crate::error::Error`] whose `Display` impl already
128    /// carries the `Fix:`-prefixed diagnostic prose.
129    WireDecodeFailed {
130        /// The inner decoder error.
131        inner: crate::error::Error,
132    },
133    /// The inner binary wire encoder rejected the program when
134    /// we tried to serialize it. Only emitted by `to_text`.
135    WireEncodeFailed {
136        /// The inner encoder error.
137        inner: crate::error::Error,
138    },
139}
140
141impl TextParseError {
142    /// Human-readable one-line rendering.
143    #[must_use]
144    #[inline]
145    pub fn message(&self) -> String {
146        match self {
147            Self::MissingHeader { observed } => format!(
148                "text format must start with `{TEXT_FORMAT_HEADER}` but saw `{observed}`. {}",
149                self.fix_hint()
150            ),
151            Self::MissingWireBytesLine { observed } => format!(
152                "text format header must be followed by `wire_bytes <N>` but saw `{observed}`. {}",
153                self.fix_hint()
154            ),
155            Self::WireBytesTooLarge { declared } => format!(
156                "declared wire_bytes = {declared} exceeds MAX_TEXT_WIRE_BYTES = {MAX_TEXT_WIRE_BYTES}. {}",
157                self.fix_hint()
158            ),
159            Self::InvalidHexCharacter { line, character } => format!(
160                "invalid hex character `{character}` on body line {line}. {}",
161                self.fix_hint()
162            ),
163            Self::OddHexLineLength { line, observed } => format!(
164                "hex body line {line} has {observed} characters, must be even. {}",
165                self.fix_hint()
166            ),
167            Self::DeclaredLengthMismatch { declared, actual } => format!(
168                "declared wire_bytes = {declared} but decoded {actual}. {}",
169                self.fix_hint()
170            ),
171            Self::WireDecodeFailed { inner } => {
172                format!("inner binary wire decoder rejected the body: {inner}")
173            }
174            Self::WireEncodeFailed { inner } => {
175                format!("inner binary wire encoder rejected the program: {inner}")
176            }
177        }
178    }
179
180    /// Actionable `Fix:`-prefixed hint for the caller.
181    #[must_use]
182    #[inline]
183    pub fn fix_hint(&self) -> &'static str {
184        match self {
185            Self::MissingHeader { .. } => {
186                "Fix: re-emit the program with Program::to_text, or manually prepend `vyre_ir v0.1\\n`."
187            }
188            Self::MissingWireBytesLine { .. } => {
189                "Fix: re-emit the program with Program::to_text; the second line must read `wire_bytes N`."
190            }
191            Self::WireBytesTooLarge { .. } => {
192                "Fix: the program is too large to round-trip through the text format; use Program::to_wire directly or split the program."
193            }
194            Self::InvalidHexCharacter { .. } | Self::OddHexLineLength { .. } => {
195                "Fix: the text body must be lowercase hex with 64 characters per line (32 bytes). Re-emit with Program::to_text."
196            }
197            Self::DeclaredLengthMismatch { .. } => {
198                "Fix: the wire_bytes header does not match the body length. Recompute wire_bytes or re-emit with Program::to_text."
199            }
200            Self::WireDecodeFailed { .. } | Self::WireEncodeFailed { .. } => {
201                "Fix: see the wrapped error message for the underlying wire-format problem."
202            }
203        }
204    }
205}
206
207impl std::fmt::Display for TextParseError {
208    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
209        f.write_str(&self.message())
210    }
211}
212
213impl std::error::Error for TextParseError {}
214
215impl Program {
216    /// Serialize to the canonical vyre IR text format.
217    ///
218    /// # Errors
219    ///
220    /// Returns `TextParseError::WireEncodeFailed` when the inner
221    /// binary wire encoder fails. This cannot happen for a program
222    /// produced by a successful `Program::new` because every field
223    /// of `Program` is a valid wire input by construction; the
224    /// error path exists only for programs synthesized through
225    /// unsafe means or a wire-format breaking change.
226    #[inline]
227    #[must_use]
228    pub fn to_text(&self) -> Result<String, TextParseError> {
229        let bytes = self
230            .to_wire()
231            .map_err(|error| TextParseError::WireEncodeFailed { inner: error })?;
232        Ok(encode_text_body(&bytes))
233    }
234
235    /// Parse the canonical vyre IR text format.
236    ///
237    /// # Errors
238    ///
239    /// Returns a [`TextParseError`] describing the first parse
240    /// failure. Parsing is total — no panic path.
241    #[inline]
242    #[must_use]
243    pub fn from_text(input: &str) -> Result<Self, TextParseError> {
244        let mut lines = input.lines();
245        let header = lines.next().unwrap_or("");
246        if header != TEXT_FORMAT_HEADER {
247            return Err(TextParseError::MissingHeader {
248                observed: truncate(header, 64),
249            });
250        }
251        let wire_line = lines.next().unwrap_or("");
252        let declared_bytes = parse_wire_bytes_line(wire_line)?;
253        if declared_bytes > MAX_TEXT_WIRE_BYTES {
254            return Err(TextParseError::WireBytesTooLarge {
255                declared: declared_bytes,
256            });
257        }
258        let mut body = Vec::with_capacity(declared_bytes);
259        for (offset, line) in lines.enumerate() {
260            let trimmed = line.trim_end_matches('\r');
261            if trimmed.is_empty() {
262                continue;
263            }
264            if trimmed.len() % 2 != 0 {
265                return Err(TextParseError::OddHexLineLength {
266                    line: offset + 3,
267                    observed: trimmed.len(),
268                });
269            }
270            let mut bytes = trimmed.as_bytes().chunks_exact(2);
271            for pair in &mut bytes {
272                let high =
273                    hex_nibble(pair[0]).ok_or_else(|| TextParseError::InvalidHexCharacter {
274                        line: offset + 3,
275                        character: pair[0] as char,
276                    })?;
277                let low =
278                    hex_nibble(pair[1]).ok_or_else(|| TextParseError::InvalidHexCharacter {
279                        line: offset + 3,
280                        character: pair[1] as char,
281                    })?;
282                body.push((high << 4) | low);
283            }
284        }
285        if body.len() != declared_bytes {
286            return Err(TextParseError::DeclaredLengthMismatch {
287                declared: declared_bytes,
288                actual: body.len(),
289            });
290        }
291        Program::from_wire(&body).map_err(|inner| TextParseError::WireDecodeFailed { inner })
292    }
293}
294
295/// Hex-encode wire bytes into the canonical vyre IR text format.
296///
297/// Prepends the `vyre_ir v0.1` header and `wire_bytes N` line, then
298/// writes the body as lowercase hex chunked at [`WIRE_BYTES_PER_LINE`].
299#[inline]
300#[must_use]
301pub(crate) fn encode_text_body(bytes: &[u8]) -> String {
302    let hex_chars = bytes.len() * 2;
303    let line_count = bytes.len().div_ceil(WIRE_BYTES_PER_LINE);
304    // header + wire_bytes line + body lines + trailing newline
305    let capacity = TEXT_FORMAT_HEADER.len() + 32 + hex_chars + line_count + 1;
306    let mut out = String::with_capacity(capacity);
307    out.push_str(TEXT_FORMAT_HEADER);
308    out.push('\n');
309    out.push_str("wire_bytes ");
310    push_usize(&mut out, bytes.len());
311    out.push('\n');
312    for chunk in bytes.chunks(WIRE_BYTES_PER_LINE) {
313        for byte in chunk {
314            push_hex_byte(&mut out, *byte);
315        }
316        out.push('\n');
317    }
318    out
319}
320
321/// Append a decimal `usize` to a `String` without allocating.
322#[inline]
323pub(crate) fn push_usize(out: &mut String, value: usize) {
324    if value == 0 {
325        out.push('0');
326        return;
327    }
328    let mut digits = [0u8; 20];
329    let mut idx = 0;
330    let mut v = value;
331    while v > 0 {
332        digits[idx] = b'0' + (v % 10) as u8;
333        v /= 10;
334        idx += 1;
335    }
336    while idx > 0 {
337        idx -= 1;
338        out.push(digits[idx] as char);
339    }
340}
341
342/// Append a byte as two lowercase hex characters.
343#[inline]
344pub(crate) fn push_hex_byte(out: &mut String, byte: u8) {
345    const HEX: &[u8; 16] = b"0123456789abcdef";
346    out.push(HEX[(byte >> 4) as usize] as char);
347    out.push(HEX[(byte & 0x0f) as usize] as char);
348}
349
350/// Parse the `wire_bytes N` header line from the text format.
351#[inline]
352#[must_use]
353pub(crate) fn parse_wire_bytes_line(line: &str) -> Result<usize, TextParseError> {
354    let trimmed = line.trim_end_matches('\r');
355    let Some(rest) = trimmed.strip_prefix("wire_bytes ") else {
356        return Err(TextParseError::MissingWireBytesLine {
357            observed: truncate(trimmed, 64),
358        });
359    };
360    rest.parse::<usize>()
361        .map_err(|_| TextParseError::MissingWireBytesLine {
362            observed: truncate(trimmed, 64),
363        })
364}
365
366/// Convert an ASCII hex digit to its numeric value.
367#[inline]
368#[must_use]
369pub(crate) fn hex_nibble(byte: u8) -> Option<u8> {
370    match byte {
371        b'0'..=b'9' => Some(byte - b'0'),
372        b'a'..=b'f' => Some(10 + (byte - b'a')),
373        b'A'..=b'F' => Some(10 + (byte - b'A')),
374        _ => None,
375    }
376}
377
378/// Truncate a string to `max` characters, appending an ellipsis if truncated.
379#[inline]
380#[must_use]
381pub(crate) fn truncate(input: &str, max: usize) -> String {
382    if input.chars().count() <= max {
383        input.to_string()
384    } else {
385        let mut out = input.chars().take(max - 1).collect::<String>();
386        out.push('…');
387        out
388    }
389}
390#[cfg(test)]
391mod tests;