vyre_foundation/serial/text.rs
1// Canonical text format for vyre IR programs (VYRE_RELEASE_PLAN Phase 2.3-2.4).
2//
3// The text format is a *stable* human-diffable encoding of the IR
4// that round-trips byte-for-byte through the binary wire format.
5//
6// ```text
7// vyre_ir v0.1
8// wire_bytes 42
9// 56495230 00050001 ... (hex-encoded wire body)
10// ```
11//
12// # Format
13//
14// ```ebnf
15// program = header "\n" body "\n"
16// header = "vyre_ir v0.1\n" wire_byte_line
17// wire_byte_line = "wire_bytes " uint32 "\n"
18// body = hex_line { hex_line }
19// hex_line = hex_byte{1..64} "\n"
20// hex_byte = hex_digit hex_digit
21// hex_digit = "0".."9" | "a".."f"
22// uint32 = ("0".."9")+
23// ```
24//
25// The header's `wire_bytes` line carries the body length in bytes so
26// the parser can reject truncation before allocating the decode
27// buffer. The body is the exact output of [`Program::to_wire`]
28// rendered as lowercase hex, chunked at 32 bytes per line (64 hex
29// characters) for diffability. The last line may be shorter.
30//
31// # Why route through the binary wire format?
32//
33// The binary wire format is already a stable canonical encoding of
34// every `Program` variant, already has bounds checks, already has
35// cross-crate parity tests, and is already the thing the runtime
36// uses. Building a second parser that reads a handwritten
37// hierarchical syntax (S-expressions, JSON, TOML, etc.) would
38// duplicate every escape/bounds/unicode check while providing no
39// additional safety. The text format layered on top of the binary
40// format is:
41//
42// - **Deterministic** — same program always produces the same bytes
43// because `to_wire` is deterministic and hex encoding is
44// deterministic.
45// - **Human-diffable** — `git diff` on two `.vyre` files shows
46// exactly which bytes changed, which in the wire format usually
47// corresponds to specific node/buffer changes.
48// - **Round-trippable** — the round-trip property
49// `from_text(to_text(p)) == p` holds by construction because the
50// inner `to_wire`/`from_wire` already round-trips. This file only
51// adds the hex envelope.
52// - **Small** — ~150 LOC of parser and serializer total, fits in
53// one file, one set of tests.
54//
55// A richer S-expression form can be layered on top later if a
56// reader wants op-by-op pretty printing; the stable format for
57// persistence and CI diff is this one.
58
59use crate::ir_inner::model::program::Program;
60
61/// Magic header that every text-format program starts with.
62///
63/// Bumping the version requires a migration. The parser rejects any
64/// program with a different header.
65pub const TEXT_FORMAT_HEADER: &str = "vyre_ir v0.1";
66
67/// Maximum body length in bytes the parser will accept before
68/// failing with a bounded-allocation error. Mirrors the I10 bound
69/// on `Program::from_wire`: 64 MiB is larger than any legitimate
70/// program but small enough that a hostile input cannot trigger an
71/// OOM.
72pub const MAX_TEXT_WIRE_BYTES: usize = 64 * 1024 * 1024;
73
74/// How many wire bytes pack into each hex line. 32 bytes = 64
75/// hex characters, which keeps line width under the standard
76/// 80-column budget including the trailing newline.
77pub const WIRE_BYTES_PER_LINE: usize = 32;
78
79/// Error returned when a text-format program fails to parse.
80///
81/// Every variant carries an actionable `Fix:` message rendered via
82/// [`TextParseError::fix_hint`]. Parsing never panics.
83#[derive(Debug, Clone, PartialEq, Eq)]
84#[non_exhaustive]
85pub enum TextParseError {
86 /// The program did not start with the `vyre_ir v0.1` header.
87 MissingHeader {
88 /// Snippet of the first line, truncated to 64 characters.
89 observed: String,
90 },
91 /// The header was present but the second line was not the
92 /// expected `wire_bytes N` declaration.
93 MissingWireBytesLine {
94 /// Snippet of the second line, truncated to 64 characters.
95 observed: String,
96 },
97 /// `wire_bytes` parsed but exceeded `MAX_TEXT_WIRE_BYTES`.
98 WireBytesTooLarge {
99 /// The declared length.
100 declared: usize,
101 },
102 /// A hex line contained a non-hex character.
103 InvalidHexCharacter {
104 /// Offending line number (1-indexed).
105 line: usize,
106 /// The character that broke the parse.
107 character: char,
108 },
109 /// A hex line's character count was odd, which cannot round-trip
110 /// to whole bytes.
111 OddHexLineLength {
112 /// Offending line number (1-indexed).
113 line: usize,
114 /// The observed character count.
115 observed: usize,
116 },
117 /// Total decoded bytes did not match the declared `wire_bytes`.
118 DeclaredLengthMismatch {
119 /// Declared byte count from the header.
120 declared: usize,
121 /// Actual decoded byte count.
122 actual: usize,
123 },
124 /// The inner binary wire decoder rejected the byte payload.
125 ///
126 /// The carried error is whatever [`Program::from_wire`] emitted —
127 /// a typed [`crate::error::Error`] whose `Display` impl already
128 /// carries the `Fix:`-prefixed diagnostic prose.
129 WireDecodeFailed {
130 /// The inner decoder error.
131 inner: crate::error::Error,
132 },
133 /// The inner binary wire encoder rejected the program when
134 /// we tried to serialize it. Only emitted by `to_text`.
135 WireEncodeFailed {
136 /// The inner encoder error.
137 inner: crate::error::Error,
138 },
139}
140
141impl TextParseError {
142 /// Human-readable one-line rendering.
143 #[must_use]
144 #[inline]
145 pub fn message(&self) -> String {
146 match self {
147 Self::MissingHeader { observed } => format!(
148 "text format must start with `{TEXT_FORMAT_HEADER}` but saw `{observed}`. {}",
149 self.fix_hint()
150 ),
151 Self::MissingWireBytesLine { observed } => format!(
152 "text format header must be followed by `wire_bytes <N>` but saw `{observed}`. {}",
153 self.fix_hint()
154 ),
155 Self::WireBytesTooLarge { declared } => format!(
156 "declared wire_bytes = {declared} exceeds MAX_TEXT_WIRE_BYTES = {MAX_TEXT_WIRE_BYTES}. {}",
157 self.fix_hint()
158 ),
159 Self::InvalidHexCharacter { line, character } => format!(
160 "invalid hex character `{character}` on body line {line}. {}",
161 self.fix_hint()
162 ),
163 Self::OddHexLineLength { line, observed } => format!(
164 "hex body line {line} has {observed} characters, must be even. {}",
165 self.fix_hint()
166 ),
167 Self::DeclaredLengthMismatch { declared, actual } => format!(
168 "declared wire_bytes = {declared} but decoded {actual}. {}",
169 self.fix_hint()
170 ),
171 Self::WireDecodeFailed { inner } => {
172 format!("inner binary wire decoder rejected the body: {inner}")
173 }
174 Self::WireEncodeFailed { inner } => {
175 format!("inner binary wire encoder rejected the program: {inner}")
176 }
177 }
178 }
179
180 /// Actionable `Fix:`-prefixed hint for the caller.
181 #[must_use]
182 #[inline]
183 pub fn fix_hint(&self) -> &'static str {
184 match self {
185 Self::MissingHeader { .. } => {
186 "Fix: re-emit the program with Program::to_text, or manually prepend `vyre_ir v0.1\\n`."
187 }
188 Self::MissingWireBytesLine { .. } => {
189 "Fix: re-emit the program with Program::to_text; the second line must read `wire_bytes N`."
190 }
191 Self::WireBytesTooLarge { .. } => {
192 "Fix: the program is too large to round-trip through the text format; use Program::to_wire directly or split the program."
193 }
194 Self::InvalidHexCharacter { .. } | Self::OddHexLineLength { .. } => {
195 "Fix: the text body must be lowercase hex with 64 characters per line (32 bytes). Re-emit with Program::to_text."
196 }
197 Self::DeclaredLengthMismatch { .. } => {
198 "Fix: the wire_bytes header does not match the body length. Recompute wire_bytes or re-emit with Program::to_text."
199 }
200 Self::WireDecodeFailed { .. } | Self::WireEncodeFailed { .. } => {
201 "Fix: see the wrapped error message for the underlying wire-format problem."
202 }
203 }
204 }
205}
206
207impl std::fmt::Display for TextParseError {
208 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
209 f.write_str(&self.message())
210 }
211}
212
213impl std::error::Error for TextParseError {}
214
215impl Program {
216 /// Serialize to the canonical vyre IR text format.
217 ///
218 /// # Errors
219 ///
220 /// Returns `TextParseError::WireEncodeFailed` when the inner
221 /// binary wire encoder fails. This cannot happen for a program
222 /// produced by a successful `Program::new` because every field
223 /// of `Program` is a valid wire input by construction; the
224 /// error path exists only for programs synthesized through
225 /// unsafe means or a wire-format breaking change.
226 #[inline]
227 #[must_use]
228 pub fn to_text(&self) -> Result<String, TextParseError> {
229 let bytes = self
230 .to_wire()
231 .map_err(|error| TextParseError::WireEncodeFailed { inner: error })?;
232 Ok(encode_text_body(&bytes))
233 }
234
235 /// Parse the canonical vyre IR text format.
236 ///
237 /// # Errors
238 ///
239 /// Returns a [`TextParseError`] describing the first parse
240 /// failure. Parsing is total — no panic path.
241 #[inline]
242 #[must_use]
243 pub fn from_text(input: &str) -> Result<Self, TextParseError> {
244 let mut lines = input.lines();
245 let header = lines.next().unwrap_or("");
246 if header != TEXT_FORMAT_HEADER {
247 return Err(TextParseError::MissingHeader {
248 observed: truncate(header, 64),
249 });
250 }
251 let wire_line = lines.next().unwrap_or("");
252 let declared_bytes = parse_wire_bytes_line(wire_line)?;
253 if declared_bytes > MAX_TEXT_WIRE_BYTES {
254 return Err(TextParseError::WireBytesTooLarge {
255 declared: declared_bytes,
256 });
257 }
258 let mut body = Vec::with_capacity(declared_bytes);
259 for (offset, line) in lines.enumerate() {
260 let trimmed = line.trim_end_matches('\r');
261 if trimmed.is_empty() {
262 continue;
263 }
264 if trimmed.len() % 2 != 0 {
265 return Err(TextParseError::OddHexLineLength {
266 line: offset + 3,
267 observed: trimmed.len(),
268 });
269 }
270 let mut bytes = trimmed.as_bytes().chunks_exact(2);
271 for pair in &mut bytes {
272 let high =
273 hex_nibble(pair[0]).ok_or_else(|| TextParseError::InvalidHexCharacter {
274 line: offset + 3,
275 character: pair[0] as char,
276 })?;
277 let low =
278 hex_nibble(pair[1]).ok_or_else(|| TextParseError::InvalidHexCharacter {
279 line: offset + 3,
280 character: pair[1] as char,
281 })?;
282 body.push((high << 4) | low);
283 }
284 }
285 if body.len() != declared_bytes {
286 return Err(TextParseError::DeclaredLengthMismatch {
287 declared: declared_bytes,
288 actual: body.len(),
289 });
290 }
291 Program::from_wire(&body).map_err(|inner| TextParseError::WireDecodeFailed { inner })
292 }
293}
294
295/// Hex-encode wire bytes into the canonical vyre IR text format.
296///
297/// Prepends the `vyre_ir v0.1` header and `wire_bytes N` line, then
298/// writes the body as lowercase hex chunked at [`WIRE_BYTES_PER_LINE`].
299#[inline]
300#[must_use]
301pub(crate) fn encode_text_body(bytes: &[u8]) -> String {
302 let hex_chars = bytes.len() * 2;
303 let line_count = bytes.len().div_ceil(WIRE_BYTES_PER_LINE);
304 // header + wire_bytes line + body lines + trailing newline
305 let capacity = TEXT_FORMAT_HEADER.len() + 32 + hex_chars + line_count + 1;
306 let mut out = String::with_capacity(capacity);
307 out.push_str(TEXT_FORMAT_HEADER);
308 out.push('\n');
309 out.push_str("wire_bytes ");
310 push_usize(&mut out, bytes.len());
311 out.push('\n');
312 for chunk in bytes.chunks(WIRE_BYTES_PER_LINE) {
313 for byte in chunk {
314 push_hex_byte(&mut out, *byte);
315 }
316 out.push('\n');
317 }
318 out
319}
320
321/// Append a decimal `usize` to a `String` without allocating.
322#[inline]
323pub(crate) fn push_usize(out: &mut String, value: usize) {
324 if value == 0 {
325 out.push('0');
326 return;
327 }
328 let mut digits = [0u8; 20];
329 let mut idx = 0;
330 let mut v = value;
331 while v > 0 {
332 digits[idx] = b'0' + (v % 10) as u8;
333 v /= 10;
334 idx += 1;
335 }
336 while idx > 0 {
337 idx -= 1;
338 out.push(digits[idx] as char);
339 }
340}
341
342/// Append a byte as two lowercase hex characters.
343#[inline]
344pub(crate) fn push_hex_byte(out: &mut String, byte: u8) {
345 const HEX: &[u8; 16] = b"0123456789abcdef";
346 out.push(HEX[(byte >> 4) as usize] as char);
347 out.push(HEX[(byte & 0x0f) as usize] as char);
348}
349
350/// Parse the `wire_bytes N` header line from the text format.
351#[inline]
352#[must_use]
353pub(crate) fn parse_wire_bytes_line(line: &str) -> Result<usize, TextParseError> {
354 let trimmed = line.trim_end_matches('\r');
355 let Some(rest) = trimmed.strip_prefix("wire_bytes ") else {
356 return Err(TextParseError::MissingWireBytesLine {
357 observed: truncate(trimmed, 64),
358 });
359 };
360 rest.parse::<usize>()
361 .map_err(|_| TextParseError::MissingWireBytesLine {
362 observed: truncate(trimmed, 64),
363 })
364}
365
366/// Convert an ASCII hex digit to its numeric value.
367#[inline]
368#[must_use]
369pub(crate) fn hex_nibble(byte: u8) -> Option<u8> {
370 match byte {
371 b'0'..=b'9' => Some(byte - b'0'),
372 b'a'..=b'f' => Some(10 + (byte - b'a')),
373 b'A'..=b'F' => Some(10 + (byte - b'A')),
374 _ => None,
375 }
376}
377
378/// Truncate a string to `max` characters, appending an ellipsis if truncated.
379#[inline]
380#[must_use]
381pub(crate) fn truncate(input: &str, max: usize) -> String {
382 if input.chars().count() <= max {
383 input.to_string()
384 } else {
385 let mut out = input.chars().take(max - 1).collect::<String>();
386 out.push('…');
387 out
388 }
389}
390#[cfg(test)]
391mod tests;