wolfram_serialize/lib.rs
1//! Serialize and deserialize Wolfram Language expressions
2//! to and from the WXF binary wire format.
3//!
4//! Two layers:
5//!
6//! * Byte level — [`Reader`] / [`Writer`]. [`Reader`] lends zero-copy
7//! buffer-lifetime views (`&'de`), so the default [`SliceReader`] reads
8//! straight out of an in-memory buffer; the default writer is `Vec<u8>`.
9//! * WXF level — [`WxfReader`] / [`WxfWriter`], typed sugar over the byte layer
10//! built on the WXF token enums.
11//!
12//! Per-Rust-type encoding/decoding is [`ToWXF`] / [`FromWXF`], both generic over
13//! the byte layer (monomorphized, no `dyn`, streaming). Top-level entry points:
14//! [`to_wxf`][fn@to_wxf] (compression optional), [`from_wxf`][fn@from_wxf], [`read_wxf`].
15
16#![warn(missing_docs)]
17
18// Lets the derive macros' absolute `::wolfram_serialize::…` paths resolve while
19// compiling this crate itself — so `#[derive(ToWXF)]` works on our own types.
20extern crate self as wolfram_serialize;
21
22pub mod complex;
23pub mod constants;
24pub(crate) mod errors;
25// `from_wxf`, `numeric_in`, and `strategy` stay `pub`: the derive macros emit
26// fully-qualified calls into them (`wolfram_serialize::from_wxf::err_at`,
27// `wolfram_serialize::numeric_in::read_fixed`, `wolfram_serialize::strategy::*`)
28// from *downstream* crates, so those paths must resolve outside this crate.
29pub mod from_wxf;
30pub mod numeric_in;
31pub(crate) mod reader;
32pub mod strategy;
33pub(crate) mod to_wxf;
34pub(crate) mod writer;
35pub(crate) mod wxf;
36
37pub use crate::errors::Error;
38
39/// Upper bound on container capacity pre-allocated from an untrusted
40/// length/count prefix. Deserialization reads counts (array rank, association
41/// size, function arity) straight from the input; a malformed prefix could
42/// otherwise request a multi-gigabyte allocation before any bytes are validated.
43/// We cap the `with_capacity` *hint* — the container still grows to the real
44/// size as elements are read, but a bogus count can no longer OOM us up front.
45pub(crate) const PREALLOC_CAP: usize = 4096;
46
47/// Clamp a capacity hint that came from an untrusted length prefix to
48/// [`PREALLOC_CAP`]. Use this for every `with_capacity` driven by wire data.
49pub(crate) fn capped_capacity(hint: usize) -> usize {
50 hint.min(PREALLOC_CAP)
51}
52
53pub use crate::complex::{Complex, Complex32, Complex64};
54
55pub use crate::constants::{
56 ExpressionEnum, HeaderEnum, NumericArrayEnum, PackedArrayEnum,
57};
58pub use crate::from_wxf::FromWXF;
59pub use crate::reader::{Reader, SliceReader};
60pub use crate::to_wxf::{ToWXF, WxfStruct};
61pub use crate::writer::Writer;
62pub use crate::wxf::reader::WxfReader;
63pub use crate::wxf::writer::WxfWriter;
64// Procedural derives — same names as the traits, resolved by Rust's separate
65// macro / type namespaces.
66pub use wolfram_serialize_macros::{Failure, FromWXF, ToWXF};
67
68/// zlib compression level passed to [`to_wxf`][fn@to_wxf].
69#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
70pub enum CompressionLevel {
71 /// zlib level 1 — fastest, lowest ratio.
72 Fastest,
73 /// zlib level 6 — balanced (zlib default; matches `BinarySerialize[…, PerformanceGoal -> "Size"]`).
74 Default,
75 /// zlib level 9 — slowest, highest ratio.
76 Best,
77 /// Explicit zlib level. Values above 9 are clamped to 9.
78 Level(u8),
79}
80
81impl CompressionLevel {
82 pub(crate) fn to_u8(self) -> u8 {
83 match self {
84 CompressionLevel::Fastest => 1,
85 CompressionLevel::Default => 6,
86 CompressionLevel::Best => 9,
87 CompressionLevel::Level(n) => n.min(9),
88 }
89 }
90}
91
92//==============================================================================
93// Top-level API
94//==============================================================================
95
96/// Serialize `value` to WXF.
97///
98/// `compression` is `impl Into<Option<CompressionLevel>>`: pass `None` for plain
99/// uncompressed WXF (`8:` header), or a [`CompressionLevel`] for zlib-compressed
100/// WXF (`8C:` header) — e.g. `to_wxf(&v, None)` or
101/// `to_wxf(&v, CompressionLevel::Default)`.
102///
103/// The compressed path streams the token body directly through the
104/// [`ZlibEncoder`][flate2::write::ZlibEncoder] — no intermediate uncompressed
105/// buffer.
106///
107/// ```
108/// use wolfram_serialize::{to_wxf, from_wxf, CompressionLevel};
109///
110/// let bytes = to_wxf(&vec![1_i64, 2, 3], None).unwrap();
111/// assert_eq!(&bytes[..2], b"8:"); // uncompressed header
112///
113/// let compressed = to_wxf(&vec![1_i64, 2, 3], CompressionLevel::Default).unwrap();
114/// assert_eq!(&compressed[..3], b"8C:"); // zlib-compressed header
115///
116/// // Both forms decode the same way — `from_wxf` auto-detects the header.
117/// assert_eq!(from_wxf::<Vec<i64>>(&bytes).unwrap(), vec![1, 2, 3]);
118/// assert_eq!(from_wxf::<Vec<i64>>(&compressed).unwrap(), vec![1, 2, 3]);
119/// ```
120pub fn to_wxf<T: ToWXF + ?Sized>(
121 value: &T,
122 compression: impl Into<Option<CompressionLevel>>,
123) -> Result<Vec<u8>, Error> {
124 use crate::constants::HeaderEnum;
125
126 // The header (`8:` / `8C:`) is framing, written here — uncompressed and at
127 // the front — mirroring `strip_header` on the read side. The token body is
128 // then written through the appropriate sink (the Vec directly, or a
129 // streaming ZlibEncoder over it for `8C:`).
130 let ver = HeaderEnum::Version as u8;
131 let sep = HeaderEnum::Separator as u8;
132 match compression.into() {
133 None => {
134 let out = vec![ver, sep];
135 let mut w = WxfWriter::new(out);
136 value.to_wxf(&mut w)?;
137 Ok(w.into_inner())
138 },
139 Some(level) => {
140 use flate2::write::ZlibEncoder;
141 use flate2::Compression;
142
143 let out = vec![ver, HeaderEnum::Compress as u8, sep];
144 let encoder =
145 ZlibEncoder::new(out, Compression::new(u32::from(level.to_u8())));
146 let mut w = WxfWriter::new(encoder);
147 value.to_wxf(&mut w)?;
148 Ok(w.into_inner().finish()?)
149 },
150 }
151}
152
153/// Strip the WXF header, returning the raw token stream. `8:` payloads are
154/// borrowed; `8C:` payloads are zlib-decompressed into an owned buffer.
155fn strip_header(bytes: &[u8]) -> Result<std::borrow::Cow<'_, [u8]>, Error> {
156 use std::io::Read;
157
158 use crate::constants::HeaderEnum;
159
160 if bytes.len() < 2 {
161 return Err(Error::invalid(
162 "byte stream too short for WXF header".into(),
163 ));
164 }
165 if bytes[0] != HeaderEnum::Version as u8 {
166 return Err(Error::invalid(format!(
167 "WXF header version mismatch: expected {:?}, got {:?}",
168 HeaderEnum::Version as u8 as char,
169 bytes[0] as char
170 )));
171 }
172 if bytes[1] == HeaderEnum::Compress as u8 {
173 if bytes.len() < 3 || bytes[2] != HeaderEnum::Separator as u8 {
174 return Err(Error::invalid("WXF compressed header truncated".into()));
175 }
176 let mut decoded = Vec::new();
177 flate2::read::ZlibDecoder::new(&bytes[3..])
178 .read_to_end(&mut decoded)
179 .map_err(|e| Error::invalid(format!("zlib decompress failed: {}", e)))?;
180 Ok(std::borrow::Cow::Owned(decoded))
181 } else if bytes[1] == HeaderEnum::Separator as u8 {
182 Ok(std::borrow::Cow::Borrowed(&bytes[2..]))
183 } else {
184 Err(Error::invalid(format!(
185 "WXF header separator mismatch: expected ':' or 'C', got {:?}",
186 bytes[1] as char
187 )))
188 }
189}
190
191/// Strip the WXF header (`8:` / `8C:` auto-detected, decompressing if needed)
192/// and hand the closure a [`WxfReader`] positioned at the start of the token
193/// stream, so it can drive the cursor directly.
194///
195/// [`from_wxf`][fn@from_wxf] only fits when the *entire* wire value decodes as
196/// one [`FromWXF`] type. Reach for `read_wxf` instead when you need to:
197///
198/// * decode several **positional** values off one cursor — e.g. a LibraryLink
199/// argument list arrives as `Function[<head>, arg0, arg1, …]`, where each
200/// argument has its own Rust type and must be read in order (this is exactly
201/// how `#[export(wxf)]` unpacks its arguments);
202/// * inspect a token (via [`WxfReader::read_expr_token`]) before deciding how
203/// to decode the rest, since [`from_wxf`][fn@from_wxf] commits to a single
204/// `T` up front;
205/// * read **borrowed** (`&str` / `&[u8]`) data — the borrow is tied to the
206/// input buffer, so it must be consumed *inside* the closure instead of
207/// escaping the call (see [`FromWXF`] for the zero-copy story).
208///
209/// ```
210/// use wolfram_serialize::{read_wxf, ExpressionEnum, FromWXF, WxfWriter};
211///
212/// // Hand-build the wire form of `{1, "two", 3.0}`:
213/// // `Function[System`List, 1, "two", 3.0]`.
214/// let mut w = WxfWriter::new(vec![b'8', b':']);
215/// w.write_function(3).unwrap();
216/// w.write_symbol("System`List").unwrap();
217/// w.write_integer(1).unwrap();
218/// w.write_string("two").unwrap();
219/// w.write_real(3.0).unwrap();
220/// let bytes = w.into_inner();
221///
222/// // Decode the three arguments positionally, each with its own Rust type —
223/// // there is no single `FromWXF` type spanning all three, so `from_wxf`
224/// // alone can't do this.
225/// let (a, b, c) = read_wxf(&bytes, |r| {
226/// assert_eq!(r.read_expr_token()?, ExpressionEnum::Function);
227/// let arity = r.read_varint()?;
228/// r.skip()?; // discard the head (`System`List`)
229/// assert_eq!(arity, 3);
230/// Ok((i64::from_wxf(r)?, String::from_wxf(r)?, f64::from_wxf(r)?))
231/// })
232/// .unwrap();
233///
234/// assert_eq!((a, b, c), (1, "two".to_string(), 3.0));
235/// ```
236pub fn read_wxf<T>(
237 bytes: &[u8],
238 f: impl for<'a> FnOnce(&mut WxfReader<SliceReader<'a>>) -> Result<T, Error>,
239) -> Result<T, Error> {
240 let payload = strip_header(bytes)?;
241 let mut r = WxfReader::new(SliceReader::new(&payload));
242 f(&mut r)
243}
244
245/// Deserialize `bytes` (WXF; `8:` or `8C:` auto-detected) into a typed `T`.
246///
247/// Use `T = Expr` for an untyped tree, or any [`FromWXF`] type — including those
248/// produced by `#[derive(FromWXF)]` — for typed deserialization with no
249/// intermediate `Expr`.
250///
251/// ```
252/// use wolfram_serialize::{to_wxf, from_wxf, FromWXF, ToWXF};
253///
254/// #[derive(ToWXF, FromWXF, Debug, PartialEq)]
255/// struct Point { x: f64, y: f64 }
256///
257/// let bytes = to_wxf(&Point { x: 1.0, y: 2.0 }, None).unwrap();
258/// let point: Point = from_wxf(&bytes).unwrap();
259/// assert_eq!(point, Point { x: 1.0, y: 2.0 });
260/// ```
261///
262/// Downstream, `wolfram_expr::Expr` also implements [`FromWXF`], so `T = Expr`
263/// decodes into an untyped tree when the shape isn't known ahead of time.
264pub fn from_wxf<T: for<'de> FromWXF<'de>>(bytes: &[u8]) -> Result<T, Error> {
265 read_wxf(bytes, |r| T::from_wxf(r))
266}