Skip to main content

wolfram_serialize/
lib.rs

1//! Serialize and deserialize Wolfram Language expressions
2//! to and from the WXF binary wire format.
3//!
4//! Two layers:
5//!
6//! * Byte level — [`Reader`] / [`Writer`]. [`Reader`] lends zero-copy
7//!   buffer-lifetime views (`&'de`), so the default [`SliceReader`] reads
8//!   straight out of an in-memory buffer; the default writer is `Vec<u8>`.
9//! * WXF level — [`WxfReader`] / [`WxfWriter`], typed sugar over the byte layer
10//!   built on the WXF token enums.
11//!
12//! Per-Rust-type encoding/decoding is [`ToWXF`] / [`FromWXF`], both generic over
13//! the byte layer (monomorphized, no `dyn`, streaming). Top-level entry points:
14//! [`to_wxf`][fn@to_wxf] (compression optional), [`from_wxf`][fn@from_wxf], [`read_wxf`].
15
16#![warn(missing_docs)]
17
18// Lets the derive macros' absolute `::wolfram_serialize::…` paths resolve while
19// compiling this crate itself — so `#[derive(ToWXF)]` works on our own types.
20extern crate self as wolfram_serialize;
21
22pub mod complex;
23pub mod constants;
24pub(crate) mod errors;
25// `from_wxf`, `numeric_in`, and `strategy` stay `pub`: the derive macros emit
26// fully-qualified calls into them (`wolfram_serialize::from_wxf::err_at`,
27// `wolfram_serialize::numeric_in::read_fixed`, `wolfram_serialize::strategy::*`)
28// from *downstream* crates, so those paths must resolve outside this crate.
29pub mod from_wxf;
30pub mod numeric_in;
31pub(crate) mod reader;
32pub mod strategy;
33pub(crate) mod to_wxf;
34pub(crate) mod writer;
35pub(crate) mod wxf;
36
37pub use crate::errors::Error;
38
39/// Upper bound on container capacity pre-allocated from an untrusted
40/// length/count prefix. Deserialization reads counts (array rank, association
41/// size, function arity) straight from the input; a malformed prefix could
42/// otherwise request a multi-gigabyte allocation before any bytes are validated.
43/// We cap the `with_capacity` *hint* — the container still grows to the real
44/// size as elements are read, but a bogus count can no longer OOM us up front.
45pub(crate) const PREALLOC_CAP: usize = 4096;
46
47/// Clamp a capacity hint that came from an untrusted length prefix to
48/// [`PREALLOC_CAP`]. Use this for every `with_capacity` driven by wire data.
49pub(crate) fn capped_capacity(hint: usize) -> usize {
50    hint.min(PREALLOC_CAP)
51}
52
53pub use crate::complex::{Complex, Complex32, Complex64};
54
55pub use crate::constants::{
56    ExpressionEnum, HeaderEnum, NumericArrayEnum, PackedArrayEnum,
57};
58pub use crate::from_wxf::FromWXF;
59pub use crate::reader::{Reader, SliceReader};
60pub use crate::to_wxf::{ToWXF, WxfStruct};
61pub use crate::writer::Writer;
62pub use crate::wxf::reader::WxfReader;
63pub use crate::wxf::writer::WxfWriter;
64// Procedural derives — same names as the traits, resolved by Rust's separate
65// macro / type namespaces.
66pub use wolfram_serialize_macros::{Failure, FromWXF, ToWXF};
67
68/// zlib compression level passed to [`to_wxf`][fn@to_wxf].
69#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
70pub enum CompressionLevel {
71    /// zlib level 1 — fastest, lowest ratio.
72    Fastest,
73    /// zlib level 6 — balanced (zlib default; matches `BinarySerialize[…, PerformanceGoal -> "Size"]`).
74    Default,
75    /// zlib level 9 — slowest, highest ratio.
76    Best,
77    /// Explicit zlib level. Values above 9 are clamped to 9.
78    Level(u8),
79}
80
81impl CompressionLevel {
82    pub(crate) fn to_u8(self) -> u8 {
83        match self {
84            CompressionLevel::Fastest => 1,
85            CompressionLevel::Default => 6,
86            CompressionLevel::Best => 9,
87            CompressionLevel::Level(n) => n.min(9),
88        }
89    }
90}
91
92//==============================================================================
93// Top-level API
94//==============================================================================
95
96/// Serialize `value` to WXF.
97///
98/// `compression` is `impl Into<Option<CompressionLevel>>`: pass `None` for plain
99/// uncompressed WXF (`8:` header), or a [`CompressionLevel`] for zlib-compressed
100/// WXF (`8C:` header) — e.g. `to_wxf(&v, None)` or
101/// `to_wxf(&v, CompressionLevel::Default)`.
102///
103/// The compressed path streams the token body directly through the
104/// [`ZlibEncoder`][flate2::write::ZlibEncoder] — no intermediate uncompressed
105/// buffer.
106///
107/// ```
108/// use wolfram_serialize::{to_wxf, from_wxf, CompressionLevel};
109///
110/// let bytes = to_wxf(&vec![1_i64, 2, 3], None).unwrap();
111/// assert_eq!(&bytes[..2], b"8:"); // uncompressed header
112///
113/// let compressed = to_wxf(&vec![1_i64, 2, 3], CompressionLevel::Default).unwrap();
114/// assert_eq!(&compressed[..3], b"8C:"); // zlib-compressed header
115///
116/// // Both forms decode the same way — `from_wxf` auto-detects the header.
117/// assert_eq!(from_wxf::<Vec<i64>>(&bytes).unwrap(), vec![1, 2, 3]);
118/// assert_eq!(from_wxf::<Vec<i64>>(&compressed).unwrap(), vec![1, 2, 3]);
119/// ```
120pub fn to_wxf<T: ToWXF + ?Sized>(
121    value: &T,
122    compression: impl Into<Option<CompressionLevel>>,
123) -> Result<Vec<u8>, Error> {
124    use crate::constants::HeaderEnum;
125
126    // The header (`8:` / `8C:`) is framing, written here — uncompressed and at
127    // the front — mirroring `strip_header` on the read side. The token body is
128    // then written through the appropriate sink (the Vec directly, or a
129    // streaming ZlibEncoder over it for `8C:`).
130    let ver = HeaderEnum::Version as u8;
131    let sep = HeaderEnum::Separator as u8;
132    match compression.into() {
133        None => {
134            let out = vec![ver, sep];
135            let mut w = WxfWriter::new(out);
136            value.to_wxf(&mut w)?;
137            Ok(w.into_inner())
138        },
139        Some(level) => {
140            use flate2::write::ZlibEncoder;
141            use flate2::Compression;
142
143            let out = vec![ver, HeaderEnum::Compress as u8, sep];
144            let encoder =
145                ZlibEncoder::new(out, Compression::new(u32::from(level.to_u8())));
146            let mut w = WxfWriter::new(encoder);
147            value.to_wxf(&mut w)?;
148            Ok(w.into_inner().finish()?)
149        },
150    }
151}
152
153/// Strip the WXF header, returning the raw token stream. `8:` payloads are
154/// borrowed; `8C:` payloads are zlib-decompressed into an owned buffer.
155fn strip_header(bytes: &[u8]) -> Result<std::borrow::Cow<'_, [u8]>, Error> {
156    use std::io::Read;
157
158    use crate::constants::HeaderEnum;
159
160    if bytes.len() < 2 {
161        return Err(Error::invalid(
162            "byte stream too short for WXF header".into(),
163        ));
164    }
165    if bytes[0] != HeaderEnum::Version as u8 {
166        return Err(Error::invalid(format!(
167            "WXF header version mismatch: expected {:?}, got {:?}",
168            HeaderEnum::Version as u8 as char,
169            bytes[0] as char
170        )));
171    }
172    if bytes[1] == HeaderEnum::Compress as u8 {
173        if bytes.len() < 3 || bytes[2] != HeaderEnum::Separator as u8 {
174            return Err(Error::invalid("WXF compressed header truncated".into()));
175        }
176        let mut decoded = Vec::new();
177        flate2::read::ZlibDecoder::new(&bytes[3..])
178            .read_to_end(&mut decoded)
179            .map_err(|e| Error::invalid(format!("zlib decompress failed: {}", e)))?;
180        Ok(std::borrow::Cow::Owned(decoded))
181    } else if bytes[1] == HeaderEnum::Separator as u8 {
182        Ok(std::borrow::Cow::Borrowed(&bytes[2..]))
183    } else {
184        Err(Error::invalid(format!(
185            "WXF header separator mismatch: expected ':' or 'C', got {:?}",
186            bytes[1] as char
187        )))
188    }
189}
190
191/// Strip the WXF header (`8:` / `8C:` auto-detected, decompressing if needed)
192/// and hand the closure a [`WxfReader`] positioned at the start of the token
193/// stream, so it can drive the cursor directly.
194///
195/// [`from_wxf`][fn@from_wxf] only fits when the *entire* wire value decodes as
196/// one [`FromWXF`] type. Reach for `read_wxf` instead when you need to:
197///
198/// * decode several **positional** values off one cursor — e.g. a LibraryLink
199///   argument list arrives as `Function[<head>, arg0, arg1, …]`, where each
200///   argument has its own Rust type and must be read in order (this is exactly
201///   how `#[export(wxf)]` unpacks its arguments);
202/// * inspect a token (via [`WxfReader::read_expr_token`]) before deciding how
203///   to decode the rest, since [`from_wxf`][fn@from_wxf] commits to a single
204///   `T` up front;
205/// * read **borrowed** (`&str` / `&[u8]`) data — the borrow is tied to the
206///   input buffer, so it must be consumed *inside* the closure instead of
207///   escaping the call (see [`FromWXF`] for the zero-copy story).
208///
209/// ```
210/// use wolfram_serialize::{read_wxf, ExpressionEnum, FromWXF, WxfWriter};
211///
212/// // Hand-build the wire form of `{1, "two", 3.0}`:
213/// // `Function[System`List, 1, "two", 3.0]`.
214/// let mut w = WxfWriter::new(vec![b'8', b':']);
215/// w.write_function(3).unwrap();
216/// w.write_symbol("System`List").unwrap();
217/// w.write_integer(1).unwrap();
218/// w.write_string("two").unwrap();
219/// w.write_real(3.0).unwrap();
220/// let bytes = w.into_inner();
221///
222/// // Decode the three arguments positionally, each with its own Rust type —
223/// // there is no single `FromWXF` type spanning all three, so `from_wxf`
224/// // alone can't do this.
225/// let (a, b, c) = read_wxf(&bytes, |r| {
226///     assert_eq!(r.read_expr_token()?, ExpressionEnum::Function);
227///     let arity = r.read_varint()?;
228///     r.skip()?; // discard the head (`System`List`)
229///     assert_eq!(arity, 3);
230///     Ok((i64::from_wxf(r)?, String::from_wxf(r)?, f64::from_wxf(r)?))
231/// })
232/// .unwrap();
233///
234/// assert_eq!((a, b, c), (1, "two".to_string(), 3.0));
235/// ```
236pub fn read_wxf<T>(
237    bytes: &[u8],
238    f: impl for<'a> FnOnce(&mut WxfReader<SliceReader<'a>>) -> Result<T, Error>,
239) -> Result<T, Error> {
240    let payload = strip_header(bytes)?;
241    let mut r = WxfReader::new(SliceReader::new(&payload));
242    f(&mut r)
243}
244
245/// Deserialize `bytes` (WXF; `8:` or `8C:` auto-detected) into a typed `T`.
246///
247/// Use `T = Expr` for an untyped tree, or any [`FromWXF`] type — including those
248/// produced by `#[derive(FromWXF)]` — for typed deserialization with no
249/// intermediate `Expr`.
250///
251/// ```
252/// use wolfram_serialize::{to_wxf, from_wxf, FromWXF, ToWXF};
253///
254/// #[derive(ToWXF, FromWXF, Debug, PartialEq)]
255/// struct Point { x: f64, y: f64 }
256///
257/// let bytes = to_wxf(&Point { x: 1.0, y: 2.0 }, None).unwrap();
258/// let point: Point = from_wxf(&bytes).unwrap();
259/// assert_eq!(point, Point { x: 1.0, y: 2.0 });
260/// ```
261///
262/// Downstream, `wolfram_expr::Expr` also implements [`FromWXF`], so `T = Expr`
263/// decodes into an untyped tree when the shape isn't known ahead of time.
264pub fn from_wxf<T: for<'de> FromWXF<'de>>(bytes: &[u8]) -> Result<T, Error> {
265    read_wxf(bytes, |r| T::from_wxf(r))
266}