Skip to main content

wolfram_serialize/wxf/
reader.rs

1//! Typed, pull-based WXF reader — sugar over a raw [`Reader`].
2//!
3//! Each WXF enum in [`crate::constants`] gets a reader that consumes its byte
4//! and does the `TryFrom` (failing if the byte isn't that enum). There is **no
5//! peek**: a token is read exactly once via [`WxfReader::read_expr_token`] and
6//! the caller dispatches on it, then reads the matching payload.
7//!
8//! Methods deal only in primitives and raw parts — higher-level value types
9//! (`Symbol`, `NumericArray`, …) are assembled by the consumer (`wolfram-expr`).
10
11use crate::constants::{ExpressionEnum, NumericArrayEnum, PackedArrayEnum};
12use crate::reader::Reader;
13use crate::Error;
14
15/// Typed WXF reader wrapping a raw byte [`Reader`].
16pub struct WxfReader<R> {
17    inner: R,
18}
19
20impl<'de, R: Reader<'de>> WxfReader<R> {
21    /// Wrap a raw reader. The reader is assumed to be positioned at the start of
22    /// the WXF payload (header already consumed — see [`crate::from_wxf`]).
23    pub fn new(inner: R) -> Self {
24        WxfReader { inner }
25    }
26
27    //---- raw passthrough ------------------------------------------------
28
29    /// Consume one raw byte.
30    pub fn read_byte(&mut self) -> Result<u8, Error> {
31        self.inner.read_byte()
32    }
33
34    /// Consume `n` raw bytes as a zero-copy, buffer-lifetime view.
35    pub fn read_bytes(&mut self, n: usize) -> Result<&'de [u8], Error> {
36        self.inner.read_bytes(n)
37    }
38
39    /// Read a WXF varint (LEB128, 7-bit groups, little-endian).
40    pub fn read_varint(&mut self) -> Result<u64, Error> {
41        let mut result: u64 = 0;
42        let mut shift: u32 = 0;
43        loop {
44            let b = self.inner.read_byte()?;
45            result |= u64::from(b & 0x7F) << shift;
46            if b & 0x80 == 0 {
47                return Ok(result);
48            }
49            shift += 7;
50            if shift >= 64 {
51                return Err(Error::invalid("varint exceeds 64 bits".into()));
52            }
53        }
54    }
55
56    //---- enum tags (consume one byte, TryFrom) --------------------------
57
58    /// Consume the next expression token byte.
59    pub fn read_expr_token(&mut self) -> Result<ExpressionEnum, Error> {
60        let b = self.inner.read_byte()?;
61        ExpressionEnum::try_from(b)
62            .map_err(|_| Error::invalid(format!("unknown WXF token byte 0x{:02X}", b)))
63    }
64
65    /// Consume a NumericArray element-type byte.
66    pub fn read_numeric_type(&mut self) -> Result<NumericArrayEnum, Error> {
67        let b = self.inner.read_byte()?;
68        NumericArrayEnum::try_from(b).map_err(|_| {
69            Error::invalid(format!("unknown NumericArray element type 0x{:02X}", b))
70        })
71    }
72
73    /// Consume a PackedArray element-type byte (numeric subset).
74    pub fn read_packed_type(&mut self) -> Result<PackedArrayEnum, Error> {
75        let b = self.inner.read_byte()?;
76        PackedArrayEnum::try_from(b).map_err(|_| {
77            Error::invalid(format!("unknown PackedArray element type 0x{:02X}", b))
78        })
79    }
80
81    //---- fixed-width integer / real payloads (tag already consumed) -----
82
83    /// Read an `Integer8` payload.
84    pub fn read_i8(&mut self) -> Result<i8, Error> {
85        Ok(self.inner.read_byte()? as i8)
86    }
87
88    /// Read an `Integer16` payload.
89    pub fn read_i16(&mut self) -> Result<i16, Error> {
90        let b = self.inner.read_bytes(2)?;
91        Ok(i16::from_le_bytes(b.try_into().unwrap()))
92    }
93
94    /// Read an `Integer32` payload.
95    pub fn read_i32(&mut self) -> Result<i32, Error> {
96        let b = self.inner.read_bytes(4)?;
97        Ok(i32::from_le_bytes(b.try_into().unwrap()))
98    }
99
100    /// Read an `Integer64` payload.
101    pub fn read_i64(&mut self) -> Result<i64, Error> {
102        let b = self.inner.read_bytes(8)?;
103        Ok(i64::from_le_bytes(b.try_into().unwrap()))
104    }
105
106    /// Read a `Real64` payload.
107    pub fn read_f64(&mut self) -> Result<f64, Error> {
108        let b = self.inner.read_bytes(8)?;
109        Ok(f64::from_le_bytes(b.try_into().unwrap()))
110    }
111
112    //---- length-prefixed payloads (tag already consumed) ----------------
113
114    /// Read a `String`/`Symbol`-shaped payload: varint length + UTF-8 bytes.
115    /// Zero-copy — returns a `&'de str` view into the underlying buffer, so it
116    /// serves both the owned path (`.to_owned()`) and borrowed fields (`&'de str`).
117    pub fn read_str(&mut self) -> Result<&'de str, Error> {
118        let len = self.read_varint()? as usize;
119        let bytes = self.inner.read_bytes(len)?;
120        std::str::from_utf8(bytes)
121            .map_err(|_| Error::invalid("payload not valid UTF-8".into()))
122    }
123
124    /// Read a complete `String` value (token + payload) into an owned `String`.
125    /// Used for keys/labels where the token has not been pre-consumed.
126    pub fn read_string(&mut self) -> Result<String, Error> {
127        match self.read_expr_token()? {
128            ExpressionEnum::String => Ok(self.read_str()?.to_owned()),
129            other => Err(Error::unexpected_token(&["String"], other)),
130        }
131    }
132
133    /// Read a `Symbol`/`BigInteger`/`BigReal` payload as an owned name/digit
134    /// string (`varint` length + UTF-8). The consumer parses it into the
135    /// appropriate value type.
136    pub fn read_symbol_name(&mut self) -> Result<String, Error> {
137        Ok(self.read_str()?.to_owned())
138    }
139
140    /// Read a `ByteArray` payload: varint length + raw bytes. Zero-copy — returns
141    /// a `&'de [u8]` view into the underlying buffer (owned path copies via
142    /// `.to_vec()`; borrowed `&'de [u8]` fields keep it).
143    pub fn read_byte_array(&mut self) -> Result<&'de [u8], Error> {
144        let len = self.read_varint()? as usize;
145        self.inner.read_bytes(len)
146    }
147
148    //---- arrays (tag already consumed) ----------------------------------
149
150    /// Read the body of a `NumericArray`/`PackedArray` token (tag already
151    /// consumed): element type + rank + dims + flat little-endian buffer.
152    /// Returns the element type, the dims, and the owned byte buffer.
153    pub fn read_numeric_array_parts(
154        &mut self,
155    ) -> Result<(NumericArrayEnum, Vec<usize>, Vec<u8>), Error> {
156        let dt = self.read_numeric_type()?;
157        let (dims, bytes) = self.read_array_body(dt.size_in_bytes())?;
158        Ok((dt, dims, bytes))
159    }
160
161    /// Shared array tail: rank varint, `rank` dim varints, then the flat
162    /// little-endian byte buffer (`prod(dims) * elem_size` bytes).
163    pub fn read_array_body(
164        &mut self,
165        elem_size: usize,
166    ) -> Result<(Vec<usize>, Vec<u8>), Error> {
167        let rank = self.read_varint()? as usize;
168        let mut dims = Vec::with_capacity(rank);
169        for _ in 0..rank {
170            dims.push(self.read_varint()? as usize);
171        }
172        let byte_count = dims.iter().product::<usize>() * elem_size;
173        let bytes = self.inner.read_bytes(byte_count)?.to_vec();
174        Ok((dims, bytes))
175    }
176
177    //---- association rules ----------------------------------------------
178
179    /// Read one `Rule` / `RuleDelayed` token; returns the `delayed` flag.
180    pub fn read_rule(&mut self) -> Result<bool, Error> {
181        match self.read_expr_token()? {
182            ExpressionEnum::Rule => Ok(false),
183            ExpressionEnum::RuleDelayed => Ok(true),
184            other => Err(Error::unexpected_token(&["Rule", "RuleDelayed"], other)),
185        }
186    }
187
188    //---- skip -----------------------------------------------------------
189
190    /// Read one complete value at the current position and discard it. Used to
191    /// drop an unknown Association key's value, or a Function head whose shape
192    /// isn't validated.
193    pub fn skip(&mut self) -> Result<(), Error> {
194        let tok = self.read_expr_token()?;
195        self.skip_body(tok)
196    }
197
198    fn skip_body(&mut self, tok: ExpressionEnum) -> Result<(), Error> {
199        match tok {
200            ExpressionEnum::Integer8 => {
201                self.read_i8()?;
202            },
203            ExpressionEnum::Integer16 => {
204                self.read_i16()?;
205            },
206            ExpressionEnum::Integer32 => {
207                self.read_i32()?;
208            },
209            ExpressionEnum::Integer64 => {
210                self.read_i64()?;
211            },
212            ExpressionEnum::Real64 => {
213                self.read_f64()?;
214            },
215            ExpressionEnum::String
216            | ExpressionEnum::Symbol
217            | ExpressionEnum::ByteArray
218            | ExpressionEnum::BigInteger
219            | ExpressionEnum::BigReal => {
220                let len = self.read_varint()? as usize;
221                self.inner.read_bytes(len)?;
222            },
223            ExpressionEnum::NumericArray | ExpressionEnum::PackedArray => {
224                // element-type byte (numeric subset shares wire bytes)
225                let dt = self.read_numeric_type()?;
226                let rank = self.read_varint()? as usize;
227                let mut count = 1usize;
228                for _ in 0..rank {
229                    count *= self.read_varint()? as usize;
230                }
231                self.inner.read_bytes(count * dt.size_in_bytes())?;
232            },
233            ExpressionEnum::Function => {
234                let n = self.read_varint()?;
235                self.skip()?; // head
236                for _ in 0..n {
237                    self.skip()?;
238                }
239            },
240            ExpressionEnum::Association => {
241                let n = self.read_varint()?;
242                for _ in 0..n {
243                    self.read_rule()?;
244                    self.skip()?; // key
245                    self.skip()?; // value
246                }
247            },
248            // A Rule where a value was expected: "any token but this".
249            other @ (ExpressionEnum::Rule | ExpressionEnum::RuleDelayed) => {
250                return Err(Error::unexpected_token(&[], other))
251            },
252        }
253        Ok(())
254    }
255}