marshal_parser/
objects.rs

1use std::fmt::{self, Display, Write};
2
3use num_bigint::BigInt;
4
5/// ## Object type flag in the binary "marshal" format
6///
7/// This enum represents the type of objects as determined by the first byte of
8/// their representation in the binary "marshal" format.
9///
10/// *Note*: Some types are not handled in this implementation, since they were
11/// replaced with other types and are not written by recent versions of Python:
12///
13/// - `'T'` (`TYPE_INT64`)
14/// - `'f'` (`TYPE_FLOAT`)
15/// - `'x'` (`TYPE_COMPLEX`)
16#[derive(Clone, Copy, Debug)]
17#[non_exhaustive]
18pub enum ObjectType {
19    /// Type of a null pointer
20    Null,
21    /// Type of the `None` singleton object
22    None,
23    /// Type of `False`
24    False,
25    /// Type of `True`
26    True,
27    /// Type of the `StopIteration` singleton object
28    StopIteration,
29    /// Type of the `...` (ellipsis) singleton object
30    Ellipsis,
31    /// Type of 32-bit integers
32    Int,
33    #[doc(hidden)]
34    Int64,
35    #[doc(hidden)]
36    Float,
37    /// Type of 64-bit floating-point numbers
38    BinaryFloat,
39    #[doc(hidden)]
40    Complex,
41    /// Type of 64-bit floating-point complex numbers
42    BinaryComplex,
43    /// Type of dynamically sized integers
44    Long,
45    /// Type of strings
46    String,
47    /// Type of interned strings
48    Interned,
49    /// Type of object references
50    Ref,
51    /// Type of tuples
52    Tuple,
53    /// Type of lists
54    List,
55    /// Type of dicts
56    Dict,
57    /// Type of code objects
58    Code,
59    /// Type of unicode strings
60    Unicode,
61    /// Type of unknown objects
62    Unknown,
63    /// Type of sets
64    Set,
65    /// Type of frozensets
66    FrozenSet,
67    /// Type of ASCII strings
68    Ascii,
69    /// Type of interned ASCII strings
70    AsciiInterned,
71    /// Type of small tuples
72    SmallTuple,
73    /// Type of short ASCII strings
74    ShortAscii,
75    /// Type of short interned ASCII strings
76    ShortAsciiInterned,
77}
78
79impl Display for ObjectType {
80    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
81        write!(f, "{:?}", self)
82    }
83}
84
85impl TryFrom<u8> for ObjectType {
86    type Error = ();
87
88    fn try_from(value: u8) -> Result<Self, Self::Error> {
89        use ObjectType as T;
90
91        Ok(match value {
92            b'0' => T::Null,
93            b'N' => T::None,
94            b'F' => T::False,
95            b'T' => T::True,
96            b'S' => T::StopIteration,
97            b'.' => T::Ellipsis,
98            b'i' => T::Int,
99            b'I' => T::Int64,
100            b'f' => T::Float,
101            b'g' => T::BinaryFloat,
102            b'x' => T::Complex,
103            b'y' => T::BinaryComplex,
104            b'l' => T::Long,
105            b's' => T::String,
106            b't' => T::Interned,
107            b'r' => T::Ref,
108            b'(' => T::Tuple,
109            b'[' => T::List,
110            b'{' => T::Dict,
111            b'c' => T::Code,
112            b'u' => T::Unicode,
113            b'?' => T::Unknown,
114            b'<' => T::Set,
115            b'>' => T::FrozenSet,
116            b'a' => T::Ascii,
117            b'A' => T::AsciiInterned,
118            b')' => T::SmallTuple,
119            b'z' => T::ShortAscii,
120            b'Z' => T::ShortAsciiInterned,
121            _ => return Err(()),
122        })
123    }
124}
125
126/// String type flag for string-like objects
127#[allow(missing_docs)]
128#[derive(Clone, Copy, Debug, PartialEq)]
129pub enum StringType {
130    String,
131    Interned,
132    Unicode,
133    Ascii,
134    AsciiInterned,
135}
136
137impl Display for StringType {
138    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
139        match self {
140            StringType::String => write!(f, "STRING"),
141            StringType::Interned => write!(f, "INTERNED"),
142            StringType::Unicode => write!(f, "UNICODE"),
143            StringType::Ascii => write!(f, "ASCII"),
144            StringType::AsciiInterned => write!(f, "ASCII_INTERNED"),
145        }
146    }
147}
148
149/// ## Python objects as represented in the binary "marshal" format
150///
151/// This enum represents Python objects as they are represented in the binary
152/// "marshal" format.
153#[derive(Clone, Debug, PartialEq)]
154#[non_exhaustive]
155pub enum Object {
156    /// null object
157    Null,
158    /// `None` singleton object
159    None,
160    /// `False` object
161    False,
162    /// `True` object
163    True,
164    /// `StopIteration` singleton
165    StopIteration,
166    /// `...` (ellipsis) singleton
167    Ellipsis,
168
169    /// 32-bit integer
170    Int(u32),
171    /// 64-bit floating-point number
172    BinaryFloat(f64),
173    /// 64-bit floating-point complex number
174    BinaryComplex((f64, f64)),
175    /// string
176    #[allow(missing_docs)]
177    String { typ: StringType, bytes: Vec<u8> },
178
179    /// tuple object (collection of objects)
180    Tuple(Vec<Object>),
181    /// list object (collection of objects)
182    List(Vec<Object>),
183    /// set object (collection of objects)
184    Set(Vec<Object>),
185    /// frozenset object (collection of objects)
186    FrozenSet(Vec<Object>),
187    /// dict object (collection of objects in key / value pairs)
188    Dict(Vec<(Object, Object)>),
189
190    /// dynamically-sized integer
191    Long(BigInt),
192    /// reference object
193    Ref(u32),
194    /// code object
195    Code(Box<CodeObject>),
196}
197
198impl Display for Object {
199    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
200        self.pretty_print(f, 0, "")
201    }
202}
203
204impl Object {
205    pub(crate) fn pretty_print<W>(&self, writer: &mut W, indent: usize, prefix: &str) -> fmt::Result
206    where
207        W: Write,
208    {
209        let indent_str = " ".repeat(indent) + prefix;
210
211        match self {
212            Object::Null => writeln!(writer, "{}NULL", indent_str),
213            Object::None => writeln!(writer, "{}None", indent_str),
214            Object::False => writeln!(writer, "{}False", indent_str),
215            Object::True => writeln!(writer, "{}True", indent_str),
216            Object::StopIteration => writeln!(writer, "{}StopIteration", indent_str),
217            Object::Ellipsis => writeln!(writer, "{}...", indent_str),
218            Object::Int(x) => writeln!(writer, "{}int: {}", indent_str, x),
219            Object::BinaryFloat(x) => writeln!(writer, "{}float: {}", indent_str, x),
220            Object::BinaryComplex(x) => writeln!(writer, "{}complex: ({}, {})", indent_str, x.0, x.1),
221            Object::String { typ, bytes } => pretty_print_string(writer, indent, prefix, *typ, bytes),
222            Object::Tuple(x) => {
223                writeln!(writer, "{}tuple (length {}):", indent_str, x.len())?;
224                for obj in x {
225                    obj.pretty_print(writer, indent + 2, "- ")?;
226                }
227                Ok(())
228            },
229            Object::List(x) => {
230                writeln!(writer, "{}list (length {}):", indent_str, x.len())?;
231                for obj in x {
232                    obj.pretty_print(writer, indent + 2, "- ")?;
233                }
234                Ok(())
235            },
236            Object::Set(x) => {
237                writeln!(writer, "{}set (length {}):", indent_str, x.len())?;
238                for obj in x {
239                    obj.pretty_print(writer, indent + 2, "- ")?;
240                }
241                Ok(())
242            },
243            Object::FrozenSet(x) => {
244                writeln!(writer, "{}frozenset (length {}):", indent_str, x.len())?;
245                for obj in x {
246                    obj.pretty_print(writer, indent + 2, "- ")?;
247                }
248                Ok(())
249            },
250            Object::Dict(x) => {
251                writeln!(writer, "{}dict (length {}):", indent_str, x.len())?;
252                for (key, value) in x {
253                    key.pretty_print(writer, indent + 2, "- key: ")?;
254                    value.pretty_print(writer, indent + 2, "- value: ")?;
255                }
256                Ok(())
257            },
258            Object::Long(x) => writeln!(writer, "{}long: {}", indent_str, x),
259            Object::Ref(x) => writeln!(writer, "{}ref: {}", indent_str, x),
260            Object::Code(x) => {
261                writeln!(writer, "{}code:", indent_str)?;
262                x.pretty_print(writer, indent + 2, "- ")
263            },
264        }
265    }
266}
267
268#[cfg(feature = "fancy")]
269fn pretty_print_string<W>(writer: &mut W, indent: usize, prefix: &str, typ: StringType, bytes: &[u8]) -> fmt::Result
270where
271    W: Write,
272{
273    let indent_str = " ".repeat(indent) + prefix;
274
275    if matches!(typ, StringType::Ascii | StringType::AsciiInterned) {
276        let s: String = String::from_utf8_lossy(bytes).escape_debug().collect();
277        writeln!(
278            writer,
279            "{}string (type {}, length {}): \"{}\"",
280            indent_str,
281            typ,
282            s.len(),
283            s
284        )
285    } else {
286        let mut indent_str_dump = " ".repeat(indent + 2);
287        indent_str_dump.push_str("| ");
288        let hex_dump = pretty_hex::config_hex(
289            &bytes,
290            pretty_hex::HexConfig {
291                title: false,
292                ascii: true,
293                width: 8,
294                ..Default::default()
295            },
296        );
297
298        writeln!(writer, "{}string (type {}, length {}):", indent_str, typ, bytes.len(),)?;
299        writeln!(writer, "{}", textwrap::indent(&hex_dump, &indent_str_dump))
300    }
301}
302
303#[cfg(not(feature = "fancy"))]
304fn pretty_print_string<W>(writer: &mut W, indent: usize, prefix: &str, typ: StringType, bytes: &[u8]) -> fmt::Result
305where
306    W: Write,
307{
308    let indent_str = " ".repeat(indent) + prefix;
309
310    if matches!(typ, StringType::Ascii | StringType::AsciiInterned) {
311        let s: String = String::from_utf8_lossy(bytes).escape_debug().collect();
312        writeln!(
313            writer,
314            "{}string (type {}, length {}): \"{}\"",
315            indent_str,
316            typ,
317            s.len(),
318            s
319        )
320    } else {
321        writeln!(
322            writer,
323            "{}string (type {}, length {}): {:x?}",
324            indent_str,
325            typ,
326            bytes.len(),
327            bytes
328        )
329    }
330}
331
332/// ## Code objects as represented in the binary "marshal" format
333///
334/// The exact layout of this object in the binary format differs between Python
335/// versions. Some fields are present in all Python versions, some fields have
336/// been added, some fields have been removed.
337#[derive(Clone, Debug, PartialEq)]
338#[allow(missing_docs)]
339#[non_exhaustive]
340pub struct CodeObject {
341    pub argcount: u32,
342    /// added in Python 3.8+
343    pub posonlyargcount: Option<u32>,
344    pub kwonlyargcount: u32,
345    /// removed in Python 3.11+
346    pub nlocals: Option<u32>,
347    pub stacksize: u32,
348    pub flags: u32,
349    pub code: Object,
350    pub consts: Object,
351    pub names: Object,
352    /// removed in Python 3.11+
353    pub varnames: Option<Object>,
354    /// removed in Python 3.11+
355    pub freevars: Option<Object>,
356    /// removed in Python 3.11+
357    pub cellvars: Option<Object>,
358    /// added in Python 3.11+
359    pub localsplusnames: Option<Object>,
360    /// added in Python 3.11+
361    pub localspluskinds: Option<Object>,
362    pub filename: Object,
363    pub name: Object,
364    /// added in Python 3.11+
365    pub qualname: Option<Object>,
366    pub firstlineno: u32,
367    pub linetable: Object,
368    /// added in Python 3.11+
369    pub exceptiontable: Option<Object>,
370}
371
372impl CodeObject {
373    pub(crate) fn pretty_print<W>(&self, writer: &mut W, indent: usize, prefix: &str) -> fmt::Result
374    where
375        W: Write,
376    {
377        let indent_str = " ".repeat(indent) + prefix;
378
379        writeln!(writer, "{}argcount: {}", indent_str, self.argcount)?;
380
381        if let Some(posonlyargcount) = &self.posonlyargcount {
382            writeln!(writer, "{}posonlyargcount: {}", indent_str, posonlyargcount)?;
383        }
384
385        writeln!(writer, "{}kwonlyargcount: {}", indent_str, self.kwonlyargcount)?;
386
387        if let Some(nlocals) = &self.nlocals {
388            writeln!(writer, "{}nlocals: {}", indent_str, nlocals)?;
389        }
390
391        writeln!(writer, "{}stacksize: {}", indent_str, self.stacksize)?;
392        writeln!(writer, "{}flags: {}", indent_str, self.flags)?;
393
394        self.code.pretty_print(writer, indent, "- code: ")?;
395        self.consts.pretty_print(writer, indent, "- consts: ")?;
396        self.names.pretty_print(writer, indent, "- names: ")?;
397
398        if let Some(varnames) = &self.varnames {
399            varnames.pretty_print(writer, indent, "- varnames: ")?;
400        }
401
402        if let Some(freevars) = &self.freevars {
403            freevars.pretty_print(writer, indent, "- freevars: ")?;
404        }
405
406        if let Some(cellvars) = &self.cellvars {
407            cellvars.pretty_print(writer, indent, "- cellvars:  ")?;
408        }
409
410        if let Some(localsplusnames) = &self.localsplusnames {
411            localsplusnames.pretty_print(writer, indent, "- localsplusnames: ")?;
412        }
413
414        if let Some(localspluskinds) = &self.localspluskinds {
415            localspluskinds.pretty_print(writer, indent, "- localspluskinds: ")?;
416        }
417
418        self.filename.pretty_print(writer, indent, "- filename: ")?;
419        self.name.pretty_print(writer, indent, "- name: ")?;
420
421        if let Some(qualname) = &self.qualname {
422            qualname.pretty_print(writer, indent, "- qualname: ")?;
423        }
424
425        writeln!(writer, "{}firstlineno: {}", indent_str, self.firstlineno)?;
426        self.linetable.pretty_print(writer, indent, "- linetable: ")?;
427
428        if let Some(exceptiontable) = &self.exceptiontable {
429            exceptiontable.pretty_print(writer, indent, "- exceptiontable: ")?;
430        }
431        Ok(())
432    }
433}