marshal_parser/
lib.rs

1//! # Parser for the "marshal" binary de/serialization format used by CPython
2//!
3//! This crate implements a parser and some utilities for reading files in the
4//! "marshal" de/serialization format used internally in CPython. The exact
5//! format is not stable and can change between minor versions of CPython.
6//!
7//! This crate supports parsing "marshal" dumps and `pyc` files that were
8//! written by CPython versions `>= 3.6` and `< 3.14`.
9//!
10//! There is a high-level and a low-level API, depending on how much access to
11//! the underlying data structures is needed. The low-level API also provides
12//! more flexibility since it does not require files, but can operate on plain
13//! bytes ([`Vec<u8>`]).
14//!
15//! Reading a `pyc` file from disk:
16//!
17//! ```no_run
18//! use marshal_parser::{MarshalFile, Object};
19//!
20//! let pyc = MarshalFile::from_pyc_path("mod.cpython-310.pyc").unwrap();
21//! let object: Object = pyc.into_inner();
22//! ```
23//!
24//! Reading a "marshal" dump (i.e. a file without `pyc` header):
25//!
26//! ```no_run
27//! use marshal_parser::{MarshalFile, Object};
28//!
29//! let dump = MarshalFile::from_dump_path("dump.marshal", (3, 11)).unwrap();
30//! let object: Object = dump.into_inner();
31//! ```
32
33use std::borrow::Cow;
34use std::fs::{File, OpenOptions};
35use std::io::{Read, Write};
36use std::path::Path;
37
38mod magic;
39mod objects;
40mod parser;
41
42pub use objects::{CodeObject, Object, ObjectType, StringType};
43pub use parser::{Error, MarshalObject};
44
45type Result<T> = std::result::Result<T, Error>;
46
47/// High-level parser for `pyc` and "marshal dump" files
48#[derive(Debug)]
49pub struct MarshalFile {
50    data: Vec<u8>,
51    marshal: MarshalObject,
52}
53
54impl MarshalFile {
55    /// Read and parse a `pyc` file at the specified path
56    pub fn from_pyc_path<S>(path: S) -> Result<Self>
57    where
58        S: AsRef<Path>,
59    {
60        let mut file = OpenOptions::new()
61            .read(true)
62            .write(false)
63            .create_new(false)
64            .open(path)?;
65
66        let mut data = Vec::new();
67        file.read_to_end(&mut data)?;
68
69        let marshal = MarshalObject::parse_pyc(&data)?;
70        Ok(MarshalFile { data, marshal })
71    }
72
73    /// Read and parse a "marshal dump" file at the specified path
74    pub fn from_dump_path<S>(path: S, (major, minor): (u16, u16)) -> Result<Self>
75    where
76        S: AsRef<Path>,
77    {
78        let mut file = OpenOptions::new()
79            .read(true)
80            .write(false)
81            .create_new(false)
82            .open(path)?;
83
84        let mut data = Vec::new();
85        file.read_to_end(&mut data)?;
86
87        let marshal = MarshalObject::parse_dump(&data, (major, minor))?;
88        Ok(MarshalFile { data, marshal })
89    }
90
91    /// Obtain a reference to the inner [`Object`]
92    pub fn inner(&self) -> &Object {
93        &self.marshal.object
94    }
95
96    /// Consume this [`MarshalFile`] to obtain the inner [`Object`]
97    pub fn into_inner(self) -> Object {
98        self.marshal.object
99    }
100
101    /// Print objects with unused reference flags to stdout
102    pub fn print_unused_ref_flags(&self) {
103        self.marshal.print_unused_ref_flags();
104    }
105
106    /// Rewrite file to remove unused reference flags
107    ///
108    /// This can be useful to generate `pyc` files that are reproducible across
109    /// different CPU architectures.
110    ///
111    /// If no unused reference flags are found, no file is written, and `false`
112    /// is returned. If a file is written, `true` is returned.
113    pub fn write_normalized<S>(self, path: S) -> Result<bool>
114    where
115        S: AsRef<Path>,
116    {
117        let marshal = self.marshal;
118        let result = marshal.clear_unused_ref_flags(&self.data)?;
119
120        if let Cow::Owned(x) = result {
121            let mut file = File::create_new(path)?;
122            file.write_all(&x)?;
123
124            Ok(true)
125        } else {
126            Ok(false)
127        }
128    }
129}