1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
//! # Parser for the "marshal" binary de/serialization format used by CPython
//!
//! This crate implements a parser and some utilities for reading files in the
//! "marshal" de/serialization format used internally in CPython. The exact
//! format is not stable and can change between minor versions of CPython.
//!
//! This crate supports parsing "marshal" dumps and `pyc` files that were
//! written by CPython versions `>= 3.6` and `< 3.14`.
//!
//! There is a high-level and a low-level API, depending on how much access to
//! the underlying data structures is needed. The low-level API also provides
//! more flexibility since it does not require files, but can operate on plain
//! bytes ([`Vec<u8>`]).
//!
//! Reading a `pyc` file from disk:
//!
//! ```no_run
//! use marshal_parser::{MarshalFile, Object};
//!
//! let pyc = MarshalFile::from_pyc_path("mod.cpython-310.pyc").unwrap();
//! let object: Object = pyc.into_inner();
//! ```
//!
//! Reading a "marshal" dump (i.e. a file without `pyc` header):
//!
//! ```no_run
//! use marshal_parser::{MarshalFile, Object};
//!
//! let dump = MarshalFile::from_dump_path("dump.marshal", (3, 11)).unwrap();
//! let object: Object = dump.into_inner();
//! ```

use std::borrow::Cow;
use std::fs::{File, OpenOptions};
use std::io::{Read, Write};
use std::path::Path;

mod magic;
mod objects;
mod parser;

pub use objects::{CodeObject, Object, ObjectType, StringType};
pub use parser::{Error, MarshalObject};

type Result<T> = std::result::Result<T, Error>;

/// High-level parser for `pyc` and "marshal dump" files
#[derive(Debug)]
pub struct MarshalFile {
    data: Vec<u8>,
    marshal: MarshalObject,
}

impl MarshalFile {
    /// Read and parse a `pyc` file at the specified path
    pub fn from_pyc_path<S>(path: S) -> Result<Self>
    where
        S: AsRef<Path>,
    {
        let mut file = OpenOptions::new()
            .read(true)
            .write(false)
            .create_new(false)
            .open(path)?;

        let mut data = Vec::new();
        file.read_to_end(&mut data)?;

        let marshal = MarshalObject::parse_pyc(&data)?;
        Ok(MarshalFile { data, marshal })
    }

    /// Read and parse a "marshal dump" file at the specified path
    pub fn from_dump_path<S>(path: S, (major, minor): (u16, u16)) -> Result<Self>
    where
        S: AsRef<Path>,
    {
        let mut file = OpenOptions::new()
            .read(true)
            .write(false)
            .create_new(false)
            .open(path)?;

        let mut data = Vec::new();
        file.read_to_end(&mut data)?;

        let marshal = MarshalObject::parse_dump(&data, (major, minor))?;
        Ok(MarshalFile { data, marshal })
    }

    /// Obtain a reference to the inner [`Object`]
    pub fn inner(&self) -> &Object {
        &self.marshal.object
    }

    /// Consume this [`MarshalFile`] to obtain the inner [`Object`]
    pub fn into_inner(self) -> Object {
        self.marshal.object
    }

    /// Print objects with unused reference flags to stdout
    pub fn print_unused_ref_flags(&self) {
        self.marshal.print_unused_ref_flags();
    }

    /// Rewrite file to remove unused reference flags
    ///
    /// This can be useful to generate `pyc` files that are reproducible across
    /// different CPU architectures.
    ///
    /// If no unused reference flags are found, no file is written, and `false`
    /// is returned. If a file is written, `true` is returned.
    pub fn write_normalized<S>(self, path: S) -> Result<bool>
    where
        S: AsRef<Path>,
    {
        let marshal = self.marshal;
        let result = marshal.clear_unused_ref_flags(&self.data)?;

        if let Cow::Owned(x) = result {
            let mut file = File::create_new(path)?;
            file.write_all(&x)?;

            Ok(true)
        } else {
            Ok(false)
        }
    }
}