Skip to main content

python_assembler/formats/pyc/
marshal.rs

1//! # Marshal 序列化模块
2//!
3//! 本模块提供了对 Python 对象进行 Marshal 序列化的功能,主要用于生成 `.pyc` 文件。
4//! 它实现了将 `PythonObject` 和 `PythonCodeObject` 转换为字节流的逻辑。
5
6use crate::{
7    instructions::PythonInstruction,
8    program::{PythonCodeObject, PythonObject, PythonVersion},
9};
10use byteorder::{LittleEndian, WriteBytesExt};
11use std::io::{self, Write};
12
13/// Marshal 序列化器
14#[derive(Debug)]
15pub struct MarshalWriter<W> {
16    writer: W,
17    #[allow(dead_code)]
18    version: PythonVersion,
19}
20
21impl<W: Write> MarshalWriter<W> {
22    /// 创建一个新的 MarshalWriter 实例
23    pub fn new(writer: W, version: PythonVersion) -> Self {
24        Self { writer, version }
25    }
26
27    /// 写入一个 Python 对象
28    pub fn write_object(&mut self, obj: &PythonObject) -> io::Result<()> {
29        match obj {
30            PythonObject::None => {
31                self.writer.write_u8(b'N')?;
32            }
33            PythonObject::Bool(b) => {
34                self.writer.write_u8(if *b { b'T' } else { b'F' })?;
35            }
36            PythonObject::Int(i) => {
37                self.writer.write_u8(b'i' | 0x80)?; // TYPE_INT | FLAG_REF
38                self.writer.write_i32::<LittleEndian>(*i)?;
39            }
40            PythonObject::Integer(i) => {
41                if *i >= i32::MIN as i64 && *i <= i32::MAX as i64 {
42                    self.writer.write_u8(b'i' | 0x80)?;
43                    self.writer.write_i32::<LittleEndian>(*i as i32)?;
44                }
45                else {
46                    self.writer.write_u8(b'I' | 0x80)?; // TYPE_INT64 | FLAG_REF
47                    self.writer.write_i64::<LittleEndian>(*i)?;
48                }
49            }
50            PythonObject::Float(f) => {
51                self.writer.write_u8(b'g' | 0x80)?; // TYPE_BINARY_FLOAT | FLAG_REF
52                self.writer.write_f64::<LittleEndian>(*f)?;
53            }
54            PythonObject::Str(s) | PythonObject::String(s) => {
55                if s.len() <= 255 && s.is_ascii() {
56                    self.writer.write_u8(b'Z' | 0x80)?; // TYPE_SHORT_ASCII_INTERNED | FLAG_REF
57                    self.writer.write_u8(s.len() as u8)?;
58                    self.writer.write_all(s.as_bytes())?;
59                }
60                else {
61                    self.writer.write_u8(b'u' | 0x80)?; // TYPE_UNICODE | FLAG_REF
62                    self.writer.write_u32::<LittleEndian>(s.len() as u32)?;
63                    self.writer.write_all(s.as_bytes())?;
64                }
65            }
66            PythonObject::Bytes(b) => {
67                self.writer.write_u8(b's' | 0x80)?; // TYPE_STRING | FLAG_REF
68                self.writer.write_u32::<LittleEndian>(b.len() as u32)?;
69                self.writer.write_all(b)?;
70            }
71            PythonObject::List(l) => {
72                self.writer.write_u8(b'[' | 0x80)?; // TYPE_LIST | FLAG_REF
73                self.writer.write_u32::<LittleEndian>(l.len() as u32)?;
74                for item in l {
75                    self.write_object(item)?;
76                }
77            }
78            PythonObject::Tuple(t) => {
79                if t.len() <= 255 {
80                    self.writer.write_u8(b')' | 0x80)?; // TYPE_SMALL_TUPLE | FLAG_REF
81                    self.writer.write_u8(t.len() as u8)?;
82                }
83                else {
84                    self.writer.write_u8(b'(' | 0x80)?; // TYPE_TUPLE | FLAG_REF
85                    self.writer.write_u32::<LittleEndian>(t.len() as u32)?;
86                }
87                for item in t {
88                    self.write_object(item)?;
89                }
90            }
91            PythonObject::Code(code) => {
92                self.write_code_object(code)?;
93            }
94        }
95        Ok(())
96    }
97
98    /// 写入一个 Python 代码对象
99    pub fn write_code_object(&mut self, code: &PythonCodeObject) -> io::Result<()> {
100        self.writer.write_u8(b'c' | 0x80)?; // TYPE_CODE | FLAG_REF
101
102        // Python 3.11+ code object fields
103        self.writer.write_u32::<LittleEndian>(code.co_argcount as u32)?;
104        self.writer.write_u32::<LittleEndian>(code.co_posonlyargcount as u32)?;
105        self.writer.write_u32::<LittleEndian>(code.co_kwonlyargcount as u32)?;
106        // nlocals removed in 3.11+ marshal, replaced by stacksize and flags
107        self.writer.write_u32::<LittleEndian>(code.co_stacksize as u32)?;
108        self.writer.write_u32::<LittleEndian>(code.co_flags)?;
109
110        // co_code
111        let bytecode = self.encode_instructions(&code.co_code);
112        self.write_object(&PythonObject::Bytes(bytecode))?;
113
114        // co_consts
115        self.write_object(&PythonObject::Tuple(code.co_consts.clone()))?;
116
117        // co_names
118        self.write_object(&PythonObject::Tuple(code.co_names.iter().map(|s| PythonObject::Str(s.clone())).collect()))?;
119
120        // co_localsplusnames (Python 3.11+)
121        self.write_object(&PythonObject::Tuple(
122            code.co_localsplusnames.iter().map(|s| PythonObject::Str(s.clone())).collect(),
123        ))?;
124
125        // co_localspluskinds
126        self.write_object(&PythonObject::Bytes(code.co_localspluskinds.clone()))?;
127
128        // co_filename
129        self.write_object(&PythonObject::Str(code.source_name.clone()))?;
130
131        // co_name
132        self.write_object(&PythonObject::Str(code.name.clone()))?;
133
134        // co_qualname
135        self.write_object(&PythonObject::Str(code.qualname.clone()))?;
136
137        // co_firstlineno
138        self.writer.write_u32::<LittleEndian>(code.first_line)?;
139
140        // co_linetable
141        self.write_object(&PythonObject::Bytes(code.co_linetable.clone()))?;
142
143        // co_exceptiontable
144        self.write_object(&PythonObject::Bytes(code.co_exceptiontable.clone()))?;
145
146        Ok(())
147    }
148
149    fn encode_instructions(&self, instrs: &[PythonInstruction]) -> Vec<u8> {
150        let mut data = Vec::new();
151        for instr in instrs {
152            let (opcode, arg) = self.map_instruction(instr);
153            data.push(opcode);
154            data.push((arg & 0xFF) as u8);
155        }
156        data
157    }
158
159    fn map_instruction(&self, instr: &PythonInstruction) -> (u8, u32) {
160        match instr {
161            PythonInstruction::RESUME => (151, 0),
162            PythonInstruction::RETURN_CONST(idx) => (121, *idx),
163            PythonInstruction::LOAD_CONST(idx) => (100, *idx),
164            PythonInstruction::STORE_NAME(idx) => (90, *idx),
165            PythonInstruction::LOAD_NAME(idx) => (101, *idx),
166            PythonInstruction::PUSH_NULL => (2, 0),
167            PythonInstruction::CALL(argc) => (171, *argc),
168            PythonInstruction::BINARY_OP(op) => (122, *op),
169            PythonInstruction::POP_TOP => (1, 0),
170            PythonInstruction::RETURN_VALUE => (83, 0),
171            PythonInstruction::BINARY_MODULO => (22, 0),
172            PythonInstruction::INPLACE_MODULO => (59, 0),
173            PythonInstruction::NOP => (9, 0),
174            PythonInstruction::UNARY_NEGATIVE => (11, 0),
175            PythonInstruction::UNARY_NOT => (12, 0),
176            PythonInstruction::UNARY_INVERT => (15, 0),
177            PythonInstruction::GET_LEN => (30, 0),
178            PythonInstruction::MATCH_MAPPING => (31, 0),
179            PythonInstruction::MATCH_SEQUENCE => (32, 0),
180            PythonInstruction::MATCH_KEYS => (33, 0),
181            PythonInstruction::GET_ITER => (68, 0),
182            PythonInstruction::LOAD_BUILD_CLASS => (71, 0),
183            PythonInstruction::LOAD_ASSERTION_ERROR => (74, 0),
184            PythonInstruction::RETURN_GENERATOR => (75, 0),
185            PythonInstruction::YIELD_VALUE => (150, 0),
186            PythonInstruction::LOAD_FAST(idx) => (124, *idx),
187            PythonInstruction::STORE_FAST(idx) => (125, *idx),
188            PythonInstruction::DELETE_FAST(idx) => (126, *idx),
189            PythonInstruction::LOAD_GLOBAL(idx) => (116, *idx),
190            PythonInstruction::STORE_GLOBAL(idx) => (97, *idx),
191            PythonInstruction::DELETE_GLOBAL(idx) => (98, *idx),
192            PythonInstruction::LOAD_ATTR(idx) => (106, *idx),
193            PythonInstruction::STORE_ATTR(idx) => (95, *idx),
194            PythonInstruction::DELETE_ATTR(idx) => (96, *idx),
195            PythonInstruction::COMPARE_OP(idx) => (107, *idx),
196            PythonInstruction::IMPORT_NAME(idx) => (108, *idx),
197            PythonInstruction::IMPORT_FROM(idx) => (109, *idx),
198            PythonInstruction::JUMP_FORWARD(idx) => (110, *idx),
199            PythonInstruction::JUMP_BACKWARD(idx) => (140, *idx),
200            PythonInstruction::POP_JUMP_IF_FALSE(idx) => (114, *idx),
201            PythonInstruction::POP_JUMP_IF_TRUE(idx) => (115, *idx),
202            PythonInstruction::MAKE_FUNCTION(idx) => (132, *idx),
203            PythonInstruction::BUILD_TUPLE(idx) => (102, *idx),
204            PythonInstruction::BUILD_LIST(idx) => (103, *idx),
205            PythonInstruction::BUILD_SET(idx) => (104, *idx),
206            PythonInstruction::BUILD_MAP(idx) => (105, *idx),
207            PythonInstruction::LIST_APPEND(idx) => (145, *idx),
208            PythonInstruction::SET_ADD(idx) => (146, *idx),
209            PythonInstruction::MAP_ADD(idx) => (147, *idx),
210            _ => (0, 0), // Default to NOP or handle error
211        }
212    }
213}