Skip to main content

python_assembler/formats/pyc/
marshal.rs

1//! # Marshal 序列化模块
2//!
3//! 本模块提供了对 Python 对象进行 Marshal 序列化的功能,主要用于生成 `.pyc` 文件。
4//! 它实现了将 `PythonObject` 和 `PythonCodeObject` 转换为字节流的逻辑。
5
6use crate::{
7    instructions::PythonInstruction,
8    program::{PythonCodeObject, PythonObject, PythonVersion},
9};
10use gaia_binary::{BinaryWriter, Fixed, LittleEndian};
11use std::io::{self, Write};
12
13/// Marshal 序列化器
14#[derive(Debug)]
15pub struct MarshalWriter<W: Write> {
16    writer: BinaryWriter<W, Fixed<LittleEndian>>,
17    #[allow(dead_code)]
18    version: PythonVersion,
19}
20
21impl<W: Write> MarshalWriter<W> {
22    /// 创建一个新的 MarshalWriter 实例
23    pub fn new(writer: W, version: PythonVersion) -> Self {
24        Self { writer: BinaryWriter::new(writer), version }
25    }
26
27    /// 写入一个 Python 对象
28    pub fn write_object(&mut self, obj: &PythonObject) -> io::Result<()> {
29        match obj {
30            PythonObject::None => {
31                self.writer.write_u8(b'N').map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
32            }
33            PythonObject::Bool(b) => {
34                self.writer
35                    .write_u8(if *b { b'T' } else { b'F' })
36                    .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
37            }
38            PythonObject::Int(i) => {
39                self.writer.write_u8(b'i' | 0x80).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; // TYPE_INT | FLAG_REF
40                self.writer.write_i32(*i).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
41            }
42            PythonObject::Integer(i) => {
43                if *i >= i32::MIN as i64 && *i <= i32::MAX as i64 {
44                    self.writer.write_u8(b'i' | 0x80).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
45                    self.writer.write_i32(*i as i32).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
46                }
47                else {
48                    self.writer.write_u8(b'I' | 0x80).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; // TYPE_INT64 | FLAG_REF
49                    self.writer.write_i64(*i).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
50                }
51            }
52            PythonObject::Float(f) => {
53                self.writer.write_u8(b'g' | 0x80).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; // TYPE_BINARY_FLOAT | FLAG_REF
54                self.writer.write_f64(*f).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
55            }
56            PythonObject::Str(s) | PythonObject::String(s) => {
57                if s.len() <= 255 && s.is_ascii() {
58                    self.writer.write_u8(b'Z' | 0x80).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; // TYPE_SHORT_ASCII_INTERNED | FLAG_REF
59                    self.writer.write_u8(s.len() as u8).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
60                    self.writer.write_bytes(s.as_bytes()).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
61                }
62                else {
63                    self.writer.write_u8(b'u' | 0x80).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; // TYPE_UNICODE | FLAG_REF
64                    self.writer.write_u32(s.len() as u32).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
65                    self.writer.write_bytes(s.as_bytes()).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
66                }
67            }
68            PythonObject::Bytes(b) => {
69                self.writer.write_u8(b's' | 0x80).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; // TYPE_STRING | FLAG_REF
70                self.writer.write_u32(b.len() as u32).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
71                self.writer.write_bytes(b).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
72            }
73            PythonObject::List(l) => {
74                self.writer.write_u8(b'[' | 0x80).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; // TYPE_LIST | FLAG_REF
75                self.writer.write_u32(l.len() as u32).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
76                for item in l {
77                    self.write_object(item)?;
78                }
79            }
80            PythonObject::Tuple(t) => {
81                if t.len() <= 255 {
82                    self.writer.write_u8(b')' | 0x80).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; // TYPE_SMALL_TUPLE | FLAG_REF
83                    self.writer.write_u8(t.len() as u8).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
84                }
85                else {
86                    self.writer.write_u8(b'(' | 0x80).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; // TYPE_TUPLE | FLAG_REF
87                    self.writer.write_u32(t.len() as u32).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
88                }
89                for item in t {
90                    self.write_object(item)?;
91                }
92            }
93            PythonObject::Code(code) => {
94                self.write_code_object(code)?;
95            }
96        }
97        Ok(())
98    }
99
100    /// 写入一个 Python 代码对象
101    pub fn write_code_object(&mut self, code: &PythonCodeObject) -> io::Result<()> {
102        self.writer.write_u8(b'c' | 0x80).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; // TYPE_CODE | FLAG_REF
103
104        // Python 3.11+ code object fields
105        self.writer.write_u32(code.co_argcount as u32).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
106        self.writer.write_u32(code.co_posonlyargcount as u32).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
107        self.writer.write_u32(code.co_kwonlyargcount as u32).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
108        // nlocals removed in 3.11+ marshal, replaced by stacksize and flags
109        self.writer.write_u32(code.co_stacksize as u32).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
110        self.writer.write_u32(code.co_flags).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
111
112        // co_code
113        let bytecode = self.encode_instructions(&code.co_code);
114        self.write_object(&PythonObject::Bytes(bytecode))?;
115
116        // co_consts
117        self.write_object(&PythonObject::Tuple(code.co_consts.clone()))?;
118
119        // co_names
120        self.write_object(&PythonObject::Tuple(code.co_names.iter().map(|s| PythonObject::Str(s.clone())).collect()))?;
121
122        // co_localsplusnames (Python 3.11+)
123        self.write_object(&PythonObject::Tuple(
124            code.co_localsplusnames.iter().map(|s| PythonObject::Str(s.clone())).collect(),
125        ))?;
126
127        // co_localspluskinds
128        self.write_object(&PythonObject::Bytes(code.co_localspluskinds.clone()))?;
129
130        // co_filename
131        self.write_object(&PythonObject::Str(code.source_name.clone()))?;
132
133        // co_name
134        self.write_object(&PythonObject::Str(code.name.clone()))?;
135
136        // co_qualname
137        self.write_object(&PythonObject::Str(code.qualname.clone()))?;
138
139        // co_firstlineno
140        self.writer.write_u32(code.first_line).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
141
142        // co_linetable
143        self.write_object(&PythonObject::Bytes(code.co_linetable.clone()))?;
144
145        // co_exceptiontable
146        self.write_object(&PythonObject::Bytes(code.co_exceptiontable.clone()))?;
147
148        Ok(())
149    }
150
151    fn encode_instructions(&self, instrs: &[PythonInstruction]) -> Vec<u8> {
152        let mut data = Vec::new();
153        for instr in instrs {
154            let (opcode, arg) = self.map_instruction(instr);
155            data.push(opcode);
156            data.push((arg & 0xFF) as u8);
157        }
158        data
159    }
160
161    fn map_instruction(&self, instr: &PythonInstruction) -> (u8, u32) {
162        match instr {
163            PythonInstruction::RESUME => (151, 0),
164            PythonInstruction::RETURN_CONST(idx) => (121, *idx),
165            PythonInstruction::LOAD_CONST(idx) => (100, *idx),
166            PythonInstruction::STORE_NAME(idx) => (90, *idx),
167            PythonInstruction::LOAD_NAME(idx) => (101, *idx),
168            PythonInstruction::PUSH_NULL => (2, 0),
169            PythonInstruction::CALL(argc) => (171, *argc),
170            PythonInstruction::BINARY_OP(op) => (122, *op),
171            PythonInstruction::POP_TOP => (1, 0),
172            PythonInstruction::RETURN_VALUE => (83, 0),
173            PythonInstruction::BINARY_MODULO => (22, 0),
174            PythonInstruction::INPLACE_MODULO => (59, 0),
175            PythonInstruction::NOP => (9, 0),
176            PythonInstruction::UNARY_NEGATIVE => (11, 0),
177            PythonInstruction::UNARY_NOT => (12, 0),
178            PythonInstruction::UNARY_INVERT => (15, 0),
179            PythonInstruction::GET_LEN => (30, 0),
180            PythonInstruction::MATCH_MAPPING => (31, 0),
181            PythonInstruction::MATCH_SEQUENCE => (32, 0),
182            PythonInstruction::MATCH_KEYS => (33, 0),
183            PythonInstruction::GET_ITER => (68, 0),
184            PythonInstruction::LOAD_BUILD_CLASS => (71, 0),
185            PythonInstruction::LOAD_ASSERTION_ERROR => (74, 0),
186            PythonInstruction::RETURN_GENERATOR => (75, 0),
187            PythonInstruction::YIELD_VALUE => (150, 0),
188            PythonInstruction::LOAD_FAST(idx) => (124, *idx),
189            PythonInstruction::STORE_FAST(idx) => (125, *idx),
190            PythonInstruction::DELETE_FAST(idx) => (126, *idx),
191            PythonInstruction::LOAD_GLOBAL(idx) => (116, *idx),
192            PythonInstruction::STORE_GLOBAL(idx) => (97, *idx),
193            PythonInstruction::DELETE_GLOBAL(idx) => (98, *idx),
194            PythonInstruction::LOAD_ATTR(idx) => (106, *idx),
195            PythonInstruction::STORE_ATTR(idx) => (95, *idx),
196            PythonInstruction::DELETE_ATTR(idx) => (96, *idx),
197            PythonInstruction::COMPARE_OP(idx) => (107, *idx),
198            PythonInstruction::IMPORT_NAME(idx) => (108, *idx),
199            PythonInstruction::IMPORT_FROM(idx) => (109, *idx),
200            PythonInstruction::JUMP_FORWARD(idx) => (110, *idx),
201            PythonInstruction::JUMP_BACKWARD(idx) => (140, *idx),
202            PythonInstruction::POP_JUMP_IF_FALSE(idx) => (114, *idx),
203            PythonInstruction::POP_JUMP_IF_TRUE(idx) => (115, *idx),
204            PythonInstruction::MAKE_FUNCTION(idx) => (132, *idx),
205            PythonInstruction::BUILD_TUPLE(idx) => (102, *idx),
206            PythonInstruction::BUILD_LIST(idx) => (103, *idx),
207            PythonInstruction::BUILD_SET(idx) => (104, *idx),
208            PythonInstruction::BUILD_MAP(idx) => (105, *idx),
209            PythonInstruction::LIST_APPEND(idx) => (145, *idx),
210            PythonInstruction::SET_ADD(idx) => (146, *idx),
211            PythonInstruction::MAP_ADD(idx) => (147, *idx),
212            _ => (0, 0), // Default to NOP or handle error
213        }
214    }
215}