python_assembler/formats/pyc/view/
to_program.rs

1use crate::{
2    formats::pyc::{view::PycView, PycReadConfig},
3    instructions::PythonInstruction,
4    program::{PythonCodeObject, PythonProgram, PythonVersion},
5};
6use gaia_types::{GaiaDiagnostics, GaiaError};
7
8const HAVE_ARGUMENT: u8 = 90;
9
10impl PycView {
11    pub fn to_program(self, config: &PycReadConfig) -> GaiaDiagnostics<PythonProgram> {
12        let mut convert = Pyc2Program { config, program: PythonProgram::default(), errors: vec![] };
13        match convert.transform(self) {
14            Ok(_) => GaiaDiagnostics { result: Ok(convert.program), diagnostics: convert.errors },
15            Err(e) => GaiaDiagnostics { result: Err(e), diagnostics: convert.errors },
16        }
17    }
18}
19
20struct Pyc2Program<'config> {
21    config: &'config PycReadConfig,
22    program: PythonProgram,
23    errors: Vec<GaiaError>,
24}
25
26impl<'config> Pyc2Program<'config> {
27    pub fn transform(&mut self, view: PycView) -> Result<(), GaiaError> {
28        // 动态判断 Python 版本:优先使用配置,其次用 .pyc 头部 MAGIC_NUMBER
29        let version = if self.config.version != PythonVersion::Unknown {
30            self.config.version
31        }
32        else {
33            PythonVersion::from_magic(view.header.magic)
34        };
35        self.program.code_object = PythonCodeObject {
36            source_name: "<unknown>".to_string(),
37            first_line: 0,
38            last_line: 0,
39            num_params: 0,
40            is_vararg: 0,
41            max_stack_size: 0,
42            nested_functions: vec![],
43            upvalues: vec![],
44            local_vars: vec![],
45            line_info: vec![],
46            co_argcount: 0,
47            co_nlocal: 0,
48            co_stacks: 0,
49            num_upval: 0,
50            co_code: Self::decode(&view.code_object_bytes, version),
51            co_consts: vec![],
52            upvalue_n: 0,
53        };
54        self.program.header = view.header;
55        Ok(())
56    }
57    fn decode(code_bytes: &[u8], version: PythonVersion) -> Vec<PythonInstruction> {
58        let mut instructions = Vec::new();
59        let mut i = 0;
60        let mut ext_arg: u32 = 0; // 处理 EXTENDED_ARG 的累积值(按 8bit 扩展)
61        while i < code_bytes.len() {
62            let opcode_byte = code_bytes[i];
63            let mut arg: Option<u32> = None;
64
65            // 读取参数(1 字节),处理 EXTENDED_ARG 扩展
66            if opcode_byte >= HAVE_ARGUMENT {
67                if i + 1 < code_bytes.len() {
68                    let base = code_bytes[i + 1] as u32;
69                    let full = (ext_arg << 8) | base;
70                    arg = Some(full);
71                }
72                i += 2; // (opcode + arg)
73                // 非 EXTENDED_ARG 指令后重置扩展
74                if opcode_byte != 144 {
75                    ext_arg = 0;
76                }
77            }
78            else {
79                i += 1; // 无参数指令
80                ext_arg = 0;
81            }
82
83            // 版本感知映射(目前以 3.12 为主)
84            let python_opcode = match opcode_byte {
85                // 3.11+ 引入的 CACHE 占位,用于 inline cache
86                0 => PythonInstruction::CACHE,
87                1 => PythonInstruction::POP_TOP,
88                2 => PythonInstruction::PUSH_NULL,
89                4 => PythonInstruction::DUP_TOP,
90                9 => PythonInstruction::NOP,
91                68 => PythonInstruction::GET_ITER,
92                83 => PythonInstruction::RETURN_VALUE,
93                93 => PythonInstruction::FOR_ITER(arg.unwrap_or(0)),
94                107 => PythonInstruction::COMPARE_OP(arg.unwrap_or(0)),
95                // 3.12 常见带参指令
96                90 => PythonInstruction::STORE_NAME(arg.unwrap_or(0)),
97                97 => PythonInstruction::STORE_GLOBAL(arg.unwrap_or(0)),
98                100 => PythonInstruction::LOAD_CONST(arg.unwrap_or(0)),
99                101 => PythonInstruction::LOAD_NAME(arg.unwrap_or(0)),
100                106 => PythonInstruction::LOAD_ATTR(arg.unwrap_or(0)),
101                107 => PythonInstruction::COMPARE_OP(arg.unwrap_or(0)),
102                108 => PythonInstruction::IMPORT_NAME(arg.unwrap_or(0)),
103                110 => PythonInstruction::JUMP_FORWARD(arg.unwrap_or(0)),
104                114 => PythonInstruction::POP_JUMP_IF_FALSE(arg.unwrap_or(0)),
105                115 => PythonInstruction::POP_JUMP_IF_TRUE(arg.unwrap_or(0)),
106                116 => PythonInstruction::LOAD_GLOBAL(arg.unwrap_or(0)),
107                121 => PythonInstruction::RETURN_CONST(arg.unwrap_or(0)),
108                122 => PythonInstruction::BINARY_OP(arg.unwrap_or(0)),
109                124 => PythonInstruction::LOAD_FAST(arg.unwrap_or(0)),
110                125 => PythonInstruction::STORE_FAST(arg.unwrap_or(0)),
111                132 => PythonInstruction::MAKE_FUNCTION(arg.unwrap_or(0)),
112                140 => PythonInstruction::JUMP_BACKWARD(arg.unwrap_or(0)),
113                144 => PythonInstruction::EXTENDED_ARG(arg.unwrap_or(0)),
114                151 => PythonInstruction::RESUME,
115                156 => PythonInstruction::BUILD_CONST_KEY_MAP(arg.unwrap_or(0)),
116                171 => PythonInstruction::CALL(arg.unwrap_or(0)),
117                // 版本差异:对于未知或被移除的指令,回退到 UNKNOWN
118                _ => PythonInstruction::UNKNOWN(opcode_byte, arg),
119            };
120            instructions.push(python_opcode);
121        }
122        instructions
123    }
124}