rustpython_codegen/
ir.rs

1use std::ops;
2
3use crate::IndexSet;
4use rustpython_compiler_core::bytecode::{
5    CodeFlags, CodeObject, CodeUnit, ConstantData, InstrDisplayContext, Instruction, Label, OpArg,
6};
7use rustpython_parser_core::source_code::{LineNumber, SourceLocation};
8
9#[derive(Copy, Clone, PartialEq, Eq, Debug)]
10pub struct BlockIdx(pub u32);
11impl BlockIdx {
12    pub const NULL: BlockIdx = BlockIdx(u32::MAX);
13    const fn idx(self) -> usize {
14        self.0 as usize
15    }
16}
17impl ops::Index<BlockIdx> for [Block] {
18    type Output = Block;
19    fn index(&self, idx: BlockIdx) -> &Block {
20        &self[idx.idx()]
21    }
22}
23impl ops::IndexMut<BlockIdx> for [Block] {
24    fn index_mut(&mut self, idx: BlockIdx) -> &mut Block {
25        &mut self[idx.idx()]
26    }
27}
28impl ops::Index<BlockIdx> for Vec<Block> {
29    type Output = Block;
30    fn index(&self, idx: BlockIdx) -> &Block {
31        &self[idx.idx()]
32    }
33}
34impl ops::IndexMut<BlockIdx> for Vec<Block> {
35    fn index_mut(&mut self, idx: BlockIdx) -> &mut Block {
36        &mut self[idx.idx()]
37    }
38}
39
40#[derive(Debug, Copy, Clone)]
41pub struct InstructionInfo {
42    pub instr: Instruction,
43    pub arg: OpArg,
44    pub target: BlockIdx,
45    pub location: SourceLocation,
46}
47
48// spell-checker:ignore petgraph
49// TODO: look into using petgraph for handling blocks and stuff? it's heavier than this, but it
50// might enable more analysis/optimizations
51#[derive(Debug)]
52pub struct Block {
53    pub instructions: Vec<InstructionInfo>,
54    pub next: BlockIdx,
55}
56impl Default for Block {
57    fn default() -> Self {
58        Block {
59            instructions: Vec::new(),
60            next: BlockIdx::NULL,
61        }
62    }
63}
64
65pub struct CodeInfo {
66    pub flags: CodeFlags,
67    pub posonlyarg_count: u32, // Number of positional-only arguments
68    pub arg_count: u32,
69    pub kwonlyarg_count: u32,
70    pub source_path: String,
71    pub first_line_number: LineNumber,
72    pub obj_name: String, // Name of the object that created this code object
73
74    pub blocks: Vec<Block>,
75    pub current_block: BlockIdx,
76    pub constants: IndexSet<ConstantData>,
77    pub name_cache: IndexSet<String>,
78    pub varname_cache: IndexSet<String>,
79    pub cellvar_cache: IndexSet<String>,
80    pub freevar_cache: IndexSet<String>,
81}
82impl CodeInfo {
83    pub fn finalize_code(mut self, optimize: u8) -> CodeObject {
84        if optimize > 0 {
85            self.dce();
86        }
87
88        let max_stackdepth = self.max_stackdepth();
89        let cell2arg = self.cell2arg();
90
91        let CodeInfo {
92            flags,
93            posonlyarg_count,
94            arg_count,
95            kwonlyarg_count,
96            source_path,
97            first_line_number,
98            obj_name,
99
100            mut blocks,
101            current_block: _,
102            constants,
103            name_cache,
104            varname_cache,
105            cellvar_cache,
106            freevar_cache,
107        } = self;
108
109        let mut instructions = Vec::new();
110        let mut locations = Vec::new();
111
112        let mut block_to_offset = vec![Label(0); blocks.len()];
113        loop {
114            let mut num_instructions = 0;
115            for (idx, block) in iter_blocks(&blocks) {
116                block_to_offset[idx.idx()] = Label(num_instructions as u32);
117                for instr in &block.instructions {
118                    num_instructions += instr.arg.instr_size()
119                }
120            }
121
122            instructions.reserve_exact(num_instructions);
123            locations.reserve_exact(num_instructions);
124
125            let mut recompile_extended_arg = false;
126            let mut next_block = BlockIdx(0);
127            while next_block != BlockIdx::NULL {
128                let block = &mut blocks[next_block];
129                for info in &mut block.instructions {
130                    let (op, arg, target) = (info.instr, &mut info.arg, info.target);
131                    if target != BlockIdx::NULL {
132                        let new_arg = OpArg(block_to_offset[target.idx()].0);
133                        recompile_extended_arg |= new_arg.instr_size() != arg.instr_size();
134                        *arg = new_arg;
135                    }
136                    let (extras, lo_arg) = arg.split();
137                    locations.extend(std::iter::repeat(info.location).take(arg.instr_size()));
138                    instructions.extend(
139                        extras
140                            .map(|byte| CodeUnit::new(Instruction::ExtendedArg, byte))
141                            .chain([CodeUnit { op, arg: lo_arg }]),
142                    );
143                }
144                next_block = block.next;
145            }
146
147            if !recompile_extended_arg {
148                break;
149            }
150
151            instructions.clear();
152            locations.clear()
153        }
154
155        CodeObject {
156            flags,
157            posonlyarg_count,
158            arg_count,
159            kwonlyarg_count,
160            source_path,
161            first_line_number: Some(first_line_number),
162            obj_name,
163
164            max_stackdepth,
165            instructions: instructions.into_boxed_slice(),
166            locations: locations.into_boxed_slice(),
167            constants: constants.into_iter().collect(),
168            names: name_cache.into_iter().collect(),
169            varnames: varname_cache.into_iter().collect(),
170            cellvars: cellvar_cache.into_iter().collect(),
171            freevars: freevar_cache.into_iter().collect(),
172            cell2arg,
173        }
174    }
175
176    fn cell2arg(&self) -> Option<Box<[i32]>> {
177        if self.cellvar_cache.is_empty() {
178            return None;
179        }
180
181        let total_args = self.arg_count
182            + self.kwonlyarg_count
183            + self.flags.contains(CodeFlags::HAS_VARARGS) as u32
184            + self.flags.contains(CodeFlags::HAS_VARKEYWORDS) as u32;
185
186        let mut found_cellarg = false;
187        let cell2arg = self
188            .cellvar_cache
189            .iter()
190            .map(|var| {
191                self.varname_cache
192                    .get_index_of(var)
193                    // check that it's actually an arg
194                    .filter(|i| *i < total_args as usize)
195                    .map_or(-1, |i| {
196                        found_cellarg = true;
197                        i as i32
198                    })
199            })
200            .collect::<Box<[_]>>();
201
202        if found_cellarg {
203            Some(cell2arg)
204        } else {
205            None
206        }
207    }
208
209    fn dce(&mut self) {
210        for block in &mut self.blocks {
211            let mut last_instr = None;
212            for (i, ins) in block.instructions.iter().enumerate() {
213                if ins.instr.unconditional_branch() {
214                    last_instr = Some(i);
215                    break;
216                }
217            }
218            if let Some(i) = last_instr {
219                block.instructions.truncate(i + 1);
220            }
221        }
222    }
223
224    fn max_stackdepth(&self) -> u32 {
225        let mut maxdepth = 0u32;
226        let mut stack = Vec::with_capacity(self.blocks.len());
227        let mut start_depths = vec![u32::MAX; self.blocks.len()];
228        start_depths[0] = 0;
229        stack.push(BlockIdx(0));
230        const DEBUG: bool = false;
231        'process_blocks: while let Some(block) = stack.pop() {
232            let mut depth = start_depths[block.idx()];
233            if DEBUG {
234                eprintln!("===BLOCK {}===", block.0);
235            }
236            let block = &self.blocks[block];
237            for ins in &block.instructions {
238                let instr = &ins.instr;
239                let effect = instr.stack_effect(ins.arg, false);
240                if DEBUG {
241                    let display_arg = if ins.target == BlockIdx::NULL {
242                        ins.arg
243                    } else {
244                        OpArg(ins.target.0)
245                    };
246                    let instr_display = instr.display(display_arg, self);
247                    eprint!("{instr_display}: {depth} {effect:+} => ");
248                }
249                let new_depth = depth.checked_add_signed(effect).unwrap();
250                if DEBUG {
251                    eprintln!("{new_depth}");
252                }
253                if new_depth > maxdepth {
254                    maxdepth = new_depth
255                }
256                // we don't want to worry about Break/Continue, they use unwinding to jump to
257                // their targets and as such the stack size is taken care of in frame.rs by setting
258                // it back to the level it was at when SetupLoop was run
259                if ins.target != BlockIdx::NULL
260                    && !matches!(
261                        instr,
262                        Instruction::Continue { .. } | Instruction::Break { .. }
263                    )
264                {
265                    let effect = instr.stack_effect(ins.arg, true);
266                    let target_depth = depth.checked_add_signed(effect).unwrap();
267                    if target_depth > maxdepth {
268                        maxdepth = target_depth
269                    }
270                    stackdepth_push(&mut stack, &mut start_depths, ins.target, target_depth);
271                }
272                depth = new_depth;
273                if instr.unconditional_branch() {
274                    continue 'process_blocks;
275                }
276            }
277            stackdepth_push(&mut stack, &mut start_depths, block.next, depth);
278        }
279        if DEBUG {
280            eprintln!("DONE: {maxdepth}");
281        }
282        maxdepth
283    }
284}
285
286impl InstrDisplayContext for CodeInfo {
287    type Constant = ConstantData;
288    fn get_constant(&self, i: usize) -> &ConstantData {
289        &self.constants[i]
290    }
291    fn get_name(&self, i: usize) -> &str {
292        self.name_cache[i].as_ref()
293    }
294    fn get_varname(&self, i: usize) -> &str {
295        self.varname_cache[i].as_ref()
296    }
297    fn get_cell_name(&self, i: usize) -> &str {
298        self.cellvar_cache
299            .get_index(i)
300            .unwrap_or_else(|| &self.freevar_cache[i - self.cellvar_cache.len()])
301            .as_ref()
302    }
303}
304
305fn stackdepth_push(
306    stack: &mut Vec<BlockIdx>,
307    start_depths: &mut [u32],
308    target: BlockIdx,
309    depth: u32,
310) {
311    let block_depth = &mut start_depths[target.idx()];
312    if *block_depth == u32::MAX || depth > *block_depth {
313        *block_depth = depth;
314        stack.push(target);
315    }
316}
317
318fn iter_blocks(blocks: &[Block]) -> impl Iterator<Item = (BlockIdx, &Block)> + '_ {
319    let mut next = BlockIdx(0);
320    std::iter::from_fn(move || {
321        if next == BlockIdx::NULL {
322            return None;
323        }
324        let (idx, b) = (next, &blocks[next]);
325        next = b.next;
326        Some((idx, b))
327    })
328}