rustpython_bytecode/
bytecode.rs

1//! Implement python as a virtual machine with bytecodes. This module
2//! implements bytecode structure.
3
4use bitflags::bitflags;
5use itertools::Itertools;
6use num_bigint::BigInt;
7use num_complex::Complex64;
8use serde::{Deserialize, Serialize};
9use std::collections::{HashMap, HashSet};
10use std::fmt;
11
12/// Sourcecode location.
13#[derive(Clone, Copy, Debug, Default, PartialEq, Serialize, Deserialize)]
14pub struct Location {
15    row: usize,
16    column: usize,
17}
18
19impl Location {
20    pub fn new(row: usize, column: usize) -> Self {
21        Location { row, column }
22    }
23
24    pub fn row(&self) -> usize {
25        self.row
26    }
27
28    pub fn column(&self) -> usize {
29        self.column
30    }
31}
32
33/// Primary container of a single code object. Each python function has
34/// a codeobject. Also a module has a codeobject.
35#[derive(Clone, PartialEq, Serialize, Deserialize)]
36pub struct CodeObject {
37    pub instructions: Vec<Instruction>,
38    /// Jump targets.
39    pub label_map: HashMap<Label, usize>,
40    pub locations: Vec<Location>,
41    pub flags: CodeFlags,
42    pub posonlyarg_count: usize, // Number of positional-only arguments
43    pub arg_names: Vec<String>,  // Names of positional arguments
44    pub varargs_name: Option<String>, // *args or *
45    pub kwonlyarg_names: Vec<String>,
46    pub varkeywords_name: Option<String>, // **kwargs or **
47    pub source_path: String,
48    pub first_line_number: usize,
49    pub obj_name: String, // Name of the object that created this code object
50}
51
52bitflags! {
53    #[derive(Serialize, Deserialize)]
54    pub struct CodeFlags: u16 {
55        const HAS_DEFAULTS = 0x01;
56        const HAS_KW_ONLY_DEFAULTS = 0x02;
57        const HAS_ANNOTATIONS = 0x04;
58        const NEW_LOCALS = 0x08;
59        const IS_GENERATOR = 0x10;
60        const IS_COROUTINE = 0x20;
61        const HAS_VARARGS = 0x40;
62        const HAS_VARKEYWORDS = 0x80;
63    }
64}
65
66impl Default for CodeFlags {
67    fn default() -> Self {
68        Self::NEW_LOCALS
69    }
70}
71
72impl CodeFlags {
73    pub const NAME_MAPPING: &'static [(&'static str, CodeFlags)] = &[
74        ("GENERATOR", CodeFlags::IS_GENERATOR),
75        ("COROUTINE", CodeFlags::IS_COROUTINE),
76        (
77            "ASYNC_GENERATOR",
78            Self::from_bits_truncate(Self::IS_GENERATOR.bits | Self::IS_COROUTINE.bits),
79        ),
80        ("VARARGS", CodeFlags::HAS_VARARGS),
81        ("VARKEYWORDS", CodeFlags::HAS_VARKEYWORDS),
82    ];
83}
84
85#[derive(Serialize, Debug, Deserialize, Clone, Copy, PartialEq, Eq, Hash)]
86pub struct Label(usize);
87
88impl Label {
89    pub fn new(label: usize) -> Self {
90        Label(label)
91    }
92}
93
94#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
95/// An indication where the name must be accessed.
96pub enum NameScope {
97    /// The name will be in the local scope.
98    Local,
99
100    /// The name will be located in scope surrounding the current scope.
101    NonLocal,
102
103    /// The name will be in global scope.
104    Global,
105
106    /// The name will be located in any scope between the current scope and the top scope.
107    Free,
108}
109
110/// Transforms a value prior to formatting it.
111#[derive(Copy, Clone, Debug, PartialEq, Serialize, Deserialize)]
112pub enum ConversionFlag {
113    /// Converts by calling `str(<value>)`.
114    Str,
115    /// Converts by calling `ascii(<value>)`.
116    Ascii,
117    /// Converts by calling `repr(<value>)`.
118    Repr,
119}
120
121/// A Single bytecode instruction.
122#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
123pub enum Instruction {
124    Import {
125        name: Option<String>,
126        symbols: Vec<String>,
127        level: usize,
128    },
129    ImportStar,
130    ImportFrom {
131        name: String,
132    },
133    LoadName {
134        name: String,
135        scope: NameScope,
136    },
137    StoreName {
138        name: String,
139        scope: NameScope,
140    },
141    DeleteName {
142        name: String,
143    },
144    Subscript,
145    StoreSubscript,
146    DeleteSubscript,
147    StoreAttr {
148        name: String,
149    },
150    DeleteAttr {
151        name: String,
152    },
153    LoadConst {
154        value: Constant,
155    },
156    UnaryOperation {
157        op: UnaryOperator,
158    },
159    BinaryOperation {
160        op: BinaryOperator,
161        inplace: bool,
162    },
163    LoadAttr {
164        name: String,
165    },
166    CompareOperation {
167        op: ComparisonOperator,
168    },
169    Pop,
170    Rotate {
171        amount: usize,
172    },
173    Duplicate,
174    GetIter,
175    Continue,
176    Break,
177    Jump {
178        target: Label,
179    },
180    /// Pop the top of the stack, and jump if this value is true.
181    JumpIfTrue {
182        target: Label,
183    },
184    /// Pop the top of the stack, and jump if this value is false.
185    JumpIfFalse {
186        target: Label,
187    },
188    /// Peek at the top of the stack, and jump if this value is true.
189    /// Otherwise, pop top of stack.
190    JumpIfTrueOrPop {
191        target: Label,
192    },
193    /// Peek at the top of the stack, and jump if this value is false.
194    /// Otherwise, pop top of stack.
195    JumpIfFalseOrPop {
196        target: Label,
197    },
198    MakeFunction,
199    CallFunction {
200        typ: CallType,
201    },
202    ForIter {
203        target: Label,
204    },
205    ReturnValue,
206    YieldValue,
207    YieldFrom,
208    SetupLoop {
209        start: Label,
210        end: Label,
211    },
212
213    /// Setup a finally handler, which will be called whenever one of this events occurs:
214    /// - the block is popped
215    /// - the function returns
216    /// - an exception is returned
217    SetupFinally {
218        handler: Label,
219    },
220
221    /// Enter a finally block, without returning, excepting, just because we are there.
222    EnterFinally,
223
224    /// Marker bytecode for the end of a finally sequence.
225    /// When this bytecode is executed, the eval loop does one of those things:
226    /// - Continue at a certain bytecode position
227    /// - Propagate the exception
228    /// - Return from a function
229    /// - Do nothing at all, just continue
230    EndFinally,
231
232    SetupExcept {
233        handler: Label,
234    },
235    SetupWith {
236        end: Label,
237    },
238    WithCleanupStart,
239    WithCleanupFinish,
240    PopBlock,
241    Raise {
242        argc: usize,
243    },
244    BuildString {
245        size: usize,
246    },
247    BuildTuple {
248        size: usize,
249        unpack: bool,
250    },
251    BuildList {
252        size: usize,
253        unpack: bool,
254    },
255    BuildSet {
256        size: usize,
257        unpack: bool,
258    },
259    BuildMap {
260        size: usize,
261        unpack: bool,
262        for_call: bool,
263    },
264    BuildSlice {
265        size: usize,
266    },
267    ListAppend {
268        i: usize,
269    },
270    SetAdd {
271        i: usize,
272    },
273    MapAdd {
274        i: usize,
275    },
276
277    PrintExpr,
278    LoadBuildClass,
279    UnpackSequence {
280        size: usize,
281    },
282    UnpackEx {
283        before: usize,
284        after: usize,
285    },
286    FormatValue {
287        conversion: Option<ConversionFlag>,
288    },
289    PopException,
290    Reverse {
291        amount: usize,
292    },
293    GetAwaitable,
294    BeforeAsyncWith,
295    SetupAsyncWith {
296        end: Label,
297    },
298    GetAIter,
299    GetANext,
300
301    /// Reverse order evaluation in MapAdd
302    /// required to support named expressions of Python 3.8 in dict comprehension
303    /// today (including Py3.9) only required in dict comprehension.
304    MapAddRev {
305        i: usize,
306    },
307}
308
309use self::Instruction::*;
310
311#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
312pub enum CallType {
313    Positional(usize),
314    Keyword(usize),
315    Ex(bool),
316}
317
318#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
319pub enum Constant {
320    Integer { value: BigInt },
321    Float { value: f64 },
322    Complex { value: Complex64 },
323    Boolean { value: bool },
324    String { value: String },
325    Bytes { value: Vec<u8> },
326    Code { code: Box<CodeObject> },
327    Tuple { elements: Vec<Constant> },
328    None,
329    Ellipsis,
330}
331
332#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
333pub enum ComparisonOperator {
334    Greater,
335    GreaterOrEqual,
336    Less,
337    LessOrEqual,
338    Equal,
339    NotEqual,
340    In,
341    NotIn,
342    Is,
343    IsNot,
344    ExceptionMatch,
345}
346
347#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
348pub enum BinaryOperator {
349    Power,
350    Multiply,
351    MatrixMultiply,
352    Divide,
353    FloorDivide,
354    Modulo,
355    Add,
356    Subtract,
357    Lshift,
358    Rshift,
359    And,
360    Xor,
361    Or,
362}
363
364#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
365pub enum UnaryOperator {
366    Not,
367    Invert,
368    Minus,
369    Plus,
370}
371
372/*
373Maintain a stack of blocks on the VM.
374pub enum BlockType {
375    Loop,
376    Except,
377}
378*/
379
380impl CodeObject {
381    #[allow(clippy::too_many_arguments)]
382    pub fn new(
383        flags: CodeFlags,
384        posonlyarg_count: usize,
385        arg_names: Vec<String>,
386        varargs_name: Option<String>,
387        kwonlyarg_names: Vec<String>,
388        varkeywords_name: Option<String>,
389        source_path: String,
390        first_line_number: usize,
391        obj_name: String,
392    ) -> CodeObject {
393        CodeObject {
394            instructions: Vec::new(),
395            label_map: HashMap::new(),
396            locations: Vec::new(),
397            flags,
398            posonlyarg_count,
399            arg_names,
400            varargs_name,
401            kwonlyarg_names,
402            varkeywords_name,
403            source_path,
404            first_line_number,
405            obj_name,
406        }
407    }
408
409    /// Load a code object from bytes
410    pub fn from_bytes(data: &[u8]) -> Result<Self, Box<dyn std::error::Error>> {
411        let data = lz4_compress::decompress(data)?;
412        bincode::deserialize::<Self>(&data).map_err(|e| e.into())
413    }
414
415    /// Serialize this bytecode to bytes.
416    pub fn to_bytes(&self) -> Vec<u8> {
417        let data = bincode::serialize(&self).expect("Code object must be serializable");
418        lz4_compress::compress(&data)
419    }
420
421    pub fn get_constants(&self) -> impl Iterator<Item = &Constant> {
422        self.instructions.iter().filter_map(|x| {
423            if let Instruction::LoadConst { value } = x {
424                Some(value)
425            } else {
426                None
427            }
428        })
429    }
430
431    pub fn varnames(&self) -> impl Iterator<Item = &str> + '_ {
432        self.arg_names
433            .iter()
434            .map(String::as_str)
435            .chain(self.kwonlyarg_names.iter().map(String::as_str))
436            .chain(
437                self.instructions
438                    .iter()
439                    .filter_map(|i| match i {
440                        Instruction::LoadName {
441                            name,
442                            scope: NameScope::Local,
443                        }
444                        | Instruction::StoreName {
445                            name,
446                            scope: NameScope::Local,
447                        } => Some(name.as_str()),
448                        _ => None,
449                    })
450                    .unique(),
451            )
452    }
453
454    fn display_inner(
455        &self,
456        f: &mut fmt::Formatter,
457        expand_codeobjects: bool,
458        level: usize,
459    ) -> fmt::Result {
460        let label_targets: HashSet<&usize> = self.label_map.values().collect();
461        for (offset, instruction) in self.instructions.iter().enumerate() {
462            let arrow = if label_targets.contains(&offset) {
463                ">>"
464            } else {
465                "  "
466            };
467            for _ in 0..level {
468                write!(f, "          ")?;
469            }
470            write!(f, "{} {:5} ", arrow, offset)?;
471            instruction.fmt_dis(f, &self.label_map, expand_codeobjects, level)?;
472        }
473        Ok(())
474    }
475
476    pub fn display_expand_codeobjects<'a>(&'a self) -> impl fmt::Display + 'a {
477        struct Display<'a>(&'a CodeObject);
478        impl fmt::Display for Display<'_> {
479            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
480                self.0.display_inner(f, true, 1)
481            }
482        }
483        Display(self)
484    }
485}
486
487impl fmt::Display for CodeObject {
488    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
489        self.display_inner(f, false, 1)
490    }
491}
492
493impl Instruction {
494    fn fmt_dis(
495        &self,
496        f: &mut fmt::Formatter,
497        label_map: &HashMap<Label, usize>,
498        expand_codeobjects: bool,
499        level: usize,
500    ) -> fmt::Result {
501        macro_rules! w {
502            ($variant:ident) => {
503                write!(f, "{:20}\n", stringify!($variant))
504            };
505            ($variant:ident, $var:expr) => {
506                write!(f, "{:20} ({})\n", stringify!($variant), $var)
507            };
508            ($variant:ident, $var1:expr, $var2:expr) => {
509                write!(f, "{:20} ({}, {})\n", stringify!($variant), $var1, $var2)
510            };
511            ($variant:ident, $var1:expr, $var2:expr, $var3:expr) => {
512                write!(
513                    f,
514                    "{:20} ({}, {}, {})\n",
515                    stringify!($variant),
516                    $var1,
517                    $var2,
518                    $var3
519                )
520            };
521        }
522
523        match self {
524            Import {
525                name,
526                symbols,
527                level,
528            } => w!(
529                Import,
530                format!("{:?}", name),
531                format!("{:?}", symbols),
532                level
533            ),
534            ImportStar => w!(ImportStar),
535            ImportFrom { name } => w!(ImportFrom, name),
536            LoadName { name, scope } => w!(LoadName, name, format!("{:?}", scope)),
537            StoreName { name, scope } => w!(StoreName, name, format!("{:?}", scope)),
538            DeleteName { name } => w!(DeleteName, name),
539            Subscript => w!(Subscript),
540            StoreSubscript => w!(StoreSubscript),
541            DeleteSubscript => w!(DeleteSubscript),
542            StoreAttr { name } => w!(StoreAttr, name),
543            DeleteAttr { name } => w!(DeleteAttr, name),
544            LoadConst { value } => match value {
545                Constant::Code { code } if expand_codeobjects => {
546                    writeln!(f, "LoadConst ({:?}):", code)?;
547                    code.display_inner(f, true, level + 1)?;
548                    Ok(())
549                }
550                _ => w!(LoadConst, value),
551            },
552            UnaryOperation { op } => w!(UnaryOperation, format!("{:?}", op)),
553            BinaryOperation { op, inplace } => w!(BinaryOperation, format!("{:?}", op), inplace),
554            LoadAttr { name } => w!(LoadAttr, name),
555            CompareOperation { op } => w!(CompareOperation, format!("{:?}", op)),
556            Pop => w!(Pop),
557            Rotate { amount } => w!(Rotate, amount),
558            Duplicate => w!(Duplicate),
559            GetIter => w!(GetIter),
560            Continue => w!(Continue),
561            Break => w!(Break),
562            Jump { target } => w!(Jump, label_map[target]),
563            JumpIfTrue { target } => w!(JumpIfTrue, label_map[target]),
564            JumpIfFalse { target } => w!(JumpIfFalse, label_map[target]),
565            JumpIfTrueOrPop { target } => w!(JumpIfTrueOrPop, label_map[target]),
566            JumpIfFalseOrPop { target } => w!(JumpIfFalseOrPop, label_map[target]),
567            MakeFunction => w!(MakeFunction),
568            CallFunction { typ } => w!(CallFunction, format!("{:?}", typ)),
569            ForIter { target } => w!(ForIter, label_map[target]),
570            ReturnValue => w!(ReturnValue),
571            YieldValue => w!(YieldValue),
572            YieldFrom => w!(YieldFrom),
573            SetupLoop { start, end } => w!(SetupLoop, label_map[start], label_map[end]),
574            SetupExcept { handler } => w!(SetupExcept, label_map[handler]),
575            SetupFinally { handler } => w!(SetupFinally, label_map[handler]),
576            EnterFinally => w!(EnterFinally),
577            EndFinally => w!(EndFinally),
578            SetupWith { end } => w!(SetupWith, label_map[end]),
579            WithCleanupStart => w!(WithCleanupStart),
580            WithCleanupFinish => w!(WithCleanupFinish),
581            BeforeAsyncWith => w!(BeforeAsyncWith),
582            SetupAsyncWith { end } => w!(SetupAsyncWith, label_map[end]),
583            PopBlock => w!(PopBlock),
584            Raise { argc } => w!(Raise, argc),
585            BuildString { size } => w!(BuildString, size),
586            BuildTuple { size, unpack } => w!(BuildTuple, size, unpack),
587            BuildList { size, unpack } => w!(BuildList, size, unpack),
588            BuildSet { size, unpack } => w!(BuildSet, size, unpack),
589            BuildMap {
590                size,
591                unpack,
592                for_call,
593            } => w!(BuildMap, size, unpack, for_call),
594            BuildSlice { size } => w!(BuildSlice, size),
595            ListAppend { i } => w!(ListAppend, i),
596            SetAdd { i } => w!(SetAdd, i),
597            MapAddRev { i } => w!(MapAddRev, i),
598            PrintExpr => w!(PrintExpr),
599            LoadBuildClass => w!(LoadBuildClass),
600            UnpackSequence { size } => w!(UnpackSequence, size),
601            UnpackEx { before, after } => w!(UnpackEx, before, after),
602            FormatValue { .. } => w!(FormatValue), // TODO: write conversion
603            PopException => w!(PopException),
604            Reverse { amount } => w!(Reverse, amount),
605            GetAwaitable => w!(GetAwaitable),
606            GetAIter => w!(GetAIter),
607            GetANext => w!(GetANext),
608            MapAdd { i } => w!(MapAdd, i),
609        }
610    }
611}
612
613impl fmt::Display for Constant {
614    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
615        match self {
616            Constant::Integer { value } => write!(f, "{}", value),
617            Constant::Float { value } => write!(f, "{}", value),
618            Constant::Complex { value } => write!(f, "{}", value),
619            Constant::Boolean { value } => write!(f, "{}", value),
620            Constant::String { value } => write!(f, "{:?}", value),
621            Constant::Bytes { value } => write!(f, "{:?}", value),
622            Constant::Code { code } => write!(f, "{:?}", code),
623            Constant::Tuple { elements } => write!(
624                f,
625                "({})",
626                elements
627                    .iter()
628                    .map(|e| format!("{}", e))
629                    .collect::<Vec<_>>()
630                    .join(", ")
631            ),
632            Constant::None => write!(f, "None"),
633            Constant::Ellipsis => write!(f, "Ellipsis"),
634        }
635    }
636}
637
638impl fmt::Debug for CodeObject {
639    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
640        write!(
641            f,
642            "<code object {} at ??? file {:?}, line {}>",
643            self.obj_name, self.source_path, self.first_line_number
644        )
645    }
646}
647
648pub struct FrozenModule {
649    pub code: CodeObject,
650    pub package: bool,
651}