Skip to main content

mica/ll/bytecode/
chunk.rs

1//! Chunks of bytecode.
2
3use std::{fmt, mem::size_of, rc::Rc};
4
5use super::{EncodeInstruction, Opcode, Opr24};
6use crate::ll::error::Location;
7
8/// A chunk of bytecode.
9pub struct Chunk {
10    /// The name of the module where the chunk is located.
11    pub module_name: Rc<str>,
12    /// The actual bytecode.
13    bytes: Vec<u8>,
14    /// Locations. These are placed at multiples of four bytes (Opcode::INSTRUCTION_SIZE).
15    locations: Vec<Location>,
16    /// The location emitted for each quad-byte on calls to `push`.
17    pub codegen_location: Location,
18    /// How many stack slots to preallocate with `nil` values for variable lookups.
19    pub preallocate_stack_slots: u32,
20}
21
22impl Chunk {
23    /// Constructs an empty chunk.
24    pub fn new(module_name: Rc<str>) -> Self {
25        Self {
26            module_name,
27            bytes: Vec::new(),
28            locations: Vec::new(),
29            codegen_location: Location::UNINIT,
30            preallocate_stack_slots: 0,
31        }
32    }
33
34    /// Pushes an encodable piece of data into the chunk. Returns where it's located.
35    pub fn emit(&mut self, instruction: impl EncodeInstruction) -> usize {
36        let position = self.bytes.len();
37        self.bytes.extend_from_slice(&instruction.encode_instruction());
38        // Only push one location, since all encoded instructions are 4 bytes long.
39        self.locations.push(self.codegen_location);
40        position
41    }
42
43    /// Pushes a number into the chunk.
44    pub fn emit_number(&mut self, number: f64) {
45        let bytes = number.to_le_bytes();
46        self.bytes.extend_from_slice(&bytes);
47        // 8 bytes, so push twice.
48        self.locations.push(self.codegen_location);
49        self.locations.push(self.codegen_location);
50    }
51
52    /// Pushes a string into the chunk.
53    ///
54    /// The string is padded with zeroes such that opcodes are aligned to four bytes.
55    pub fn emit_string(&mut self, string: &str) {
56        let start = self.len();
57
58        // I don't know of any 128-bit targets so this cast should be OK. Also, it isn't physically
59        // possible to store a string as large as 2^64 bytes.
60        let len = string.len() as u64;
61        let len_bytes: [u8; 8] = len.to_le_bytes();
62        self.bytes.extend_from_slice(&len_bytes);
63        self.bytes.extend(string.as_bytes());
64        let padded_len = (string.len() + 3) & !3;
65        let padding = padded_len - string.len();
66        for _ in 0..padding {
67            self.bytes.push(0);
68        }
69
70        let end = self.len();
71        for _ in (0..end - start).step_by(4) {
72            self.locations.push(self.codegen_location);
73        }
74    }
75
76    /// Patches the instruction at the given position.
77    pub fn patch(&mut self, position: usize, instruction: impl EncodeInstruction) {
78        let bytes = instruction.encode_instruction();
79        self.bytes[position..position + Opcode::INSTRUCTION_SIZE].copy_from_slice(&bytes);
80    }
81
82    /// Reads an instruction.
83    ///
84    /// # Safety
85    /// Assumes that `pc` is within the chunk's bounds and that the opcode at `pc` is valid.
86    pub unsafe fn read_instruction(&self, pc: &mut usize) -> (Opcode, Opr24) {
87        let bytes = &self.bytes[*pc..*pc + Opcode::INSTRUCTION_SIZE];
88        let mut bytes = <[u8; Opcode::INSTRUCTION_SIZE]>::try_from(bytes).unwrap();
89        let opcode: Opcode = std::mem::transmute(bytes[0]);
90        bytes[0] = 0;
91        let operand = Opr24 { bytes: [bytes[1], bytes[2], bytes[3]] };
92        *pc += Opcode::INSTRUCTION_SIZE;
93        (opcode, operand)
94    }
95
96    /// Reads a number.
97    ///
98    /// # Safety
99    /// Assumes that `pc` is within the chunk's bounds, skipping any checks.
100    pub unsafe fn read_number(&self, pc: &mut usize) -> f64 {
101        const SIZE: usize = std::mem::size_of::<f64>();
102        let bytes = &self.bytes[*pc..*pc + SIZE];
103        let bytes: [u8; SIZE] = bytes.try_into().unwrap();
104        let number = f64::from_le_bytes(bytes);
105        *pc += SIZE;
106        number
107    }
108
109    /// Reads a string.
110    ///
111    /// # Safety
112    /// This assumes the original string was encoded as proper UTF-8 (which it should
113    /// have been considering the only way to write a string is to use a `&str` in the first place).
114    pub unsafe fn read_string(&self, pc: &mut usize) -> &str {
115        let len_bytes: [u8; 8] = self.bytes[*pc..*pc + size_of::<u64>()].try_into().unwrap();
116        let len = u64::from_le_bytes(len_bytes);
117        *pc += size_of::<u64>();
118        let string = &self.bytes[*pc..*pc + len as usize];
119        let padded_len = (len + 3) & !3;
120        *pc += padded_len as usize;
121        std::str::from_utf8_unchecked(string)
122    }
123
124    /// Returns the length of the chunk (in bytes).
125    pub fn len(&self) -> usize {
126        self.bytes.len()
127    }
128
129    /// Returns whether the chunk is empty.
130    pub fn is_empty(&self) -> bool {
131        self.len() == 0
132    }
133
134    /// Returns the location (in file) of the program counter.
135    pub fn location(&self, pc: usize) -> Location {
136        let index = pc >> 2;
137        self.locations.get(index).copied().unwrap_or(Location::UNINIT)
138    }
139
140    /// Returns whether the given program counter is at the end of the chunk.
141    pub fn at_end(&self, pc: usize) -> bool {
142        pc >= self.bytes.len()
143    }
144}
145
146impl fmt::Debug for Chunk {
147    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
148        f.debug_struct("Chunk")
149            .field("module_name", &self.module_name)
150            .field("preallocate_stack_slots", &self.preallocate_stack_slots)
151            .finish()?;
152        writeln!(f)?;
153
154        let mut pc = 0;
155        while !self.at_end(pc) {
156            let location = self.location(pc);
157            let show_pc = pc;
158            let (opcode, operand) = unsafe { self.read_instruction(&mut pc) };
159            write!(f, "{show_pc:06x} {location} {opcode:?}({operand:?}) ")?;
160
161            #[allow(clippy::single_match)]
162            match opcode {
163                Opcode::PushNumber => write!(f, "{}", unsafe { self.read_number(&mut pc) })?,
164                Opcode::PushString | Opcode::CreateType => {
165                    write!(f, "{:?}", unsafe { self.read_string(&mut pc) })?
166                }
167                Opcode::JumpForward | Opcode::JumpForwardIfFalsy | Opcode::JumpForwardIfTruthy => {
168                    write!(
169                        f,
170                        "-> {:06x}",
171                        pc + u32::from(operand) as usize + Opcode::INSTRUCTION_SIZE
172                    )?;
173                }
174                Opcode::JumpBackward => {
175                    write!(
176                        f,
177                        "-> {:06x}",
178                        pc - u32::from(operand) as usize + Opcode::INSTRUCTION_SIZE
179                    )?;
180                }
181                Opcode::CallMethod => {
182                    let (method_index, argument_count) = operand.unpack();
183                    let operand = u32::from(operand);
184                    write!(f, "{operand:06x}:[mi={method_index}, ac={argument_count}]")?;
185                }
186                _ => (),
187            }
188
189            writeln!(f)?;
190        }
191        Ok(())
192    }
193}