compiler/
compiler.rs

1use object::builtins::BuiltIns;
2use std::rc::Rc;
3
4use object::Object;
5use parser::ast::{BlockStatement, Expression, Literal, Node, Statement};
6use parser::lexer::token::TokenKind;
7
8use crate::op_code::Opcode::*;
9use crate::op_code::{cast_u8_to_opcode, make_instructions, Instructions, Opcode};
10use crate::symbol_table::{Symbol, SymbolScope, SymbolTable};
11
12struct CompilationScope {
13    instructions: Instructions,
14    last_instruction: EmittedInstruction,
15    previous_instruction: EmittedInstruction,
16}
17
18pub struct Compiler {
19    pub constants: Vec<Rc<Object>>,
20    pub symbol_table: SymbolTable,
21    scopes: Vec<CompilationScope>,
22    scope_index: usize,
23}
24
25pub struct Bytecode {
26    pub instructions: Instructions,
27    pub constants: Vec<Rc<Object>>,
28}
29
30#[derive(Clone)]
31pub struct EmittedInstruction {
32    pub opcode: Opcode,
33    pub position: usize,
34}
35
36type CompileError = String;
37
38impl Compiler {
39    pub fn new() -> Compiler {
40        let main_scope = CompilationScope {
41            instructions: Instructions { data: vec![] },
42            last_instruction: EmittedInstruction { opcode: OpNull, position: 0 },
43            previous_instruction: EmittedInstruction { opcode: OpNull, position: 0 },
44        };
45
46        let mut symbol_table = SymbolTable::new();
47        for (key, value) in BuiltIns.iter().enumerate() {
48            symbol_table.define_builtin(key, value.0.to_string());
49        }
50
51        return Compiler {
52            constants: vec![],
53            symbol_table,
54            scopes: vec![main_scope],
55            scope_index: 0,
56        };
57    }
58
59    pub fn new_with_state(symbol_table: SymbolTable, constants: Vec<Rc<Object>>) -> Compiler {
60        let mut compiler = Compiler::new();
61        compiler.constants = constants;
62        compiler.symbol_table = symbol_table;
63        return compiler;
64    }
65
66    pub fn compile(&mut self, node: &Node) -> Result<Bytecode, CompileError> {
67        match node {
68            Node::Program(p) => {
69                for stmt in &p.body {
70                    self.compile_stmt(stmt)?;
71                }
72            }
73            Node::Statement(s) => {
74                self.compile_stmt(s)?;
75            }
76            Node::Expression(e) => {
77                self.compile_expr(e)?;
78            }
79        }
80
81        return Ok(self.bytecode());
82    }
83
84    fn compile_stmt(&mut self, s: &Statement) -> Result<(), CompileError> {
85        match s {
86            Statement::Let(let_statement) => {
87                let symbol = self
88                    .symbol_table
89                    .define(let_statement.identifier.kind.to_string());
90                self.compile_expr(&let_statement.expr)?;
91                if symbol.scope == SymbolScope::Global {
92                    self.emit(Opcode::OpSetGlobal, &vec![symbol.index]);
93                } else {
94                    self.emit(Opcode::OpSetLocal, &vec![symbol.index]);
95                }
96                return Ok(());
97            }
98            Statement::Return(r) => {
99                self.compile_expr(&r.argument)?;
100                self.emit(Opcode::OpReturnValue, &vec![]);
101                return Ok(());
102            }
103            Statement::Expr(e) => {
104                self.compile_expr(e)?;
105                self.emit(OpPop, &vec![]);
106                return Ok(());
107            }
108        }
109    }
110
111    fn compile_expr(&mut self, e: &Expression) -> Result<(), CompileError> {
112        match e {
113            Expression::IDENTIFIER(identifier) => {
114                let symbol = self.symbol_table.resolve(identifier.name.clone());
115                match symbol {
116                    Some(symbol) => {
117                        self.load_symbol(&symbol);
118                    }
119                    None => {
120                        return Err(format!("Undefined variable '{}'", identifier.name));
121                    }
122                }
123            }
124            Expression::LITERAL(l) => match l {
125                Literal::Integer(i) => {
126                    let int = Object::Integer(i.raw);
127                    let operands = vec![self.add_constant(int)];
128                    self.emit(OpConst, &operands);
129                }
130                Literal::Boolean(i) => {
131                    if i.raw {
132                        self.emit(OpTrue, &vec![]);
133                    } else {
134                        self.emit(OpFalse, &vec![]);
135                    }
136                }
137                Literal::String(s) => {
138                    let string_object = Object::String(s.raw.clone());
139                    let operands = vec![self.add_constant(string_object)];
140                    self.emit(OpConst, &operands);
141                }
142                Literal::Array(array) => {
143                    for element in array.elements.iter() {
144                        self.compile_expr(element)?;
145                    }
146                    self.emit(OpArray, &vec![array.elements.len()]);
147                }
148                Literal::Hash(hash) => {
149                    for (key, value) in hash.elements.iter() {
150                        self.compile_expr(&key)?;
151                        self.compile_expr(&value)?;
152                    }
153                    self.emit(OpHash, &vec![hash.elements.len() * 2]);
154                }
155            },
156            Expression::PREFIX(prefix) => {
157                self.compile_expr(&prefix.operand).unwrap();
158                match prefix.op.kind {
159                    TokenKind::MINUS => {
160                        self.emit(OpMinus, &vec![]);
161                    }
162                    TokenKind::BANG => {
163                        self.emit(OpBang, &vec![]);
164                    }
165                    _ => {
166                        return Err(format!("unexpected prefix op: {}", prefix.op));
167                    }
168                }
169            }
170            Expression::INFIX(infix) => {
171                if infix.op.kind == TokenKind::LT {
172                    self.compile_expr(&infix.right).unwrap();
173                    self.compile_expr(&infix.left).unwrap();
174                    self.emit(Opcode::OpGreaterThan, &vec![]);
175                    return Ok(());
176                }
177                self.compile_expr(&infix.left).unwrap();
178                self.compile_expr(&infix.right).unwrap();
179                match infix.op.kind {
180                    TokenKind::PLUS => {
181                        self.emit(OpAdd, &vec![]);
182                    }
183                    TokenKind::MINUS => {
184                        self.emit(OpSub, &vec![]);
185                    }
186                    TokenKind::ASTERISK => {
187                        self.emit(OpMul, &vec![]);
188                    }
189                    TokenKind::SLASH => {
190                        self.emit(OpDiv, &vec![]);
191                    }
192                    TokenKind::GT => {
193                        self.emit(Opcode::OpGreaterThan, &vec![]);
194                    }
195                    TokenKind::EQ => {
196                        self.emit(Opcode::OpEqual, &vec![]);
197                    }
198                    TokenKind::NotEq => {
199                        self.emit(Opcode::OpNotEqual, &vec![]);
200                    }
201                    _ => {
202                        return Err(format!("unexpected infix op: {}", infix.op));
203                    }
204                }
205            }
206            Expression::IF(if_node) => {
207                self.compile_expr(&if_node.condition)?;
208                let jump_not_truthy = self.emit(OpJumpNotTruthy, &vec![9527]);
209                self.compile_block_statement(&if_node.consequent)?;
210                if self.last_instruction_is(OpPop) {
211                    self.remove_last_pop();
212                }
213
214                let jump_pos = self.emit(OpJump, &vec![9527]);
215
216                let after_consequence_location = self.current_instruction().data.len();
217                self.change_operand(jump_not_truthy, after_consequence_location);
218
219                if if_node.alternate.is_none() {
220                    self.emit(OpNull, &vec![]);
221                } else {
222                    self.compile_block_statement(&if_node.clone().alternate.unwrap())?;
223                    if self.last_instruction_is(OpPop) {
224                        self.remove_last_pop();
225                    }
226                }
227                let after_alternative_location = self.current_instruction().data.len();
228                self.change_operand(jump_pos, after_alternative_location);
229            }
230            Expression::Index(index) => {
231                self.compile_expr(&index.object)?;
232                self.compile_expr(&index.index)?;
233                self.emit(OpIndex, &vec![]);
234            }
235            Expression::FUNCTION(f) => {
236                self.enter_scope();
237                // f.name
238                for param in f.params.iter() {
239                    self.symbol_table.define(param.name.clone());
240                }
241                self.compile_block_statement(&f.body)?;
242                if self.last_instruction_is(OpPop) {
243                    self.replace_last_pop_with_return();
244                }
245                if !(self.last_instruction_is(OpReturnValue)) {
246                    self.emit(OpReturn, &vec![]);
247                }
248                let num_locals = self.symbol_table.num_definitions;
249                let free_symbols = self.symbol_table.free_symbols.clone();
250                let instructions = self.leave_scope();
251                for x in free_symbols.clone() {
252                    self.load_symbol(&x);
253                }
254
255                let compiled_function = Rc::from(object::CompiledFunction {
256                    instructions: instructions.data,
257                    num_locals,
258                    num_parameters: f.params.len(),
259                });
260
261                let operands = vec![self.add_constant(Object::CompiledFunction(compiled_function)), free_symbols.len()];
262                self.emit(OpClosure, &operands);
263            }
264            Expression::FunctionCall(fc) => {
265                self.compile_expr(&fc.callee)?;
266                for arg in fc.arguments.iter() {
267                    self.compile_expr(arg)?;
268                }
269                self.emit(OpCall, &vec![fc.arguments.len()]);
270            }
271        }
272
273        return Ok(());
274    }
275
276    fn load_symbol(&mut self, symbol: &Rc<Symbol>) {
277        match symbol.scope {
278            SymbolScope::Global => {
279                self.emit(OpGetGlobal, &vec![symbol.index]);
280            }
281            SymbolScope::LOCAL => {
282                self.emit(OpGetLocal, &vec![symbol.index]);
283            }
284            SymbolScope::Builtin => {
285                self.emit(OpGetBuiltin, &vec![symbol.index]);
286            }
287            SymbolScope::Free => {
288                self.emit(OpGetFree, &vec![symbol.index]);
289            }
290            SymbolScope::Function => {
291                self.emit(OpCurrentClosure, &vec![]);
292            }
293        }
294    }
295
296    pub fn bytecode(&self) -> Bytecode {
297        return Bytecode {
298            instructions: self.current_instruction().clone(),
299            constants: self.constants.clone(),
300        };
301    }
302
303    pub fn add_constant(&mut self, obj: Object) -> usize {
304        self.constants.push(Rc::new(obj));
305        return self.constants.len() - 1;
306    }
307
308    pub fn emit(&mut self, op: Opcode, operands: &Vec<usize>) -> usize {
309        let ins = make_instructions(op, operands);
310        let pos = self.add_instructions(&ins);
311        self.set_last_instruction(op, pos);
312
313        return pos;
314    }
315
316    fn compile_block_statement(
317        &mut self,
318        block_statement: &BlockStatement,
319    ) -> Result<(), CompileError> {
320        for stmt in &block_statement.body {
321            self.compile_stmt(stmt)?;
322        }
323        Ok(())
324    }
325
326    pub fn add_instructions(&mut self, ins: &Instructions) -> usize {
327        let pos = self.current_instruction().data.len();
328        let updated_ins = self.scopes[self.scope_index]
329            .instructions
330            .merge_instructions(ins);
331        self.scopes[self.scope_index].instructions = updated_ins;
332        return pos;
333    }
334
335    fn set_last_instruction(&mut self, op: Opcode, pos: usize) {
336        let previous_instruction = self.scopes[self.scope_index].last_instruction.clone();
337        let last_instruction = EmittedInstruction { opcode: op, position: pos };
338        self.scopes[self.scope_index].last_instruction = last_instruction;
339        self.scopes[self.scope_index].previous_instruction = previous_instruction;
340    }
341
342    fn last_instruction_is(&self, op: Opcode) -> bool {
343        if self.current_instruction().data.len() == 0 {
344            return false;
345        }
346        return self.scopes[self.scope_index].last_instruction.opcode == op;
347    }
348
349    fn remove_last_pop(&mut self) {
350        let last = self.scopes[self.scope_index].last_instruction.clone();
351        let previous = self.scopes[self.scope_index].previous_instruction.clone();
352
353        let old = self.current_instruction().data.clone();
354        let new = old[..last.position].to_vec();
355
356        self.scopes[self.scope_index].instructions.data = new;
357        self.scopes[self.scope_index].last_instruction = previous;
358    }
359
360    fn replace_instruction(&mut self, pos: usize, new_instruction: &Instructions) {
361        let ins = &mut self.scopes[self.scope_index].instructions;
362        for i in 0..new_instruction.data.len() {
363            ins.data[pos + i] = new_instruction.data[i];
364        }
365    }
366
367    fn replace_last_pop_with_return(&mut self) {
368        let last_pos = self.scopes[self.scope_index].last_instruction.position;
369        self.replace_instruction(last_pos, &make_instructions(OpReturnValue, &vec![]));
370        self.scopes[self.scope_index].last_instruction.opcode = OpReturnValue;
371    }
372
373    fn change_operand(&mut self, pos: usize, operand: usize) {
374        let op = cast_u8_to_opcode(self.current_instruction().data[pos]);
375        let ins = make_instructions(op, &vec![operand]);
376        self.replace_instruction(pos, &ins);
377    }
378
379    fn current_instruction(&self) -> &Instructions {
380        return &self.scopes[self.scope_index].instructions;
381    }
382
383    fn enter_scope(&mut self) {
384        let scope = CompilationScope {
385            instructions: Instructions { data: vec![] },
386            last_instruction: EmittedInstruction { opcode: OpNull, position: 0 },
387            previous_instruction: EmittedInstruction { opcode: OpNull, position: 0 },
388        };
389        self.scopes.push(scope);
390        self.scope_index += 1;
391        self.symbol_table = SymbolTable::new_enclosed_symbol_table(self.symbol_table.clone());
392    }
393
394    fn leave_scope(&mut self) -> Instructions {
395        let instructions = self.current_instruction().clone();
396        self.scopes.pop();
397        self.scope_index -= 1;
398        let s = self.symbol_table.outer.as_ref().unwrap().as_ref().clone();
399        self.symbol_table = s;
400        return instructions;
401    }
402}