1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
use crate::compiler::ast::AST; use crate::common::opcode::Opcode; use crate::common::data::Data; use crate::common::span::Spanned; use crate::common::number::split_number; use crate::vm::local::Local; // TODO: annotations in bytecode // TODO: separate AST compiler from Chunk itself /// The bytecode generator (emitter) walks the AST and produces (unoptimized) Bytecode /// There are plans to add a bytecode optimizer in the future. pub fn gen(ast: Spanned<AST>) -> Chunk { let mut generator = Chunk::empty(); generator.walk(&ast); generator } /// Represents a single interpretable chunk of bytecode, /// Think a function. #[derive(Debug, Clone, Eq, PartialEq)] pub struct Chunk { pub code: Vec<u8>, // each byte is an opcode or a number-stream pub offsets: Vec<usize>, // each usize indexes the bytecode op that begins each line pub constants: Vec<Data>, // number-stream indexed, used to load constants pub locals: Vec<Local>, // '' variables } impl Chunk { /// Creates a new empty chunk to be filled. pub fn empty() -> Chunk { Chunk { code: vec![], offsets: vec![], constants: vec![], locals: vec![], } } // TODO: bytecode chunk dissambler /// Walks an AST to generate bytecode. /// At this stage, the AST should've been verified, pruned, typechecked, etc. /// A malformed AST will cause a panic, as ASTs should be correct at this stage, /// and for them to be incorrect is an error in the compiler itself. fn walk(&mut self, ast: &Spanned<AST>) { // push left, push right, push center match ast.item.clone() { AST::Data(data) => self.data(data), AST::Symbol(symbol) => self.symbol(symbol), AST::Block(block) => self.block(block), AST::Assign { pattern, expression } => self.assign(*pattern, *expression), AST::Lambda { pattern, expression } => self.lambda(*pattern, *expression), AST::Call { fun, arg } => self.call(*fun, *arg), } } /// Given some data, this function adds it to the constants table, /// and returns the data's index. /// The constants table is push only, so constants are identified by their index. /// The resulting usize can be split up into a number byte stream, /// and be inserted into the bytecode. pub fn index_data(&mut self, data: Data) -> usize { match self.constants.iter().position(|d| d == &data) { Some(d) => d, None => { self.constants.push(data); self.constants.len() - 1 }, } } /// Takes a `Data` leaf and and produces some code to load the constant fn data(&mut self, data: Data) { self.code.push(Opcode::Con as u8); let mut split = split_number(self.index_data(data)); self.code.append(&mut split); } /// Similar to index constant, but indexes variables instead. fn index_symbol(&mut self, symbol: Local) -> usize { match self.locals.iter().position(|l| l == &symbol) { Some(l) => l, None => { self.locals.push(symbol); self.locals.len() - 1 }, } } /// Takes a symbol leaf, and produces some code to load the local. fn symbol(&mut self, symbol: Local) { self.code.push(Opcode::Load as u8); let index = self.index_symbol(symbol); self.code.append(&mut split_number(index)); } /// A block is a series of expressions where the last is returned. /// Each sup-expression is walked, the last value is left on the stack. /// (In the future, block should create a new scope.) fn block(&mut self, children: Vec<Spanned<AST>>) { for child in children { self.walk(&child); // TODO: Should `Opcode::Clear` not be a thing? self.code.push(Opcode::Clear as u8); } // remove the last clear instruction self.code.pop(); } /// Binds a variable to a value in the current scope. /// Note that values are immutable, but variables aren't. /// Passerine uses a special form of reference counting, /// Where each object can only have one reference. /// This allows for lifetime optimizations later on. /// /// When a variable is reassigned, the value it holds is dropped. /// When a variable is loaded, the value it points to is copied, /// Unless it's the last occurance of that variable in it's lifetime. /// This makes passerine strictly pass by value. /// Though mutable objects can be simulated with macros. /// For example: /// ```plain /// --- Increments a variable by 1, returns new value. /// increment = var ~> { var = var + 1; var } /// /// counter = 7 /// counter.increment () /// -- desugars to /// increment counter /// -- desugars to /// { counter = counter + 1; counter } /// ``` /// To demonstrate what I mean, let's annotate the vars. /// ```plain /// increment = var<`a> ~> { /// var<`b> = var<`a> + 1 /// var<`b> /// } /// ``` /// `<\`_>` means that the value held by var is the same. /// Because /// ```plain /// var<`b> = var<`a> + 1 /// ^^^^ /// ``` /// is the last use of the value of var<`a>, instead of copying the value, /// the value var points to is used, and var<`a`> is removed from the scope. /// /// There are still many optimizations that can be made, /// but needless to say, Passerine uses dynamically inferred lifetimes /// in lieu of garbage collecting. /// One issue with this strategy is having multiple copies of the same data, /// So for larger datastructures, some sort of persistent ARC implementation might be used. fn assign(&mut self, symbol: Spanned<AST>, expression: Spanned<AST>) { // eval the expression self.walk(&expression); // load the following symbol ... self.code.push(Opcode::Save as u8); // ... the symbol to be loaded let index = match symbol.item { AST::Symbol(l) => self.index_symbol(l), _ => unreachable!(), }; self.code.append(&mut split_number(index)); // TODO: load Unit } /// Walks a function, creates a chunk, then pushes the resulting chunk onto the stack. /// All functions take and return one value. /// This allows for parital application, /// but is slow if you just want to run a function, /// because a function of three arguments is essentially three function calls. /// In the future, repeated calls should be optimized out. /// TODO: closures /// The issue with closures is that they allow the data to escape /// which makes vaporization less useful as a result. /// There are a few potential solutions: /// - The easiest solution is to disallow closures. This is lame. /// - The second easiest solution is to simply copy the data /// when creating a closure. /// While easy to implement, captured values would not represent /// the same object: /// ```plain /// counter = start -> { /// value = start /// increment = () -> { value = value + 1 } /// decrement = () -> { value = value - 1 } /// (increment, decrement) /// } /// ``` /// If a counter was constructed using the above code, /// increment and decrement would refer to different values. /// - As closures are a poor man's object, /// an alternative would be adding support for pseudo-objects via macros. /// this wouldn't clash with naïeve closure implementations; /// here's what I'm getting at: /// ```plain /// counter = start -> { /// Counter start -- wrap value in Label, creating a type /// } /// /// increment = value: Counter _ ~> { value = value + 1 } /// decrement = value: Counter _ ~> { value = value - 1 } /// tally = Counter value -> value /// /// my_counter = counter 5 /// increment counter; increment counter /// decrement counter /// print (tally counter) /// ``` /// I like this solution, but I think it should be its own thing /// rather than boot actual closures from the language. /// Passerine's an impure functional language, /// so no closures would be a little silly. /// - Another solution would be to store the values on the heap, arc'd. /// Nah. /// - Ok, I think I've got it. /// At compile time, each function contains a list of variables /// of the environment it's created in, ad infinitum. /// When a function is defined within a function (read closure), /// during definition, it marks all variables used by that function. /// At the end of the original function, all unmarked (read uncaptured) /// variables are removed from the environment, /// And all functions return containing an ARC to the base function's environment /// - I noticed an issue. Take this example: /// ```plain /// escape = huge tiny -> { /// forget = () -> huge, /// remember = () -> tiny /// remember /// } /// ``` /// In this case, `huge` is captured by the `forget closure` /// But only remember is returned. /// However, everything is stored in the same struct /// So huge isn't deallocated until remember is. /// - Here's my Final Solution. We introduce a new type of data, /// `ReferenceCoutned`. /// When data is captured, it's converted to reference-counted data /// if it hasn't been already, and the reference count is increased. /// The only downside is that this is a bit slower, /// but it's a small price to pay for salvation. fn lambda(&mut self, symbol: Spanned<AST>, expression: Spanned<AST>) { let mut fun = Chunk::empty(); // inside the function // save the argument into the given variable fun.code.push(Opcode::Save as u8); let index = fun.index_symbol(match symbol.item { AST::Symbol(l) => l, _ => unreachable!(), }); fun.code.append(&mut split_number(index)); fun.code.push(Opcode::Clear as u8); // clear the stack fun.walk(&expression); // run the function fun.code.push(Opcode::Return as u8); // return the result // push the lambda object onto the callee's stack. self.data(Data::Lambda(fun)); } /// When a function is called, the top two items are taken off the stack, /// The topmost item is expected to be a function. fn call(&mut self, fun: Spanned<AST>, arg: Spanned<AST>) { self.walk(&arg); self.walk(&fun); self.code.push(Opcode::Call as u8); } } #[cfg(test)] mod test { use super::*; use crate::compiler::lex::lex; use crate::compiler::parse::parse; use crate::common::source::Source; #[test] fn constants() { // TODO: flesh out as more datatypes are added let source = Source::source("heck = true; lol = 0.0; lmao = false; eyy = \"GOod MoRNiNg, SiR\""); let ast = parse( lex(source).unwrap() ).unwrap(); let chunk = gen(ast); let result = vec![ Data::Boolean(true), Data::Real(0.0), Data::Boolean(false), Data::String("GOod MoRNiNg, SiR".to_string()), ]; assert_eq!(chunk.constants, result); } #[test] fn bytecode() { let source = Source::source("heck = true; lol = heck; lmao = false"); let ast = parse(lex(source).unwrap()).unwrap(); let chunk = gen(ast); let result = vec![ // con true, save to heck, clear (Opcode::Con as u8), 128, (Opcode::Save as u8), 128, (Opcode::Clear as u8), // load heck, save to lol, clear (Opcode::Load as u8), 128, (Opcode::Save as u8), 129, (Opcode::Clear as u8), // con false, save to lmao (Opcode::Con as u8), 129, (Opcode::Save as u8), 130, ]; assert_eq!(result, chunk.code); } }