1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
use std::{
    mem,
    rc::Rc,
};

use crate::common::{
    number::split_number,
    span::{Span, Spanned},
    lambda::{Captured, Lambda},
    opcode::Opcode,
    data::Data,
};

// TODO: do a pass where we hoist and resolve variables?
// may work well for types too.

use crate::compiler::{
    sst::{UniqueSymbol, Scope, SST, SSTPattern},
    // TODO: pattern for where?
    syntax::Syntax,
};

use crate::core::{
    ffi_core,
    ffi::FFI,
};

// TODO: namespaces for FFIs?

/// Simple function that generates unoptimized bytecode from an `SST`.
/// Exposes the functionality of the `Compiler`.
pub fn gen(sst: (Spanned<SST>, Scope)) -> Result<Rc<Lambda>, Syntax> {
    gen_with_ffi(sst, ffi_core())
}

/// Generates unoptimized bytecode from a `SST`,
/// Given a specific FFI. Note that this doesn't even assume the core ffi,
/// So it's required you generate a core ffi with `core::ffi_core()`,
/// Then merge it with your ffi with `FFI::combine(...)`.
pub fn gen_with_ffi(sst: (Spanned<SST>, Scope), ffi: FFI) -> Result<Rc<Lambda>, Syntax> {
    let mut compiler = Compiler::base(ffi, sst.1);
    compiler.walk(&sst.0)?;
    Ok(Rc::new(compiler.lambda))
}

/// Compiler is a bytecode generator that walks an SST and produces (unoptimized) Bytecode.
/// There are plans to add a bytecode optimizer in the future.
/// Note that this struct should not be controlled manually,
/// use the `gen` function instead.
pub struct Compiler {
    /// The previous compiler (when compiling nested scopes).
    enclosing: Option<Box<Compiler>>,
    /// The current bytecode emission target.
    lambda: Lambda,
    /// Names of symbols,
    // symbol_table: Vec<String>,
    /// The foreign functional interface used to bind values
    ffi: FFI,
    /// The FFI functions that have been bound in this scope.
    ffi_names: Vec<String>,
    // determined in hoisting
    scope: Scope,
}

impl Compiler {
    /// Construct a new `Compiler`.
    pub fn base(ffi: FFI, scope: Scope) -> Compiler {
        Compiler {
            enclosing: None,
            lambda:    Lambda::empty(),
            ffi,
            ffi_names: vec![],
            scope,
        }
    }

    /// Replace the current compiler with a fresh one,
    /// keeping a reference to the old one in `self.enclosing`,
    /// and moving the FFI into the current compiler.
    pub fn enter_scope(&mut self, scope: Scope) {
        let ffi        = mem::replace(&mut self.ffi, FFI::new());
        let nested     = Compiler::base(ffi, scope);
        let enclosing  = mem::replace(self, nested);
        self.enclosing = Some(Box::new(enclosing));
    }

    /// Restore the enclosing compiler,
    /// returning the nested one for data (Lambda) extraction,
    /// and moving the FFI mappings back into the enclosing compiler.
    pub fn exit_scope(&mut self) -> Compiler {
        let ffi       = mem::replace(&mut self.ffi, FFI::new());
        let enclosing = mem::replace(&mut self.enclosing, None);
        let nested = match enclosing {
            Some(compiler) => mem::replace(self, *compiler),
            None => unreachable!("Can not go back past root copiler"),
        };
        self.ffi = ffi;
        nested
    }

    /// Walks an SST to generate bytecode.
    /// At this stage, the SST should've been verified, pruned, typechecked, etc.
    /// A malformed SST will cause a panic, as SSTs should be correct at this stage,
    /// and for them to be incorrect is an error in the compiler itself.
    pub fn walk(&mut self, sst: &Spanned<SST>) -> Result<(), Syntax> {
        // TODO: move this to a better spot
        self.lambda.decls = self.scope.locals.len();

        // the entire span of the current node
        self.lambda.emit_span(&sst.span);

        // push left, push right, push center
        match sst.item.clone() {
            SST::Data(data) => {
                self.data(data);
                Ok(())
            },
            SST::Symbol(unique) => {
                self.symbol(unique);
                Ok(())
            },
            SST::Block(block) => self.block(block),
            SST::Label(name, expression) => self.label(name, *expression),
            SST::Tuple(tuple) => self.tuple(tuple),
            SST::FFI    { name,    expression } => self.ffi(name, *expression, sst.span.clone()),
            SST::Assign { pattern, expression } => self.assign(*pattern, *expression),
            SST::Lambda { pattern, expression, scope } => self.lambda(*pattern, *expression, scope),
            SST::Call   { fun,     arg        } => self.call(*fun, *arg),
        }
    }

    // TODO: closures are just lambdas + records
    // refactor as such?

    /// Resovles a symbol lookup, e.g. something like `x`.
    pub fn symbol(&mut self, unique_symbol: UniqueSymbol) {
        let index = if let Some(i) = self.scope.local_index(unique_symbol) {
            self.lambda.emit(Opcode::Load); i
        } else if let Some(i) = self.scope.nonlocal_index(unique_symbol) {
            self.lambda.emit(Opcode::LoadCap); i
        } else {
            // unreachable?
            todo!();
        };

        self.lambda.emit_bytes(&mut split_number(index));
    }

    /// Takes a `Data` leaf and and produces some code to load the constant
    pub fn data(&mut self, data: Data) {
        self.lambda.emit(Opcode::Con);
        let mut split = split_number(self.lambda.index_data(data));
        self.lambda.emit_bytes(&mut split);
    }

    /// A block is a series of expressions where the last is returned.
    /// Each sup-expression is walked, the last value is left on the stack.
    pub fn block(&mut self, children: Vec<Spanned<SST>>) -> Result<(), Syntax> {
        if children.is_empty() {
            self.data(Data::Unit);
            return Ok(());
        }

        for child in children {
            self.walk(&child)?;
            self.lambda.emit(Opcode::Del);
        }

        // remove the last delete instruction
        self.lambda.demit();
        Ok(())
    }

    /// Generates a print expression
    /// Note that currently printing is a baked-in language feature,
    /// but the second the FFI becomes a thing
    /// it'll no longer be one.
    pub fn print(&mut self, expression: Spanned<SST>) -> Result<(), Syntax> {
        self.walk(&expression)?;
        self.lambda.emit(Opcode::Print);
        Ok(())
    }

    /// Generates a Label construction
    /// that loads the variant, then wraps some data
    pub fn label(&mut self, name: String, expression: Spanned<SST>) -> Result<(), Syntax> {
        self.walk(&expression)?;
        self.data(Data::Kind(name));
        self.lambda.emit(Opcode::Label);
        Ok(())
    }

    /// Generates a Tuple construction
    /// that loads all fields in the tuple
    /// then rips them off the stack into a vec.
    pub fn tuple(&mut self, tuple: Vec<Spanned<SST>>) -> Result<(), Syntax> {
        let length = tuple.len();

        for item in tuple.into_iter() {
            self.walk(&item)?;
        }

        self.lambda.emit(Opcode::Tuple);
        self.lambda.emit_bytes(&mut split_number(length));
        Ok(())
    }

    // TODO: make a macro to map Passerine's data model to Rust's
    /// Makes a Rust function callable from Passerine,
    /// by keeping a reference to that function.
    pub fn ffi(&mut self, name: String, expression: Spanned<SST>, span: Span) -> Result<(), Syntax> {
        self.walk(&expression)?;

        let function = self.ffi.get(&name)
            .map_err(|s| Syntax::error(&s, &span))?;

        let index = match self.ffi_names.iter().position(|n| n == &name) {
            Some(p) => p,
            None => {
                // TODO: keeping track of state
                // in two different places is a code smell imo
                // Reason: don't want to include strings in lambda
                // optimal solutions:
                // have an earlier step that normalizes AST,
                // determines scope of all names/symbols,
                // and replaces all names/symbols with indicies
                // before codgen.
                self.ffi_names.push(name);
                self.lambda.add_ffi(function)
            },
        };

        self.lambda.emit_span(&span);
        self.lambda.emit(Opcode::FFICall);
        self.lambda.emit_bytes(&mut split_number(index));
        Ok(())
    }

    /// Resolves the assignment of a variable
    /// returns true if the variable was declared.
    pub fn resolve_assign(&mut self, unique_symbol: UniqueSymbol) {
        let index = if let Some(i) = self.scope.local_index(unique_symbol) {
            self.lambda.emit(Opcode::Save); i
        } else if let Some(i) = self.scope.nonlocal_index(unique_symbol) {
            self.lambda.emit(Opcode::SaveCap); i
        } else {
            // unreachable?
            todo!()
        };

        self.lambda.emit_bytes(&mut split_number(index));
    }

    /// Destructures a pattern into
    /// a series of unpack and assign instructions.
    /// Instructions match against the topmost stack item.
    /// Does delete the data that is matched against.
    pub fn destructure(&mut self, pattern: Spanned<SSTPattern>, redeclare: bool) {
        self.lambda.emit_span(&pattern.span);

        match pattern.item {
            SSTPattern::Symbol(unique_symbol) => {
                self.resolve_assign(unique_symbol);
            },
            SSTPattern::Data(expected) => {
                self.data(expected);
                self.lambda.emit(Opcode::UnData);
            }
            SSTPattern::Label(name, pattern) => {
                self.data(Data::Kind(name));
                self.lambda.emit(Opcode::UnLabel);
                self.destructure(*pattern, redeclare);
            }
            SSTPattern::Tuple(tuple) => {
                for (index, sub_pattern) in tuple.into_iter().enumerate() {
                    self.lambda.emit(Opcode::UnTuple);
                    self.lambda.emit_bytes(&mut split_number(index));
                    self.destructure(sub_pattern, redeclare);
                }
                // Delete the tuple moved to the top of the stack.
                self.lambda.emit(Opcode::Del);
            },
        }
    }

    /// Assign a value to a variable.
    pub fn assign(
        &mut self,
        pattern: Spanned<SSTPattern>,
        expression: Spanned<SST>
    ) -> Result<(), Syntax> {
        // eval the expression
        self.walk(&expression)?;
        self.destructure(pattern, false);
        self.data(Data::Unit);
        Ok(())
    }

    /// Recursively compiles a lambda declaration in a new scope.
    pub fn lambda(
        &mut self,
        pattern: Spanned<SSTPattern>,
        expression: Spanned<SST>,
        scope: Scope,
    ) -> Result<(), Syntax> {
        // build a list of captures at the boundary
        let mut captures = vec![];
        for nonlocal in scope.nonlocals.iter() {
            let captured = if self.scope.is_local(*nonlocal) {
                let index = self.scope.local_index(*nonlocal).unwrap();
                self.lambda.emit(Opcode::Capture);
                self.lambda.emit_bytes(&mut split_number(index));
                Captured::Local(index)
            } else {
                Captured::Nonlocal(self.scope.nonlocal_index(*nonlocal).unwrap())
            };
            captures.push(captured);
        }

        // just so the parallel is visually apparent
        self.enter_scope(scope);
        {
            // push locals and captures into lambda
            self.lambda.captures = captures;

            // match the argument against the pattern, binding variables
            self.destructure(pattern, true);

            // enter a new scope and walk the function body
            self.walk(&expression)?;

            // return the result
            self.lambda.emit(Opcode::Return);
            self.lambda.emit_bytes(&mut split_number(self.scope.locals.len()));
        }
        let lambda = self.exit_scope().lambda;

        // push the lambda object onto the callee's stack.
        let lambda_index = self.lambda.index_data(Data::Lambda(Rc::new(lambda)));
        self.lambda.emit(Opcode::Closure);
        self.lambda.emit_bytes(&mut split_number(lambda_index));

        Ok(())
    }

    /// When a function is called, the top two items are taken off the stack,
    /// The topmost item is expected to be a function.
    pub fn call(&mut self, fun: Spanned<SST>, arg: Spanned<SST>) -> Result<(), Syntax> {
        self.walk(&arg)?;
        self.walk(&fun)?;

        self.lambda.emit_span(&Span::combine(&fun.span, &arg.span));
        self.lambda.emit(Opcode::Call);
        Ok(())
    }
}

#[cfg(test)]
mod test {
    use super::*;
    use crate::compiler::{
        lex::lex,
        parse::parse,
        desugar::desugar,
        hoist::hoist,
    };
    use crate::common::source::Source;

    #[test]
    fn constants() {
        let source = Source::source("heck = true; lol = 0.0; lmao = false; eyy = \"GOod MoRNiNg, SiR\"");
        let lambda = gen(hoist(desugar(parse(lex(source).unwrap()).unwrap()).unwrap()).unwrap()).unwrap();

        let result = vec![
            Data::Boolean(true),
            Data::Unit, // from assignment
            Data::Real(0.0),
            Data::Boolean(false),
            Data::String("GOod MoRNiNg, SiR".to_string()),
        ];

        assert_eq!(lambda.constants, result);
    }

    #[test]
    fn bytecode() {
        let source = Source::source("heck = true; lol = heck; lmao = false");
        let lambda = gen(hoist(desugar(parse(lex(source).unwrap()).unwrap()).unwrap()).unwrap()).unwrap();

        let result = vec![
            (Opcode::Con as u8), 128, (Opcode::Save as u8), 128,  // con true, save to heck,
                (Opcode::Con as u8), 129, (Opcode::Del as u8),    // load unit, delete
            (Opcode::Load as u8), 128, (Opcode::Save as u8), 129, // load heck, save to lol,
                (Opcode::Con as u8), 129, (Opcode::Del as u8),    // load unit, delete
            (Opcode::Con as u8), 130, (Opcode::Save as u8), 130,  // con false, save to lmao
                (Opcode::Con as u8), 129,                         // load unit
        ];

        assert_eq!(result, lambda.code);
    }

    // NOTE: instead of veryfying bytecode output,
    // write a test in vm::vm::test
    // and check behaviour that way
}