1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
//! Entry point for the Inc compiler
/// State for the code generator
pub mod state {
use crate::core::Code;
use crate::x86::{Reference, ASM, WORDSIZE};
use std::collections::HashMap;
/// State for the code generator; easier to bundle it all into a struct than
/// pass several arguments in.
///
/// Stack index points to the current available empty slot. Use and then
/// decrement the index to add a new variable. Default to `-word size`
///
/// `li` is label index, a counter used to generate unique labels. See
/// `gen_label`
///
/// `symbols` is a list of all strings known at compile time, so that they
/// can be allocated in the binary instead of heap.
///
/// `functions` are all user defined functions
///
/// State should also implement some form of register allocation.
pub struct State {
pub si: i64,
pub asm: ASM,
li: u64,
pub strings: HashMap<String, usize>,
pub symbols: HashMap<String, usize>,
pub functions: HashMap<String, Code>,
env: Env,
}
impl Default for State {
fn default() -> Self {
State {
si: -WORDSIZE,
asm: Default::default(),
li: 0,
strings: HashMap::new(),
symbols: HashMap::new(),
functions: HashMap::new(),
env: Default::default(),
}
}
}
impl State {
pub fn enter(&mut self) {
self.env.enter();
}
pub fn leave(&mut self) {
let unwind = self.env.0.first().expect("unexpected empty env").len() as i64 * WORDSIZE;
self.si += unwind;
self.env.leave()
}
pub fn get(&self, i: &str) -> Option<&Reference> {
self.env.get(i)
}
// Set a new binding in the current local environment
pub fn set(&mut self, i: &str, r: Reference) {
self.env.set(i, r);
self.alloc();
}
/// Allocate a word on the stack & return reference to existing empty slot
///
/// Since stack index points to existing free memory, it is useful to be
/// able to use it and increment in one go.
///
/// # Example:
///
/// ```ignore
/// Save { r: RAX, si: s.alloc() }
/// ```
pub fn alloc(&mut self) -> i64 {
let current = self.si;
self.si -= WORDSIZE;
current
}
/// Explicitly free `n` words of memory from stack
pub fn dealloc(&mut self, count: i64) {
self.si += count * WORDSIZE;
}
/// Generate a unique label for jump targets.
pub fn gen_label(&mut self, prefix: &str) -> String {
self.li += 1;
format!("{}_{}", prefix, self.li)
}
}
// Environment is an *ordered* list of bindings.
struct Env(Vec<HashMap<String, Reference>>);
impl Default for Env {
fn default() -> Self {
Env(vec![HashMap::new()])
}
}
impl Env {
pub fn enter(&mut self) {
self.0.insert(0, HashMap::new());
}
pub fn leave(&mut self) {
self.0.remove(0);
}
pub fn set(&mut self, i: &str, r: Reference) {
self.0.first_mut().map(|binding| binding.insert(i.to_string(), r));
}
pub fn get(&self, i: &str) -> Option<&Reference> {
for bindings in &self.0 {
if let Some(t) = bindings.get(i) {
return Some(t);
}
}
None
}
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn t() {
let mut e: Env = Default::default();
assert_eq!(e.0.len(), 1);
// default global scope
e.set("x", Reference::from(-8));
assert_eq!(e.get("x"), Some(&Reference::from(-8)));
// overwrite in current scope
e.set("x", Reference::from(Reference::from(-16)));
assert_eq!(e.get("x"), Some(&Reference::from(-16)));
e.enter();
assert_eq!(e.0.len(), 2);
// read variables from parent scope
assert_eq!(e.get("x"), Some(&Reference::from(-16)));
e.set("y", Reference::from(-24));
// local variable shadows global
e.set("x", Reference::from(-32));
assert_eq!(e.get("x"), Some(&Reference::from(-32)));
e.leave();
assert_eq!(e.0.len(), 1);
assert_eq!(e.get("y"), None);
assert_eq!(e.get("x"), Some(&Reference::from(-16)));
}
}
}
/// Emit machine code for inc AST.
///
/// This module implements bulk of the compiler and is a good place to start
/// reading code. Platform specific code is annotated with `cfg(target_os)` for
/// both linux and mac. This module implements code gen specific to inc and
/// anything generic goes into `x86` module.
pub mod emit {
use crate::{
compiler::state::State,
core::Expr::{self, *},
x86::{self, Ins, Reference, Register::*, Relative, ASM},
*,
};
/// Clear (mask) all except the least significant 3 tag bits
pub fn mask() -> Ins {
x86::and(RAX.into(), immediate::MASK.into())
}
/// Emit code for a let expression
///
/// A new environment is created to hold the bindings, which map the name to
/// a stack index. All the space allocated by the let expression for local
/// variables can be freed at the end of the body. This implies the `si`
/// stays the same before and after a let expression. There is no need to
/// keep track of the amount of space allocated inside the let expression
/// and free it afterwards.
pub fn vars(s: &mut State, vars: &[(String, Expr)], body: &[Expr]) -> ASM {
let mut asm = ASM(vec![]);
s.enter();
for (name, expr) in vars {
match immediate::to(expr) {
Some(c) => asm += x86::save(Reference::Const(c), s.si),
None => asm += eval(s, expr) + x86::save(RAX.into(), s.si),
}
let r = Relative { register: RBP, offset: s.si };
s.set(name, r.into());
}
for b in body {
asm += eval(s, &b);
}
s.leave();
asm
}
/// Emit code for a conditional expression
pub fn cond(s: &mut State, p: &Expr, then: &Expr, alt: &Option<Box<Expr>>) -> ASM {
let exit_label = s.gen_label("exit");
let alt_label = s.gen_label("else");
// A conditional without an explicit alternate should evaluate to '()
let t = match alt {
None => &Expr::Nil,
Some(t) => t,
};
eval(s, p)
+ x86::cmp(RAX.into(), immediate::FALSE.into())
+ x86::je(&alt_label)
+ eval(s, then)
+ x86::jmp(&exit_label)
+ x86::label(&alt_label)
+ eval(s, t)
+ x86::label(&exit_label)
}
/// Evaluate an expression into RAX
///
/// If the expression fits in a machine word, immediately return with the
/// immediate repr, recurse for anything else till the base case.
///
// TODO: eval should dispatch based on first atom alone, not necessarily
// care about arity here. `let` and other variadic syntax forms won't fit
// into any specific branch here.
#[allow(clippy::redundant_pattern)]
pub fn eval(s: &mut State, prog: &Expr) -> ASM {
match prog {
Identifier(i) => match s.get(i) {
Some(i) => x86::mov(RAX.into(), i.clone()).into(),
None => panic!("Undefined variable {}", i),
},
// Find the symbol index and return and reference in RAX
Str(data) => strings::eval(&s, &data),
Symbol(data) => symbols::eval(&s, &data),
Let { bindings, body } => vars(s, bindings, body),
Cond { pred, then, alt } => cond(s, pred, then, alt),
List(list) => match list.as_slice() {
[Identifier(f), args @ ..] => {
if s.functions.contains_key(f) {
lambda::call(s, f, &args)
} else if let Some(x) = primitives::call(s, f, args) {
x
} else if rt::defined(&f) {
ffi::call(s, f, &args)
} else {
panic!("Unknown function {} called with args: {:?}", f, &args)
}
}
_ => panic!("Unknown expression: `{}`", prog),
},
_ => match immediate::to(&prog) {
Some(c) => x86::mov(RAX.into(), c.into()).into(),
None => panic!("Unknown expression: `{}`", prog),
},
}
}
/// Top level interface to the emit module
pub fn program(prog: &[Expr]) -> String {
let mut s: State = Default::default();
let prog = lang::lift(&mut s, &prog);
let prog = lang::rename(&prog);
let mut gen = x86::prelude() + x86::func(&x86::init()) + x86::enter() + x86::init_heap();
for b in &prog {
gen += eval(&mut s, &b);
}
gen += x86::leave();
gen += strings::inline(&s);
gen += symbols::inline(&s);
gen += lambda::code(&mut s);
gen.to_string()
}
}