Skip to main content

ttk91/
compiler.rs

1//! Compilation from assembly source to bytecode.
2
3use crate::symbol_table::{Label, Location, SymbolId, SymbolInfo, SymbolTable, Value};
4use crate::symbolic;
5use crate::symbolic::program::Instruction as SymbolicInstruction;
6
7use crate::bytecode::{Program, Segment};
8use crate::instruction::Instruction as BytecodeInstruction;
9
10use crate::parsing::Span;
11use std::collections::HashMap;
12
13use slog::{o, trace, Discard, Logger};
14
15/// Represents the type of a memory segment.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
17pub enum SegmentType {
18    /// Segment that contains program instructions.
19    /// Pushed words are guaranteed to be in consecutive addresses.
20    Text,
21
22    /// Segment that contains data.
23    /// Pushed words have no guarantee about their location.
24    Data,
25}
26
27/// Defines an interface for a data structure into which bytecode can be compiled.
28pub trait CompileTarget: Sized {
29    /// Represents a position in the data structure.
30    /// This should not change even if new instructions or data is pushed to the data structure.
31    /// The actual address of this location in the produced memory dump can change.
32    type Location: Clone + std::fmt::Debug;
33
34    /// Create an empty instance of itself.
35    fn create(st: SymbolTable) -> Self;
36
37    /// Create an empty instance of itself with reserved capacity for `size` words of data.
38    /// This is just a request or a hint, so this doesn't need to be actually implemented if
39    /// the data structure doesn't support this.
40    ///
41    /// The provided default implementation is a call to [CompileTarget::create].
42    fn with_capacity(symbol_table: SymbolTable, _size: u16) -> Self {
43        Self::create(symbol_table)
44    }
45
46    /// Finalize the compilation.
47    /// The compiler will not modify the data structure after this.
48    fn finish(self) -> Self {
49        self
50    }
51
52    fn symbol_table_mut(&mut self) -> &mut SymbolTable;
53
54    /// Sets the word in the location `addr` to value `word`.
55    fn set_word(&mut self, addr: &Self::Location, word: i32);
56
57    /// Pushes a new word to the data structure.
58    /// The only requirement for the location of the word is that words pushed with segment type
59    /// [SegmentType::Text] need to be in consecutive addresses.
60    ///
61    /// # Parameters
62    /// - `source_line`: The line number of the instruction in the symbolic assembly source.
63    /// - `word`: Value for the word.
64    /// - `segment`: Type of the data. Affects the words location in the memory dump.
65    /// TODO: Changed source_line to span
66    fn push_word(&mut self, span: Option<Span>, word: i32, segment: SegmentType) -> Self::Location;
67
68    /// Declares a new symbol with label `label` in location `address`.
69    fn set_symbol(&mut self, label: SymbolId, address: &Self::Location);
70
71    fn get_symbol_mut(&mut self, label: SymbolId) -> &mut SymbolInfo;
72
73    /// Translates the location `loc` into a word offset in the memory dump.
74    /// The location can change after calls to [push_word](CompileTarget::push_word).
75    fn to_address(&self, loc: &Self::Location) -> u16;
76}
77
78impl Program {
79    fn move_symbols_after(&mut self, addr: u16) {
80        for symbol in self.symbol_table.iter_mut() {
81            let value = match symbol.get_mut::<Value>() {
82                Some(v) => v,
83                None => continue,
84            };
85
86            if *value >= addr as i32 {
87                *value += 1;
88            }
89        }
90    }
91}
92
93impl CompileTarget for Program {
94    type Location = (SegmentType, usize);
95
96    fn create(symbol_table: SymbolTable) -> Program {
97        Program {
98            code: Segment {
99                content: Vec::new(),
100                start: 0,
101            },
102            data: Segment {
103                content: Vec::new(),
104                start: 0,
105            },
106            symbol_table,
107        }
108    }
109
110    fn push_word(
111        &mut self,
112        _span: Option<Span>,
113        word: i32,
114        segment: SegmentType,
115    ) -> Self::Location {
116        let (abs_addr, rel_addr) = match segment {
117            SegmentType::Text => {
118                self.code.content.push(word);
119                self.data.start += 1;
120                (self.code.content.len() - 1, self.code.content.len() - 1)
121            }
122            SegmentType::Data => {
123                self.data.content.push(word);
124                (
125                    self.code.content.len() + self.data.content.len() - 1,
126                    self.data.content.len() - 1,
127                )
128            }
129        };
130
131        self.move_symbols_after(abs_addr as u16);
132
133        (segment, rel_addr)
134    }
135
136    fn symbol_table_mut(&mut self) -> &mut SymbolTable {
137        &mut self.symbol_table
138    }
139
140    fn set_word(&mut self, (segment, addr): &Self::Location, value: i32) {
141        match segment {
142            SegmentType::Text => self.code.content[*addr] = value,
143            SegmentType::Data => self.data.content[*addr] = value,
144        }
145    }
146
147    fn set_symbol(&mut self, id: SymbolId, (segment, mut addr): &Self::Location) {
148        if *segment == SegmentType::Data {
149            addr += self.code.content.len();
150        }
151
152        let sym = self.symbol_table.symbol_mut(id);
153
154        println!(
155            "Set location: {:?} ({:?}) = {}",
156            sym.get::<Label>().as_str(),
157            sym.get::<SymbolId>(),
158            addr
159        );
160
161        sym.set::<Value>(Some(addr as i32));
162    }
163
164    fn get_symbol_mut(&mut self, label: SymbolId) -> &mut SymbolInfo {
165        self.symbol_table.symbol_mut(label)
166    }
167
168    fn to_address(&self, loc: &Self::Location) -> u16 {
169        match loc {
170            (SegmentType::Text, addr) => *addr as u16,
171            (SegmentType::Data, addr) => (addr + self.data.start) as u16,
172        }
173    }
174}
175
176/// Captures line number information produced during the compilation process
177/// and produces a mapping from memory locations into source assembly line numbers.
178///
179/// Line numbers for both instructions and symbols are included.
180pub struct SourceMap<T: CompileTarget> {
181    /// The actual artifact of the compilation.
182    pub compiled: T,
183
184    tmp: HashMap<T::Location, Span>,
185
186    /// Map from memory locations into source assembly line numbers.
187    pub source_map: HashMap<u16, Span>,
188}
189
190impl<T> CompileTarget for SourceMap<T>
191where
192    T: CompileTarget,
193    T::Location: std::hash::Hash + std::cmp::Eq + Clone,
194{
195    type Location = T::Location;
196
197    fn create(symbol_table: SymbolTable) -> Self {
198        SourceMap {
199            compiled: T::create(symbol_table),
200            tmp: HashMap::new(),
201            source_map: HashMap::new(),
202        }
203    }
204
205    fn symbol_table_mut(&mut self) -> &mut SymbolTable {
206        self.compiled.symbol_table_mut()
207    }
208
209    fn set_word(&mut self, loc: &Self::Location, word: i32) {
210        self.compiled.set_word(loc, word);
211    }
212
213    fn push_word(&mut self, span: Option<Span>, word: i32, segment: SegmentType) -> Self::Location {
214        let loc = self.compiled.push_word(span.clone(), word, segment);
215
216        if let Some(span) = span {
217            self.tmp.insert(loc.clone(), span);
218        }
219
220        loc
221    }
222
223    fn set_symbol(&mut self, id: SymbolId, loc: &Self::Location) {
224        self.compiled.set_symbol(id, loc);
225    }
226
227    fn get_symbol_mut(&mut self, label: SymbolId) -> &mut SymbolInfo {
228        self.compiled.get_symbol_mut(label)
229    }
230
231    fn to_address(&self, loc: &Self::Location) -> u16 {
232        self.compiled.to_address(loc)
233    }
234
235    fn finish(mut self) -> Self {
236        let mut map = HashMap::new();
237
238        for (loc, source) in self.tmp.drain() {
239            let addr = self.compiled.to_address(&loc);
240            map.insert(addr, source);
241        }
242
243        self.source_map = map;
244
245        self
246    }
247}
248
249/// Compiles the given assembly program into bytecode.
250/// Supports compilation into multiple data structures, but most often the compilation target is
251/// [crate::bytecode::Program] possibly in combination with [SourceMap].
252pub fn compile<T>(symprog: symbolic::Program) -> T
253where
254    T: CompileTarget + 'static,
255    T::Location: std::hash::Hash,
256{
257    compile_with_logger(symprog, None)
258}
259
260fn print_hash<T: std::hash::Hash>(t: &T) -> String {
261    use std::collections::hash_map::DefaultHasher;
262    use std::hash::Hasher;
263    let mut hasher = DefaultHasher::default();
264    t.hash(&mut hasher);
265    format!("{:x}", hasher.finish())
266}
267
268pub fn compile_with_logger<T, L>(symprog: symbolic::Program, logger: L) -> T
269where
270    T: CompileTarget + 'static,
271    L: Into<Option<Logger>>,
272    T::Location: std::hash::Hash,
273{
274    let logger = logger
275        .into()
276        .unwrap_or(Logger::root(Discard, o!()))
277        .new(o!("stage" => "compilation"));
278
279    let mut target = T::create(symprog.symbol_table);
280
281    let symbol_table = target.symbol_table_mut();
282
283    symbol_table.get_or_create("CRT".into())
284        .set::<Value>(Some(0));
285
286    symbol_table.get_or_create("HALT".into())
287        .set::<Value>(Some(11));
288
289    let mut relocation_table = HashMap::<SymbolId, Vec<(T::Location, BytecodeInstruction)>>::new();
290
291    for entry in symprog.instructions {
292        match entry.instruction {
293            SymbolicInstruction::Real(sym_ins) => {
294                let ins: BytecodeInstruction = sym_ins.clone().into();
295                let word: u32 = ins.clone().into();
296
297                let loc = target.push_word(entry.span, word as i32, SegmentType::Text);
298
299                let loc_log = logger.new(o!("location" => print_hash(&loc)));
300
301                trace!(loc_log, "append instruction");
302
303                if let Some(reloc) = sym_ins.relocation_symbol() {
304                    trace!(loc_log, "add a location to relocation table"; "symbol" => ?reloc.symbol);
305                    relocation_table
306                        .entry(reloc.symbol)
307                        .or_default()
308                        .push((loc.clone(), ins));
309                }
310
311                for symbol in entry.labels {
312                    trace!(loc_log, "add a label to the symbol table"; "symbol" => ?symbol);
313                    target
314                        .symbol_table_mut()
315                        .symbol_mut(symbol)
316                        .set::<Location<T>>(Some(loc.clone()));
317                    target.set_symbol(symbol, &loc);
318                }
319            }
320            SymbolicInstruction::Pseudo(ins) => {
321                let loc = target.push_word(entry.span.clone(), ins.value, SegmentType::Data);
322
323                let loc_log = logger.new(o!("location" => print_hash(&loc)));
324
325                trace!(loc_log, "append data"; "size" => ins.size, "value" => ins.value);
326
327                for symbol in entry.labels {
328                    trace!(loc_log, "add a label to the symbol table"; "symbol" => ?symbol);
329                    target
330                        .symbol_table_mut()
331                        .symbol_mut(symbol)
332                        .set::<Location<T>>(Some(loc.clone()));
333                    target.set_symbol(symbol, &loc);
334                }
335
336                for _ in 0..ins.size - 1 {
337                    target.push_word(entry.span.clone(), ins.value, SegmentType::Data);
338                }
339            }
340        }
341    }
342
343    for (sym, locs) in relocation_table {
344        let sym = target.get_symbol_mut(sym);
345
346        let addr = match sym.get::<Label>().as_str() {
347            "CRT" => 0,
348            "KBD" => 0,
349            "HALT" => 11,
350            label => {
351                println!("{:?}", sym);
352                let location = sym
353                    .get::<Location<T>>()
354                    .as_ref()
355                    .as_ref()
356                    .expect(
357                        format!(
358                            "Symbol {:?} ({:?}) has no location",
359                            sym.get::<SymbolId>(),
360                            label
361                        )
362                        .as_ref(),
363                    )
364                    .clone();
365                target.to_address(&location)
366            }
367        };
368
369        for (ins_loc, mut ins) in locs {
370            ins.immediate = addr;
371            let word: u32 = ins.into();
372
373            trace!(logger, "replace address part"; "address" => addr);
374
375            target.set_word(&ins_loc, word as i32);
376        }
377    }
378
379    target.finish()
380}
381
382#[test]
383fn test_compile() {
384    let source = r#"
385X 	DC 	13
386Y 	DC 	15
387
388;;;;;;;;;;;;;;;;;;;;;;;;;;;
389; hello.k91 - print 28
390;
391MAIN 	LOAD 	R1, X
392        ADD 	R1, Y
393	    OUT 	R1, =CRT
394	    SVC 	SP, =HALT
395    "#;
396
397    let program = crate::symbolic::Program::parse(source).unwrap();
398    println!("{:?}", program.instructions);
399
400    let prog: crate::bytecode::Program = compile(program);
401
402    use std::convert::TryFrom;
403
404    for word in &prog.code.content {
405        println!("W> {:b}", word);
406    }
407
408    let ins = prog
409        .code
410        .content
411        .iter()
412        .map(|word| BytecodeInstruction::try_from(*word as u32).unwrap())
413        .collect::<Vec<_>>();
414
415    println!("{:?}", ins);
416
417    println!("{:?}", prog);
418}
419
420#[test]
421fn test_compile_sourcemap() {
422    use crate::{bytecode, symbolic};
423
424    let source = r#"
425X 	DC 	13
426Y 	DC 	15
427
428;;;;;;;;;;;;;;;;;;;;;;;;;;;
429; hello.k91 - print 28
430;
431MAIN 	LOAD 	R1, X
432        ADD 	R1, Y
433	    OUT 	R1, =CRT
434	    SVC 	SP, =HALT
435    "#;
436
437    let program = symbolic::Program::parse(source).unwrap();
438    let prog: SourceMap<bytecode::Program> = compile(program);
439
440    let source_lines = prog
441        .source_map
442        .iter()
443        .map(|(addr, span)| (addr, source[..span.start].split('\n').count()))
444        .collect::<HashMap<_, _>>();
445
446    let lines = prog
447        .source_map
448        .iter()
449        .map(|(addr, span)| (addr, &source[span.clone()]))
450        .collect::<HashMap<_, _>>();
451
452    println!("{:?}", prog.source_map);
453    println!("{:?}", source_lines);
454    println!("{:?}", lines);
455
456    assert_eq!(source_lines.get(&0), Some(&8));
457    assert_eq!(source_lines.get(&1), Some(&9));
458    assert_eq!(source_lines.get(&2), Some(&10));
459    assert_eq!(source_lines.get(&3), Some(&11));
460    assert_eq!(source_lines.get(&4), Some(&2));
461    assert_eq!(source_lines.get(&5), Some(&3));
462
463    println!("{:?}", prog.compiled);
464}