lc3_ensemble/ast/
asm.rs

1//! This module holds the AST for statements from assembly source code.
2//! 
3//! For instructions that map to bytecode instructions
4//! (i.e., the hex representation of assembly instructions), see [`sim::SimInstr`].
5//! 
6//! Useful structs in this module include:
7//! - [`AsmInstr`]: An enum of all possible assembly source code instructions
8//! - [`Directive`]: An enum of all possible assembly source code directives
9//! - [`Stmt`]: The format for a single "statement" in assembly source code
10//! 
11//! [`sim::SimInstr`]: crate::ast::sim::SimInstr`
12use std::fmt::Write as _;
13
14use super::{CondCode, IOffset, ImmOrReg, Label, Offset, PCOffset, Reg, TrapVect8};
15
16type PCOffset9 = PCOffset<i16, 9>;
17type PCOffset11 = PCOffset<i16, 11>;
18
19/// An enum representing all of the possible instructions in LC-3 assembly code.
20/// 
21/// The variants in this enum represent instructions before assembly passes.
22/// 
23/// For instructions that map to bytecode
24/// (i.e., the hex representation of assembly instructions), refer to [`sim::SimInstr`].
25/// 
26/// [`sim::SimInstr`]: crate::ast::sim::SimInstr`
27#[derive(Debug, PartialEq, Eq, Hash, Clone)]
28pub enum AsmInstr {
29    /// An ADD instruction.
30    /// 
31    /// # Operation
32    /// 
33    /// Evaluates the two operands, adds them, and stores the result to the destination register (`DR`).
34    /// This also sets the condition code for the LC-3 machine.
35    /// 
36    /// # Syntax
37    /// - `ADD DR, SR1, SR2`
38    /// - `ADD DR, SR1, imm5`
39    ADD(Reg, Reg, ImmOrReg<5>),
40
41    /// An AND instruction.
42    /// 
43    /// # Operation
44    /// 
45    /// Evaluates the two operands, bitwise ANDs them, and stores the result to the destination register (`DR`).
46    /// This also sets the condition code for the LC-3 machine.
47    /// 
48    /// # Syntax
49    /// - `AND DR, SR1, SR2`
50    /// - `AND DR, SR1, imm5`
51    AND(Reg, Reg, ImmOrReg<5>),
52
53    /// A BR instruction.
54    /// 
55    /// # Operation
56    /// 
57    /// Checks the current condition code and branches to the given `PCOffset9` 
58    /// if the condition code matches one of the provided condition codes of the instruction.
59    /// 
60    /// # Syntax
61    /// - `BR PCOffset9` (equivalent to `BRnzp`),
62    /// - `BRn PCOffset9`
63    /// - `BRz PCOffset9`
64    /// - `BRnz PCOffset9`
65    /// - `BRp PCOffset9`
66    /// - `BRnp PCOffset9`
67    /// - `BRzp PCOffset9`
68    /// - `BRnzp PCOffset9`
69    BR(CondCode, PCOffset9),
70    
71    /// A JMP instruction.
72    /// 
73    /// # Operation
74    /// 
75    /// Unconditionally jumps to the location stored in the given register (`BR`).
76    /// 
77    /// # Syntax
78    /// - `JMP BR`
79    JMP(Reg),
80    
81    /// A JSR instruction.
82    /// 
83    /// # Operation
84    /// 
85    /// Jumps to a given subroutine. This is done by storing the current PC into R7,
86    /// and then unconditionally jumping to the location of the given `PCOffset11`.
87    /// 
88    /// # Syntax
89    /// - `JSR PCOffset11`
90    JSR(PCOffset11),
91    
92    /// A JSRR instruction.
93    /// 
94    /// # Operation
95    /// 
96    /// Jumps to a given subroutine. This is done by storing the current PC into R7,
97    /// and then unconditionally jumping to the location stored in the given register (`BR`).
98    /// 
99    /// # Syntax
100    /// - `JSRR BR`
101    JSRR(Reg),
102    
103    /// A LD instruction.
104    /// 
105    /// # Operation
106    /// 
107    /// Computes an effective address (`PC + PCOffset9`), accesses the memory at that address,
108    /// and stores it to the destination register (`DR`).
109    /// This also sets the condition code for the LC-3 machine.
110    /// 
111    /// # Syntax
112    /// - `LD DR, PCOffset9`
113    LD(Reg, PCOffset9),
114    
115    /// A LDI instruction.
116    /// 
117    /// # Operation
118    /// 
119    /// Computes an effective address (`mem[PC + PCOffset9]`), accesses the memory at that address,
120    /// and stores it to the destination register (`DR`).
121    /// This also sets the condition code for the LC-3 machine.
122    /// 
123    /// # Syntax
124    /// - `LDI DR, PCOffset9`
125    LDI(Reg, PCOffset9),
126    
127    /// A LDR instruction.
128    /// 
129    /// # Operation
130    /// 
131    /// Computes an effective address (`mem[BR + offset6]`), accesses the memory at that address,
132    /// and stores it to the destination register (`DR`).
133    /// This also sets the condition code for the LC-3 machine.
134    /// 
135    /// # Syntax
136    /// - `LDR DR, BR, offset6`
137    LDR(Reg, Reg, IOffset<6>),
138    
139    /// A LEA instruction.
140    /// 
141    /// # Operation
142    /// 
143    /// Computes an effective address (`PC + PCOffset9`) and stores it to the destination register (`DR`).
144    /// 
145    /// # Syntax
146    /// - `LEA DR, PCOffset9`
147    LEA(Reg, PCOffset9),
148
149    /// A NOT instruction.
150    /// 
151    /// # Operation
152    /// 
153    /// Evaluates the operand, bitwise NOTs them, and stores the result to the destination register (`DR`).
154    /// This also sets the condition code for the LC-3 machine.
155    /// 
156    /// # Syntax
157    /// - `NOT DR, SR`
158    NOT(Reg, Reg),
159    
160    /// A RET instruction.
161    /// 
162    /// # Operation
163    /// 
164    /// Returns from a subroutine. This is an alias for `JMP R7`.
165    /// 
166    /// # Syntax
167    /// - `RET`
168    RET,
169    
170    /// A RTI instruction.
171    /// 
172    /// # Operation
173    /// 
174    /// Returns from a trap or interrupt.
175    /// 
176    /// # Syntax
177    /// - `RTI`
178    RTI,
179    
180    /// A ST instruction.
181    /// 
182    /// # Operation
183    /// 
184    /// Computes an effective address (`PC + PCOffset9`), and writes the value from the source register (`SR`)
185    /// into the memory at that address,
186    /// 
187    /// # Syntax
188    /// - `ST SR, PCOffset9`
189    ST(Reg, PCOffset9),
190
191    /// A STI instruction.
192    /// 
193    /// # Operation
194    /// 
195    /// Computes an effective address (`mem[PC + PCOffset9]`), and writes the value from the source register (`SR`)
196    /// into the memory at that address,
197    /// 
198    /// # Syntax
199    /// - `STI SR, PCOffset9`
200    STI(Reg, PCOffset9),
201
202    /// A STR instruction.
203    /// 
204    /// # Operation
205    /// 
206    /// Computes an effective address (`mem[BR + offset6]`), and writes the value from the source register (`SR`)
207    /// into the memory at that address,
208    /// 
209    /// # Syntax
210    /// - `STR SR, BR, offset6`
211    STR(Reg, Reg, IOffset<6>),
212
213    /// A TRAP instruction.
214    /// 
215    /// # Operation
216    /// 
217    /// Executes the trap with the given trap vector `TrapVect8`.
218    /// 
219    /// # Syntax
220    /// - `TRAP TrapVect8`
221    TRAP(TrapVect8),
222
223    /* ALIASES AND TRAPS */
224
225    /// A NOP instruction.
226    /// 
227    /// # Operation
228    /// 
229    /// Does nothing.
230    /// 
231    /// # Syntax
232    /// - `NOP`
233    /// - `NOP LABEL` (label is computed, but not used)
234    /// - `NOP #99`
235    NOP(PCOffset9),
236
237    /// A GETC instruction.
238    /// 
239    /// # Operation
240    /// 
241    /// Gets a character from the keyboard, and store it into R0 (with the high 8 bits cleared). 
242    /// This is an alias for `TRAP x20`.
243    /// 
244    /// # Syntax
245    /// - `GETC`
246    GETC,
247
248    /// An OUT instruction.
249    /// 
250    /// # Operation
251    /// 
252    /// Writes a character from `R0[7:0]` to the display. This is an alias for `TRAP x21`.
253    /// 
254    /// # Syntax
255    /// - `OUT`
256    OUT,
257
258    /// A PUTC instruction.
259    /// 
260    /// # Operation
261    /// 
262    /// Writes a character from `R0[7:0]` to the display. This is an alias for `TRAP x21`.
263    /// 
264    /// # Syntax
265    /// - `PUTC`
266    PUTC,
267
268    /// A PUTS instruction.
269    /// 
270    /// # Operation
271    /// 
272    /// Prints characters in consecutive memory locations until a x00 character is read.
273    /// This starts with the memory location pointed to by the address in `R0`.
274    /// 
275    /// This is an alias for `TRAP x22`.
276    /// 
277    /// # Syntax
278    /// - `PUTS`
279    PUTS,
280
281    /// An IN instruction.
282    /// 
283    /// # Operation
284    /// 
285    /// Prompts the user for a character, stores the character into `R0` (with the high 8 bits cleared).
286    /// Additionally, this prints the obtained character onto the display.
287    /// 
288    /// This is an alias for `TRAP x23`.
289    /// 
290    /// # Syntax
291    /// - `IN`
292    IN,
293
294    /// A PUTSP instruction.
295    /// 
296    /// # Operation
297    /// 
298    /// Prints characters (two characters per memory location) until a x00 character is read.
299    /// This starts with the memory location pointed to by the address in `R0`.
300    /// This first prints the character in the low 8 bits, and then the character in the high 8 bits.
301    /// 
302    /// This is an alias for `TRAP x24`.
303    /// 
304    /// # Syntax
305    /// - `PUTSP`
306    PUTSP,
307
308    /// A HALT instruction.
309    /// 
310    /// # Operation
311    /// 
312    /// Stops execution of the program. This is an alias for `TRAP x25`.
313    /// 
314    /// # Syntax
315    /// - `HALT`
316    HALT
317}
318impl std::fmt::Display for AsmInstr {
319    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
320        match self {
321            Self::ADD(dr, sr1, sr2) => write!(f, "ADD {dr}, {sr1}, {sr2}"),
322            Self::AND(dr, sr1, sr2) => write!(f, "AND {dr}, {sr1}, {sr2}"),
323            Self::BR(cc, off) => {
324                if cc != &0 {
325                    write!(f, "BR")?;
326                    if cc & 0b100 != 0 { f.write_char('n')?; };
327                    if cc & 0b010 != 0 { f.write_char('z')?; };
328                    if cc & 0b001 != 0 { f.write_char('p')?; };
329                } else {
330                    write!(f, "NOP")?;
331                }
332                write!(f, " {off}")
333            },
334            Self::JMP(br) => write!(f, "JMP {br}"),
335            Self::JSR(off) => write!(f, "JSR {off}"),
336            Self::JSRR(br) => write!(f, "JSRR {br}"),
337            Self::LD(dr, off) => write!(f, "LD {dr}, {off}"),
338            Self::LDI(dr, off) => write!(f, "LDI {dr}, {off}"),
339            Self::LDR(dr, br, off) => write!(f, "LDR {dr}, {br}, {off}"),
340            Self::LEA(dr, off) => write!(f, "LEA {dr}, {off}"),
341            Self::NOT(dr, sr) => write!(f, "NOT {dr}, {sr}"),
342            Self::RET   => f.write_str("RET"),
343            Self::RTI   => f.write_str("RTI"),
344            Self::ST(sr, off) => write!(f, "ST {sr}, {off}"),
345            Self::STI(sr, off) => write!(f, "STI {sr}, {off}"),
346            Self::STR(sr, br, off) => write!(f, "STR {sr}, {br}, {off}"),
347            Self::TRAP(vect) => write!(f, "TRAP {vect:02X}"),
348            Self::NOP(off) => write!(f, "NOP {off}"),
349            Self::GETC  => f.write_str("GETC"),
350            Self::OUT   => f.write_str("OUT"),
351            Self::PUTC  => f.write_str("PUTC"),
352            Self::PUTS  => f.write_str("PUTS"),
353            Self::IN    => f.write_str("IN"),
354            Self::PUTSP => f.write_str("PUTSP"),
355            Self::HALT  => f.write_str("HALT"),
356        }
357    }
358}
359
360/// An enum representing all the possible directives in LC-3 assembly code.
361#[derive(Debug, PartialEq, Eq, Hash, Clone)]
362pub enum Directive {
363    /// An `.orig` directive.
364    /// 
365    /// # Operation
366    /// 
367    /// Starts a block of assembly.
368    /// 
369    /// # Syntax
370    /// 
371    /// `.orig ADDR`
372    Orig(Offset<u16, 16>),
373
374    /// A `.fill` directive.
375    /// 
376    /// # Operation
377    /// 
378    /// Writes some data into the given memory location.
379    /// 
380    /// # Syntax
381    /// 
382    /// `.fill DATA`
383    /// `.fill LABEL`
384    Fill(PCOffset<u16, 16>),
385    
386    
387    /// A `.blkw` directive.
388    /// 
389    /// # Operation
390    /// 
391    /// Saves a provided number of memory locations for writing into.
392    /// 
393    /// # Syntax
394    /// 
395    /// `.blkw N`
396    Blkw(Offset<u16, 16>),
397
398    /// A `.stringz` directive.
399    /// 
400    /// # Operation
401    /// 
402    /// Writes a null-terminated string into the provided location.
403    /// 
404    /// # Syntax
405    /// 
406    /// `.stringz "A literal"`
407    Stringz(String),
408
409    /// A `.end` directive.
410    /// 
411    /// # Operation
412    /// 
413    /// Closes a block started by an `.orig`.
414    /// 
415    /// # Syntax
416    /// 
417    /// `.end`
418    End,
419
420    /// A `.external` directive.
421    /// 
422    /// # Operation
423    /// 
424    /// Designates that a label is external, 
425    /// meaning it is not defined within the file and must be linked in.
426    /// 
427    /// # Syntax
428    /// 
429    /// `.external LABEL`
430    External(Label),
431}
432impl std::fmt::Display for Directive {
433    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
434        match self {
435            Self::Orig(addr)   => write!(f, ".orig {addr:04X}"),
436            Self::Fill(val)    => write!(f, ".fill {val}"),
437            Self::Blkw(n)      => write!(f, ".blkw {n}"),
438            Self::Stringz(val) => write!(f, ".stringz {val:?}"),
439            Self::End          => write!(f, ".end"),
440            Self::External(lb) => write!(f, ".external {lb}"),
441        }
442    }
443}
444
445/// Either an instruction or a directive.
446#[derive(Debug, PartialEq, Eq, Hash, Clone)]
447pub enum StmtKind {
448    #[allow(missing_docs)]
449    Instr(AsmInstr),
450    #[allow(missing_docs)]
451    Directive(Directive)
452}
453impl std::fmt::Display for StmtKind {
454    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
455        match self {
456            StmtKind::Instr(i) => i.fmt(f),
457            StmtKind::Directive(d) => d.fmt(f),
458        }
459    }
460}
461
462/// A "statement" in LC-3 assembly.
463/// 
464/// While not a defined term in LC-3 assembly, 
465/// a statement here refers to either an instruction or a directive,
466/// and the labels that are associated with it.
467#[derive(Debug, PartialEq, Eq, Hash, Clone)]
468pub struct Stmt {
469    /// The labels.
470    pub labels: Vec<Label>,
471    /// The instruction or directive.
472    pub nucleus: StmtKind,
473    /// The span of the nucleus.
474    pub span: std::ops::Range<usize>
475}
476impl std::fmt::Display for Stmt {
477    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
478        for label in &self.labels {
479            label.fmt(f)?;
480            f.write_char(' ')?;
481        }
482        self.nucleus.fmt(f)
483    }
484}
485
486/// Attempts to disassemble a line of bytecode back into an assembly instruction,
487/// returning `None` if it cannot be disassembled.
488pub fn try_disassemble_line(word: u16) -> Option<Stmt> {
489    // All words before 0x0200 are NOPs with offsets.
490    let si = match word >= 0x0200 {
491        true  => super::sim::SimInstr::decode(word).ok(),
492        false => None,
493    }?;
494    
495    let ai = match si {
496        super::sim::SimInstr::BR(cc, off) => AsmInstr::BR(cc, PCOffset::Offset(off)),
497        super::sim::SimInstr::ADD(dr, sr1, sr2) => AsmInstr::ADD(dr, sr1, sr2),
498        super::sim::SimInstr::LD(dr, off) => AsmInstr::LD(dr, PCOffset::Offset(off)),
499        super::sim::SimInstr::ST(sr, off) => AsmInstr::ST(sr, PCOffset::Offset(off)),
500        super::sim::SimInstr::JSR(off) => match off {
501            ImmOrReg::Imm(imm) => AsmInstr::JSR(PCOffset::Offset(imm)),
502            ImmOrReg::Reg(reg) => AsmInstr::JSRR(reg),
503        },
504        super::sim::SimInstr::AND(dr, sr1, sr2) => AsmInstr::AND(dr, sr1, sr2),
505        super::sim::SimInstr::LDR(dr, br, off) => AsmInstr::LDR(dr, br, off),
506        super::sim::SimInstr::STR(sr, br, off) => AsmInstr::STR(sr, br, off),
507        super::sim::SimInstr::RTI   => AsmInstr::RTI,
508        super::sim::SimInstr::NOT(dr, sr) => AsmInstr::NOT(dr, sr),
509        super::sim::SimInstr::LDI(dr, off) => AsmInstr::LDI(dr, PCOffset::Offset(off)),
510        super::sim::SimInstr::STI(sr, off) => AsmInstr::STI(sr, PCOffset::Offset(off)),
511        super::sim::SimInstr::JMP(Reg::R7) => AsmInstr::RET,
512        super::sim::SimInstr::JMP(br) => AsmInstr::JMP(br),
513        super::sim::SimInstr::LEA(dr, off) => AsmInstr::LEA(dr, PCOffset::Offset(off)),
514        super::sim::SimInstr::TRAP(vect) if vect.get() == 0x20 => AsmInstr::GETC,
515        super::sim::SimInstr::TRAP(vect) if vect.get() == 0x21 => AsmInstr::PUTC,
516        super::sim::SimInstr::TRAP(vect) if vect.get() == 0x22 => AsmInstr::PUTS,
517        super::sim::SimInstr::TRAP(vect) if vect.get() == 0x23 => AsmInstr::IN,
518        super::sim::SimInstr::TRAP(vect) if vect.get() == 0x24 => AsmInstr::PUTSP,
519        super::sim::SimInstr::TRAP(vect) if vect.get() == 0x25 => AsmInstr::HALT,
520        super::sim::SimInstr::TRAP(vect) => AsmInstr::TRAP(vect),
521    };
522
523    Some(Stmt {
524        labels: vec![],
525        nucleus: StmtKind::Instr(ai),
526        span: 0..0
527    })
528}
529
530/// Attempts to disassemble a line of bytecode back into an assembly instruction,
531/// returning a `.fill` directive if not possible.
532pub fn disassemble_line(word: u16) -> Stmt {
533    try_disassemble_line(word)
534        .unwrap_or_else(|| {
535            let fill = Directive::Fill(PCOffset::Offset(super::Offset::new_trunc(word)));
536
537            Stmt {
538                labels: vec![],
539                nucleus: StmtKind::Directive(fill),
540                span: 0..0
541            }
542        })
543}
544
545/// Attempts to disassemble bytecode back into assembly instructions.
546pub fn disassemble(data: &[u16]) -> Vec<Stmt> {
547    data.iter()
548        .copied()
549        .map(disassemble_line)
550        .collect()
551}