lc3_ensemble/ast/asm.rs
1//! This module holds the AST for statements from assembly source code.
2//!
3//! For instructions that map to bytecode instructions
4//! (i.e., the hex representation of assembly instructions), see [`sim::SimInstr`].
5//!
6//! Useful structs in this module include:
7//! - [`AsmInstr`]: An enum of all possible assembly source code instructions
8//! - [`Directive`]: An enum of all possible assembly source code directives
9//! - [`Stmt`]: The format for a single "statement" in assembly source code
10//!
11//! [`sim::SimInstr`]: crate::ast::sim::SimInstr`
12use std::fmt::Write as _;
13
14use super::{CondCode, IOffset, ImmOrReg, Label, Offset, PCOffset, Reg, TrapVect8};
15
16type PCOffset9 = PCOffset<i16, 9>;
17type PCOffset11 = PCOffset<i16, 11>;
18
19/// An enum representing all of the possible instructions in LC-3 assembly code.
20///
21/// The variants in this enum represent instructions before assembly passes.
22///
23/// For instructions that map to bytecode
24/// (i.e., the hex representation of assembly instructions), refer to [`sim::SimInstr`].
25///
26/// [`sim::SimInstr`]: crate::ast::sim::SimInstr`
27#[derive(Debug, PartialEq, Eq, Hash, Clone)]
28pub enum AsmInstr {
29 /// An ADD instruction.
30 ///
31 /// # Operation
32 ///
33 /// Evaluates the two operands, adds them, and stores the result to the destination register (`DR`).
34 /// This also sets the condition code for the LC-3 machine.
35 ///
36 /// # Syntax
37 /// - `ADD DR, SR1, SR2`
38 /// - `ADD DR, SR1, imm5`
39 ADD(Reg, Reg, ImmOrReg<5>),
40
41 /// An AND instruction.
42 ///
43 /// # Operation
44 ///
45 /// Evaluates the two operands, bitwise ANDs them, and stores the result to the destination register (`DR`).
46 /// This also sets the condition code for the LC-3 machine.
47 ///
48 /// # Syntax
49 /// - `AND DR, SR1, SR2`
50 /// - `AND DR, SR1, imm5`
51 AND(Reg, Reg, ImmOrReg<5>),
52
53 /// A BR instruction.
54 ///
55 /// # Operation
56 ///
57 /// Checks the current condition code and branches to the given `PCOffset9`
58 /// if the condition code matches one of the provided condition codes of the instruction.
59 ///
60 /// # Syntax
61 /// - `BR PCOffset9` (equivalent to `BRnzp`),
62 /// - `BRn PCOffset9`
63 /// - `BRz PCOffset9`
64 /// - `BRnz PCOffset9`
65 /// - `BRp PCOffset9`
66 /// - `BRnp PCOffset9`
67 /// - `BRzp PCOffset9`
68 /// - `BRnzp PCOffset9`
69 BR(CondCode, PCOffset9),
70
71 /// A JMP instruction.
72 ///
73 /// # Operation
74 ///
75 /// Unconditionally jumps to the location stored in the given register (`BR`).
76 ///
77 /// # Syntax
78 /// - `JMP BR`
79 JMP(Reg),
80
81 /// A JSR instruction.
82 ///
83 /// # Operation
84 ///
85 /// Jumps to a given subroutine. This is done by storing the current PC into R7,
86 /// and then unconditionally jumping to the location of the given `PCOffset11`.
87 ///
88 /// # Syntax
89 /// - `JSR PCOffset11`
90 JSR(PCOffset11),
91
92 /// A JSRR instruction.
93 ///
94 /// # Operation
95 ///
96 /// Jumps to a given subroutine. This is done by storing the current PC into R7,
97 /// and then unconditionally jumping to the location stored in the given register (`BR`).
98 ///
99 /// # Syntax
100 /// - `JSRR BR`
101 JSRR(Reg),
102
103 /// A LD instruction.
104 ///
105 /// # Operation
106 ///
107 /// Computes an effective address (`PC + PCOffset9`), accesses the memory at that address,
108 /// and stores it to the destination register (`DR`).
109 /// This also sets the condition code for the LC-3 machine.
110 ///
111 /// # Syntax
112 /// - `LD DR, PCOffset9`
113 LD(Reg, PCOffset9),
114
115 /// A LDI instruction.
116 ///
117 /// # Operation
118 ///
119 /// Computes an effective address (`mem[PC + PCOffset9]`), accesses the memory at that address,
120 /// and stores it to the destination register (`DR`).
121 /// This also sets the condition code for the LC-3 machine.
122 ///
123 /// # Syntax
124 /// - `LDI DR, PCOffset9`
125 LDI(Reg, PCOffset9),
126
127 /// A LDR instruction.
128 ///
129 /// # Operation
130 ///
131 /// Computes an effective address (`mem[BR + offset6]`), accesses the memory at that address,
132 /// and stores it to the destination register (`DR`).
133 /// This also sets the condition code for the LC-3 machine.
134 ///
135 /// # Syntax
136 /// - `LDR DR, BR, offset6`
137 LDR(Reg, Reg, IOffset<6>),
138
139 /// A LEA instruction.
140 ///
141 /// # Operation
142 ///
143 /// Computes an effective address (`PC + PCOffset9`) and stores it to the destination register (`DR`).
144 ///
145 /// # Syntax
146 /// - `LEA DR, PCOffset9`
147 LEA(Reg, PCOffset9),
148
149 /// A NOT instruction.
150 ///
151 /// # Operation
152 ///
153 /// Evaluates the operand, bitwise NOTs them, and stores the result to the destination register (`DR`).
154 /// This also sets the condition code for the LC-3 machine.
155 ///
156 /// # Syntax
157 /// - `NOT DR, SR`
158 NOT(Reg, Reg),
159
160 /// A RET instruction.
161 ///
162 /// # Operation
163 ///
164 /// Returns from a subroutine. This is an alias for `JMP R7`.
165 ///
166 /// # Syntax
167 /// - `RET`
168 RET,
169
170 /// A RTI instruction.
171 ///
172 /// # Operation
173 ///
174 /// Returns from a trap or interrupt.
175 ///
176 /// # Syntax
177 /// - `RTI`
178 RTI,
179
180 /// A ST instruction.
181 ///
182 /// # Operation
183 ///
184 /// Computes an effective address (`PC + PCOffset9`), and writes the value from the source register (`SR`)
185 /// into the memory at that address,
186 ///
187 /// # Syntax
188 /// - `ST SR, PCOffset9`
189 ST(Reg, PCOffset9),
190
191 /// A STI instruction.
192 ///
193 /// # Operation
194 ///
195 /// Computes an effective address (`mem[PC + PCOffset9]`), and writes the value from the source register (`SR`)
196 /// into the memory at that address,
197 ///
198 /// # Syntax
199 /// - `STI SR, PCOffset9`
200 STI(Reg, PCOffset9),
201
202 /// A STR instruction.
203 ///
204 /// # Operation
205 ///
206 /// Computes an effective address (`mem[BR + offset6]`), and writes the value from the source register (`SR`)
207 /// into the memory at that address,
208 ///
209 /// # Syntax
210 /// - `STR SR, BR, offset6`
211 STR(Reg, Reg, IOffset<6>),
212
213 /// A TRAP instruction.
214 ///
215 /// # Operation
216 ///
217 /// Executes the trap with the given trap vector `TrapVect8`.
218 ///
219 /// # Syntax
220 /// - `TRAP TrapVect8`
221 TRAP(TrapVect8),
222
223 /* ALIASES AND TRAPS */
224
225 /// A NOP instruction.
226 ///
227 /// # Operation
228 ///
229 /// Does nothing.
230 ///
231 /// # Syntax
232 /// - `NOP`
233 /// - `NOP LABEL` (label is computed, but not used)
234 /// - `NOP #99`
235 NOP(PCOffset9),
236
237 /// A GETC instruction.
238 ///
239 /// # Operation
240 ///
241 /// Gets a character from the keyboard, and store it into R0 (with the high 8 bits cleared).
242 /// This is an alias for `TRAP x20`.
243 ///
244 /// # Syntax
245 /// - `GETC`
246 GETC,
247
248 /// An OUT instruction.
249 ///
250 /// # Operation
251 ///
252 /// Writes a character from `R0[7:0]` to the display. This is an alias for `TRAP x21`.
253 ///
254 /// # Syntax
255 /// - `OUT`
256 OUT,
257
258 /// A PUTC instruction.
259 ///
260 /// # Operation
261 ///
262 /// Writes a character from `R0[7:0]` to the display. This is an alias for `TRAP x21`.
263 ///
264 /// # Syntax
265 /// - `PUTC`
266 PUTC,
267
268 /// A PUTS instruction.
269 ///
270 /// # Operation
271 ///
272 /// Prints characters in consecutive memory locations until a x00 character is read.
273 /// This starts with the memory location pointed to by the address in `R0`.
274 ///
275 /// This is an alias for `TRAP x22`.
276 ///
277 /// # Syntax
278 /// - `PUTS`
279 PUTS,
280
281 /// An IN instruction.
282 ///
283 /// # Operation
284 ///
285 /// Prompts the user for a character, stores the character into `R0` (with the high 8 bits cleared).
286 /// Additionally, this prints the obtained character onto the display.
287 ///
288 /// This is an alias for `TRAP x23`.
289 ///
290 /// # Syntax
291 /// - `IN`
292 IN,
293
294 /// A PUTSP instruction.
295 ///
296 /// # Operation
297 ///
298 /// Prints characters (two characters per memory location) until a x00 character is read.
299 /// This starts with the memory location pointed to by the address in `R0`.
300 /// This first prints the character in the low 8 bits, and then the character in the high 8 bits.
301 ///
302 /// This is an alias for `TRAP x24`.
303 ///
304 /// # Syntax
305 /// - `PUTSP`
306 PUTSP,
307
308 /// A HALT instruction.
309 ///
310 /// # Operation
311 ///
312 /// Stops execution of the program. This is an alias for `TRAP x25`.
313 ///
314 /// # Syntax
315 /// - `HALT`
316 HALT
317}
318impl std::fmt::Display for AsmInstr {
319 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
320 match self {
321 Self::ADD(dr, sr1, sr2) => write!(f, "ADD {dr}, {sr1}, {sr2}"),
322 Self::AND(dr, sr1, sr2) => write!(f, "AND {dr}, {sr1}, {sr2}"),
323 Self::BR(cc, off) => {
324 if cc != &0 {
325 write!(f, "BR")?;
326 if cc & 0b100 != 0 { f.write_char('n')?; };
327 if cc & 0b010 != 0 { f.write_char('z')?; };
328 if cc & 0b001 != 0 { f.write_char('p')?; };
329 } else {
330 write!(f, "NOP")?;
331 }
332 write!(f, " {off}")
333 },
334 Self::JMP(br) => write!(f, "JMP {br}"),
335 Self::JSR(off) => write!(f, "JSR {off}"),
336 Self::JSRR(br) => write!(f, "JSRR {br}"),
337 Self::LD(dr, off) => write!(f, "LD {dr}, {off}"),
338 Self::LDI(dr, off) => write!(f, "LDI {dr}, {off}"),
339 Self::LDR(dr, br, off) => write!(f, "LDR {dr}, {br}, {off}"),
340 Self::LEA(dr, off) => write!(f, "LEA {dr}, {off}"),
341 Self::NOT(dr, sr) => write!(f, "NOT {dr}, {sr}"),
342 Self::RET => f.write_str("RET"),
343 Self::RTI => f.write_str("RTI"),
344 Self::ST(sr, off) => write!(f, "ST {sr}, {off}"),
345 Self::STI(sr, off) => write!(f, "STI {sr}, {off}"),
346 Self::STR(sr, br, off) => write!(f, "STR {sr}, {br}, {off}"),
347 Self::TRAP(vect) => write!(f, "TRAP {vect:02X}"),
348 Self::NOP(off) => write!(f, "NOP {off}"),
349 Self::GETC => f.write_str("GETC"),
350 Self::OUT => f.write_str("OUT"),
351 Self::PUTC => f.write_str("PUTC"),
352 Self::PUTS => f.write_str("PUTS"),
353 Self::IN => f.write_str("IN"),
354 Self::PUTSP => f.write_str("PUTSP"),
355 Self::HALT => f.write_str("HALT"),
356 }
357 }
358}
359
360/// An enum representing all the possible directives in LC-3 assembly code.
361#[derive(Debug, PartialEq, Eq, Hash, Clone)]
362pub enum Directive {
363 /// An `.orig` directive.
364 ///
365 /// # Operation
366 ///
367 /// Starts a block of assembly.
368 ///
369 /// # Syntax
370 ///
371 /// `.orig ADDR`
372 Orig(Offset<u16, 16>),
373
374 /// A `.fill` directive.
375 ///
376 /// # Operation
377 ///
378 /// Writes some data into the given memory location.
379 ///
380 /// # Syntax
381 ///
382 /// `.fill DATA`
383 /// `.fill LABEL`
384 Fill(PCOffset<u16, 16>),
385
386
387 /// A `.blkw` directive.
388 ///
389 /// # Operation
390 ///
391 /// Saves a provided number of memory locations for writing into.
392 ///
393 /// # Syntax
394 ///
395 /// `.blkw N`
396 Blkw(Offset<u16, 16>),
397
398 /// A `.stringz` directive.
399 ///
400 /// # Operation
401 ///
402 /// Writes a null-terminated string into the provided location.
403 ///
404 /// # Syntax
405 ///
406 /// `.stringz "A literal"`
407 Stringz(String),
408
409 /// A `.end` directive.
410 ///
411 /// # Operation
412 ///
413 /// Closes a block started by an `.orig`.
414 ///
415 /// # Syntax
416 ///
417 /// `.end`
418 End,
419
420 /// A `.external` directive.
421 ///
422 /// # Operation
423 ///
424 /// Designates that a label is external,
425 /// meaning it is not defined within the file and must be linked in.
426 ///
427 /// # Syntax
428 ///
429 /// `.external LABEL`
430 External(Label),
431}
432impl std::fmt::Display for Directive {
433 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
434 match self {
435 Self::Orig(addr) => write!(f, ".orig {addr:04X}"),
436 Self::Fill(val) => write!(f, ".fill {val}"),
437 Self::Blkw(n) => write!(f, ".blkw {n}"),
438 Self::Stringz(val) => write!(f, ".stringz {val:?}"),
439 Self::End => write!(f, ".end"),
440 Self::External(lb) => write!(f, ".external {lb}"),
441 }
442 }
443}
444
445/// Either an instruction or a directive.
446#[derive(Debug, PartialEq, Eq, Hash, Clone)]
447pub enum StmtKind {
448 #[allow(missing_docs)]
449 Instr(AsmInstr),
450 #[allow(missing_docs)]
451 Directive(Directive)
452}
453impl std::fmt::Display for StmtKind {
454 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
455 match self {
456 StmtKind::Instr(i) => i.fmt(f),
457 StmtKind::Directive(d) => d.fmt(f),
458 }
459 }
460}
461
462/// A "statement" in LC-3 assembly.
463///
464/// While not a defined term in LC-3 assembly,
465/// a statement here refers to either an instruction or a directive,
466/// and the labels that are associated with it.
467#[derive(Debug, PartialEq, Eq, Hash, Clone)]
468pub struct Stmt {
469 /// The labels.
470 pub labels: Vec<Label>,
471 /// The instruction or directive.
472 pub nucleus: StmtKind,
473 /// The span of the nucleus.
474 pub span: std::ops::Range<usize>
475}
476impl std::fmt::Display for Stmt {
477 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
478 for label in &self.labels {
479 label.fmt(f)?;
480 f.write_char(' ')?;
481 }
482 self.nucleus.fmt(f)
483 }
484}
485
486/// Attempts to disassemble a line of bytecode back into an assembly instruction,
487/// returning `None` if it cannot be disassembled.
488pub fn try_disassemble_line(word: u16) -> Option<Stmt> {
489 // All words before 0x0200 are NOPs with offsets.
490 let si = match word >= 0x0200 {
491 true => super::sim::SimInstr::decode(word).ok(),
492 false => None,
493 }?;
494
495 let ai = match si {
496 super::sim::SimInstr::BR(cc, off) => AsmInstr::BR(cc, PCOffset::Offset(off)),
497 super::sim::SimInstr::ADD(dr, sr1, sr2) => AsmInstr::ADD(dr, sr1, sr2),
498 super::sim::SimInstr::LD(dr, off) => AsmInstr::LD(dr, PCOffset::Offset(off)),
499 super::sim::SimInstr::ST(sr, off) => AsmInstr::ST(sr, PCOffset::Offset(off)),
500 super::sim::SimInstr::JSR(off) => match off {
501 ImmOrReg::Imm(imm) => AsmInstr::JSR(PCOffset::Offset(imm)),
502 ImmOrReg::Reg(reg) => AsmInstr::JSRR(reg),
503 },
504 super::sim::SimInstr::AND(dr, sr1, sr2) => AsmInstr::AND(dr, sr1, sr2),
505 super::sim::SimInstr::LDR(dr, br, off) => AsmInstr::LDR(dr, br, off),
506 super::sim::SimInstr::STR(sr, br, off) => AsmInstr::STR(sr, br, off),
507 super::sim::SimInstr::RTI => AsmInstr::RTI,
508 super::sim::SimInstr::NOT(dr, sr) => AsmInstr::NOT(dr, sr),
509 super::sim::SimInstr::LDI(dr, off) => AsmInstr::LDI(dr, PCOffset::Offset(off)),
510 super::sim::SimInstr::STI(sr, off) => AsmInstr::STI(sr, PCOffset::Offset(off)),
511 super::sim::SimInstr::JMP(Reg::R7) => AsmInstr::RET,
512 super::sim::SimInstr::JMP(br) => AsmInstr::JMP(br),
513 super::sim::SimInstr::LEA(dr, off) => AsmInstr::LEA(dr, PCOffset::Offset(off)),
514 super::sim::SimInstr::TRAP(vect) if vect.get() == 0x20 => AsmInstr::GETC,
515 super::sim::SimInstr::TRAP(vect) if vect.get() == 0x21 => AsmInstr::PUTC,
516 super::sim::SimInstr::TRAP(vect) if vect.get() == 0x22 => AsmInstr::PUTS,
517 super::sim::SimInstr::TRAP(vect) if vect.get() == 0x23 => AsmInstr::IN,
518 super::sim::SimInstr::TRAP(vect) if vect.get() == 0x24 => AsmInstr::PUTSP,
519 super::sim::SimInstr::TRAP(vect) if vect.get() == 0x25 => AsmInstr::HALT,
520 super::sim::SimInstr::TRAP(vect) => AsmInstr::TRAP(vect),
521 };
522
523 Some(Stmt {
524 labels: vec![],
525 nucleus: StmtKind::Instr(ai),
526 span: 0..0
527 })
528}
529
530/// Attempts to disassemble a line of bytecode back into an assembly instruction,
531/// returning a `.fill` directive if not possible.
532pub fn disassemble_line(word: u16) -> Stmt {
533 try_disassemble_line(word)
534 .unwrap_or_else(|| {
535 let fill = Directive::Fill(PCOffset::Offset(super::Offset::new_trunc(word)));
536
537 Stmt {
538 labels: vec![],
539 nucleus: StmtKind::Directive(fill),
540 span: 0..0
541 }
542 })
543}
544
545/// Attempts to disassemble bytecode back into assembly instructions.
546pub fn disassemble(data: &[u16]) -> Vec<Stmt> {
547 data.iter()
548 .copied()
549 .map(disassemble_line)
550 .collect()
551}