lc3_ensemble/ast.rs
1//! Components relating to the abstract syntax trees (ASTs)
2//! used in representing assembly instructions.
3//!
4//! These components together are used to construct...
5//! - [`asm::AsmInstr`] (a data structure holding an assembly source code instruction),
6//! - [`asm::Directive`] (a data structure holding an assembly source code directive),
7//! - and [`sim::SimInstr`] (a data structure holding a bytecode instruction).
8
9pub mod asm;
10pub mod sim;
11
12use std::fmt::Write as _;
13use std::num::TryFromIntError;
14use offset_base::OffsetBacking;
15
16/// A register. Must be between 0 and 7.
17///
18/// This `Reg` struct can either be constructed by accessing an enum variant,
19/// or by using [`Reg::try_from`].
20///
21/// ## Examples
22///
23/// ```text
24/// AND R0, R0, #0
25/// ~~ ~~
26/// ADD R1, R1, R0
27/// ~~ ~~ ~~
28/// LD R2, VALUE
29/// ~~
30/// NOT R1, R2
31/// ~~ ~~
32/// ```
33#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
34pub enum Reg {
35 /// The 0th register in the register file.
36 R0 = 0,
37 /// The 1st register in the register file.
38 R1 = 1,
39 /// The 2nd register in the register file.
40 R2 = 2,
41 /// The 3rd register in the register file.
42 R3 = 3,
43 /// The 4th register in the register file.
44 R4 = 4,
45 /// The 5th register in the register file.
46 R5 = 5,
47 /// The 6th register in the register file.
48 R6 = 6,
49 /// The 7th register in the register file.
50 R7 = 7
51}
52impl Reg {
53 /// The number of registers defined by the LC-3 ISA.
54 pub(crate) const REG_SIZE: usize = 8;
55
56 /// Gets the register number of this [`Reg`]. This is always between 0 and 7.
57 pub fn reg_no(self) -> u8 {
58 self as u8
59 }
60}
61impl std::fmt::Display for Reg {
62 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63 // formatting parameters should have no effect here
64 write!(f, "R{}", self.reg_no())
65 }
66}
67impl From<Reg> for usize {
68 // Used for indexing the reg file in [`ast::Sim`].
69 fn from(value: Reg) -> Self {
70 usize::from(value.reg_no())
71 }
72}
73impl TryFrom<u8> for Reg {
74 type Error = TryFromIntError;
75
76 fn try_from(value: u8) -> Result<Self, Self::Error> {
77 match value {
78 0 => Ok(Reg::R0),
79 1 => Ok(Reg::R1),
80 2 => Ok(Reg::R2),
81 3 => Ok(Reg::R3),
82 4 => Ok(Reg::R4),
83 5 => Ok(Reg::R5),
84 6 => Ok(Reg::R6),
85 7 => Ok(Reg::R7),
86 // HACKy, but there's no other way to create this error
87 _ => u8::try_from(256).map(|_| unreachable!("should've been TryFromIntError")),
88 }
89 }
90}
91
92/// A condition code (used for `BR`), must be between 0 and 7.
93///
94/// The condition codes are listed below:
95///
96/// | instruction | code (bin) |
97/// |---------------|------------|
98/// | `NOP` | `000` |
99/// | `BRn` | `100` |
100/// | `BRz` | `010` |
101/// | `BRnz` | `110` |
102/// | `BRp` | `001` |
103/// | `BRnp` | `101` |
104/// | `BRzp` | `011` |
105/// | `BR`, `BRnzp` | `111` |
106///
107pub type CondCode = u8;
108
109/// A value representing a signed offset or a signed immediate value.
110///
111/// `N` indicates the maximum bit size of this offset/immediate value.
112///
113/// ## Examples
114///
115/// `IOffset<5>` is used to represent `ADD`/`AND`'s imm5 operand:
116///
117/// ```text
118/// AND R0, R0, #0
119/// ~~
120/// ADD R1, R1, #1
121/// ~~
122/// ```
123///
124/// They are also used for numeric register or PC offset values:
125/// ```text
126/// BR x-F
127/// ~~~
128/// JSR #99
129/// ~~~
130/// LD R0, #10
131/// ~~~
132/// LDR R0, R0, #9
133/// ~~
134/// ```
135pub type IOffset<const N: u32> = Offset<i16, N>;
136/// An unsigned 8-bit trap vector (used for `TRAP`).
137///
138/// ## Examples
139///
140/// ```text
141/// TRAP x25
142/// ~~~
143/// ```
144pub type TrapVect8 = Offset<u16, 8>;
145
146/// A value representing either an immediate value or a register.
147///
148/// This is used to handle cases where an operand can be either
149/// an immediate value or a register (e.g., in `AND` or `ADD`).
150///
151/// ## Examples
152/// ```text
153/// AND R0, R0, #0
154/// AND R1, R1, R1
155/// ADD R2, R2, #2
156/// ADD R3, R3, R3
157/// ^^
158/// ```
159#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
160pub enum ImmOrReg<const N: u32> {
161 #[allow(missing_docs)]
162 Imm(IOffset<N>),
163 #[allow(missing_docs)]
164 Reg(Reg)
165}
166impl<const N: u32> std::fmt::Display for ImmOrReg<N> {
167 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
168 match self {
169 ImmOrReg::Imm(imm) => imm.fmt(f),
170 ImmOrReg::Reg(reg) => reg.fmt(f),
171 }
172 }
173}
174
175/// A value representing an offset or an immediate value.
176///
177/// The `OFF` type represents the backing type of this offset.
178/// The signedness of this offset type is dependent on the signedness of the `OFF` type:
179/// - `Offset<i16, _>`: signed offset (also aliased as [`IOffset`])
180/// - `Offset<u16, _>`: unsigned offset
181///
182/// `N` indicates the maximum bit size of this offset/immediate value.
183///
184/// ## Examples
185///
186/// - `Offset<i16, 5>` is used to represent `ADD`/`AND`'s imm5 operand.
187/// See [`IOffset`] for more examples of its use.
188/// - `Offset<u16, 8>` is used to represent the `trapvect8` operand of the `TRAP` instruction.
189/// See [`TrapVect8`] for more examples of its use.
190#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
191pub struct Offset<OFF, const N: u32>(OFF);
192
193impl<OFF: std::fmt::Display, const N: u32> std::fmt::Display for Offset<OFF, N> {
194 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
195 f.write_char('#')?;
196 self.0.fmt(f)
197 }
198}
199impl<OFF: std::fmt::Binary, const N: u32> std::fmt::Binary for Offset<OFF, N> {
200 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
201 f.write_char('b')?;
202 self.0.fmt(f)
203 }
204}
205impl<OFF: std::fmt::LowerHex, const N: u32> std::fmt::LowerHex for Offset<OFF, N> {
206 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
207 f.write_char('x')?;
208 self.0.fmt(f)
209 }
210}
211impl<OFF: std::fmt::UpperHex, const N: u32> std::fmt::UpperHex for Offset<OFF, N> {
212 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
213 f.write_char('x')?;
214 self.0.fmt(f)
215 }
216}
217
218/// The errors that can result from calling [`Offset::new`].
219#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
220pub enum OffsetNewErr {
221 /// The provided offset cannot fit an unsigned integer of the given bitsize.
222 CannotFitUnsigned(u32),
223 /// The provided offset cannot fit a signed integer of the given bitsize.
224 CannotFitSigned(u32)
225}
226
227impl std::fmt::Display for OffsetNewErr {
228 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
229 match self {
230 OffsetNewErr::CannotFitUnsigned(n) => write!(f, "value is too big for unsigned {n}-bit integer"),
231 OffsetNewErr::CannotFitSigned(n) => write!(f, "value is too big for signed {n}-bit integer"),
232 }
233 }
234}
235impl std::error::Error for OffsetNewErr {}
236impl crate::err::Error for OffsetNewErr {
237 fn help(&self) -> Option<std::borrow::Cow<str>> {
238 use std::borrow::Cow;
239
240 let error = match self {
241 OffsetNewErr::CannotFitUnsigned(n) => Cow::from(format!("the range for an unsigned {n}-bit integer is [0, {}]", (1 << n) - 1)),
242 OffsetNewErr::CannotFitSigned(n) => Cow::from(format!("the range for a signed {n}-bit integer is [{}, {}]", (-1) << (n - 1), (1 << (n - 1)) - 1)),
243 };
244
245 Some(error)
246 }
247}
248
249mod offset_base {
250 use super::OffsetNewErr;
251
252 /// Any type that could store a value for [`Offset`].
253 ///
254 /// [`Offset`]: super::Offset
255 pub trait OffsetBacking: Copy + Eq {
256 /// How many bits are contained within this backing.
257 ///
258 /// For example, `u16` has 16 bits and thus BITS == 16.
259 const BITS: u32;
260
261 /// Truncates the given value to the provided `bit_size`.
262 ///
263 /// This bit size is always known to be less than BITS.
264 fn truncate(self, bit_size: u32) -> Self;
265
266 /// The error to raise if a given value doesn't match
267 /// its provided value when truncated to a given `bit_size`.
268 fn does_not_fit_error(bit_size: u32) -> OffsetNewErr;
269 }
270
271 macro_rules! impl_offset_backing_for_ints {
272 ($($Int:ty: $Err:ident),*) => {
273 $(
274 impl OffsetBacking for $Int {
275 const BITS: u32 = Self::BITS;
276
277 fn truncate(self, bit_size: u32) -> Self {
278 (self << (Self::BITS - bit_size)) >> (Self::BITS - bit_size)
279 }
280
281 fn does_not_fit_error(bit_size: u32) -> OffsetNewErr {
282 OffsetNewErr::$Err(bit_size)
283 }
284 }
285 )*
286 }
287 }
288 impl_offset_backing_for_ints! {
289 u16: CannotFitUnsigned,
290 i16: CannotFitSigned
291 }
292}
293
294impl<OFF: OffsetBacking, const N: u32> Offset<OFF, N> {
295 /// Creates a new offset value.
296 /// This must fit within `N` bits of the representation, otherwise an error is raised.
297 ///
298 /// # Examples
299 ///
300 /// ```
301 /// # use lc3_ensemble::ast::Offset;
302 /// #
303 /// // Signed:
304 /// let neg5 = Offset::<i16, 5>::new(-5);
305 /// let pos15 = Offset::<i16, 5>::new(15);
306 /// let pos16 = Offset::<i16, 5>::new(16);
307 /// assert!(neg5.is_ok());
308 /// assert!(pos15.is_ok());
309 /// assert!(pos16.is_err());
310 ///
311 /// // Unsigned:
312 /// let pos15 = Offset::<u16, 5>::new(15);
313 /// let pos16 = Offset::<u16, 5>::new(16);
314 /// let pos32 = Offset::<u16, 5>::new(32);
315 /// assert!(pos15.is_ok());
316 /// assert!(pos16.is_ok());
317 /// assert!(pos32.is_err());
318 /// ```
319 ///
320 /// # Panics
321 ///
322 /// This will panic if `N` is larger than the offset backing (e.g., for backing `u16`, larger than 16).
323 ///
324 /// ```should_panic
325 /// # use lc3_ensemble::ast::Offset;
326 /// #
327 /// let oh_no = Offset::<i16, 17>::new(18);
328 /// ```
329 pub fn new(n: OFF) -> Result<Self, OffsetNewErr> {
330 assert!(N <= OFF::BITS, "bit size {N} exceeds size of backing ({})", OFF::BITS);
331 match n == n.truncate(N) {
332 true => Ok(Offset(n)),
333 false => Err(OFF::does_not_fit_error(N)),
334 }
335 }
336
337 /// Creates a new offset by extending the first N bits of the integer,
338 /// and discarding the rest.
339 ///
340 /// The extension is considered sign-extended if the offset's backing is signed,
341 /// and zero-extended if the offset's backing is unsigned.
342 ///
343 /// # Examples
344 ///
345 /// ```
346 /// # use lc3_ensemble::ast::Offset;
347 /// #
348 /// // Signed:
349 /// let neg5 = Offset::<i16, 5>::new_trunc(-5); // 0b11111111111_11011
350 /// let pos15 = Offset::<i16, 5>::new_trunc(15); // 0b00000000000_01111
351 /// let pos16 = Offset::<i16, 5>::new_trunc(16); // 0b00000000000_10000
352 /// assert_eq!(neg5.get(), -5); // 0b11011
353 /// assert_eq!(pos15.get(), 15); // 0b01111
354 /// assert_eq!(pos16.get(), -16); // 0b10000
355 ///
356 /// // Unsigned:
357 /// let pos15 = Offset::<u16, 5>::new_trunc(15); // 0b00000000000_01111
358 /// let pos16 = Offset::<u16, 5>::new_trunc(16); // 0b00000000000_10000
359 /// let pos32 = Offset::<u16, 5>::new_trunc(32); // 0b00000000001_00000
360 /// assert_eq!(pos15.get(), 15); // 01111
361 /// assert_eq!(pos16.get(), 16); // 10000
362 /// assert_eq!(pos32.get(), 0); // 00000
363 /// ```
364 ///
365 /// # Panics
366 ///
367 /// This will panic if `N` is larger than the offset backing (e.g., for backing `u16`, larger than 16).
368 ///
369 /// ```should_panic
370 /// # use lc3_ensemble::ast::Offset;
371 /// #
372 /// let oh_no = Offset::<i16, 17>::new_trunc(18);
373 /// ```
374 pub fn new_trunc(n: OFF) -> Self {
375 assert!(N <= OFF::BITS, "bit size {N} exceeds size of backing ({})", OFF::BITS);
376 Self(n.truncate(N))
377 }
378
379 /// Gets the value of the offset.
380 pub fn get(&self) -> OFF {
381 self.0
382 }
383}
384
385/// An offset or a label.
386///
387/// This is used to represent [`PCOffset`] operands
388/// (such as the `PCOffset9` operand in `LD` and `ST`
389/// and the `PCOffset11` operand in `JSR`).
390///
391/// During the first assembly pass, the label is resolved and
392/// replaced with a regular [`Offset`] value.
393///
394/// ## Examples
395/// ```text
396/// LD R2, VALUE
397/// ~~~~~
398/// BRz END
399/// ~~~
400/// BR #-99
401/// ~~~~
402/// JSR SUBROUTINE
403/// ~~~~~~~~~~
404/// ```
405#[derive(Debug, PartialEq, Eq, Hash, Clone)]
406pub enum PCOffset<OFF, const N: u32> {
407 #[allow(missing_docs)]
408 Offset(Offset<OFF, N>),
409 #[allow(missing_docs)]
410 Label(Label)
411}
412impl<OFF, const N: u32> std::fmt::Display for PCOffset<OFF, N>
413 where Offset<OFF, N>: std::fmt::Display
414{
415 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
416 match self {
417 PCOffset::Offset(off) => off.fmt(f),
418 PCOffset::Label(label) => label.fmt(f),
419 }
420 }
421}
422
423/// A label.
424///
425/// This struct stores the name of the label (accessible by the `name` field)
426/// and the source code span indicating where the label is located in assembly source code.
427///
428/// # Examples
429/// ```text
430/// .orig x3000
431/// AND R0, R0, #0
432/// LD R2, VALUE
433/// ~~~~~
434/// LOOP:
435/// ~~~~
436/// NOT R1, R2
437/// ADD R1, R1, #1
438/// ADD R1, R1, R0
439/// BRz END
440/// ~~~
441/// ADD R0, R0, #1
442/// BR LOOP
443/// ~~~~
444/// END: HALT
445/// ~~~
446/// VALUE: .fill #8464
447/// ~~~~~
448/// .end
449/// ```
450#[derive(Clone, PartialEq, Eq, Hash, Debug, Default)]
451pub struct Label {
452 /// The label's identifier
453 pub name: String,
454
455 /// The start of the label in assembly source code.
456 ///
457 /// Since name stores the length of the string,
458 /// we don't need to store the whole span.
459 ///
460 /// This saves like 8 bytes of space on a 64-bit machine, so ya know
461 start: usize
462}
463impl Label {
464 /// Creates a new label.
465 pub fn new(name: String, span: std::ops::Range<usize>) -> Self {
466 debug_assert_eq!(span.start + name.len(), span.end, "span should have the same length as name");
467 Label { name, start: span.start }
468 }
469 /// Returns the span of the label in assembly source code.
470 pub fn span(&self) -> std::ops::Range<usize> {
471 self.start .. (self.start + self.name.len())
472 }
473}
474impl std::fmt::Display for Label {
475 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
476 self.name.fmt(f)
477 }
478}