lc3_ensemble/
ast.rs

1//! Components relating to the abstract syntax trees (ASTs) 
2//! used in representing assembly instructions.
3//! 
4//! These components together are used to construct... 
5//! - [`asm::AsmInstr`] (a data structure holding an assembly source code instruction),
6//! - [`asm::Directive`] (a data structure holding an assembly source code directive),
7//! - and [`sim::SimInstr`] (a data structure holding a bytecode instruction).
8
9pub mod asm;
10pub mod sim;
11
12use std::fmt::Write as _;
13use std::num::TryFromIntError;
14use offset_base::OffsetBacking;
15
16/// A register. Must be between 0 and 7.
17/// 
18/// This `Reg` struct can either be constructed by accessing an enum variant, 
19/// or by using [`Reg::try_from`].
20/// 
21/// ## Examples
22/// 
23/// ```text
24/// AND R0, R0, #0
25///     ~~  ~~    
26/// ADD R1, R1, R0
27///     ~~  ~~  ~~
28/// LD R2, VALUE
29///    ~~         
30/// NOT R1, R2
31///     ~~  ~~    
32/// ```
33#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
34pub enum Reg {
35    /// The 0th register in the register file.
36    R0 = 0,
37    /// The 1st register in the register file.
38    R1 = 1,
39    /// The 2nd register in the register file.
40    R2 = 2,
41    /// The 3rd register in the register file.
42    R3 = 3,
43    /// The 4th register in the register file.
44    R4 = 4,
45    /// The 5th register in the register file.
46    R5 = 5,
47    /// The 6th register in the register file.
48    R6 = 6,
49    /// The 7th register in the register file.
50    R7 = 7
51}
52impl Reg {
53    /// The number of registers defined by the LC-3 ISA.
54    pub(crate) const REG_SIZE: usize = 8;
55
56    /// Gets the register number of this [`Reg`]. This is always between 0 and 7.
57    pub fn reg_no(self) -> u8 {
58        self as u8
59    }
60}
61impl std::fmt::Display for Reg {
62    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63        // formatting parameters should have no effect here
64        write!(f, "R{}", self.reg_no())
65    }
66}
67impl From<Reg> for usize {
68    // Used for indexing the reg file in [`ast::Sim`].
69    fn from(value: Reg) -> Self {
70        usize::from(value.reg_no())
71    }
72}
73impl TryFrom<u8> for Reg {
74    type Error = TryFromIntError;
75
76    fn try_from(value: u8) -> Result<Self, Self::Error> {
77        match value {
78            0 => Ok(Reg::R0),
79            1 => Ok(Reg::R1),
80            2 => Ok(Reg::R2),
81            3 => Ok(Reg::R3),
82            4 => Ok(Reg::R4),
83            5 => Ok(Reg::R5),
84            6 => Ok(Reg::R6),
85            7 => Ok(Reg::R7),
86            // HACKy, but there's no other way to create this error
87            _     => u8::try_from(256).map(|_| unreachable!("should've been TryFromIntError")),
88        }
89    }
90}
91
92/// A condition code (used for `BR`), must be between 0 and 7.
93/// 
94/// The condition codes are listed below:
95/// 
96/// | instruction   | code (bin) |
97/// |---------------|------------|
98/// | `NOP`         | `000`      |
99/// | `BRn`         | `100`      |
100/// | `BRz`         | `010`      |
101/// | `BRnz`        | `110`      |
102/// | `BRp`         | `001`      |
103/// | `BRnp`        | `101`      |
104/// | `BRzp`        | `011`      |
105/// | `BR`, `BRnzp` | `111`      |
106/// 
107pub type CondCode = u8;
108
109/// A value representing a signed offset or a signed immediate value.
110/// 
111/// `N` indicates the maximum bit size of this offset/immediate value.
112/// 
113/// ## Examples
114/// 
115/// `IOffset<5>` is used to represent `ADD`/`AND`'s imm5 operand:
116/// 
117/// ```text
118/// AND R0, R0, #0
119///             ~~
120/// ADD R1, R1, #1
121///             ~~
122/// ```
123/// 
124/// They are also used for numeric register or PC offset values:
125/// ```text
126/// BR x-F
127///    ~~~
128/// JSR #99
129///     ~~~
130/// LD R0, #10
131///        ~~~
132/// LDR R0, R0, #9
133///             ~~
134/// ```
135pub type IOffset<const N: u32> = Offset<i16, N>;
136/// An unsigned 8-bit trap vector (used for `TRAP`).
137/// 
138/// ## Examples
139/// 
140/// ```text
141/// TRAP x25
142///      ~~~
143/// ```
144pub type TrapVect8 = Offset<u16, 8>;
145
146/// A value representing either an immediate value or a register.
147/// 
148/// This is used to handle cases where an operand can be either 
149/// an immediate value or a register (e.g., in `AND` or `ADD`).
150/// 
151/// ## Examples
152/// ```text
153/// AND R0, R0, #0
154/// AND R1, R1, R1
155/// ADD R2, R2, #2
156/// ADD R3, R3, R3
157///             ^^
158/// ```
159#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
160pub enum ImmOrReg<const N: u32> {
161    #[allow(missing_docs)]
162    Imm(IOffset<N>),
163    #[allow(missing_docs)]
164    Reg(Reg)
165}
166impl<const N: u32> std::fmt::Display for ImmOrReg<N> {
167    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
168        match self {
169            ImmOrReg::Imm(imm) => imm.fmt(f),
170            ImmOrReg::Reg(reg) => reg.fmt(f),
171        }
172    }
173}
174
175/// A value representing an offset or an immediate value.
176/// 
177/// The `OFF` type represents the backing type of this offset. 
178/// The signedness of this offset type is dependent on the signedness of the `OFF` type:
179/// - `Offset<i16, _>`: signed offset (also aliased as [`IOffset`])
180/// - `Offset<u16, _>`: unsigned offset
181/// 
182/// `N` indicates the maximum bit size of this offset/immediate value.
183/// 
184/// ## Examples
185/// 
186/// - `Offset<i16, 5>`  is used to represent `ADD`/`AND`'s imm5 operand. 
187///     See [`IOffset`] for more examples of its use.
188/// - `Offset<u16, 8>` is used to represent the `trapvect8` operand of the `TRAP` instruction.
189///     See [`TrapVect8`] for more examples of its use.
190#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
191pub struct Offset<OFF, const N: u32>(OFF);
192
193impl<OFF: std::fmt::Display, const N: u32> std::fmt::Display for Offset<OFF, N> {
194    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
195        f.write_char('#')?;
196        self.0.fmt(f)
197    }
198}
199impl<OFF: std::fmt::Binary, const N: u32> std::fmt::Binary for Offset<OFF, N> {
200    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
201        f.write_char('b')?;
202        self.0.fmt(f)
203    }
204}
205impl<OFF: std::fmt::LowerHex, const N: u32> std::fmt::LowerHex for Offset<OFF, N> {
206    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
207        f.write_char('x')?;
208        self.0.fmt(f)
209    }
210}
211impl<OFF: std::fmt::UpperHex, const N: u32> std::fmt::UpperHex for Offset<OFF, N> {
212    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
213        f.write_char('x')?;
214        self.0.fmt(f)
215    }
216}
217
218/// The errors that can result from calling [`Offset::new`].
219#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
220pub enum OffsetNewErr {
221    /// The provided offset cannot fit an unsigned integer of the given bitsize.
222    CannotFitUnsigned(u32),
223    /// The provided offset cannot fit a signed integer of the given bitsize.
224    CannotFitSigned(u32)
225}
226
227impl std::fmt::Display for OffsetNewErr {
228    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
229        match self {
230            OffsetNewErr::CannotFitUnsigned(n) => write!(f, "value is too big for unsigned {n}-bit integer"),
231            OffsetNewErr::CannotFitSigned(n) => write!(f, "value is too big for signed {n}-bit integer"),
232        }
233    }
234}
235impl std::error::Error for OffsetNewErr {}
236impl crate::err::Error for OffsetNewErr {
237    fn help(&self) -> Option<std::borrow::Cow<str>> {
238        use std::borrow::Cow;
239
240        let error = match self {
241            OffsetNewErr::CannotFitUnsigned(n) => Cow::from(format!("the range for an unsigned {n}-bit integer is [0, {}]", (1 << n) - 1)),
242            OffsetNewErr::CannotFitSigned(n) => Cow::from(format!("the range for a signed {n}-bit integer is [{}, {}]", (-1) << (n - 1), (1 << (n - 1)) - 1)),
243        };
244
245        Some(error)
246    }
247}
248
249mod offset_base {
250    use super::OffsetNewErr;
251
252    /// Any type that could store a value for [`Offset`].
253    /// 
254    /// [`Offset`]: super::Offset
255    pub trait OffsetBacking: Copy + Eq {
256        /// How many bits are contained within this backing.
257        /// 
258        /// For example, `u16` has 16 bits and thus BITS == 16.
259        const BITS: u32;
260
261        /// Truncates the given value to the provided `bit_size`.
262        /// 
263        /// This bit size is always known to be less than BITS.
264        fn truncate(self, bit_size: u32) -> Self;
265
266        /// The error to raise if a given value doesn't match
267        /// its provided value when truncated to a given `bit_size`.
268        fn does_not_fit_error(bit_size: u32) -> OffsetNewErr;
269    }
270    
271    macro_rules! impl_offset_backing_for_ints {
272        ($($Int:ty: $Err:ident),*) => {
273            $(
274                impl OffsetBacking for $Int {
275                    const BITS: u32 = Self::BITS;
276                
277                    fn truncate(self, bit_size: u32) -> Self {
278                        (self << (Self::BITS - bit_size)) >> (Self::BITS - bit_size)
279                    }
280
281                    fn does_not_fit_error(bit_size: u32) -> OffsetNewErr {
282                        OffsetNewErr::$Err(bit_size)
283                    }
284                }
285            )*
286        }
287    }
288    impl_offset_backing_for_ints! {
289        u16: CannotFitUnsigned,
290        i16: CannotFitSigned
291    }
292}
293
294impl<OFF: OffsetBacking, const N: u32> Offset<OFF, N> {
295    /// Creates a new offset value.
296    /// This must fit within `N` bits of the representation, otherwise an error is raised.
297    /// 
298    /// # Examples
299    /// 
300    /// ```
301    /// # use lc3_ensemble::ast::Offset;
302    /// #
303    /// // Signed:
304    /// let neg5 = Offset::<i16, 5>::new(-5);
305    /// let pos15 = Offset::<i16, 5>::new(15);
306    /// let pos16 = Offset::<i16, 5>::new(16);
307    /// assert!(neg5.is_ok());
308    /// assert!(pos15.is_ok());
309    /// assert!(pos16.is_err());
310    /// 
311    /// // Unsigned:
312    /// let pos15 = Offset::<u16, 5>::new(15);
313    /// let pos16 = Offset::<u16, 5>::new(16);
314    /// let pos32 = Offset::<u16, 5>::new(32);
315    /// assert!(pos15.is_ok());
316    /// assert!(pos16.is_ok());
317    /// assert!(pos32.is_err());
318    /// ```
319    /// 
320    /// # Panics
321    /// 
322    /// This will panic if `N` is larger than the offset backing (e.g., for backing `u16`, larger than 16).
323    /// 
324    /// ```should_panic
325    /// # use lc3_ensemble::ast::Offset;
326    /// #
327    /// let oh_no = Offset::<i16, 17>::new(18);
328    /// ```
329    pub fn new(n: OFF) -> Result<Self, OffsetNewErr> {
330        assert!(N <= OFF::BITS, "bit size {N} exceeds size of backing ({})", OFF::BITS);
331        match n == n.truncate(N) {
332            true  => Ok(Offset(n)),
333            false => Err(OFF::does_not_fit_error(N)),
334        }
335    }
336
337    /// Creates a new offset by extending the first N bits of the integer,
338    /// and discarding the rest.
339    /// 
340    /// The extension is considered sign-extended if the offset's backing is signed,
341    /// and zero-extended if the offset's backing is unsigned.
342    /// 
343    /// # Examples
344    /// 
345    /// ```
346    /// # use lc3_ensemble::ast::Offset;
347    /// #
348    /// // Signed:
349    /// let neg5 = Offset::<i16, 5>::new_trunc(-5);  // 0b11111111111_11011
350    /// let pos15 = Offset::<i16, 5>::new_trunc(15); // 0b00000000000_01111
351    /// let pos16 = Offset::<i16, 5>::new_trunc(16); // 0b00000000000_10000
352    /// assert_eq!(neg5.get(),   -5); // 0b11011
353    /// assert_eq!(pos15.get(),  15); // 0b01111
354    /// assert_eq!(pos16.get(), -16); // 0b10000
355    /// 
356    /// // Unsigned:
357    /// let pos15 = Offset::<u16, 5>::new_trunc(15); // 0b00000000000_01111
358    /// let pos16 = Offset::<u16, 5>::new_trunc(16); // 0b00000000000_10000
359    /// let pos32 = Offset::<u16, 5>::new_trunc(32); // 0b00000000001_00000
360    /// assert_eq!(pos15.get(), 15); // 01111
361    /// assert_eq!(pos16.get(), 16); // 10000
362    /// assert_eq!(pos32.get(),  0); // 00000
363    /// ```
364    /// 
365    /// # Panics
366    /// 
367    /// This will panic if `N` is larger than the offset backing (e.g., for backing `u16`, larger than 16).
368    /// 
369    /// ```should_panic
370    /// # use lc3_ensemble::ast::Offset;
371    /// #
372    /// let oh_no = Offset::<i16, 17>::new_trunc(18);
373    /// ```
374    pub fn new_trunc(n: OFF) -> Self {
375        assert!(N <= OFF::BITS, "bit size {N} exceeds size of backing ({})", OFF::BITS);
376        Self(n.truncate(N))
377    }
378
379    /// Gets the value of the offset.
380    pub fn get(&self) -> OFF {
381        self.0
382    }
383}
384
385/// An offset or a label.
386/// 
387/// This is used to represent [`PCOffset`] operands 
388/// (such as the `PCOffset9` operand in `LD` and `ST` 
389/// and the `PCOffset11` operand in `JSR`).
390/// 
391/// During the first assembly pass, the label is resolved and
392/// replaced with a regular [`Offset`] value.
393/// 
394/// ## Examples
395/// ```text
396/// LD R2, VALUE
397///        ~~~~~
398/// BRz END
399///     ~~~
400/// BR #-99
401///    ~~~~
402/// JSR SUBROUTINE
403///     ~~~~~~~~~~
404/// ```
405#[derive(Debug, PartialEq, Eq, Hash, Clone)]
406pub enum PCOffset<OFF, const N: u32> {
407    #[allow(missing_docs)]
408    Offset(Offset<OFF, N>),
409    #[allow(missing_docs)]
410    Label(Label)
411}
412impl<OFF, const N: u32> std::fmt::Display for PCOffset<OFF, N> 
413    where Offset<OFF, N>: std::fmt::Display
414{
415    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
416        match self {
417            PCOffset::Offset(off)  => off.fmt(f),
418            PCOffset::Label(label) => label.fmt(f),
419        }
420    }
421}
422
423/// A label.
424/// 
425/// This struct stores the name of the label (accessible by the `name` field)
426/// and the source code span indicating where the label is located in assembly source code.
427/// 
428/// # Examples
429/// ```text
430/// .orig x3000
431/// AND R0, R0, #0
432/// LD R2, VALUE
433///        ~~~~~
434/// LOOP:
435/// ~~~~
436///     NOT R1, R2
437///     ADD R1, R1, #1
438///     ADD R1, R1, R0
439///     BRz END
440///         ~~~
441///     ADD R0, R0, #1
442///     BR LOOP
443///        ~~~~
444/// END: HALT
445/// ~~~
446/// VALUE: .fill #8464
447/// ~~~~~
448/// .end
449/// ```
450#[derive(Clone, PartialEq, Eq, Hash, Debug, Default)]
451pub struct Label {
452    /// The label's identifier
453    pub name: String,
454
455    /// The start of the label in assembly source code.
456    /// 
457    /// Since name stores the length of the string,
458    /// we don't need to store the whole span.
459    /// 
460    /// This saves like 8 bytes of space on a 64-bit machine, so ya know
461    start: usize
462}
463impl Label {
464    /// Creates a new label.
465    pub fn new(name: String, span: std::ops::Range<usize>) -> Self {
466        debug_assert_eq!(span.start + name.len(), span.end, "span should have the same length as name");
467        Label { name, start: span.start }
468    }
469    /// Returns the span of the label in assembly source code.
470    pub fn span(&self) -> std::ops::Range<usize> {
471        self.start .. (self.start + self.name.len())
472    }
473}
474impl std::fmt::Display for Label {
475    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
476        self.name.fmt(f)
477    }
478}