lc3_ensemble/
ast.rs

1//! Components relating to the abstract syntax trees (ASTs) 
2//! used in representing assembly instructions.
3//! 
4//! These components together are used to construct... 
5//! - [`asm::AsmInstr`] (a data structure holding an assembly source code instruction),
6//! - [`asm::Directive`] (a data structure holding an assembly source code directive),
7//! - and [`sim::SimInstr`] (a data structure holding a bytecode instruction).
8
9pub mod asm;
10pub mod sim;
11
12use std::fmt::Write as _;
13use std::num::TryFromIntError;
14use offset_base::OffsetBacking;
15
16/// A register. Must be between 0 and 7.
17/// 
18/// This `Reg` struct can either be constructed by accessing an enum variant, 
19/// or by using [`Reg::try_from`].
20/// 
21/// ## Examples
22/// 
23/// ```text
24/// AND R0, R0, #0
25///     ~~  ~~    
26/// ADD R1, R1, R0
27///     ~~  ~~  ~~
28/// LD R2, VALUE
29///    ~~         
30/// NOT R1, R2
31///     ~~  ~~    
32/// ```
33#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
34pub enum Reg {
35    /// The 0th register in the register file.
36    R0 = 0,
37    /// The 1st register in the register file.
38    R1 = 1,
39    /// The 2nd register in the register file.
40    R2 = 2,
41    /// The 3rd register in the register file.
42    R3 = 3,
43    /// The 4th register in the register file.
44    R4 = 4,
45    /// The 5th register in the register file.
46    R5 = 5,
47    /// The 6th register in the register file.
48    R6 = 6,
49    /// The 7th register in the register file.
50    R7 = 7
51}
52impl Reg {
53    /// Gets the register number of this [`Reg`]. This is always between 0 and 7.
54    pub fn reg_no(self) -> u8 {
55        self as u8
56    }
57}
58impl std::fmt::Display for Reg {
59    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60        // formatting parameters should have no effect here
61        write!(f, "R{}", self.reg_no())
62    }
63}
64impl From<Reg> for usize {
65    // Used for indexing the reg file in [`ast::Sim`].
66    fn from(value: Reg) -> Self {
67        usize::from(value.reg_no())
68    }
69}
70impl TryFrom<u8> for Reg {
71    type Error = TryFromIntError;
72
73    fn try_from(value: u8) -> Result<Self, Self::Error> {
74        match value {
75            0 => Ok(Reg::R0),
76            1 => Ok(Reg::R1),
77            2 => Ok(Reg::R2),
78            3 => Ok(Reg::R3),
79            4 => Ok(Reg::R4),
80            5 => Ok(Reg::R5),
81            6 => Ok(Reg::R6),
82            7 => Ok(Reg::R7),
83            // HACKy, but there's no other way to create this error
84            _     => u8::try_from(256).map(|_| unreachable!("should've been TryFromIntError")),
85        }
86    }
87}
88
89/// A condition code (used for `BR`), must be between 0 and 7.
90/// 
91/// The condition codes are listed below:
92/// 
93/// | instruction   | code (bin) |
94/// |---------------|------------|
95/// | `NOP`         | `000`      |
96/// | `BRn`         | `100`      |
97/// | `BRz`         | `010`      |
98/// | `BRnz`        | `110`      |
99/// | `BRp`         | `001`      |
100/// | `BRnp`        | `101`      |
101/// | `BRzp`        | `011`      |
102/// | `BR`, `BRnzp` | `111`      |
103/// 
104pub type CondCode = u8;
105
106/// A value representing a signed offset or a signed immediate value.
107/// 
108/// `N` indicates the maximum bit size of this offset/immediate value.
109/// 
110/// ## Examples
111/// 
112/// `IOffset<5>` is used to represent `ADD`/`AND`'s imm5 operand:
113/// 
114/// ```text
115/// AND R0, R0, #0
116///             ~~
117/// ADD R1, R1, #1
118///             ~~
119/// ```
120/// 
121/// They are also used for numeric register or PC offset values:
122/// ```text
123/// BR x-F
124///    ~~~
125/// JSR #99
126///     ~~~
127/// LD R0, #10
128///        ~~~
129/// LDR R0, R0, #9
130///             ~~
131/// ```
132pub type IOffset<const N: u32> = Offset<i16, N>;
133/// An unsigned 8-bit trap vector (used for `TRAP`).
134/// 
135/// ## Examples
136/// 
137/// ```text
138/// TRAP x25
139///      ~~~
140/// ```
141pub type TrapVect8 = Offset<u16, 8>;
142
143/// A value representing either an immediate value or a register.
144/// 
145/// This is used to handle cases where an operand can be either 
146/// an immediate value or a register (e.g., in `AND` or `ADD`).
147/// 
148/// ## Examples
149/// ```text
150/// AND R0, R0, #0
151/// AND R1, R1, R1
152/// ADD R2, R2, #2
153/// ADD R3, R3, R3
154///             ^^
155/// ```
156#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
157pub enum ImmOrReg<const N: u32> {
158    #[allow(missing_docs)]
159    Imm(IOffset<N>),
160    #[allow(missing_docs)]
161    Reg(Reg)
162}
163impl<const N: u32> std::fmt::Display for ImmOrReg<N> {
164    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
165        match self {
166            ImmOrReg::Imm(imm) => imm.fmt(f),
167            ImmOrReg::Reg(reg) => reg.fmt(f),
168        }
169    }
170}
171
172/// A value representing an offset or an immediate value.
173/// 
174/// The `OFF` type represents the backing type of this offset. 
175/// The signedness of this offset type is dependent on the signedness of the `OFF` type:
176/// - `Offset<i16, _>`: signed offset (also aliased as [`IOffset`])
177/// - `Offset<u16, _>`: unsigned offset
178/// 
179/// `N` indicates the maximum bit size of this offset/immediate value.
180/// 
181/// ## Examples
182/// 
183/// - `Offset<i16, 5>`  is used to represent `ADD`/`AND`'s imm5 operand. 
184///     See [`IOffset`] for more examples of its use.
185/// - `Offset<u16, 8>` is used to represent the `trapvect8` operand of the `TRAP` instruction.
186///     See [`TrapVect8`] for more examples of its use.
187#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
188pub struct Offset<OFF, const N: u32>(OFF);
189
190impl<OFF: std::fmt::Display, const N: u32> std::fmt::Display for Offset<OFF, N> {
191    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
192        f.write_char('#')?;
193        self.0.fmt(f)
194    }
195}
196impl<OFF: std::fmt::Binary, const N: u32> std::fmt::Binary for Offset<OFF, N> {
197    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
198        f.write_char('b')?;
199        self.0.fmt(f)
200    }
201}
202impl<OFF: std::fmt::LowerHex, const N: u32> std::fmt::LowerHex for Offset<OFF, N> {
203    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
204        f.write_char('x')?;
205        self.0.fmt(f)
206    }
207}
208impl<OFF: std::fmt::UpperHex, const N: u32> std::fmt::UpperHex for Offset<OFF, N> {
209    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
210        f.write_char('x')?;
211        self.0.fmt(f)
212    }
213}
214
215/// The errors that can result from calling [`Offset::new`].
216#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
217pub enum OffsetNewErr {
218    /// The provided offset cannot fit an unsigned integer of the given bitsize.
219    CannotFitUnsigned(u32),
220    /// The provided offset cannot fit a signed integer of the given bitsize.
221    CannotFitSigned(u32)
222}
223
224impl std::fmt::Display for OffsetNewErr {
225    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
226        match self {
227            OffsetNewErr::CannotFitUnsigned(n) => write!(f, "value is too big for unsigned {n}-bit integer"),
228            OffsetNewErr::CannotFitSigned(n) => write!(f, "value is too big for signed {n}-bit integer"),
229        }
230    }
231}
232impl std::error::Error for OffsetNewErr {}
233impl crate::err::Error for OffsetNewErr {
234    fn help(&self) -> Option<std::borrow::Cow<str>> {
235        use std::borrow::Cow;
236
237        let error = match self {
238            OffsetNewErr::CannotFitUnsigned(n) => Cow::from(format!("the range for an unsigned {n}-bit integer is [0, {}]", (1 << n) - 1)),
239            OffsetNewErr::CannotFitSigned(n) => Cow::from(format!("the range for a signed {n}-bit integer is [{}, {}]", (-1) << (n - 1), (1 << (n - 1)) - 1)),
240        };
241
242        Some(error)
243    }
244}
245
246mod offset_base {
247    use super::OffsetNewErr;
248
249    /// Any type that could store a value for [`Offset`].
250    /// 
251    /// [`Offset`]: super::Offset
252    pub trait OffsetBacking: Copy + Eq {
253        /// How many bits are contained within this backing.
254        /// 
255        /// For example, `u16` has 16 bits and thus BITS == 16.
256        const BITS: u32;
257
258        /// Truncates the given value to the provided `bit_size`.
259        /// 
260        /// This bit size is always known to be less than BITS.
261        fn truncate(self, bit_size: u32) -> Self;
262
263        /// The error to raise if a given value doesn't match
264        /// its provided value when truncated to a given `bit_size`.
265        fn does_not_fit_error(bit_size: u32) -> OffsetNewErr;
266    }
267    
268    macro_rules! impl_offset_backing_for_ints {
269        ($($Int:ty: $Err:ident),*) => {
270            $(
271                impl OffsetBacking for $Int {
272                    const BITS: u32 = Self::BITS;
273                
274                    fn truncate(self, bit_size: u32) -> Self {
275                        (self << (Self::BITS - bit_size)) >> (Self::BITS - bit_size)
276                    }
277
278                    fn does_not_fit_error(bit_size: u32) -> OffsetNewErr {
279                        OffsetNewErr::$Err(bit_size)
280                    }
281                }
282            )*
283        }
284    }
285    impl_offset_backing_for_ints! {
286        u16: CannotFitUnsigned,
287        i16: CannotFitSigned
288    }
289}
290
291impl<OFF: OffsetBacking, const N: u32> Offset<OFF, N> {
292    /// Creates a new offset value.
293    /// This must fit within `N` bits of the representation, otherwise an error is raised.
294    /// 
295    /// # Examples
296    /// 
297    /// ```
298    /// # use lc3_ensemble::ast::Offset;
299    /// #
300    /// // Signed:
301    /// let neg5 = Offset::<i16, 5>::new(-5);
302    /// let pos15 = Offset::<i16, 5>::new(15);
303    /// let pos16 = Offset::<i16, 5>::new(16);
304    /// assert!(neg5.is_ok());
305    /// assert!(pos15.is_ok());
306    /// assert!(pos16.is_err());
307    /// 
308    /// // Unsigned:
309    /// let pos15 = Offset::<u16, 5>::new(15);
310    /// let pos16 = Offset::<u16, 5>::new(16);
311    /// let pos32 = Offset::<u16, 5>::new(32);
312    /// assert!(pos15.is_ok());
313    /// assert!(pos16.is_ok());
314    /// assert!(pos32.is_err());
315    /// ```
316    /// 
317    /// # Panics
318    /// 
319    /// This will panic if `N` is larger than the offset backing (e.g., for backing `u16`, larger than 16).
320    /// 
321    /// ```should_panic
322    /// # use lc3_ensemble::ast::Offset;
323    /// #
324    /// let oh_no = Offset::<i16, 17>::new(18);
325    /// ```
326    pub fn new(n: OFF) -> Result<Self, OffsetNewErr> {
327        assert!(N <= OFF::BITS, "bit size {N} exceeds size of backing ({})", OFF::BITS);
328        match n == n.truncate(N) {
329            true  => Ok(Offset(n)),
330            false => Err(OFF::does_not_fit_error(N)),
331        }
332    }
333
334    /// Creates a new offset by extending the first N bits of the integer,
335    /// and discarding the rest.
336    /// 
337    /// The extension is considered sign-extended if the offset's backing is signed,
338    /// and zero-extended if the offset's backing is unsigned.
339    /// 
340    /// # Examples
341    /// 
342    /// ```
343    /// # use lc3_ensemble::ast::Offset;
344    /// #
345    /// // Signed:
346    /// let neg5 = Offset::<i16, 5>::new_trunc(-5);  // 0b11111111111_11011
347    /// let pos15 = Offset::<i16, 5>::new_trunc(15); // 0b00000000000_01111
348    /// let pos16 = Offset::<i16, 5>::new_trunc(16); // 0b00000000000_10000
349    /// assert_eq!(neg5.get(),   -5); // 0b11011
350    /// assert_eq!(pos15.get(),  15); // 0b01111
351    /// assert_eq!(pos16.get(), -16); // 0b10000
352    /// 
353    /// // Unsigned:
354    /// let pos15 = Offset::<u16, 5>::new_trunc(15); // 0b00000000000_01111
355    /// let pos16 = Offset::<u16, 5>::new_trunc(16); // 0b00000000000_10000
356    /// let pos32 = Offset::<u16, 5>::new_trunc(32); // 0b00000000001_00000
357    /// assert_eq!(pos15.get(), 15); // 01111
358    /// assert_eq!(pos16.get(), 16); // 10000
359    /// assert_eq!(pos32.get(),  0); // 00000
360    /// ```
361    /// 
362    /// # Panics
363    /// 
364    /// This will panic if `N` is larger than the offset backing (e.g., for backing `u16`, larger than 16).
365    /// 
366    /// ```should_panic
367    /// # use lc3_ensemble::ast::Offset;
368    /// #
369    /// let oh_no = Offset::<i16, 17>::new_trunc(18);
370    /// ```
371    pub fn new_trunc(n: OFF) -> Self {
372        assert!(N <= OFF::BITS, "bit size {N} exceeds size of backing ({})", OFF::BITS);
373        Self(n.truncate(N))
374    }
375
376    /// Gets the value of the offset.
377    pub fn get(&self) -> OFF {
378        self.0
379    }
380}
381
382/// An offset or a label.
383/// 
384/// This is used to represent [`PCOffset`] operands 
385/// (such as the `PCOffset9` operand in `LD` and `ST` 
386/// and the `PCOffset11` operand in `JSR`).
387/// 
388/// During the first assembly pass, the label is resolved and
389/// replaced with a regular [`Offset`] value.
390/// 
391/// ## Examples
392/// ```text
393/// LD R2, VALUE
394///        ~~~~~
395/// BRz END
396///     ~~~
397/// BR #-99
398///    ~~~~
399/// JSR SUBROUTINE
400///     ~~~~~~~~~~
401/// ```
402#[derive(Debug, PartialEq, Eq, Hash, Clone)]
403pub enum PCOffset<OFF, const N: u32> {
404    #[allow(missing_docs)]
405    Offset(Offset<OFF, N>),
406    #[allow(missing_docs)]
407    Label(Label)
408}
409impl<OFF, const N: u32> std::fmt::Display for PCOffset<OFF, N> 
410    where Offset<OFF, N>: std::fmt::Display
411{
412    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
413        match self {
414            PCOffset::Offset(off)  => off.fmt(f),
415            PCOffset::Label(label) => label.fmt(f),
416        }
417    }
418}
419
420/// A label.
421/// 
422/// This struct stores the name of the label (accessible by the `name` field)
423/// and the source code span indicating where the label is located in assembly source code.
424/// 
425/// # Examples
426/// ```text
427/// .orig x3000
428/// AND R0, R0, #0
429/// LD R2, VALUE
430///        ~~~~~
431/// LOOP:
432/// ~~~~
433///     NOT R1, R2
434///     ADD R1, R1, #1
435///     ADD R1, R1, R0
436///     BRz END
437///         ~~~
438///     ADD R0, R0, #1
439///     BR LOOP
440///        ~~~~
441/// END: HALT
442/// ~~~
443/// VALUE: .fill #8464
444/// ~~~~~
445/// .end
446/// ```
447#[derive(Clone, PartialEq, Eq, Hash, Debug, Default)]
448pub struct Label {
449    /// The label's identifier
450    pub name: String,
451
452    /// The start of the label in assembly source code.
453    /// 
454    /// Since name stores the length of the string,
455    /// we don't need to store the whole span.
456    /// 
457    /// This saves like 8 bytes of space on a 64-bit machine, so ya know
458    start: usize
459}
460impl Label {
461    /// Creates a new label.
462    pub fn new(name: String, span: std::ops::Range<usize>) -> Self {
463        debug_assert_eq!(span.start + name.len(), span.end, "span should have the same length as name");
464        Label { name, start: span.start }
465    }
466    /// Returns the span of the label in assembly source code.
467    pub fn span(&self) -> std::ops::Range<usize> {
468        self.start .. (self.start + self.name.len())
469    }
470}
471impl std::fmt::Display for Label {
472    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
473        self.name.fmt(f)
474    }
475}