lc3_ensemble/ast.rs
1//! Components relating to the abstract syntax trees (ASTs)
2//! used in representing assembly instructions.
3//!
4//! These components together are used to construct...
5//! - [`asm::AsmInstr`] (a data structure holding an assembly source code instruction),
6//! - [`asm::Directive`] (a data structure holding an assembly source code directive),
7//! - and [`sim::SimInstr`] (a data structure holding a bytecode instruction).
8
9pub mod asm;
10pub mod sim;
11
12use std::fmt::Write as _;
13use std::num::TryFromIntError;
14use offset_base::OffsetBacking;
15
16/// A register. Must be between 0 and 7.
17///
18/// This `Reg` struct can either be constructed by accessing an enum variant,
19/// or by using [`Reg::try_from`].
20///
21/// ## Examples
22///
23/// ```text
24/// AND R0, R0, #0
25/// ~~ ~~
26/// ADD R1, R1, R0
27/// ~~ ~~ ~~
28/// LD R2, VALUE
29/// ~~
30/// NOT R1, R2
31/// ~~ ~~
32/// ```
33#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
34pub enum Reg {
35 /// The 0th register in the register file.
36 R0 = 0,
37 /// The 1st register in the register file.
38 R1 = 1,
39 /// The 2nd register in the register file.
40 R2 = 2,
41 /// The 3rd register in the register file.
42 R3 = 3,
43 /// The 4th register in the register file.
44 R4 = 4,
45 /// The 5th register in the register file.
46 R5 = 5,
47 /// The 6th register in the register file.
48 R6 = 6,
49 /// The 7th register in the register file.
50 R7 = 7
51}
52impl Reg {
53 /// Gets the register number of this [`Reg`]. This is always between 0 and 7.
54 pub fn reg_no(self) -> u8 {
55 self as u8
56 }
57}
58impl std::fmt::Display for Reg {
59 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60 // formatting parameters should have no effect here
61 write!(f, "R{}", self.reg_no())
62 }
63}
64impl From<Reg> for usize {
65 // Used for indexing the reg file in [`ast::Sim`].
66 fn from(value: Reg) -> Self {
67 usize::from(value.reg_no())
68 }
69}
70impl TryFrom<u8> for Reg {
71 type Error = TryFromIntError;
72
73 fn try_from(value: u8) -> Result<Self, Self::Error> {
74 match value {
75 0 => Ok(Reg::R0),
76 1 => Ok(Reg::R1),
77 2 => Ok(Reg::R2),
78 3 => Ok(Reg::R3),
79 4 => Ok(Reg::R4),
80 5 => Ok(Reg::R5),
81 6 => Ok(Reg::R6),
82 7 => Ok(Reg::R7),
83 // HACKy, but there's no other way to create this error
84 _ => u8::try_from(256).map(|_| unreachable!("should've been TryFromIntError")),
85 }
86 }
87}
88
89/// A condition code (used for `BR`), must be between 0 and 7.
90///
91/// The condition codes are listed below:
92///
93/// | instruction | code (bin) |
94/// |---------------|------------|
95/// | `NOP` | `000` |
96/// | `BRn` | `100` |
97/// | `BRz` | `010` |
98/// | `BRnz` | `110` |
99/// | `BRp` | `001` |
100/// | `BRnp` | `101` |
101/// | `BRzp` | `011` |
102/// | `BR`, `BRnzp` | `111` |
103///
104pub type CondCode = u8;
105
106/// A value representing a signed offset or a signed immediate value.
107///
108/// `N` indicates the maximum bit size of this offset/immediate value.
109///
110/// ## Examples
111///
112/// `IOffset<5>` is used to represent `ADD`/`AND`'s imm5 operand:
113///
114/// ```text
115/// AND R0, R0, #0
116/// ~~
117/// ADD R1, R1, #1
118/// ~~
119/// ```
120///
121/// They are also used for numeric register or PC offset values:
122/// ```text
123/// BR x-F
124/// ~~~
125/// JSR #99
126/// ~~~
127/// LD R0, #10
128/// ~~~
129/// LDR R0, R0, #9
130/// ~~
131/// ```
132pub type IOffset<const N: u32> = Offset<i16, N>;
133/// An unsigned 8-bit trap vector (used for `TRAP`).
134///
135/// ## Examples
136///
137/// ```text
138/// TRAP x25
139/// ~~~
140/// ```
141pub type TrapVect8 = Offset<u16, 8>;
142
143/// A value representing either an immediate value or a register.
144///
145/// This is used to handle cases where an operand can be either
146/// an immediate value or a register (e.g., in `AND` or `ADD`).
147///
148/// ## Examples
149/// ```text
150/// AND R0, R0, #0
151/// AND R1, R1, R1
152/// ADD R2, R2, #2
153/// ADD R3, R3, R3
154/// ^^
155/// ```
156#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
157pub enum ImmOrReg<const N: u32> {
158 #[allow(missing_docs)]
159 Imm(IOffset<N>),
160 #[allow(missing_docs)]
161 Reg(Reg)
162}
163impl<const N: u32> std::fmt::Display for ImmOrReg<N> {
164 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
165 match self {
166 ImmOrReg::Imm(imm) => imm.fmt(f),
167 ImmOrReg::Reg(reg) => reg.fmt(f),
168 }
169 }
170}
171
172/// A value representing an offset or an immediate value.
173///
174/// The `OFF` type represents the backing type of this offset.
175/// The signedness of this offset type is dependent on the signedness of the `OFF` type:
176/// - `Offset<i16, _>`: signed offset (also aliased as [`IOffset`])
177/// - `Offset<u16, _>`: unsigned offset
178///
179/// `N` indicates the maximum bit size of this offset/immediate value.
180///
181/// ## Examples
182///
183/// - `Offset<i16, 5>` is used to represent `ADD`/`AND`'s imm5 operand.
184/// See [`IOffset`] for more examples of its use.
185/// - `Offset<u16, 8>` is used to represent the `trapvect8` operand of the `TRAP` instruction.
186/// See [`TrapVect8`] for more examples of its use.
187#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
188pub struct Offset<OFF, const N: u32>(OFF);
189
190impl<OFF: std::fmt::Display, const N: u32> std::fmt::Display for Offset<OFF, N> {
191 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
192 f.write_char('#')?;
193 self.0.fmt(f)
194 }
195}
196impl<OFF: std::fmt::Binary, const N: u32> std::fmt::Binary for Offset<OFF, N> {
197 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
198 f.write_char('b')?;
199 self.0.fmt(f)
200 }
201}
202impl<OFF: std::fmt::LowerHex, const N: u32> std::fmt::LowerHex for Offset<OFF, N> {
203 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
204 f.write_char('x')?;
205 self.0.fmt(f)
206 }
207}
208impl<OFF: std::fmt::UpperHex, const N: u32> std::fmt::UpperHex for Offset<OFF, N> {
209 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
210 f.write_char('x')?;
211 self.0.fmt(f)
212 }
213}
214
215/// The errors that can result from calling [`Offset::new`].
216#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
217pub enum OffsetNewErr {
218 /// The provided offset cannot fit an unsigned integer of the given bitsize.
219 CannotFitUnsigned(u32),
220 /// The provided offset cannot fit a signed integer of the given bitsize.
221 CannotFitSigned(u32)
222}
223
224impl std::fmt::Display for OffsetNewErr {
225 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
226 match self {
227 OffsetNewErr::CannotFitUnsigned(n) => write!(f, "value is too big for unsigned {n}-bit integer"),
228 OffsetNewErr::CannotFitSigned(n) => write!(f, "value is too big for signed {n}-bit integer"),
229 }
230 }
231}
232impl std::error::Error for OffsetNewErr {}
233impl crate::err::Error for OffsetNewErr {
234 fn help(&self) -> Option<std::borrow::Cow<str>> {
235 use std::borrow::Cow;
236
237 let error = match self {
238 OffsetNewErr::CannotFitUnsigned(n) => Cow::from(format!("the range for an unsigned {n}-bit integer is [0, {}]", (1 << n) - 1)),
239 OffsetNewErr::CannotFitSigned(n) => Cow::from(format!("the range for a signed {n}-bit integer is [{}, {}]", (-1) << (n - 1), (1 << (n - 1)) - 1)),
240 };
241
242 Some(error)
243 }
244}
245
246mod offset_base {
247 use super::OffsetNewErr;
248
249 /// Any type that could store a value for [`Offset`].
250 ///
251 /// [`Offset`]: super::Offset
252 pub trait OffsetBacking: Copy + Eq {
253 /// How many bits are contained within this backing.
254 ///
255 /// For example, `u16` has 16 bits and thus BITS == 16.
256 const BITS: u32;
257
258 /// Truncates the given value to the provided `bit_size`.
259 ///
260 /// This bit size is always known to be less than BITS.
261 fn truncate(self, bit_size: u32) -> Self;
262
263 /// The error to raise if a given value doesn't match
264 /// its provided value when truncated to a given `bit_size`.
265 fn does_not_fit_error(bit_size: u32) -> OffsetNewErr;
266 }
267
268 macro_rules! impl_offset_backing_for_ints {
269 ($($Int:ty: $Err:ident),*) => {
270 $(
271 impl OffsetBacking for $Int {
272 const BITS: u32 = Self::BITS;
273
274 fn truncate(self, bit_size: u32) -> Self {
275 (self << (Self::BITS - bit_size)) >> (Self::BITS - bit_size)
276 }
277
278 fn does_not_fit_error(bit_size: u32) -> OffsetNewErr {
279 OffsetNewErr::$Err(bit_size)
280 }
281 }
282 )*
283 }
284 }
285 impl_offset_backing_for_ints! {
286 u16: CannotFitUnsigned,
287 i16: CannotFitSigned
288 }
289}
290
291impl<OFF: OffsetBacking, const N: u32> Offset<OFF, N> {
292 /// Creates a new offset value.
293 /// This must fit within `N` bits of the representation, otherwise an error is raised.
294 ///
295 /// # Examples
296 ///
297 /// ```
298 /// # use lc3_ensemble::ast::Offset;
299 /// #
300 /// // Signed:
301 /// let neg5 = Offset::<i16, 5>::new(-5);
302 /// let pos15 = Offset::<i16, 5>::new(15);
303 /// let pos16 = Offset::<i16, 5>::new(16);
304 /// assert!(neg5.is_ok());
305 /// assert!(pos15.is_ok());
306 /// assert!(pos16.is_err());
307 ///
308 /// // Unsigned:
309 /// let pos15 = Offset::<u16, 5>::new(15);
310 /// let pos16 = Offset::<u16, 5>::new(16);
311 /// let pos32 = Offset::<u16, 5>::new(32);
312 /// assert!(pos15.is_ok());
313 /// assert!(pos16.is_ok());
314 /// assert!(pos32.is_err());
315 /// ```
316 ///
317 /// # Panics
318 ///
319 /// This will panic if `N` is larger than the offset backing (e.g., for backing `u16`, larger than 16).
320 ///
321 /// ```should_panic
322 /// # use lc3_ensemble::ast::Offset;
323 /// #
324 /// let oh_no = Offset::<i16, 17>::new(18);
325 /// ```
326 pub fn new(n: OFF) -> Result<Self, OffsetNewErr> {
327 assert!(N <= OFF::BITS, "bit size {N} exceeds size of backing ({})", OFF::BITS);
328 match n == n.truncate(N) {
329 true => Ok(Offset(n)),
330 false => Err(OFF::does_not_fit_error(N)),
331 }
332 }
333
334 /// Creates a new offset by extending the first N bits of the integer,
335 /// and discarding the rest.
336 ///
337 /// The extension is considered sign-extended if the offset's backing is signed,
338 /// and zero-extended if the offset's backing is unsigned.
339 ///
340 /// # Examples
341 ///
342 /// ```
343 /// # use lc3_ensemble::ast::Offset;
344 /// #
345 /// // Signed:
346 /// let neg5 = Offset::<i16, 5>::new_trunc(-5); // 0b11111111111_11011
347 /// let pos15 = Offset::<i16, 5>::new_trunc(15); // 0b00000000000_01111
348 /// let pos16 = Offset::<i16, 5>::new_trunc(16); // 0b00000000000_10000
349 /// assert_eq!(neg5.get(), -5); // 0b11011
350 /// assert_eq!(pos15.get(), 15); // 0b01111
351 /// assert_eq!(pos16.get(), -16); // 0b10000
352 ///
353 /// // Unsigned:
354 /// let pos15 = Offset::<u16, 5>::new_trunc(15); // 0b00000000000_01111
355 /// let pos16 = Offset::<u16, 5>::new_trunc(16); // 0b00000000000_10000
356 /// let pos32 = Offset::<u16, 5>::new_trunc(32); // 0b00000000001_00000
357 /// assert_eq!(pos15.get(), 15); // 01111
358 /// assert_eq!(pos16.get(), 16); // 10000
359 /// assert_eq!(pos32.get(), 0); // 00000
360 /// ```
361 ///
362 /// # Panics
363 ///
364 /// This will panic if `N` is larger than the offset backing (e.g., for backing `u16`, larger than 16).
365 ///
366 /// ```should_panic
367 /// # use lc3_ensemble::ast::Offset;
368 /// #
369 /// let oh_no = Offset::<i16, 17>::new_trunc(18);
370 /// ```
371 pub fn new_trunc(n: OFF) -> Self {
372 assert!(N <= OFF::BITS, "bit size {N} exceeds size of backing ({})", OFF::BITS);
373 Self(n.truncate(N))
374 }
375
376 /// Gets the value of the offset.
377 pub fn get(&self) -> OFF {
378 self.0
379 }
380}
381
382/// An offset or a label.
383///
384/// This is used to represent [`PCOffset`] operands
385/// (such as the `PCOffset9` operand in `LD` and `ST`
386/// and the `PCOffset11` operand in `JSR`).
387///
388/// During the first assembly pass, the label is resolved and
389/// replaced with a regular [`Offset`] value.
390///
391/// ## Examples
392/// ```text
393/// LD R2, VALUE
394/// ~~~~~
395/// BRz END
396/// ~~~
397/// BR #-99
398/// ~~~~
399/// JSR SUBROUTINE
400/// ~~~~~~~~~~
401/// ```
402#[derive(Debug, PartialEq, Eq, Hash, Clone)]
403pub enum PCOffset<OFF, const N: u32> {
404 #[allow(missing_docs)]
405 Offset(Offset<OFF, N>),
406 #[allow(missing_docs)]
407 Label(Label)
408}
409impl<OFF, const N: u32> std::fmt::Display for PCOffset<OFF, N>
410 where Offset<OFF, N>: std::fmt::Display
411{
412 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
413 match self {
414 PCOffset::Offset(off) => off.fmt(f),
415 PCOffset::Label(label) => label.fmt(f),
416 }
417 }
418}
419
420/// A label.
421///
422/// This struct stores the name of the label (accessible by the `name` field)
423/// and the source code span indicating where the label is located in assembly source code.
424///
425/// # Examples
426/// ```text
427/// .orig x3000
428/// AND R0, R0, #0
429/// LD R2, VALUE
430/// ~~~~~
431/// LOOP:
432/// ~~~~
433/// NOT R1, R2
434/// ADD R1, R1, #1
435/// ADD R1, R1, R0
436/// BRz END
437/// ~~~
438/// ADD R0, R0, #1
439/// BR LOOP
440/// ~~~~
441/// END: HALT
442/// ~~~
443/// VALUE: .fill #8464
444/// ~~~~~
445/// .end
446/// ```
447#[derive(Clone, PartialEq, Eq, Hash, Debug, Default)]
448pub struct Label {
449 /// The label's identifier
450 pub name: String,
451
452 /// The start of the label in assembly source code.
453 ///
454 /// Since name stores the length of the string,
455 /// we don't need to store the whole span.
456 ///
457 /// This saves like 8 bytes of space on a 64-bit machine, so ya know
458 start: usize
459}
460impl Label {
461 /// Creates a new label.
462 pub fn new(name: String, span: std::ops::Range<usize>) -> Self {
463 debug_assert_eq!(span.start + name.len(), span.end, "span should have the same length as name");
464 Label { name, start: span.start }
465 }
466 /// Returns the span of the label in assembly source code.
467 pub fn span(&self) -> std::ops::Range<usize> {
468 self.start .. (self.start + self.name.len())
469 }
470}
471impl std::fmt::Display for Label {
472 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
473 self.name.fmt(f)
474 }
475}