lc3_ensemble/
asm.rs

1//! Assembling assembly source ASTs into object files.
2//! 
3//! This module is used to convert source ASTs (`Vec<`[`Stmt`]`>`) into object files 
4//! that can be executed by the simulator.
5//! 
6//! The assembler module notably consists of:
7//! - [`assemble`] and [`assemble_debug`]: The main functions which assemble the statements into an object file.
8//! - [`SymbolTable`]: a struct holding the symbol table, which stores location information for labels after the first assembler pass
9//! - [`ObjectFile`]: a struct holding the object file, which can be loaded into the simulator and executed
10//! 
11//! [`Stmt`]: crate::ast::asm::Stmt
12
13pub mod encoding;
14
15use std::collections::hash_map::Entry;
16use std::collections::{BTreeMap, HashMap};
17use std::ops::Range;
18
19use logos::Span;
20
21use crate::ast::asm::{AsmInstr, Directive, Stmt, StmtKind};
22use crate::ast::sim::SimInstr;
23use crate::ast::{IOffset, ImmOrReg, Offset, OffsetNewErr, PCOffset, Reg};
24use crate::err::ErrSpan;
25
26/// Assembles a assembly source code AST into an object file.
27/// 
28/// This function assembles the source AST *without* including debug symbols
29/// in the object file.
30/// See [`SymbolTable`] for more details about debug symbols.
31/// 
32/// # Example
33/// ```
34/// use lc3_ensemble::parse::parse_ast;
35/// use lc3_ensemble::asm::assemble;
36/// 
37/// let src = "
38///     .orig x3000
39///     LABEL: HALT
40///     .end
41/// ";
42/// let ast = parse_ast(src).unwrap();
43/// 
44/// let obj_file = assemble(ast);
45/// assert!(obj_file.is_ok());
46/// 
47/// // Symbol table doesn't exist in object file:
48/// let obj_file = obj_file.unwrap();
49/// assert!(obj_file.symbol_table().is_none());
50/// ```
51pub fn assemble(ast: Vec<Stmt>) -> Result<ObjectFile, AsmErr> {
52    let sym = SymbolTable::new(&ast, None)?;
53    ObjectFile::new(ast, sym, false)
54}
55/// Assembles a assembly source code AST into an object file.
56/// 
57/// This function assembles the source AST *and* includes debug symbols
58/// in the object file.
59/// See [`SymbolTable`] for more details about debug symbols.
60/// 
61/// # Example
62/// ```
63/// use lc3_ensemble::parse::parse_ast;
64/// use lc3_ensemble::asm::assemble_debug;
65/// 
66/// let src = "
67///     .orig x3000
68///     LABEL: HALT
69///     .end
70/// ";
71/// let ast = parse_ast(src).unwrap();
72/// 
73/// let obj_file = assemble_debug(ast, src);
74/// assert!(obj_file.is_ok());
75/// 
76/// // Symbol table does exist in object file:
77/// let obj_file = obj_file.unwrap();
78/// assert!(obj_file.symbol_table().is_some());
79/// ```
80pub fn assemble_debug(ast: Vec<Stmt>, src: &str) -> Result<ObjectFile, AsmErr> {
81    let sym = SymbolTable::new(&ast, Some(src))?;
82    ObjectFile::new(ast, sym, true)
83}
84
85/// Kinds of errors that can occur from assembling given assembly code.
86/// 
87/// See [`AsmErr`] for this error type with span information included.
88#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
89pub enum AsmErrKind {
90    /// Cannot determine address of label (pass 1).
91    UndetAddrLabel,
92    /// Cannot determine address of instruction (pass 2).
93    UndetAddrStmt,
94    /// There was an `.orig` but no corresponding `.end` (pass 1).
95    UnclosedOrig,
96    /// There was an `.end` but no corresonding `.orig` (pass 1).
97    UnopenedOrig,
98    /// There was an `.orig` opened after another `.orig` (pass 1).
99    OverlappingOrig,
100    /// There were multiple labels of the same name (pass 1).
101    OverlappingLabels,
102    /// Block wraps memory (pass 2).
103    WrappingBlock,
104    /// Block writes to IO memory region (pass 2).
105    BlockInIO,
106    /// There are blocks that overlap ranges of memory (pass 2).
107    OverlappingBlocks,
108    /// Creating the offset to replace a label caused overflow (pass 2).
109    OffsetNewErr(OffsetNewErr),
110    /// Cannot find the offset with an external label (pass 2).
111    OffsetExternal,
112    /// Label did not have an assigned address (pass 2).
113    CouldNotFindLabel,
114}
115impl std::fmt::Display for AsmErrKind {
116    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
117        match self {
118            Self::UndetAddrLabel    => f.write_str("cannot determine address of label"),
119            Self::UndetAddrStmt     => f.write_str("cannot determine address of statement"),
120            Self::UnclosedOrig      => f.write_str(".orig directive was never closed"),
121            Self::UnopenedOrig      => f.write_str(".end does not have associated .orig"),
122            Self::OverlappingOrig   => f.write_str("cannot have an .orig inside another region"),
123            Self::OverlappingLabels => f.write_str("label was defined multiple times"),
124            Self::WrappingBlock     => f.write_str("block wraps around in memory"),
125            Self::BlockInIO         => f.write_str("cannot write code into memory-mapped IO region"),
126            Self::OverlappingBlocks => f.write_str("regions overlap in memory"),
127            Self::OffsetNewErr(e)   => e.fmt(f),
128            Self::OffsetExternal    => f.write_str("cannot use external label here"),
129            Self::CouldNotFindLabel => f.write_str("label could not be found"),
130        }
131    }
132}
133
134/// Error from assembling given assembly code.
135#[derive(Debug)]
136pub struct AsmErr {
137    /// The value with a span.
138    pub kind: AsmErrKind,
139    /// The span in the source associated with this value.
140    pub span: ErrSpan
141}
142impl AsmErr {
143    /// Creates a new [`AsmErr`].
144    pub fn new<E: Into<ErrSpan>>(kind: AsmErrKind, span: E) -> Self {
145        AsmErr { kind, span: span.into() }
146    }
147}
148impl std::fmt::Display for AsmErr {
149    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
150        self.kind.fmt(f)
151    }
152}
153impl std::error::Error for AsmErr {
154    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
155        match &self.kind {
156            AsmErrKind::OffsetNewErr(e) => Some(e),
157            _ => None
158        }
159    }
160}
161impl crate::err::Error for AsmErr {
162    fn span(&self) -> Option<crate::err::ErrSpan> {
163        Some(self.span.clone())
164    }
165
166    fn help(&self) -> Option<std::borrow::Cow<str>> {
167        match &self.kind {
168            AsmErrKind::UndetAddrLabel    => Some("try moving this label inside of an .orig/.end block".into()),
169            AsmErrKind::UndetAddrStmt     => Some("try moving this statement inside of an .orig/.end block".into()),
170            AsmErrKind::UnclosedOrig      => Some("try adding an .end directive at the end of this block".into()),
171            AsmErrKind::UnopenedOrig      => Some("try adding an .orig directive at the beginning of this block".into()),
172            AsmErrKind::OverlappingOrig   => Some("try adding an .end directive at the end of the outer .orig block".into()),
173            AsmErrKind::OverlappingLabels => Some("labels must be unique within a file, try renaming one of the labels".into()),
174            AsmErrKind::OverlappingBlocks => Some("try moving the starting address of one of these regions".into()),
175            AsmErrKind::WrappingBlock     => Some("user code typically starts at x3000 and is short enough to not wrap memory".into()),
176            AsmErrKind::BlockInIO         => Some("try not doing that".into()),
177            AsmErrKind::OffsetNewErr(e)   => e.help(),
178            AsmErrKind::OffsetExternal    => Some("external labels cannot be an offset operand; try creating a .fill LABEL directive".into()),
179            AsmErrKind::CouldNotFindLabel => Some("try adding this label before an instruction or directive".into()),
180        }
181    }
182}
183
184const IO_START: u16 = 0xFE00;
185
186/// A mapping from line numbers to memory addresses (and vice-versa).
187///
188/// This is implemented as a sorted list of contiguous blocks, consisting of:
189/// - The first source line number of the block, and
190/// - The memory addresses of the block
191/// 
192/// For example,
193/// ```text
194/// 0 | .orig x3000
195/// 1 |     AND R0, R0, #0
196/// 2 |     ADD R0, R0, #5
197/// 3 |     HALT
198/// 4 | .end
199/// 5 | 
200/// 6 | .orig x4000
201/// 7 |     .blkw 5
202/// 8 |     .fill x9F9F
203/// 9 | .end
204/// ```
205/// maps to `LineSymbolMap({1: [0x3000, 0x3001, 0x3002], 7: [0x4000, 0x4005]})`.
206/// 
207/// This data structure holds several invariants:
208/// - Line numbers should never overlap.
209/// - In a given block, the addresses should be in ascending order 
210///     (this has to occur in a well-formed program because regions constitute contiguous, non-overlapping parts of memory).
211/// 
212/// If these invariants are not held, invalid behavior can occur.
213#[derive(PartialEq, Eq, Clone)]
214struct LineSymbolMap(BTreeMap<usize, Vec<u16>>);
215
216impl LineSymbolMap {
217    /// Creates a new line symbol table.
218    /// 
219    /// This takes an expanded list of line-memory address mappings and condenses it into 
220    /// the internal [`LineSymbolMap`] format.
221    /// 
222    /// For example,
223    /// 
224    /// `[None, Some(0x3000), Some(0x3001), Some(0x3002), None, None, None, Some(0x4000), Some(0x4005)]` 
225    /// condenses to `{1: [0x3000, 0x3001, 0x3002], 7: [0x4000, 0x4005]}`.
226    /// 
227    /// For a given block of contiguous `Some`s, the memory addresses should be sorted and accesses
228    /// through `LineSymbolMap`'s methods assume the values are sorted.
229    /// 
230    /// If they are not sorted, incorrect behaviors may occur. Skill issue.
231    fn new(lines: Vec<Option<u16>>) -> Option<Self> {
232        let mut blocks = BTreeMap::new();
233        let mut current = None;
234        for (i, line) in lines.into_iter().enumerate() {
235            match line {
236                Some(addr) => current.get_or_insert_with(Vec::new).push(addr),
237                None => if let Some(bl) = current.take() {
238                    blocks.insert(i - bl.len(), bl);
239                },
240            }
241        }
242
243        Self::from_blocks(blocks)
244    }
245
246    fn from_blocks(blocks: impl IntoIterator<Item=(usize, Vec<u16>)>) -> Option<Self> {
247        let mut bl: Vec<_> = blocks.into_iter().collect();
248
249        bl.sort_by_key(|&(l, _)| l);
250        
251        // Check not overlapping:
252        let not_overlapping = bl.windows(2).all(|win| {
253            let [(ls, lb), (rs, _)] = win else { unreachable!() };
254            ls + lb.len() <= *rs
255        });
256
257        match not_overlapping {
258            true => {
259                // Check every individual block is sorted:
260                let sorted = bl.iter().all(|(_, lb)| {
261                    lb.windows(2).all(|win| win[0] <= win[1])
262                });
263
264                sorted.then(|| Self(bl.into_iter().collect()))
265            }
266            false => None,
267        }
268    }
269
270    /// Gets the memory address associated with this line, if it is present in the line symbol mapping.
271    fn get(&self, line: usize) -> Option<u16> {
272        // Find the block such that `line` falls within the source line number range of the block.
273        let (start, block) = self.0.range(..=line).next_back()?;
274
275        // Access the memory address.
276        block.get(line - *start).copied()
277    }
278
279    /// Gets the source line number associated with this memory address, if it is present in the symbol table.
280    fn find(&self, addr: u16) -> Option<usize> {
281        self.0.iter()
282            .find_map(|(start, words)| {
283                // Find the block that contains the given address,
284                // and then find the line index once it's found.
285                words.binary_search(&addr)
286                    .ok()
287                    .map(|o| start + o)
288            })
289    }
290
291    /// Gets an iterable representing the block mappings.
292    fn block_iter(&self) -> impl Iterator<Item=(usize, &[u16])> + '_ {
293        self.0.iter()
294            .map(|(&i, words)| (i, words.as_slice()))
295    }
296
297    /// Gets an iterable representing the mapping of line numbers to addresses.
298    fn iter(&self) -> impl Iterator<Item=(usize, u16)> + '_ {
299        self.block_iter()
300            .flat_map(|(i, words)| {
301                words.iter()
302                    .enumerate()
303                    .map(move |(off, &addr)| (i + off, addr))
304            })
305    }
306}
307impl std::fmt::Debug for LineSymbolMap {
308    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
309        f.debug_map()
310        .entries(self.iter().map(|(i, v)| (i, Addr(v))))
311        .finish()
312    }
313}
314/// Struct holding the source string and contains helpers 
315/// to index lines and to query position information from a source string.
316#[derive(PartialEq, Eq, Clone)]
317pub struct SourceInfo {
318    /// The source code.
319    src: String,
320    /// The index of each new line in source code.
321    nl_indices: Vec<usize>
322}
323impl std::fmt::Debug for SourceInfo {
324    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
325        f.debug_struct("SourceInfo")
326            .field("nl_indices", &self.nl_indices)
327            .finish_non_exhaustive()
328    }
329}
330impl SourceInfo {
331    /// Computes the source info from a given string.
332    pub fn new(src: &str) -> Self {
333        Self::from_string(src.to_string())
334    }
335    fn from_string(src: String) -> Self {
336        // Index where each new line appears.
337        let nl_indices: Vec<_> = src
338            .match_indices('\n')
339            .map(|(i, _)| i)
340            .chain([src.len()])
341            .collect();
342
343        Self { src, nl_indices }
344    }
345
346    /// Returns the entire source.
347    pub fn source(&self) -> &str {
348        &self.src
349    }
350
351    /// Counts the number of lines in the source string.
352    pub fn count_lines(&self) -> usize {
353        // The first line, plus every line after (delimited by a new line)
354        self.nl_indices.len()
355    }
356
357    /// Gets the character range for the provided line, including any whitespace
358    /// and the newline character.
359    /// 
360    /// This returns None if line is not in the interval `[0, number of lines)`.
361    fn raw_line_span(&self, line: usize) -> Option<Range<usize>> {
362        // Implementation detail:
363        // number of lines = self.nl_indices.len() + 1
364        if !(0..self.count_lines()).contains(&line) {
365            return None;
366        };
367
368        let start = match line {
369            0 => 0,
370            _ => self.nl_indices[line - 1] + 1
371        };
372
373        let eof = self.src.len();
374        let end = match self.nl_indices.get(line) {
375            Some(i) => (i + 1).min(eof), // incl NL, but don't go over EOF
376            None => eof,
377        };
378        
379        Some(start..end)
380    }
381
382    /// Gets the character range for the provided line, excluding any whitespace.
383    /// 
384    /// This returns None if line is not in the interval `[0, number of lines)`.
385    pub fn line_span(&self, line: usize) -> Option<Range<usize>> {
386        let Range { mut start, mut end } = self.raw_line_span(line)?;
387        
388        // shift line span by trim
389        let line = &self.src[start..end];
390        let end_trimmed = line.trim_end();
391        end -= line.len() - end_trimmed.len();
392        
393        let line = end_trimmed;
394        start += line.len() - line.trim_start().len();
395
396        Some(start..end)
397    }
398
399    /// Reads a line from source.
400    /// 
401    /// This returns None if line is not in the interval `[0, number of lines)`.
402    pub fn read_line(&self, line: usize) -> Option<&str> {
403        self.line_span(line).map(|r| &self.src[r])
404    }
405
406    /// Gets the line number of the current position.
407    fn get_line(&self, index: usize) -> usize {
408        self.nl_indices.partition_point(|&start| start < index)
409    }
410
411    /// Calculates the line and character number for a given character index.
412    /// 
413    /// If the index exceeds the length of the string,
414    /// the line number is given as the last line and the character number
415    /// is given as the number of characters after the start of the line.
416    pub fn get_pos_pair(&self, index: usize) -> (usize, usize) {
417        let lno = self.get_line(index);
418
419        let Range { start: lstart, .. } = self.raw_line_span(lno)
420            .or_else(|| self.raw_line_span(self.nl_indices.len()))
421            .unwrap_or(0..0);
422        let cno = index - lstart;
423        (lno, cno)
424    }
425}
426impl From<&'_ str> for SourceInfo {
427    fn from(value: &'_ str) -> Self {
428        Self::new(value)
429    }
430}
431impl From<String> for SourceInfo {
432    fn from(value: String) -> Self {
433        Self::from_string(value)
434    }
435}
436
437#[derive(PartialEq, Eq, Clone, Copy, Default, Debug)]
438struct SymbolData {
439    addr: u16,
440    src_start: usize,
441    external: bool
442}
443impl SymbolData {
444    /// Calculates the source range of this symbol, given the name of the label.
445    fn span(&self, label: &str) -> Range<usize> {
446        self.src_start .. (self.src_start + label.len())
447    }
448}
449
450/// Debug symbols.
451#[derive(PartialEq, Eq, Debug, Clone)]
452struct DebugSymbols {
453    /// A mapping from each line with a statement in the source to an address.
454    line_map: LineSymbolMap,
455
456    /// Information about the source.
457    src_info: SourceInfo
458}
459impl DebugSymbols {
460    pub fn lookup_line(&self, line: usize) -> Option<u16> {
461        self.line_map.get(line)
462    }
463
464    pub fn rev_lookup_line(&self, addr: u16) -> Option<usize> {
465        self.line_map.find(addr)
466    }
467
468    /// Links debug symbols.
469    pub fn link(mut a: Self, b: Self) -> Result<Self, AsmErr> {
470        // TODO: This kinda just tacks files together into a single source.
471        // Once object files have support for multiple ASM references, this should be cleaner.
472
473        let lines = a.src_info.count_lines();
474
475        // B doesn't overlap with A because ObjectFile check
476        a.line_map.0.extend({
477            b.line_map.0.into_iter()
478                .map(|(k, v)| (k + lines, v))
479        });
480
481        a.src_info = SourceInfo::from_string(a.src_info.src + "\n" + &b.src_info.src);
482
483        Ok(a)
484    }
485}
486/// The symbol table created in the first assembler pass
487/// that encodes source code mappings to memory addresses in the object file.
488/// 
489/// The symbol table consists of: 
490/// - A mapping from source code labels to memory addresses.
491/// - A mapping from source code line numbers to memory addresses (if debug symbols are enabled).
492/// - The source text (if debug symbols are enabled).
493/// 
494/// Here is a table of the mappings that the symbol table provides:
495/// 
496/// | from ↓, to → | label                              | memory address                | source line/span                  |
497/// |----------------|------------------------------------|-------------------------------|-----------------------------------|
498/// | label          | -                                  | [`SymbolTable::lookup_label`] | [`SymbolTable::get_label_source`] |
499/// | memory address | [`SymbolTable::rev_lookup_label`]  | -                             | [`SymbolTable::rev_lookup_line`]  |
500/// | source line    | none                               | [`SymbolTable::lookup_line`]  | -                                 |
501/// 
502/// # Debug symbols
503/// 
504/// Debug symbols are optional data added to this symbol table which can help users debug their code.
505/// 
506/// Without debug symbols, the symbol table only consists of mappings from labels to spans (and vice-versa).
507/// These are used to translate labels in source code to addresses during the assembly process.
508/// After the completion of this assembly process, the [`SymbolTable`] is dropped and is not part of the resultant
509/// [`ObjectFile`].
510/// 
511/// However, with debug symbols, this information persists in the resultant [`ObjectFile`], allowing
512/// the label mappings to be accessed during simulation time. Additionally, more information from the source
513/// text is available during simulation time:
514/// - Mappings from source code line numbers to memory addresses
515/// - Source code text (which grants access to line contents from a given line number; see [`SourceInfo`] for more details)
516#[derive(PartialEq, Eq, Clone)]
517pub struct SymbolTable {
518    /// A mapping from label to address and span of the label.
519    label_map: HashMap<String, SymbolData>,
520
521    /// Relocation table.
522    rel_map: HashMap<u16, String>,
523
524    /// Debug symbols. If None, there were no debug symbols provided.
525    debug_symbols: Option<DebugSymbols>,
526}
527
528impl SymbolTable {
529    /// Creates a new symbol table.
530    /// 
531    /// This performs the first assembler pass, calculating the memory address of
532    /// labels at each provided statement.
533    /// 
534    /// If a `src` argument is provided, debug symbols are also computed for the symbol table.
535    /// 
536    /// ## Example
537    /// ```
538    /// use lc3_ensemble::parse::parse_ast;
539    /// use lc3_ensemble::asm::SymbolTable;
540    /// 
541    /// let src = "
542    ///     .orig x3000
543    ///     LABEL: HALT
544    ///     .end
545    /// ";
546    /// let ast = parse_ast(src).unwrap();
547    /// 
548    /// // without debug symbols
549    /// let sym = SymbolTable::new(&ast, None).unwrap();
550    /// assert_eq!(sym.lookup_label("LABEL"), Some(0x3000));
551    /// assert_eq!(sym.lookup_line(2), None);
552    /// 
553    /// // with debug symbols
554    /// let sym = SymbolTable::new(&ast, Some(src)).unwrap();
555    /// assert_eq!(sym.lookup_label("LABEL"), Some(0x3000));
556    /// assert_eq!(sym.lookup_line(2), Some(0x3000));
557    /// ```
558    pub fn new(stmts: &[Stmt], src: Option<&str>) -> Result<Self, AsmErr> {
559        struct Cursor {
560            // The current location counter.
561            lc: u16,
562            // True if 0x10000.
563            overflowed: bool,
564            // The span of the .orig directive.
565            block_orig: Span,
566        }
567        impl Cursor {
568            fn new(lc: u16, block_orig: Span) -> Self {
569                Self { lc, overflowed: false, block_orig }
570            }
571            /// Attempts to shift the LC forward by n word locations,
572            /// failing if that would cause the LC to pass the IO region or
573            /// overflow memory.
574            fn shift(&mut self, n: u16) -> Result<(), AsmErrKind> {
575                if n == 0 { return Ok(()); }
576
577                match (self.overflowed, self.lc.checked_add(n)) {
578                    (true, _) => Err(AsmErrKind::WrappingBlock),
579                    (false, Some(new_lc)) if new_lc > IO_START => Err(AsmErrKind::BlockInIO),
580                    (false, Some(new_lc)) => {
581                        self.lc = new_lc;
582                        Ok(())
583                    },
584                    (false, None) => {
585                        let lc = std::mem::take(&mut self.lc);
586                        self.overflowed = true;
587                        // If aligns exactly, it can't be considered wrapping over
588                        Err(match lc == n.wrapping_neg() {
589                            true => AsmErrKind::BlockInIO,
590                            false => AsmErrKind::WrappingBlock,
591                        })
592                    }
593                }
594            }
595        }
596
597        fn add_label(
598            labels: &mut HashMap<String, SymbolData>, 
599            label: &crate::ast::Label, 
600            addr: u16,
601            external: bool
602        ) -> Result<(), AsmErr> {
603            match labels.entry(label.name.to_uppercase()) {
604                // Two labels with different addresses. Conflict.
605                Entry::Occupied(e) if e.get().addr != addr => {
606                    let span1 = e.get().span(e.key());
607                    let span2 = label.span();
608                    Err(AsmErr::new(AsmErrKind::OverlappingLabels, [span1, span2]))
609                },
610                // Two labels with same address. No conflict.
611                Entry::Occupied(_) => Ok(()),
612                // New label.
613                Entry::Vacant(e) => {
614                    e.insert(SymbolData { addr, src_start: label.span().start, external });
615                    Ok(())
616                }
617            }
618        }
619
620        let mut cursor: Option<Cursor> = None;
621        let mut label_map: HashMap<String, SymbolData> = HashMap::new();
622        let mut rel_map = HashMap::new();
623        let mut debug_sym = src.map(|s| {
624            let src_info = SourceInfo::new(s);
625            (vec![None; src_info.count_lines()], src_info)
626        });
627
628        for stmt in stmts {
629            // Add labels if they exist
630            if !stmt.labels.is_empty() {
631                // If cursor does not exist, that means we're not in an .orig block,
632                // so these labels don't have a known location
633                let Some(cur) = cursor.as_ref() else {
634                    let spans = stmt.labels.iter()
635                        .map(|label| label.span())
636                        .collect::<Vec<_>>();
637                    
638                    return Err(AsmErr::new(AsmErrKind::UndetAddrLabel, spans));
639                };
640
641                // Add labels
642                for label in &stmt.labels {
643                    add_label(&mut label_map, label, cur.lc, false)?;
644                }
645            }
646
647            // Handle special directives:
648            match &stmt.nucleus {
649                StmtKind::Directive(Directive::Orig(addr)) => match cursor {
650                    Some(cur) => return Err(AsmErr::new(AsmErrKind::OverlappingOrig, [cur.block_orig, stmt.span.clone()])),
651                    None      => { cursor.replace(Cursor::new(addr.get(), stmt.span.clone())); },
652                },
653                StmtKind::Directive(Directive::End) => match cursor {
654                    Some(_) => { cursor.take(); },
655                    None    => return Err(AsmErr::new(AsmErrKind::UnopenedOrig, stmt.span.clone())),
656                },
657                StmtKind::Directive(Directive::External(label)) => {
658                    // Arbitrarily chose 0 as a placeholder for externals
659                    add_label(&mut label_map, label, 0, true)?;
660                }
661                StmtKind::Directive(Directive::Fill(PCOffset::Label(label))) => {
662                    let label_text = label.name.to_uppercase();
663                    if let Some(SymbolData { external: true, .. }) = label_map.get(&label_text) {
664                        let Some(cur) = cursor.as_ref() else {
665                            return Err(AsmErr::new(AsmErrKind::UndetAddrStmt, stmt.span.clone()));
666                        };
667
668                        rel_map.insert(cur.lc, label_text);
669                    }
670                },
671                _ => {}
672            };
673
674            // If we're keeping track of the line counter currently (i.e., are inside of a .orig block):
675            if let Some(cur) = &mut cursor {
676                // Debug symbol:
677                // Calculate which source code line is associated with the instruction the LC is currently pointing to
678                // and add the mapping from line to instruction address.
679                if let Some((lines, s)) = &mut debug_sym {
680                    if !matches!(stmt.nucleus, StmtKind::Directive(Directive::Orig(_) | Directive::End)) {
681                        let line_index = s.get_line(stmt.span.start);
682                        lines[line_index].replace(cur.lc);
683                    }
684                }
685
686                // Shift the LC forward
687                match &stmt.nucleus {
688                    StmtKind::Instr(_)     => cur.shift(1),
689                    StmtKind::Directive(d) => cur.shift(d.word_len()),
690                }.map_err(|e| AsmErr::new(e, stmt.span.clone()))?
691            }
692        }
693
694        if let Some(cur) = cursor {
695            return Err(AsmErr::new(AsmErrKind::UnclosedOrig, cur.block_orig));
696        }
697        
698        let debug_symbols = debug_sym.map(|(lines, src_info)| DebugSymbols {
699            line_map: LineSymbolMap::new(lines)
700            .unwrap_or_else(|| {
701                unreachable!("line symbol map's invariants should have been upheld during symbol table pass")
702            }),
703            src_info,
704        });
705
706        Ok(SymbolTable { label_map, rel_map, debug_symbols })
707    }
708
709    /// Gets the memory address of a given label (if it exists).
710    /// 
711    /// ## Example
712    /// ```
713    /// use lc3_ensemble::parse::parse_ast;
714    /// use lc3_ensemble::asm::SymbolTable;
715    /// 
716    /// let src = "
717    ///     .orig x3000
718    ///     LOOP:
719    ///         ADD R0, R0, #1
720    ///         BR LOOP
721    ///     LOOP2:
722    ///         ADD R0, R0, #2
723    ///         BR LOOP2
724    ///     LOOP3:
725    ///         ADD R0, R0, #3
726    ///         BR LOOP3
727    ///     .end
728    /// ";
729    /// let ast = parse_ast(src).unwrap();
730    /// 
731    /// let sym = SymbolTable::new(&ast, None).unwrap();
732    /// assert_eq!(sym.lookup_label("LOOP"), Some(0x3000));
733    /// assert_eq!(sym.lookup_label("LOOP2"), Some(0x3002));
734    /// assert_eq!(sym.lookup_label("LOOP3"), Some(0x3004));
735    /// assert_eq!(sym.lookup_label("LOOP_DE_LOOP"), None);
736    /// ```
737    pub fn lookup_label(&self, label: &str) -> Option<u16> {
738        self.label_map.get(&label.to_uppercase()).map(|sym_data| sym_data.addr)
739    }
740    
741    /// Gets the label at a given memory address (if it exists).
742    /// 
743    /// ## Example
744    /// ```
745    /// use lc3_ensemble::parse::parse_ast;
746    /// use lc3_ensemble::asm::SymbolTable;
747    /// 
748    /// let src = "
749    ///     .orig x3000
750    ///     LOOP:
751    ///         ADD R0, R0, #1
752    ///         BR LOOP
753    ///     LOOP2:
754    ///         ADD R0, R0, #2
755    ///         BR LOOP2
756    ///     LOOP3:
757    ///         ADD R0, R0, #3
758    ///         BR LOOP3
759    ///     .end
760    /// ";
761    /// let ast = parse_ast(src).unwrap();
762    /// 
763    /// let sym = SymbolTable::new(&ast, None).unwrap();
764    /// assert_eq!(sym.rev_lookup_label(0x3000), Some("LOOP"));
765    /// assert_eq!(sym.rev_lookup_label(0x3002), Some("LOOP2"));
766    /// assert_eq!(sym.rev_lookup_label(0x3004), Some("LOOP3"));
767    /// assert_eq!(sym.rev_lookup_label(0x2110), None);
768    /// ```
769    pub fn rev_lookup_label(&self, addr: u16) -> Option<&str> {
770        let (label, _) = self.label_map.iter()
771            .find(|&(_, sym_data)| sym_data.addr == addr)?;
772
773        Some(label)
774    }
775
776    /// Gets the source span of a given label (if it exists).
777    /// 
778    /// ## Example
779    /// ```
780    /// use lc3_ensemble::parse::parse_ast;
781    /// use lc3_ensemble::asm::SymbolTable;
782    /// 
783    /// let src = "
784    ///     .orig x3000
785    ///     LOOPY:
786    ///         ADD R0, R0, #1
787    ///         BR LOOPY
788    ///     .end
789    /// ";
790    /// let ast = parse_ast(src).unwrap();
791    /// 
792    /// let sym = SymbolTable::new(&ast, None).unwrap();
793    /// assert_eq!(sym.get_label_source("LOOPY"), Some(21..26));
794    /// assert_eq!(sym.get_label_source("LOOP_DE_LOOP"), None);
795    /// ```
796    pub fn get_label_source(&self, label: &str) -> Option<Range<usize>> {
797        self.label_map.get(label)
798            .map(|data| data.span(label))
799    }
800
801    /// Gets the address of a given source line.
802    /// 
803    /// If debug symbols are not enabled, this unconditionally returns `None`.
804    /// Note that each address is mapped to at most one source code line.
805    /// 
806    /// ## Example
807    /// ```
808    /// use lc3_ensemble::parse::parse_ast;
809    /// use lc3_ensemble::asm::SymbolTable;
810    /// 
811    /// let src = "              ;;  0
812    ///     .orig x3000          ;;  1
813    ///     LOOP:                ;;  2
814    ///         ADD R0, R0, #1   ;;  3
815    ///         BR LOOP          ;;  4
816    ///     .fill x9999          ;;  5
817    ///     .blkw 10             ;;  6
818    ///     LOOP2:               ;;  7
819    ///         ADD R0, R0, #3   ;;  8
820    ///         BR LOOP3         ;;  9
821    ///     .end                 ;; 10
822    /// ";
823    /// let ast = parse_ast(src).unwrap();
824    /// 
825    /// // Debug symbols required:
826    /// let sym = SymbolTable::new(&ast, Some(src)).unwrap();
827    /// assert_eq!(sym.lookup_line(0),  None);
828    /// assert_eq!(sym.lookup_line(1),  None);
829    /// assert_eq!(sym.lookup_line(2),  None);
830    /// assert_eq!(sym.lookup_line(3),  Some(0x3000));
831    /// assert_eq!(sym.lookup_line(4),  Some(0x3001));
832    /// assert_eq!(sym.lookup_line(5),  Some(0x3002));
833    /// assert_eq!(sym.lookup_line(6),  Some(0x3003));
834    /// assert_eq!(sym.lookup_line(7),  None);
835    /// assert_eq!(sym.lookup_line(8),  Some(0x300D));
836    /// assert_eq!(sym.lookup_line(9),  Some(0x300E));
837    /// assert_eq!(sym.lookup_line(10), None);
838    /// ```
839    pub fn lookup_line(&self, line: usize) -> Option<u16> {
840        self.debug_symbols.as_ref()?.lookup_line(line)
841    }
842
843    /// Gets the source line of a given memory address (if it exists.)
844    /// 
845    /// The result can be converted into a source span (range of characters encompassed by the instruction)
846    /// using [`SymbolTable::source_info`] and [`SourceInfo::line_span`].
847    /// 
848    /// If debug symbols are not enabled, this unconditionally returns `None`.
849    /// Note that each source code line is mapped to at most one address.
850    /// 
851    /// ## Example
852    /// ```
853    /// use lc3_ensemble::parse::parse_ast;
854    /// use lc3_ensemble::asm::SymbolTable;
855    /// 
856    /// let src = "              ;;  0
857    ///     .orig x3000          ;;  1
858    ///     LOOP:                ;;  2
859    ///         ADD R0, R0, #1   ;;  3
860    ///         BR LOOP          ;;  4
861    ///     .fill x9999          ;;  5
862    ///     .blkw 10             ;;  6
863    ///     LOOP2:               ;;  7
864    ///         ADD R0, R0, #3   ;;  8
865    ///         BR LOOP3         ;;  9
866    ///     .end                 ;; 10
867    /// ";
868    /// let ast = parse_ast(src).unwrap();
869    /// 
870    /// // Debug symbols required:
871    /// let sym = SymbolTable::new(&ast, Some(src)).unwrap();
872    /// assert_eq!(sym.rev_lookup_line(0x3000),  Some(3));
873    /// assert_eq!(sym.rev_lookup_line(0x3001),  Some(4));
874    /// assert_eq!(sym.rev_lookup_line(0x3002),  Some(5));
875    /// assert_eq!(sym.rev_lookup_line(0x3003),  Some(6));
876    /// assert_eq!(sym.rev_lookup_line(0x3004),  None);
877    /// assert_eq!(sym.rev_lookup_line(0x3005),  None);
878    /// assert_eq!(sym.rev_lookup_line(0x3006),  None);
879    /// assert_eq!(sym.rev_lookup_line(0x3007),  None);
880    /// assert_eq!(sym.rev_lookup_line(0x3008),  None);
881    /// assert_eq!(sym.rev_lookup_line(0x3009),  None);
882    /// assert_eq!(sym.rev_lookup_line(0x300A),  None);
883    /// assert_eq!(sym.rev_lookup_line(0x300B),  None);
884    /// assert_eq!(sym.rev_lookup_line(0x300C),  None);
885    /// assert_eq!(sym.rev_lookup_line(0x300D),  Some(8));
886    /// assert_eq!(sym.rev_lookup_line(0x300E),  Some(9));
887    /// assert_eq!(sym.rev_lookup_line(0x300F),  None);
888    /// ```
889    pub fn rev_lookup_line(&self, addr: u16) -> Option<usize> {
890        self.debug_symbols.as_ref()?.rev_lookup_line(addr)
891    }
892
893    /// Reads the source info from this symbol table (if debug symbols are enabled).
894    pub fn source_info(&self) -> Option<&SourceInfo> {
895        self.debug_symbols.as_ref().map(|ds| &ds.src_info)
896    }
897    
898    /// Gets an iterable of the mapping from labels to addresses.
899    pub fn label_iter(&self) -> impl Iterator<Item=(&str, u16, bool)> + '_ {
900        self.label_map.iter()
901            .map(|(label, sym_data)| (&**label, sym_data.addr, sym_data.external))
902    }
903
904    /// Gets an iterable of the mapping from lines to addresses.
905    /// 
906    /// This iterator will be empty if debug symbols were not enabled.
907    pub fn line_iter(&self) -> impl Iterator<Item=(usize, u16)> + '_ {
908        self.debug_symbols.iter()
909            .flat_map(|s| s.line_map.iter())
910    }
911}
912impl std::fmt::Debug for SymbolTable {
913    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
914        struct ClosureMap<R, F: Fn() -> R>(F);
915        impl<K, V, R, F> std::fmt::Debug for ClosureMap<R, F> 
916            where K: std::fmt::Debug,
917                  V: std::fmt::Debug,
918                  R: IntoIterator<Item=(K, V)>,
919                  F: Fn() -> R
920        {
921            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
922                f.debug_map()
923                    .entries((self.0)())
924                    .finish()
925            }
926        }
927
928        f.debug_struct("SymbolTable")
929            .field("label_map", &ClosureMap(|| {
930                self.label_map.iter()
931                    .map(|(k, data @ SymbolData { addr, .. })| {
932                        (k, (Addr(*addr), data.span(k)))
933                    })
934            }))
935            .field("debug_symbols", &self.debug_symbols)
936            .finish()
937    }
938}
939
940/// Replaces a [`PCOffset`] value with an [`Offset`] value by calculating the offset from a given label
941/// (if this `PCOffset` represents a label).
942fn replace_pc_offset<const N: u32>(off: PCOffset<i16, N>, pc: u16, sym: &SymbolTable) -> Result<IOffset<N>, AsmErr> {
943    match off {
944        PCOffset::Offset(off) => Ok(off),
945        PCOffset::Label(label) => {
946            // TODO: use sym.lookup_label
947            match sym.label_map.get(&label.name.to_uppercase()) {
948                Some(SymbolData { external: true, .. }) => Err(AsmErr::new(AsmErrKind::OffsetExternal, label.span())),
949                Some(SymbolData { addr, .. }) => {
950                    IOffset::new(addr.wrapping_sub(pc) as i16)
951                        .map_err(|e| AsmErr::new(AsmErrKind::OffsetNewErr(e), label.span()))
952                }
953                None => Err(AsmErr::new(AsmErrKind::CouldNotFindLabel, label.span())),
954            }
955        },
956    }
957}
958
959/// Checks if two ranges overlap.
960/// 
961/// This assumes (start <= end) for both ranges.
962fn ranges_overlap<T: Ord>(a: Range<T>, b: Range<T>) -> bool {
963    let Range { start: a_start, end: a_end } = a;
964    let Range { start: b_start, end: b_end } = b;
965
966    // Range not overlapping: a_start >= b_end || b_start >= a_end
967    // This is just the inverse.
968    a_start < b_end && b_start < a_end
969}
970
971impl AsmInstr {
972    /// Converts an ASM instruction into a simulator instruction ([`SimInstr`])
973    /// by resolving offsets and erasing aliases.
974    /// 
975    /// Parameters:
976    /// - `pc`: PC increment
977    /// - `sym`: The symbol table
978    pub fn into_sim_instr(self, pc: u16, sym: &SymbolTable) -> Result<SimInstr, AsmErr> {
979        match self {
980            AsmInstr::ADD(dr, sr1, sr2) => Ok(SimInstr::ADD(dr, sr1, sr2)),
981            AsmInstr::AND(dr, sr1, sr2) => Ok(SimInstr::AND(dr, sr1, sr2)),
982            AsmInstr::BR(cc, off)       => Ok(SimInstr::BR(cc, replace_pc_offset(off, pc, sym)?)),
983            AsmInstr::JMP(br)           => Ok(SimInstr::JMP(br)),
984            AsmInstr::JSR(off)          => Ok(SimInstr::JSR(ImmOrReg::Imm(replace_pc_offset(off, pc, sym)?))),
985            AsmInstr::JSRR(br)          => Ok(SimInstr::JSR(ImmOrReg::Reg(br))),
986            AsmInstr::LD(dr, off)       => Ok(SimInstr::LD(dr, replace_pc_offset(off, pc, sym)?)),
987            AsmInstr::LDI(dr, off)      => Ok(SimInstr::LDI(dr, replace_pc_offset(off, pc, sym)?)),
988            AsmInstr::LDR(dr, br, off)  => Ok(SimInstr::LDR(dr, br, off)),
989            AsmInstr::LEA(dr, off)      => Ok(SimInstr::LEA(dr, replace_pc_offset(off, pc, sym)?)),
990            AsmInstr::NOT(dr, sr)       => Ok(SimInstr::NOT(dr, sr)),
991            AsmInstr::RET               => Ok(SimInstr::JMP(Reg::R7)),
992            AsmInstr::RTI               => Ok(SimInstr::RTI),
993            AsmInstr::ST(sr, off)       => Ok(SimInstr::ST(sr, replace_pc_offset(off, pc, sym)?)),
994            AsmInstr::STI(sr, off)      => Ok(SimInstr::STI(sr, replace_pc_offset(off, pc, sym)?)),
995            AsmInstr::STR(sr, br, off)  => Ok(SimInstr::STR(sr, br, off)),
996            AsmInstr::TRAP(vect)        => Ok(SimInstr::TRAP(vect)),
997            AsmInstr::NOP(off)          => Ok(SimInstr::BR(0b000, replace_pc_offset(off, pc, sym)?)),
998            AsmInstr::GETC              => Ok(SimInstr::TRAP(Offset::new_trunc(0x20))),
999            AsmInstr::OUT               => Ok(SimInstr::TRAP(Offset::new_trunc(0x21))),
1000            AsmInstr::PUTC              => Ok(SimInstr::TRAP(Offset::new_trunc(0x21))),
1001            AsmInstr::PUTS              => Ok(SimInstr::TRAP(Offset::new_trunc(0x22))),
1002            AsmInstr::IN                => Ok(SimInstr::TRAP(Offset::new_trunc(0x23))),
1003            AsmInstr::PUTSP             => Ok(SimInstr::TRAP(Offset::new_trunc(0x24))),
1004            AsmInstr::HALT              => Ok(SimInstr::TRAP(Offset::new_trunc(0x25))),
1005        }
1006    }
1007}
1008impl Directive {
1009    /// How many words this directive takes up in memory.
1010    fn word_len(&self) -> u16 {
1011        match self {
1012            Directive::Orig(_)     => 0,
1013            Directive::Fill(_)     => 1,
1014            Directive::Blkw(n)     => n.get(),
1015            Directive::Stringz(s)  => s.len() as u16 + 1, // lex should assure that s + 1 <= 65535
1016            Directive::End         => 0,
1017            Directive::External(_) => 0,
1018        }
1019    }
1020}
1021
1022/// An object file.
1023/// 
1024/// This is the final product after assembly source code is fully assembled.
1025/// This can be loaded in the simulator to run the assembled code.
1026#[derive(Debug, PartialEq, Eq, Clone)]
1027pub struct ObjectFile {
1028    /// A mapping of each block's address to its corresponding data.
1029    /// 
1030    /// Invariants:
1031    /// - The blocks are sorted in order.
1032    /// - Blocks cannot wrap around in memory.
1033    /// - Blocks cannot write into xFE00-xFFFF.
1034    /// - As a corollary, block's length must fit in a `u16`.
1035    block_map: BTreeMap<u16, Vec<Option<u16>>>,
1036
1037    /// Debug symbols.
1038    sym: Option<SymbolTable>
1039}
1040impl ObjectFile {
1041    /// Creates an empty object file.
1042    pub fn empty() -> Self {
1043        ObjectFile { block_map: BTreeMap::new(), sym: None }
1044    }
1045
1046    /// Creates a new object file from an assembly AST and a symbol table.
1047    fn new(ast: Vec<Stmt>, sym: SymbolTable, debug: bool) -> Result<Self, AsmErr> {
1048        /// A singular block which represents a singular region in an object file.
1049        struct ObjBlock {
1050            /// Starting address of the block.
1051            start: u16,
1052            /// The words in the block.
1053            words: Vec<Option<u16>>,
1054            /// Span of the orig statement.
1055            /// 
1056            /// Used for error diagnostics in this function.
1057            orig_span: Range<usize>
1058        }
1059
1060        impl ObjBlock {
1061            fn range(&self) -> Range<u16> {
1062                // Assumes no overflow and there cannot be more than u16::MAX words
1063                // Both of these invariants are asserted by `push` and `try_extend`.
1064                self.start .. (self.start + self.words.len() as u16)
1065            }
1066
1067            fn push(&mut self, data: u16) {
1068                self.words.push(Some(data));
1069            }
1070            fn shift(&mut self, n: u16) {
1071                self.words.extend({
1072                    std::iter::repeat(None)
1073                        .take(usize::from(n))
1074                });
1075            }
1076            /// Writes the assembly for the given directive into the provided object block.
1077            fn write_directive(&mut self, directive: Directive, labels: &SymbolTable) -> Result<(), AsmErr> {
1078                match directive {
1079                    Directive::Orig(_) => {},
1080                    Directive::Fill(pc_offset) => {
1081                        let off = match pc_offset {
1082                            PCOffset::Offset(o) => o.get(),
1083                            PCOffset::Label(l)  => {
1084                                labels.lookup_label(&l.name)
1085                                    .ok_or_else(|| AsmErr::new(AsmErrKind::CouldNotFindLabel, l.span()))?
1086                            },
1087                        };
1088
1089                        self.push(off);
1090                    },
1091                    Directive::Blkw(n) => self.shift(n.get()),
1092                    Directive::Stringz(n) => {
1093                        self.extend(n.bytes().map(u16::from));
1094                        self.push(0);
1095                    },
1096                    Directive::End => {},
1097                    Directive::External(_) => {},
1098                }
1099
1100                Ok(())
1101            }
1102        }
1103
1104        impl Extend<u16> for ObjBlock {
1105            fn extend<T: IntoIterator<Item = u16>>(&mut self, iter: T) {
1106                self.words.extend(iter.into_iter().map(Some));
1107            }
1108        }
1109
1110        let mut block_map: BTreeMap<u16, ObjBlock> = BTreeMap::new();
1111
1112        // PASS 2
1113        // Holding both the LC and currently writing block
1114        let mut current: Option<(u16, ObjBlock)> = None;
1115
1116        for stmt in ast {
1117            match stmt.nucleus {
1118                StmtKind::Directive(Directive::Orig(off)) => {
1119                    debug_assert!(current.is_none());
1120                    
1121                    // Add new working block.
1122                    let addr = off.get();
1123                    current.replace((addr, ObjBlock { start: addr, orig_span: stmt.span, words: vec![] }));
1124                },
1125                StmtKind::Directive(Directive::End) => {
1126                    // The current block is complete, so take it out and append it to the block map.
1127                    let Some((_, block)) = current.take() else {
1128                        // unreachable (because pass 1 should've found it)
1129                        return Err(AsmErr::new(AsmErrKind::UnopenedOrig, stmt.span));
1130                    };
1131
1132                    // only append if it's not empty:
1133                    if block.words.is_empty() { continue; }
1134
1135                    // Check for overlap. Note this is probably overengineering:
1136                    let m_overlapping = [
1137                        block_map.range(..=block.start).next_back(), // previous block
1138                        block_map.range(block.start..).next(), // next block
1139                    ]
1140                        .into_iter()
1141                        .flatten()
1142                        .find(|(_, b)| ranges_overlap(block.range(), b.range()));
1143
1144                    // If found overlapping block, raise error:
1145                    if let Some((_, overlapping_block)) = m_overlapping {
1146                        let span0 = block.orig_span;
1147                        let span1 = overlapping_block.orig_span.clone();
1148
1149                        let order = match span0.start <= span1.start {
1150                            true  => [span0, span1],
1151                            false => [span1, span0],
1152                        };
1153
1154                        return Err(AsmErr::new(AsmErrKind::OverlappingBlocks, order));
1155                    }
1156
1157                    block_map.insert(block.start, block);
1158                },
1159                StmtKind::Directive(Directive::External(_)) => {},
1160                StmtKind::Directive(directive) => {
1161                    let Some((lc, block)) = &mut current else {
1162                        return Err(AsmErr::new(AsmErrKind::UndetAddrStmt, stmt.span));
1163                    };
1164
1165                    let wl = directive.word_len();
1166                    block.write_directive(directive, &sym)?;
1167                    *lc = lc.wrapping_add(wl);
1168                },
1169                StmtKind::Instr(instr) => {
1170                    let Some((lc, block)) = &mut current else {
1171                        return Err(AsmErr::new(AsmErrKind::UndetAddrStmt, stmt.span));
1172                    };
1173                    let sim = instr.into_sim_instr(lc.wrapping_add(1), &sym)?;
1174                    block.push(sim.encode());
1175                    *lc = lc.wrapping_add(1);
1176                },
1177            }
1178        }
1179
1180        let block_map = block_map.into_iter()
1181            .map(|(start, ObjBlock { words, .. })| (start, words))
1182            .collect();
1183        Ok(Self {
1184            block_map,
1185            sym: debug.then_some(sym),
1186        })
1187    }
1188
1189    /// Gets a mutable reference to the value at the given address if defined in the object file.
1190    /// 
1191    /// If the data is uninitialized, this returns `Some(None)`.
1192    fn get_mut(&mut self, addr: u16) -> Option<&mut Option<u16>> {
1193        let (&start, block) = self.block_map.range_mut(..=addr).next_back()?;
1194        block.get_mut(addr.wrapping_sub(start) as usize)
1195    }
1196
1197    /// Links two object files, combining them into one.
1198    /// 
1199    /// The linking algorithm is as follows:
1200    /// - The list of regions in both object files are merged into one.
1201    /// - Overlaps between regions are checked. If any are found, error.
1202    /// - For every symbol in the symbol table, this is added to the new symbol table.
1203    ///     - If any symbols appear more than once in different locations (and neither are external), error (duplicate labels).
1204    ///     - If any symbols appear more than once in different locations (and one is external), pull out any relocation entries (from `.LINKER_INFO`) for the external and match them.
1205    /// - Merge the remaining relocation table entries.
1206    pub fn link(mut a_obj: Self, b_obj: Self) -> Result<Self, AsmErr> {
1207        let Self { block_map: b_block_map, sym: b_sym } = b_obj;
1208
1209        for (addr, block) in b_block_map {
1210            if a_obj.block_map.insert(addr, block).is_some() {
1211                return Err(AsmErr::new(AsmErrKind::OverlappingBlocks, []));
1212            }
1213        }
1214
1215        let first = a_obj.block_map.iter();
1216        let mut second = a_obj.block_map.iter();
1217        second.next();
1218        if std::iter::zip(first, second).any(|((&a_st, a_bl), (&b_st, b_bl))| {
1219            let ar = a_st .. (a_st + a_bl.len() as u16);
1220            let br = b_st .. (b_st + b_bl.len() as u16);
1221            ranges_overlap(ar, br)
1222        }) {
1223            return Err(AsmErr::new(AsmErrKind::OverlappingBlocks, []));
1224        }
1225
1226        // Merge symbol tables:
1227        let mut relocations = vec![];
1228        a_obj.sym = match (a_obj.sym, b_sym) {
1229            // If we have both symbol tables:
1230            (Some(mut a_sym), Some(b_sym)) => {
1231                let SymbolTable { label_map, rel_map, debug_symbols: b_debug_symbols } = b_sym;
1232                a_sym.debug_symbols = match (a_sym.debug_symbols, b_debug_symbols) {
1233                    (Some(ads), Some(bds)) => Some(DebugSymbols::link(ads, bds)?),
1234                    (m_ads, b_ads) => m_ads.or(b_ads)
1235                };
1236
1237                // Cannot overlap due to the above overlapping blocks invariant.
1238                a_sym.rel_map.extend(rel_map);
1239
1240                // For every label in symbol table B:
1241                for (label, b_sym_data) in label_map {
1242                    match a_sym.label_map.entry(label) {
1243                        Entry::Occupied(mut e) => {
1244                            let &a_sym_data = e.get();
1245                            match (a_sym_data.external, b_sym_data.external) {
1246                                // Two external labels
1247                                // Rel map entries are preserved, nothing changes.
1248                                (true, true) => {},
1249
1250                                // One external label
1251                                // Bind all of the relocation entries corresponding to the external symbol 
1252                                // to the linked symbol
1253                                (true, false) | (false, true) => {
1254                                    // The address to link to.
1255                                    let linked_sym = match a_sym_data.external {
1256                                        true  => b_sym_data,
1257                                        false => a_sym_data
1258                                    };
1259                                    e.insert(linked_sym);
1260
1261                                    // Split the relocation map to unmatching & matching labels.
1262                                    let (rel_addrs, new_rel_map) = a_sym.rel_map.into_iter()
1263                                        .partition(|(_, v)| v == e.key());
1264                                    a_sym.rel_map = new_rel_map;
1265
1266                                    // Add matching labels to the "to relocate later" Vec.
1267                                    relocations.extend({
1268                                        rel_addrs.into_keys().map(|addr| (addr, linked_sym.addr))
1269                                    });
1270                                },
1271
1272                                // No external labels
1273                                // If they point to the same addresses, nothing changes.
1274                                // If they point to different addresses, raise conflict.
1275                                (false, false) => if a_sym_data.addr != b_sym_data.addr {
1276                                    let a_span = a_sym_data.span(e.key());
1277                                    let b_span = b_sym_data.span(e.key());
1278                
1279                                    // TODO: this error does not have correct spans (since the files are different).
1280                                    return Err(AsmErr::new(AsmErrKind::OverlappingLabels, [a_span, b_span]));
1281                                }
1282                            }
1283                        },
1284                        Entry::Vacant(e) => { e.insert(b_sym_data); },
1285                    };
1286                }
1287                Some(a_sym)
1288            },
1289            (ma, mb) => ma.or(mb)
1290        };
1291        for (addr, linked_addr) in relocations {
1292            // TODO: handle case where the address needed is not found in block map
1293            // should really only occur from invalid manipulation of obj file
1294            a_obj.get_mut(addr)
1295                .unwrap_or_else(|| unreachable!("object file should have had address x{addr:04X} bound"))
1296                .replace(linked_addr);
1297        }
1298
1299        Ok(a_obj)
1300    }
1301
1302    /// Get an iterator over all of the blocks of the object file.
1303    pub(crate) fn block_iter(&self) -> impl Iterator<Item=(u16, &[Option<u16>])> {
1304        self.block_map.iter()
1305            .map(|(&addr, block)| (addr, block.as_slice()))
1306    }
1307    /// Checks if object file has any external symbols.
1308    pub(crate) fn get_external_symbol(&self) -> Option<&str> {
1309        self.symbol_table()
1310            .and_then(|s| s.label_map.iter().find(|(_, s)| s.external))
1311            .map(|(k, _)| k.as_str())
1312    }
1313    /// Gets an iterator over all of the memory locations defined in the object file.
1314    pub fn addr_iter(&self) -> impl Iterator<Item=(u16, Option<u16>)> + '_ {
1315        self.block_iter()
1316            .flat_map(|(addr, block)| {
1317                block.iter()
1318                    .enumerate()
1319                    .map(move |(i, &v)| (addr.wrapping_add(i as u16), v))
1320            })
1321    }
1322    /// Gets the symbol table if it is present in the object file.
1323    pub fn symbol_table(&self) -> Option<&SymbolTable> {
1324        self.sym.as_ref()
1325    }
1326}
1327
1328/// Used for [`std::fmt::Debug`] purposes.
1329#[repr(transparent)]
1330struct Addr(u16);
1331impl std::fmt::Debug for Addr {
1332    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1333        write!(f, "x{:04X}", self.0)
1334    }
1335}
1336
1337#[cfg(test)]
1338mod tests {
1339    use std::fmt::Write;
1340
1341    use crate::asm::encoding::TextFormat;
1342    use crate::asm::AsmErrKind;
1343    use crate::parse::parse_ast;
1344
1345    use super::encoding::{BinaryFormat, ObjFileFormat};
1346    use super::{assemble_debug, AsmErr, ObjectFile};
1347
1348    fn assemble_src(src: &str) -> Result<ObjectFile, AsmErr> {
1349        let ast = parse_ast(src).unwrap();
1350        assemble_debug(ast, src)
1351    }
1352    fn assert_asm_fail<T: std::fmt::Debug>(r: Result<T, AsmErr>, kind: AsmErrKind) {
1353        assert_eq!(r.unwrap_err().kind, kind);
1354    }
1355
1356    #[test]
1357    fn test_sym_basic() {
1358        let src = "
1359        .orig x3000
1360            A ADD R0, R0, #0
1361            AND R0, R0, #1
1362            C ADD R0, R0, #0
1363            D LD R0, #-1
1364            HALT
1365            HALT
1366            HALT
1367            HALT
1368            E BR C
1369            HALT
1370            HALT
1371            HALT
1372            B JSR A
1373        .end
1374        ";
1375
1376        let obj = assemble_src(src).unwrap();
1377        let sym = obj.symbol_table().unwrap();
1378        assert_eq!(sym.lookup_label("A"), Some(0x3000));
1379        assert_eq!(sym.lookup_label("C"), Some(0x3002));
1380        assert_eq!(sym.lookup_label("D"), Some(0x3003));
1381        assert_eq!(sym.lookup_label("E"), Some(0x3008));
1382        assert_eq!(sym.lookup_label("B"), Some(0x300C));
1383    }
1384
1385    #[test]
1386    fn test_region_overlap() {
1387        // Two orig blocks, one after another
1388        let src = "
1389        .orig x3000
1390            HALT
1391            HALT
1392            HALT
1393            HALT
1394        .end
1395
1396        .orig x3002
1397            HALT
1398        .end
1399        ";
1400
1401        let obj = assemble_src(src);
1402        assert_asm_fail(obj, AsmErrKind::OverlappingBlocks);
1403
1404        // Two orig blocks, one before another
1405        let src = "
1406        .orig x3002
1407            HALT
1408        .end
1409
1410        .orig x3000
1411            HALT
1412            HALT
1413            HALT
1414            HALT
1415        .end
1416        ";
1417
1418        let obj = assemble_src(src);
1419        assert_asm_fail(obj, AsmErrKind::OverlappingBlocks);
1420
1421        // Two orig blocks, one empty
1422        let src = "
1423        .orig x3000
1424            HALT
1425            HALT
1426            HALT
1427            HALT
1428        .end
1429
1430        .orig x3002
1431        .end
1432        ";
1433        assemble_src(src).unwrap();
1434
1435        // Two orig blocks, one empty
1436        let src = "
1437        .orig x3002
1438        .end
1439
1440        .orig x3000
1441            HALT
1442            HALT
1443            HALT
1444            HALT
1445        .end
1446        ";
1447
1448        assemble_src(src).unwrap();
1449    }
1450
1451    #[test]
1452    fn test_writing_into_io() {
1453        // write empty blocks
1454        let src = "
1455            .orig xFE00
1456            .end
1457        ";
1458        assemble_src(src).unwrap();
1459
1460        let src = "
1461            .orig xFE02
1462            .end
1463        ";
1464        assemble_src(src).unwrap();
1465
1466        // write actual block
1467        let src = "
1468            .orig xFE00
1469                AND R0, R0, #0
1470            .end
1471        ";
1472        let obj = assemble_src(src);
1473        assert_asm_fail(obj, AsmErrKind::BlockInIO);
1474    }
1475
1476    #[test]
1477    fn test_big_blocks() {
1478        // big BLKW
1479        let src = "
1480            .orig x3000
1481            .blkw xFFFF
1482            .end
1483        ";
1484
1485        let obj = assemble_src(src);
1486        assert_asm_fail(obj, AsmErrKind::WrappingBlock);
1487
1488        // Bunch of .fill:
1489        let mut src = String::from(".orig x0000\n");
1490        for i in 0x0000..=0xFFFF {
1491            writeln!(src, ".fill x{i:04X}").unwrap();
1492        }
1493        writeln!(src, ".end").unwrap();
1494
1495        let obj = assemble_src(&src);
1496        assert_asm_fail(obj, AsmErrKind::BlockInIO);
1497
1498        // perfectly aligns
1499        let src = "
1500            .orig xFFFF
1501            .blkw 1
1502            .end
1503        ";
1504        let obj = assemble_src(src);
1505        assert_asm_fail(obj, AsmErrKind::BlockInIO);
1506
1507        // perfectly aligns 2
1508        let src = "
1509            .orig x3000
1510            .blkw xD000
1511            .end
1512        ";
1513        let obj = assemble_src(src);
1514        assert_asm_fail(obj, AsmErrKind::BlockInIO);
1515
1516        // big BLKW
1517        let src = "
1518            .orig x3000
1519            .blkw xFFFF
1520            .blkw xFFFF
1521            .blkw xFFFF
1522            .end
1523        ";
1524        let obj = assemble_src(src);
1525        assert_asm_fail(obj, AsmErrKind::WrappingBlock);
1526
1527        // perfectly aligns and then does schenanigans
1528        let src = "
1529            .orig x3000
1530            LABEL1 .blkw xD000
1531            .fill x0000
1532            .fill x0001
1533            LABEL2 .fill x0002
1534            .fill x0003
1535            .end
1536        ";
1537        // Should error. Don't really care which error.
1538        assemble_src(src).unwrap_err();
1539    }
1540
1541    fn assert_obj_equal(deser: &mut ObjectFile, expected: &ObjectFile, m: &str) {
1542        let deser_src = deser.sym.as_mut()
1543            .and_then(|s| s.debug_symbols.as_mut())
1544            .map(|s| &mut s.src_info.src)
1545            .expect("deserialized object file has no source");
1546
1547        let expected_src = expected.sym.as_ref()
1548            .and_then(|s| s.debug_symbols.as_ref())
1549            .map(|s| &s.src_info.src)
1550            .expect("expected object file has no source");
1551
1552        let deser_lines = deser_src.trim().lines().map(str::trim);
1553        let expected_lines = expected_src.trim().lines().map(str::trim);
1554        
1555        assert!(deser_lines.eq(expected_lines), "lines should have matched");
1556        
1557        let mut buf = expected_src.to_string();
1558        std::mem::swap(deser_src, &mut buf);
1559        assert_eq!(deser, expected, "{m}");
1560
1561        // Revert change
1562        let deser_src = deser.sym.as_mut()
1563            .and_then(|s| s.debug_symbols.as_mut())
1564            .map(|s| &mut s.src_info.src)
1565            .expect("deserialized object file has no source");
1566        std::mem::swap(deser_src, &mut buf);
1567    }
1568
1569    #[test]
1570    fn test_ser_deser() {
1571        let src = "
1572            .orig x3000
1573                AND R0, R0, #0
1574                ADD R0, R0, #15
1575                MINUS_R0 NOT R1, R0
1576                ADD R1, R1, #1
1577                HALT
1578            .end
1579        ";
1580
1581        let obj = assemble_src(src).unwrap();
1582        
1583        // Binary format
1584        let ser = BinaryFormat::serialize(&obj);
1585        let mut de = BinaryFormat::deserialize(&ser).expect("binary encoding should've been parseable");
1586        assert_obj_equal(&mut de, &obj, "binary encoding could not be roundtripped");
1587
1588        // Text format
1589        let ser = TextFormat::serialize(&obj);
1590        let mut de = TextFormat::deserialize(&ser).expect("text encoding should've been parseable");
1591        assert_obj_equal(&mut de, &obj, "text encoding could not be roundtripped");
1592    }
1593    
1594    #[test]
1595    fn test_ser_deser_crlf() {
1596        // With CRLF:
1597        let src = "\r
1598            .orig x3000\r
1599                AND R0, R0, #0\r
1600                ADD R0, R0, #15\r
1601                MINUS_R0 NOT R1, R0\r
1602                ADD R1, R1, #1\r
1603                HALT\r
1604            .end\r
1605        ";
1606
1607        let obj = assemble_src(src).unwrap();
1608        
1609        // Text format
1610        let ser = TextFormat::serialize(&obj);
1611        let mut de = TextFormat::deserialize(&ser).expect("text encoding should've been parseable");
1612        assert_obj_equal(&mut de, &obj, "text encoding could not be roundtripped");
1613    }
1614
1615    #[test]
1616    fn test_basic_link() {
1617        let library = "
1618            .orig x5000
1619                ADDER ADD R2, R0, R1
1620                RET
1621            .end
1622        ";
1623
1624        let program = "
1625            .external ADDER
1626
1627            .orig x4000
1628                LD R0, A
1629                LD R1, B
1630
1631                LD R3, ADDER_ADDR
1632                JSRR R3
1633
1634                HALT
1635
1636                A .fill 10
1637                B .fill 20
1638                ADDER_ADDR .fill ADDER
1639            .end
1640        ";
1641
1642        let lib_obj = assemble_src(library).unwrap();
1643        let prog_obj = assemble_src(program).unwrap();
1644        ObjectFile::link(lib_obj, prog_obj).unwrap();
1645    }
1646    #[test]
1647    fn test_link_overlapping_labels() {
1648        let library = "
1649            .orig x5000
1650                LOOP BR LOOP
1651                RET
1652            .end
1653        ";
1654
1655        let program = "
1656            .orig x4000
1657                LOOP BR LOOP
1658                RET
1659            .end
1660        ";
1661
1662        let lib_obj = assemble_src(library).unwrap();
1663        let prog_obj = assemble_src(program).unwrap();
1664        let link_obj = ObjectFile::link(lib_obj, prog_obj);
1665        assert_asm_fail(link_obj, AsmErrKind::OverlappingLabels);
1666    }
1667
1668    #[test]
1669    fn test_link_overlapping_blocks() {
1670        // Overlapping blocks, but not identically overlapping
1671        let library = "
1672            .orig x3000
1673                ADD R0, R0, #0
1674                ADD R0, R0, #1
1675                ADD R0, R0, #2
1676                ADD R0, R0, #3
1677            .end
1678        ";
1679
1680        let program = "
1681            .orig x3001
1682                ADD R0, R0, #0
1683                ADD R0, R0, #1
1684                ADD R0, R0, #2
1685                ADD R0, R0, #3
1686            .end
1687        ";
1688
1689        let lib_obj = assemble_src(library).unwrap();
1690        let prog_obj = assemble_src(program).unwrap();
1691        let link_obj = ObjectFile::link(lib_obj, prog_obj);
1692        assert_asm_fail(link_obj, AsmErrKind::OverlappingBlocks);
1693        
1694        // Overlapping blocks and identically overlapping
1695        let library = "
1696            .orig x3000
1697                ADD R0, R0, #0
1698                ADD R0, R0, #1
1699                ADD R0, R0, #2
1700                ADD R0, R0, #3
1701            .end
1702        ";
1703
1704        let program = "
1705            .orig x3000
1706                ADD R0, R0, #0
1707                ADD R0, R0, #1
1708                ADD R0, R0, #2
1709                ADD R0, R0, #3
1710            .end
1711        ";
1712
1713        let lib_obj = assemble_src(library).unwrap();
1714        let prog_obj = assemble_src(program).unwrap();
1715        let link_obj = ObjectFile::link(lib_obj, prog_obj);
1716        assert_asm_fail(link_obj, AsmErrKind::OverlappingBlocks);
1717        
1718        // Contiguous blocks
1719        let library = "
1720            .orig x3000
1721                ADD R0, R0, #0
1722                ADD R0, R0, #1
1723                ADD R0, R0, #2
1724                ADD R0, R0, #3
1725            .end
1726        ";
1727
1728        let program = "
1729            .orig x3004
1730                ADD R0, R0, #4
1731                ADD R0, R0, #5
1732                ADD R0, R0, #6
1733                ADD R0, R0, #7
1734            .end
1735        ";
1736
1737        let lib_obj = assemble_src(library).unwrap();
1738        let prog_obj = assemble_src(program).unwrap();
1739        ObjectFile::link(lib_obj, prog_obj).unwrap();
1740    }
1741
1742    #[test]
1743    fn test_link_order_agnostic() {
1744        let library = "
1745            .orig x5000
1746                ;; very functional MULTIPLY subroutine
1747                MULTIPLY RET
1748            .end
1749        ";
1750        let program1 = "
1751            .external MULTIPLY
1752
1753            .orig x3000
1754                LD R1, MADDR1
1755                JSRR R1
1756                HALT
1757
1758                MADDR1 .fill MULTIPLY
1759            .end
1760        ";
1761        let program2 = "
1762            .external MULTIPLY
1763
1764            .orig x4000
1765                LD R2, MADDR2
1766                JSRR R2
1767                HALT
1768
1769                MADDR2 .fill MULTIPLY
1770            .end
1771        ";
1772
1773        let lib_obj = assemble_src(library).unwrap();
1774        let prog1_obj = assemble_src(program1).unwrap();
1775        let prog2_obj = assemble_src(program2).unwrap();
1776        
1777        // (lib + prog1) + prog2:
1778        let link_obj = ObjectFile::link(lib_obj.clone(), prog1_obj.clone()).unwrap();
1779        ObjectFile::link(link_obj.clone(), prog2_obj.clone())
1780            .expect("(lib + prog1) + prog2 should've succeeded");
1781        // prog2 + (lib + prog1):
1782        ObjectFile::link(prog2_obj.clone(), link_obj.clone())
1783            .expect("prog2 + (lib + prog1) should've succeeded");
1784        
1785        // (prog1 + lib) + prog2:
1786        let link_obj = ObjectFile::link(prog1_obj.clone(), lib_obj.clone()).unwrap();
1787        ObjectFile::link(link_obj.clone(), prog2_obj.clone())
1788            .expect("(prog1 + lib) + prog2 should've succeeded");
1789        // prog2 + (prog1 + lib):
1790        ObjectFile::link(prog2_obj.clone(), link_obj.clone())
1791            .expect("prog2 + (prog1 + lib) should've succeeded");
1792        
1793        // (prog1 + prog2) + lib:
1794        let link_obj = ObjectFile::link(prog1_obj.clone(), prog2_obj.clone()).unwrap();
1795        ObjectFile::link(link_obj.clone(), lib_obj.clone())
1796            .expect("(prog1 + prog2) + lib should've succeeded");
1797        // lib + (prog1 + prog2):
1798        ObjectFile::link(lib_obj.clone(), link_obj.clone())
1799            .expect("lib + (prog1 + prog2) should've succeeded");
1800    }
1801
1802    #[test]
1803    fn test_link_external_place() {
1804        let program = "
1805            .external X
1806            .orig x3000
1807                .fill X
1808            .end
1809        ";
1810        assemble_src(program).unwrap();
1811
1812        let program = "
1813            .external X
1814            .orig x3000
1815                LD R0, X
1816            .end
1817        ";
1818        assert_asm_fail(assemble_src(program), AsmErrKind::OffsetExternal);
1819
1820        let program = "
1821            .external X
1822            .orig x3000
1823                JSR X
1824            .end
1825        ";
1826        assert_asm_fail(assemble_src(program), AsmErrKind::OffsetExternal);
1827
1828        let program = "
1829            .external X
1830            .orig x3000
1831                BR X
1832            .end
1833        ";
1834        assert_asm_fail(assemble_src(program), AsmErrKind::OffsetExternal);
1835    }
1836}