lc3_ensemble/asm.rs
1//! Assembling assembly source ASTs into object files.
2//!
3//! This module is used to convert source ASTs (`Vec<`[`Stmt`]`>`) into object files
4//! that can be executed by the simulator.
5//!
6//! The assembler module notably consists of:
7//! - [`assemble`] and [`assemble_debug`]: The main functions which assemble the statements into an object file.
8//! - [`SymbolTable`]: a struct holding the symbol table, which stores location information for labels after the first assembler pass
9//! - [`ObjectFile`]: a struct holding the object file, which can be loaded into the simulator and executed
10//!
11//! [`Stmt`]: crate::ast::asm::Stmt
12
13pub mod encoding;
14
15use std::collections::hash_map::Entry;
16use std::collections::{BTreeMap, HashMap};
17use std::ops::Range;
18
19use logos::Span;
20
21use crate::ast::asm::{AsmInstr, Directive, Stmt, StmtKind};
22use crate::ast::sim::SimInstr;
23use crate::ast::{IOffset, ImmOrReg, Offset, OffsetNewErr, PCOffset, Reg};
24use crate::err::ErrSpan;
25
26/// Assembles a assembly source code AST into an object file.
27///
28/// This function assembles the source AST *without* including debug symbols
29/// in the object file.
30/// See [`SymbolTable`] for more details about debug symbols.
31///
32/// # Example
33/// ```
34/// use lc3_ensemble::parse::parse_ast;
35/// use lc3_ensemble::asm::assemble;
36///
37/// let src = "
38/// .orig x3000
39/// LABEL: HALT
40/// .end
41/// ";
42/// let ast = parse_ast(src).unwrap();
43///
44/// let obj_file = assemble(ast);
45/// assert!(obj_file.is_ok());
46///
47/// // Symbol table doesn't exist in object file:
48/// let obj_file = obj_file.unwrap();
49/// assert!(obj_file.symbol_table().is_none());
50/// ```
51pub fn assemble(ast: Vec<Stmt>) -> Result<ObjectFile, AsmErr> {
52 let sym = SymbolTable::new(&ast, None)?;
53 ObjectFile::new(ast, sym, false)
54}
55/// Assembles a assembly source code AST into an object file.
56///
57/// This function assembles the source AST *and* includes debug symbols
58/// in the object file.
59/// See [`SymbolTable`] for more details about debug symbols.
60///
61/// # Example
62/// ```
63/// use lc3_ensemble::parse::parse_ast;
64/// use lc3_ensemble::asm::assemble_debug;
65///
66/// let src = "
67/// .orig x3000
68/// LABEL: HALT
69/// .end
70/// ";
71/// let ast = parse_ast(src).unwrap();
72///
73/// let obj_file = assemble_debug(ast, src);
74/// assert!(obj_file.is_ok());
75///
76/// // Symbol table does exist in object file:
77/// let obj_file = obj_file.unwrap();
78/// assert!(obj_file.symbol_table().is_some());
79/// ```
80pub fn assemble_debug(ast: Vec<Stmt>, src: &str) -> Result<ObjectFile, AsmErr> {
81 let sym = SymbolTable::new(&ast, Some(src))?;
82 ObjectFile::new(ast, sym, true)
83}
84
85/// Kinds of errors that can occur from assembling given assembly code.
86///
87/// See [`AsmErr`] for this error type with span information included.
88#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
89pub enum AsmErrKind {
90 /// Cannot determine address of label (pass 1).
91 UndetAddrLabel,
92 /// Cannot determine address of instruction (pass 2).
93 UndetAddrStmt,
94 /// There was an `.orig` but no corresponding `.end` (pass 1).
95 UnclosedOrig,
96 /// There was an `.end` but no corresonding `.orig` (pass 1).
97 UnopenedOrig,
98 /// There was an `.orig` opened after another `.orig` (pass 1).
99 OverlappingOrig,
100 /// There were multiple labels of the same name (pass 1).
101 OverlappingLabels,
102 /// Block wraps memory (pass 2).
103 WrappingBlock,
104 /// Block writes to IO memory region (pass 2).
105 BlockInIO,
106 /// There are blocks that overlap ranges of memory (pass 2).
107 OverlappingBlocks,
108 /// Creating the offset to replace a label caused overflow (pass 2).
109 OffsetNewErr(OffsetNewErr),
110 /// Cannot find the offset with an external label (pass 2).
111 OffsetExternal,
112 /// Label did not have an assigned address (pass 2).
113 CouldNotFindLabel,
114}
115impl std::fmt::Display for AsmErrKind {
116 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
117 match self {
118 Self::UndetAddrLabel => f.write_str("cannot determine address of label"),
119 Self::UndetAddrStmt => f.write_str("cannot determine address of statement"),
120 Self::UnclosedOrig => f.write_str(".orig directive was never closed"),
121 Self::UnopenedOrig => f.write_str(".end does not have associated .orig"),
122 Self::OverlappingOrig => f.write_str("cannot have an .orig inside another region"),
123 Self::OverlappingLabels => f.write_str("label was defined multiple times"),
124 Self::WrappingBlock => f.write_str("block wraps around in memory"),
125 Self::BlockInIO => f.write_str("cannot write code into memory-mapped IO region"),
126 Self::OverlappingBlocks => f.write_str("regions overlap in memory"),
127 Self::OffsetNewErr(e) => e.fmt(f),
128 Self::OffsetExternal => f.write_str("cannot use external label here"),
129 Self::CouldNotFindLabel => f.write_str("label could not be found"),
130 }
131 }
132}
133
134/// Error from assembling given assembly code.
135#[derive(Debug)]
136pub struct AsmErr {
137 /// The value with a span.
138 pub kind: AsmErrKind,
139 /// The span in the source associated with this value.
140 pub span: ErrSpan
141}
142impl AsmErr {
143 /// Creates a new [`AsmErr`].
144 pub fn new<E: Into<ErrSpan>>(kind: AsmErrKind, span: E) -> Self {
145 AsmErr { kind, span: span.into() }
146 }
147}
148impl std::fmt::Display for AsmErr {
149 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
150 self.kind.fmt(f)
151 }
152}
153impl std::error::Error for AsmErr {
154 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
155 match &self.kind {
156 AsmErrKind::OffsetNewErr(e) => Some(e),
157 _ => None
158 }
159 }
160}
161impl crate::err::Error for AsmErr {
162 fn span(&self) -> Option<crate::err::ErrSpan> {
163 Some(self.span.clone())
164 }
165
166 fn help(&self) -> Option<std::borrow::Cow<str>> {
167 match &self.kind {
168 AsmErrKind::UndetAddrLabel => Some("try moving this label inside of an .orig/.end block".into()),
169 AsmErrKind::UndetAddrStmt => Some("try moving this statement inside of an .orig/.end block".into()),
170 AsmErrKind::UnclosedOrig => Some("try adding an .end directive at the end of this block".into()),
171 AsmErrKind::UnopenedOrig => Some("try adding an .orig directive at the beginning of this block".into()),
172 AsmErrKind::OverlappingOrig => Some("try adding an .end directive at the end of the outer .orig block".into()),
173 AsmErrKind::OverlappingLabels => Some("labels must be unique within a file, try renaming one of the labels".into()),
174 AsmErrKind::OverlappingBlocks => Some("try moving the starting address of one of these regions".into()),
175 AsmErrKind::WrappingBlock => Some("user code typically starts at x3000 and is short enough to not wrap memory".into()),
176 AsmErrKind::BlockInIO => Some("try not doing that".into()),
177 AsmErrKind::OffsetNewErr(e) => e.help(),
178 AsmErrKind::OffsetExternal => Some("external labels cannot be an offset operand; try creating a .fill LABEL directive".into()),
179 AsmErrKind::CouldNotFindLabel => Some("try adding this label before an instruction or directive".into()),
180 }
181 }
182}
183
184const IO_START: u16 = 0xFE00;
185
186/// A mapping from line numbers to memory addresses (and vice-versa).
187///
188/// This is implemented as a sorted list of contiguous blocks, consisting of:
189/// - The first source line number of the block, and
190/// - The memory addresses of the block
191///
192/// For example,
193/// ```text
194/// 0 | .orig x3000
195/// 1 | AND R0, R0, #0
196/// 2 | ADD R0, R0, #5
197/// 3 | HALT
198/// 4 | .end
199/// 5 |
200/// 6 | .orig x4000
201/// 7 | .blkw 5
202/// 8 | .fill x9F9F
203/// 9 | .end
204/// ```
205/// maps to `LineSymbolMap({1: [0x3000, 0x3001, 0x3002], 7: [0x4000, 0x4005]})`.
206///
207/// This data structure holds several invariants:
208/// - Line numbers should never overlap.
209/// - In a given block, the addresses should be in ascending order
210/// (this has to occur in a well-formed program because regions constitute contiguous, non-overlapping parts of memory).
211///
212/// If these invariants are not held, invalid behavior can occur.
213#[derive(PartialEq, Eq, Clone)]
214struct LineSymbolMap(BTreeMap<usize, Vec<u16>>);
215
216impl LineSymbolMap {
217 /// Creates a new line symbol table.
218 ///
219 /// This takes an expanded list of line-memory address mappings and condenses it into
220 /// the internal [`LineSymbolMap`] format.
221 ///
222 /// For example,
223 ///
224 /// `[None, Some(0x3000), Some(0x3001), Some(0x3002), None, None, None, Some(0x4000), Some(0x4005)]`
225 /// condenses to `{1: [0x3000, 0x3001, 0x3002], 7: [0x4000, 0x4005]}`.
226 ///
227 /// For a given block of contiguous `Some`s, the memory addresses should be sorted and accesses
228 /// through `LineSymbolMap`'s methods assume the values are sorted.
229 ///
230 /// If they are not sorted, incorrect behaviors may occur. Skill issue.
231 fn new(lines: Vec<Option<u16>>) -> Option<Self> {
232 let mut blocks = BTreeMap::new();
233 let mut current = None;
234 for (i, line) in lines.into_iter().enumerate() {
235 match line {
236 Some(addr) => current.get_or_insert_with(Vec::new).push(addr),
237 None => if let Some(bl) = current.take() {
238 blocks.insert(i - bl.len(), bl);
239 },
240 }
241 }
242
243 Self::from_blocks(blocks)
244 }
245
246 fn from_blocks(blocks: impl IntoIterator<Item=(usize, Vec<u16>)>) -> Option<Self> {
247 let mut bl: Vec<_> = blocks.into_iter().collect();
248
249 bl.sort_by_key(|&(l, _)| l);
250
251 // Check not overlapping:
252 let not_overlapping = bl.windows(2).all(|win| {
253 let [(ls, lb), (rs, _)] = win else { unreachable!() };
254 ls + lb.len() <= *rs
255 });
256
257 match not_overlapping {
258 true => {
259 // Check every individual block is sorted:
260 let sorted = bl.iter().all(|(_, lb)| {
261 lb.windows(2).all(|win| win[0] <= win[1])
262 });
263
264 sorted.then(|| Self(bl.into_iter().collect()))
265 }
266 false => None,
267 }
268 }
269
270 /// Gets the memory address associated with this line, if it is present in the line symbol mapping.
271 fn get(&self, line: usize) -> Option<u16> {
272 // Find the block such that `line` falls within the source line number range of the block.
273 let (start, block) = self.0.range(..=line).next_back()?;
274
275 // Access the memory address.
276 block.get(line - *start).copied()
277 }
278
279 /// Gets the source line number associated with this memory address, if it is present in the symbol table.
280 fn find(&self, addr: u16) -> Option<usize> {
281 self.0.iter()
282 .find_map(|(start, words)| {
283 // Find the block that contains the given address,
284 // and then find the line index once it's found.
285 words.binary_search(&addr)
286 .ok()
287 .map(|o| start + o)
288 })
289 }
290
291 /// Gets an iterable representing the block mappings.
292 fn block_iter(&self) -> impl Iterator<Item=(usize, &[u16])> + '_ {
293 self.0.iter()
294 .map(|(&i, words)| (i, words.as_slice()))
295 }
296
297 /// Gets an iterable representing the mapping of line numbers to addresses.
298 fn iter(&self) -> impl Iterator<Item=(usize, u16)> + '_ {
299 self.block_iter()
300 .flat_map(|(i, words)| {
301 words.iter()
302 .enumerate()
303 .map(move |(off, &addr)| (i + off, addr))
304 })
305 }
306}
307impl std::fmt::Debug for LineSymbolMap {
308 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
309 f.debug_map()
310 .entries(self.iter().map(|(i, v)| (i, Addr(v))))
311 .finish()
312 }
313}
314/// Struct holding the source string and contains helpers
315/// to index lines and to query position information from a source string.
316#[derive(PartialEq, Eq, Clone)]
317pub struct SourceInfo {
318 /// The source code.
319 src: String,
320 /// The index of each new line in source code.
321 nl_indices: Vec<usize>
322}
323impl std::fmt::Debug for SourceInfo {
324 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
325 f.debug_struct("SourceInfo")
326 .field("nl_indices", &self.nl_indices)
327 .finish_non_exhaustive()
328 }
329}
330impl SourceInfo {
331 /// Computes the source info from a given string.
332 pub fn new(src: &str) -> Self {
333 Self::from_string(src.to_string())
334 }
335 fn from_string(src: String) -> Self {
336 // Index where each new line appears.
337 let nl_indices: Vec<_> = src
338 .match_indices('\n')
339 .map(|(i, _)| i)
340 .chain([src.len()])
341 .collect();
342
343 Self { src, nl_indices }
344 }
345
346 /// Returns the entire source.
347 pub fn source(&self) -> &str {
348 &self.src
349 }
350
351 /// Counts the number of lines in the source string.
352 pub fn count_lines(&self) -> usize {
353 // The first line, plus every line after (delimited by a new line)
354 self.nl_indices.len()
355 }
356
357 /// Gets the character range for the provided line, including any whitespace
358 /// and the newline character.
359 ///
360 /// This returns None if line is not in the interval `[0, number of lines)`.
361 fn raw_line_span(&self, line: usize) -> Option<Range<usize>> {
362 // Implementation detail:
363 // number of lines = self.nl_indices.len() + 1
364 if !(0..self.count_lines()).contains(&line) {
365 return None;
366 };
367
368 let start = match line {
369 0 => 0,
370 _ => self.nl_indices[line - 1] + 1
371 };
372
373 let eof = self.src.len();
374 let end = match self.nl_indices.get(line) {
375 Some(i) => (i + 1).min(eof), // incl NL, but don't go over EOF
376 None => eof,
377 };
378
379 Some(start..end)
380 }
381
382 /// Gets the character range for the provided line, excluding any whitespace.
383 ///
384 /// This returns None if line is not in the interval `[0, number of lines)`.
385 pub fn line_span(&self, line: usize) -> Option<Range<usize>> {
386 let Range { mut start, mut end } = self.raw_line_span(line)?;
387
388 // shift line span by trim
389 let line = &self.src[start..end];
390 let end_trimmed = line.trim_end();
391 end -= line.len() - end_trimmed.len();
392
393 let line = end_trimmed;
394 start += line.len() - line.trim_start().len();
395
396 Some(start..end)
397 }
398
399 /// Reads a line from source.
400 ///
401 /// This returns None if line is not in the interval `[0, number of lines)`.
402 pub fn read_line(&self, line: usize) -> Option<&str> {
403 self.line_span(line).map(|r| &self.src[r])
404 }
405
406 /// Gets the line number of the current position.
407 fn get_line(&self, index: usize) -> usize {
408 self.nl_indices.partition_point(|&start| start < index)
409 }
410
411 /// Calculates the line and character number for a given character index.
412 ///
413 /// If the index exceeds the length of the string,
414 /// the line number is given as the last line and the character number
415 /// is given as the number of characters after the start of the line.
416 pub fn get_pos_pair(&self, index: usize) -> (usize, usize) {
417 let lno = self.get_line(index);
418
419 let Range { start: lstart, .. } = self.raw_line_span(lno)
420 .or_else(|| self.raw_line_span(self.nl_indices.len()))
421 .unwrap_or(0..0);
422 let cno = index - lstart;
423 (lno, cno)
424 }
425}
426impl From<&'_ str> for SourceInfo {
427 fn from(value: &'_ str) -> Self {
428 Self::new(value)
429 }
430}
431impl From<String> for SourceInfo {
432 fn from(value: String) -> Self {
433 Self::from_string(value)
434 }
435}
436
437#[derive(PartialEq, Eq, Clone, Copy, Default, Debug)]
438struct SymbolData {
439 addr: u16,
440 src_start: usize,
441 external: bool
442}
443impl SymbolData {
444 /// Calculates the source range of this symbol, given the name of the label.
445 fn span(&self, label: &str) -> Range<usize> {
446 self.src_start .. (self.src_start + label.len())
447 }
448}
449
450/// Debug symbols.
451#[derive(PartialEq, Eq, Debug, Clone)]
452struct DebugSymbols {
453 /// A mapping from each line with a statement in the source to an address.
454 line_map: LineSymbolMap,
455
456 /// Information about the source.
457 src_info: SourceInfo
458}
459impl DebugSymbols {
460 pub fn lookup_line(&self, line: usize) -> Option<u16> {
461 self.line_map.get(line)
462 }
463
464 pub fn rev_lookup_line(&self, addr: u16) -> Option<usize> {
465 self.line_map.find(addr)
466 }
467
468 /// Links debug symbols.
469 pub fn link(mut a: Self, b: Self) -> Result<Self, AsmErr> {
470 // TODO: This kinda just tacks files together into a single source.
471 // Once object files have support for multiple ASM references, this should be cleaner.
472
473 let lines = a.src_info.count_lines();
474
475 // B doesn't overlap with A because ObjectFile check
476 a.line_map.0.extend({
477 b.line_map.0.into_iter()
478 .map(|(k, v)| (k + lines, v))
479 });
480
481 a.src_info = SourceInfo::from_string(a.src_info.src + "\n" + &b.src_info.src);
482
483 Ok(a)
484 }
485}
486/// The symbol table created in the first assembler pass
487/// that encodes source code mappings to memory addresses in the object file.
488///
489/// The symbol table consists of:
490/// - A mapping from source code labels to memory addresses.
491/// - A mapping from source code line numbers to memory addresses (if debug symbols are enabled).
492/// - The source text (if debug symbols are enabled).
493///
494/// Here is a table of the mappings that the symbol table provides:
495///
496/// | from ↓, to → | label | memory address | source line/span |
497/// |----------------|------------------------------------|-------------------------------|-----------------------------------|
498/// | label | - | [`SymbolTable::lookup_label`] | [`SymbolTable::get_label_source`] |
499/// | memory address | [`SymbolTable::rev_lookup_label`] | - | [`SymbolTable::rev_lookup_line`] |
500/// | source line | none | [`SymbolTable::lookup_line`] | - |
501///
502/// # Debug symbols
503///
504/// Debug symbols are optional data added to this symbol table which can help users debug their code.
505///
506/// Without debug symbols, the symbol table only consists of mappings from labels to spans (and vice-versa).
507/// These are used to translate labels in source code to addresses during the assembly process.
508/// After the completion of this assembly process, the [`SymbolTable`] is dropped and is not part of the resultant
509/// [`ObjectFile`].
510///
511/// However, with debug symbols, this information persists in the resultant [`ObjectFile`], allowing
512/// the label mappings to be accessed during simulation time. Additionally, more information from the source
513/// text is available during simulation time:
514/// - Mappings from source code line numbers to memory addresses
515/// - Source code text (which grants access to line contents from a given line number; see [`SourceInfo`] for more details)
516#[derive(PartialEq, Eq, Clone)]
517pub struct SymbolTable {
518 /// A mapping from label to address and span of the label.
519 label_map: HashMap<String, SymbolData>,
520
521 /// Relocation table.
522 rel_map: HashMap<u16, String>,
523
524 /// Debug symbols. If None, there were no debug symbols provided.
525 debug_symbols: Option<DebugSymbols>,
526}
527
528impl SymbolTable {
529 /// Creates a new symbol table.
530 ///
531 /// This performs the first assembler pass, calculating the memory address of
532 /// labels at each provided statement.
533 ///
534 /// If a `src` argument is provided, debug symbols are also computed for the symbol table.
535 ///
536 /// ## Example
537 /// ```
538 /// use lc3_ensemble::parse::parse_ast;
539 /// use lc3_ensemble::asm::SymbolTable;
540 ///
541 /// let src = "
542 /// .orig x3000
543 /// LABEL: HALT
544 /// .end
545 /// ";
546 /// let ast = parse_ast(src).unwrap();
547 ///
548 /// // without debug symbols
549 /// let sym = SymbolTable::new(&ast, None).unwrap();
550 /// assert_eq!(sym.lookup_label("LABEL"), Some(0x3000));
551 /// assert_eq!(sym.lookup_line(2), None);
552 ///
553 /// // with debug symbols
554 /// let sym = SymbolTable::new(&ast, Some(src)).unwrap();
555 /// assert_eq!(sym.lookup_label("LABEL"), Some(0x3000));
556 /// assert_eq!(sym.lookup_line(2), Some(0x3000));
557 /// ```
558 pub fn new(stmts: &[Stmt], src: Option<&str>) -> Result<Self, AsmErr> {
559 struct Cursor {
560 // The current location counter.
561 lc: u16,
562 // True if 0x10000.
563 overflowed: bool,
564 // The span of the .orig directive.
565 block_orig: Span,
566 }
567 impl Cursor {
568 fn new(lc: u16, block_orig: Span) -> Self {
569 Self { lc, overflowed: false, block_orig }
570 }
571 /// Attempts to shift the LC forward by n word locations,
572 /// failing if that would cause the LC to pass the IO region or
573 /// overflow memory.
574 fn shift(&mut self, n: u16) -> Result<(), AsmErrKind> {
575 if n == 0 { return Ok(()); }
576
577 match (self.overflowed, self.lc.checked_add(n)) {
578 (true, _) => Err(AsmErrKind::WrappingBlock),
579 (false, Some(new_lc)) if new_lc > IO_START => Err(AsmErrKind::BlockInIO),
580 (false, Some(new_lc)) => {
581 self.lc = new_lc;
582 Ok(())
583 },
584 (false, None) => {
585 let lc = std::mem::take(&mut self.lc);
586 self.overflowed = true;
587 // If aligns exactly, it can't be considered wrapping over
588 Err(match lc == n.wrapping_neg() {
589 true => AsmErrKind::BlockInIO,
590 false => AsmErrKind::WrappingBlock,
591 })
592 }
593 }
594 }
595 }
596
597 fn add_label(
598 labels: &mut HashMap<String, SymbolData>,
599 label: &crate::ast::Label,
600 addr: u16,
601 external: bool
602 ) -> Result<(), AsmErr> {
603 match labels.entry(label.name.to_uppercase()) {
604 // Two labels with different addresses. Conflict.
605 Entry::Occupied(e) if e.get().addr != addr => {
606 let span1 = e.get().span(e.key());
607 let span2 = label.span();
608 Err(AsmErr::new(AsmErrKind::OverlappingLabels, [span1, span2]))
609 },
610 // Two labels with same address. No conflict.
611 Entry::Occupied(_) => Ok(()),
612 // New label.
613 Entry::Vacant(e) => {
614 e.insert(SymbolData { addr, src_start: label.span().start, external });
615 Ok(())
616 }
617 }
618 }
619
620 let mut cursor: Option<Cursor> = None;
621 let mut label_map: HashMap<String, SymbolData> = HashMap::new();
622 let mut rel_map = HashMap::new();
623 let mut debug_sym = src.map(|s| {
624 let src_info = SourceInfo::new(s);
625 (vec![None; src_info.count_lines()], src_info)
626 });
627
628 for stmt in stmts {
629 // Add labels if they exist
630 if !stmt.labels.is_empty() {
631 // If cursor does not exist, that means we're not in an .orig block,
632 // so these labels don't have a known location
633 let Some(cur) = cursor.as_ref() else {
634 let spans = stmt.labels.iter()
635 .map(|label| label.span())
636 .collect::<Vec<_>>();
637
638 return Err(AsmErr::new(AsmErrKind::UndetAddrLabel, spans));
639 };
640
641 // Add labels
642 for label in &stmt.labels {
643 add_label(&mut label_map, label, cur.lc, false)?;
644 }
645 }
646
647 // Handle special directives:
648 match &stmt.nucleus {
649 StmtKind::Directive(Directive::Orig(addr)) => match cursor {
650 Some(cur) => return Err(AsmErr::new(AsmErrKind::OverlappingOrig, [cur.block_orig, stmt.span.clone()])),
651 None => { cursor.replace(Cursor::new(addr.get(), stmt.span.clone())); },
652 },
653 StmtKind::Directive(Directive::End) => match cursor {
654 Some(_) => { cursor.take(); },
655 None => return Err(AsmErr::new(AsmErrKind::UnopenedOrig, stmt.span.clone())),
656 },
657 StmtKind::Directive(Directive::External(label)) => {
658 // Arbitrarily chose 0 as a placeholder for externals
659 add_label(&mut label_map, label, 0, true)?;
660 }
661 StmtKind::Directive(Directive::Fill(PCOffset::Label(label))) => {
662 let label_text = label.name.to_uppercase();
663 if let Some(SymbolData { external: true, .. }) = label_map.get(&label_text) {
664 let Some(cur) = cursor.as_ref() else {
665 return Err(AsmErr::new(AsmErrKind::UndetAddrStmt, stmt.span.clone()));
666 };
667
668 rel_map.insert(cur.lc, label_text);
669 }
670 },
671 _ => {}
672 };
673
674 // If we're keeping track of the line counter currently (i.e., are inside of a .orig block):
675 if let Some(cur) = &mut cursor {
676 // Debug symbol:
677 // Calculate which source code line is associated with the instruction the LC is currently pointing to
678 // and add the mapping from line to instruction address.
679 if let Some((lines, s)) = &mut debug_sym {
680 if !matches!(stmt.nucleus, StmtKind::Directive(Directive::Orig(_) | Directive::End)) {
681 let line_index = s.get_line(stmt.span.start);
682 lines[line_index].replace(cur.lc);
683 }
684 }
685
686 // Shift the LC forward
687 match &stmt.nucleus {
688 StmtKind::Instr(_) => cur.shift(1),
689 StmtKind::Directive(d) => cur.shift(d.word_len()),
690 }.map_err(|e| AsmErr::new(e, stmt.span.clone()))?
691 }
692 }
693
694 if let Some(cur) = cursor {
695 return Err(AsmErr::new(AsmErrKind::UnclosedOrig, cur.block_orig));
696 }
697
698 let debug_symbols = debug_sym.map(|(lines, src_info)| DebugSymbols {
699 line_map: LineSymbolMap::new(lines)
700 .unwrap_or_else(|| {
701 unreachable!("line symbol map's invariants should have been upheld during symbol table pass")
702 }),
703 src_info,
704 });
705
706 Ok(SymbolTable { label_map, rel_map, debug_symbols })
707 }
708
709 /// Gets the memory address of a given label (if it exists).
710 ///
711 /// ## Example
712 /// ```
713 /// use lc3_ensemble::parse::parse_ast;
714 /// use lc3_ensemble::asm::SymbolTable;
715 ///
716 /// let src = "
717 /// .orig x3000
718 /// LOOP:
719 /// ADD R0, R0, #1
720 /// BR LOOP
721 /// LOOP2:
722 /// ADD R0, R0, #2
723 /// BR LOOP2
724 /// LOOP3:
725 /// ADD R0, R0, #3
726 /// BR LOOP3
727 /// .end
728 /// ";
729 /// let ast = parse_ast(src).unwrap();
730 ///
731 /// let sym = SymbolTable::new(&ast, None).unwrap();
732 /// assert_eq!(sym.lookup_label("LOOP"), Some(0x3000));
733 /// assert_eq!(sym.lookup_label("LOOP2"), Some(0x3002));
734 /// assert_eq!(sym.lookup_label("LOOP3"), Some(0x3004));
735 /// assert_eq!(sym.lookup_label("LOOP_DE_LOOP"), None);
736 /// ```
737 pub fn lookup_label(&self, label: &str) -> Option<u16> {
738 self.label_map.get(&label.to_uppercase()).map(|sym_data| sym_data.addr)
739 }
740
741 /// Gets the label at a given memory address (if it exists).
742 ///
743 /// ## Example
744 /// ```
745 /// use lc3_ensemble::parse::parse_ast;
746 /// use lc3_ensemble::asm::SymbolTable;
747 ///
748 /// let src = "
749 /// .orig x3000
750 /// LOOP:
751 /// ADD R0, R0, #1
752 /// BR LOOP
753 /// LOOP2:
754 /// ADD R0, R0, #2
755 /// BR LOOP2
756 /// LOOP3:
757 /// ADD R0, R0, #3
758 /// BR LOOP3
759 /// .end
760 /// ";
761 /// let ast = parse_ast(src).unwrap();
762 ///
763 /// let sym = SymbolTable::new(&ast, None).unwrap();
764 /// assert_eq!(sym.rev_lookup_label(0x3000), Some("LOOP"));
765 /// assert_eq!(sym.rev_lookup_label(0x3002), Some("LOOP2"));
766 /// assert_eq!(sym.rev_lookup_label(0x3004), Some("LOOP3"));
767 /// assert_eq!(sym.rev_lookup_label(0x2110), None);
768 /// ```
769 pub fn rev_lookup_label(&self, addr: u16) -> Option<&str> {
770 let (label, _) = self.label_map.iter()
771 .find(|&(_, sym_data)| sym_data.addr == addr)?;
772
773 Some(label)
774 }
775
776 /// Gets the source span of a given label (if it exists).
777 ///
778 /// ## Example
779 /// ```
780 /// use lc3_ensemble::parse::parse_ast;
781 /// use lc3_ensemble::asm::SymbolTable;
782 ///
783 /// let src = "
784 /// .orig x3000
785 /// LOOPY:
786 /// ADD R0, R0, #1
787 /// BR LOOPY
788 /// .end
789 /// ";
790 /// let ast = parse_ast(src).unwrap();
791 ///
792 /// let sym = SymbolTable::new(&ast, None).unwrap();
793 /// assert_eq!(sym.get_label_source("LOOPY"), Some(21..26));
794 /// assert_eq!(sym.get_label_source("LOOP_DE_LOOP"), None);
795 /// ```
796 pub fn get_label_source(&self, label: &str) -> Option<Range<usize>> {
797 self.label_map.get(label)
798 .map(|data| data.span(label))
799 }
800
801 /// Gets the address of a given source line.
802 ///
803 /// If debug symbols are not enabled, this unconditionally returns `None`.
804 /// Note that each address is mapped to at most one source code line.
805 ///
806 /// ## Example
807 /// ```
808 /// use lc3_ensemble::parse::parse_ast;
809 /// use lc3_ensemble::asm::SymbolTable;
810 ///
811 /// let src = " ;; 0
812 /// .orig x3000 ;; 1
813 /// LOOP: ;; 2
814 /// ADD R0, R0, #1 ;; 3
815 /// BR LOOP ;; 4
816 /// .fill x9999 ;; 5
817 /// .blkw 10 ;; 6
818 /// LOOP2: ;; 7
819 /// ADD R0, R0, #3 ;; 8
820 /// BR LOOP3 ;; 9
821 /// .end ;; 10
822 /// ";
823 /// let ast = parse_ast(src).unwrap();
824 ///
825 /// // Debug symbols required:
826 /// let sym = SymbolTable::new(&ast, Some(src)).unwrap();
827 /// assert_eq!(sym.lookup_line(0), None);
828 /// assert_eq!(sym.lookup_line(1), None);
829 /// assert_eq!(sym.lookup_line(2), None);
830 /// assert_eq!(sym.lookup_line(3), Some(0x3000));
831 /// assert_eq!(sym.lookup_line(4), Some(0x3001));
832 /// assert_eq!(sym.lookup_line(5), Some(0x3002));
833 /// assert_eq!(sym.lookup_line(6), Some(0x3003));
834 /// assert_eq!(sym.lookup_line(7), None);
835 /// assert_eq!(sym.lookup_line(8), Some(0x300D));
836 /// assert_eq!(sym.lookup_line(9), Some(0x300E));
837 /// assert_eq!(sym.lookup_line(10), None);
838 /// ```
839 pub fn lookup_line(&self, line: usize) -> Option<u16> {
840 self.debug_symbols.as_ref()?.lookup_line(line)
841 }
842
843 /// Gets the source line of a given memory address (if it exists.)
844 ///
845 /// The result can be converted into a source span (range of characters encompassed by the instruction)
846 /// using [`SymbolTable::source_info`] and [`SourceInfo::line_span`].
847 ///
848 /// If debug symbols are not enabled, this unconditionally returns `None`.
849 /// Note that each source code line is mapped to at most one address.
850 ///
851 /// ## Example
852 /// ```
853 /// use lc3_ensemble::parse::parse_ast;
854 /// use lc3_ensemble::asm::SymbolTable;
855 ///
856 /// let src = " ;; 0
857 /// .orig x3000 ;; 1
858 /// LOOP: ;; 2
859 /// ADD R0, R0, #1 ;; 3
860 /// BR LOOP ;; 4
861 /// .fill x9999 ;; 5
862 /// .blkw 10 ;; 6
863 /// LOOP2: ;; 7
864 /// ADD R0, R0, #3 ;; 8
865 /// BR LOOP3 ;; 9
866 /// .end ;; 10
867 /// ";
868 /// let ast = parse_ast(src).unwrap();
869 ///
870 /// // Debug symbols required:
871 /// let sym = SymbolTable::new(&ast, Some(src)).unwrap();
872 /// assert_eq!(sym.rev_lookup_line(0x3000), Some(3));
873 /// assert_eq!(sym.rev_lookup_line(0x3001), Some(4));
874 /// assert_eq!(sym.rev_lookup_line(0x3002), Some(5));
875 /// assert_eq!(sym.rev_lookup_line(0x3003), Some(6));
876 /// assert_eq!(sym.rev_lookup_line(0x3004), None);
877 /// assert_eq!(sym.rev_lookup_line(0x3005), None);
878 /// assert_eq!(sym.rev_lookup_line(0x3006), None);
879 /// assert_eq!(sym.rev_lookup_line(0x3007), None);
880 /// assert_eq!(sym.rev_lookup_line(0x3008), None);
881 /// assert_eq!(sym.rev_lookup_line(0x3009), None);
882 /// assert_eq!(sym.rev_lookup_line(0x300A), None);
883 /// assert_eq!(sym.rev_lookup_line(0x300B), None);
884 /// assert_eq!(sym.rev_lookup_line(0x300C), None);
885 /// assert_eq!(sym.rev_lookup_line(0x300D), Some(8));
886 /// assert_eq!(sym.rev_lookup_line(0x300E), Some(9));
887 /// assert_eq!(sym.rev_lookup_line(0x300F), None);
888 /// ```
889 pub fn rev_lookup_line(&self, addr: u16) -> Option<usize> {
890 self.debug_symbols.as_ref()?.rev_lookup_line(addr)
891 }
892
893 /// Reads the source info from this symbol table (if debug symbols are enabled).
894 pub fn source_info(&self) -> Option<&SourceInfo> {
895 self.debug_symbols.as_ref().map(|ds| &ds.src_info)
896 }
897
898 /// Gets an iterable of the mapping from labels to addresses.
899 pub fn label_iter(&self) -> impl Iterator<Item=(&str, u16, bool)> + '_ {
900 self.label_map.iter()
901 .map(|(label, sym_data)| (&**label, sym_data.addr, sym_data.external))
902 }
903
904 /// Gets an iterable of the mapping from lines to addresses.
905 ///
906 /// This iterator will be empty if debug symbols were not enabled.
907 pub fn line_iter(&self) -> impl Iterator<Item=(usize, u16)> + '_ {
908 self.debug_symbols.iter()
909 .flat_map(|s| s.line_map.iter())
910 }
911}
912impl std::fmt::Debug for SymbolTable {
913 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
914 struct ClosureMap<R, F: Fn() -> R>(F);
915 impl<K, V, R, F> std::fmt::Debug for ClosureMap<R, F>
916 where K: std::fmt::Debug,
917 V: std::fmt::Debug,
918 R: IntoIterator<Item=(K, V)>,
919 F: Fn() -> R
920 {
921 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
922 f.debug_map()
923 .entries((self.0)())
924 .finish()
925 }
926 }
927
928 f.debug_struct("SymbolTable")
929 .field("label_map", &ClosureMap(|| {
930 self.label_map.iter()
931 .map(|(k, data @ SymbolData { addr, .. })| {
932 (k, (Addr(*addr), data.span(k)))
933 })
934 }))
935 .field("debug_symbols", &self.debug_symbols)
936 .finish()
937 }
938}
939
940/// Replaces a [`PCOffset`] value with an [`Offset`] value by calculating the offset from a given label
941/// (if this `PCOffset` represents a label).
942fn replace_pc_offset<const N: u32>(off: PCOffset<i16, N>, pc: u16, sym: &SymbolTable) -> Result<IOffset<N>, AsmErr> {
943 match off {
944 PCOffset::Offset(off) => Ok(off),
945 PCOffset::Label(label) => {
946 // TODO: use sym.lookup_label
947 match sym.label_map.get(&label.name.to_uppercase()) {
948 Some(SymbolData { external: true, .. }) => Err(AsmErr::new(AsmErrKind::OffsetExternal, label.span())),
949 Some(SymbolData { addr, .. }) => {
950 IOffset::new(addr.wrapping_sub(pc) as i16)
951 .map_err(|e| AsmErr::new(AsmErrKind::OffsetNewErr(e), label.span()))
952 }
953 None => Err(AsmErr::new(AsmErrKind::CouldNotFindLabel, label.span())),
954 }
955 },
956 }
957}
958
959/// Checks if two ranges overlap.
960///
961/// This assumes (start <= end) for both ranges.
962fn ranges_overlap<T: Ord>(a: Range<T>, b: Range<T>) -> bool {
963 let Range { start: a_start, end: a_end } = a;
964 let Range { start: b_start, end: b_end } = b;
965
966 // Range not overlapping: a_start >= b_end || b_start >= a_end
967 // This is just the inverse.
968 a_start < b_end && b_start < a_end
969}
970
971impl AsmInstr {
972 /// Converts an ASM instruction into a simulator instruction ([`SimInstr`])
973 /// by resolving offsets and erasing aliases.
974 ///
975 /// Parameters:
976 /// - `pc`: PC increment
977 /// - `sym`: The symbol table
978 pub fn into_sim_instr(self, pc: u16, sym: &SymbolTable) -> Result<SimInstr, AsmErr> {
979 match self {
980 AsmInstr::ADD(dr, sr1, sr2) => Ok(SimInstr::ADD(dr, sr1, sr2)),
981 AsmInstr::AND(dr, sr1, sr2) => Ok(SimInstr::AND(dr, sr1, sr2)),
982 AsmInstr::BR(cc, off) => Ok(SimInstr::BR(cc, replace_pc_offset(off, pc, sym)?)),
983 AsmInstr::JMP(br) => Ok(SimInstr::JMP(br)),
984 AsmInstr::JSR(off) => Ok(SimInstr::JSR(ImmOrReg::Imm(replace_pc_offset(off, pc, sym)?))),
985 AsmInstr::JSRR(br) => Ok(SimInstr::JSR(ImmOrReg::Reg(br))),
986 AsmInstr::LD(dr, off) => Ok(SimInstr::LD(dr, replace_pc_offset(off, pc, sym)?)),
987 AsmInstr::LDI(dr, off) => Ok(SimInstr::LDI(dr, replace_pc_offset(off, pc, sym)?)),
988 AsmInstr::LDR(dr, br, off) => Ok(SimInstr::LDR(dr, br, off)),
989 AsmInstr::LEA(dr, off) => Ok(SimInstr::LEA(dr, replace_pc_offset(off, pc, sym)?)),
990 AsmInstr::NOT(dr, sr) => Ok(SimInstr::NOT(dr, sr)),
991 AsmInstr::RET => Ok(SimInstr::JMP(Reg::R7)),
992 AsmInstr::RTI => Ok(SimInstr::RTI),
993 AsmInstr::ST(sr, off) => Ok(SimInstr::ST(sr, replace_pc_offset(off, pc, sym)?)),
994 AsmInstr::STI(sr, off) => Ok(SimInstr::STI(sr, replace_pc_offset(off, pc, sym)?)),
995 AsmInstr::STR(sr, br, off) => Ok(SimInstr::STR(sr, br, off)),
996 AsmInstr::TRAP(vect) => Ok(SimInstr::TRAP(vect)),
997 AsmInstr::NOP(off) => Ok(SimInstr::BR(0b000, replace_pc_offset(off, pc, sym)?)),
998 AsmInstr::GETC => Ok(SimInstr::TRAP(Offset::new_trunc(0x20))),
999 AsmInstr::OUT => Ok(SimInstr::TRAP(Offset::new_trunc(0x21))),
1000 AsmInstr::PUTC => Ok(SimInstr::TRAP(Offset::new_trunc(0x21))),
1001 AsmInstr::PUTS => Ok(SimInstr::TRAP(Offset::new_trunc(0x22))),
1002 AsmInstr::IN => Ok(SimInstr::TRAP(Offset::new_trunc(0x23))),
1003 AsmInstr::PUTSP => Ok(SimInstr::TRAP(Offset::new_trunc(0x24))),
1004 AsmInstr::HALT => Ok(SimInstr::TRAP(Offset::new_trunc(0x25))),
1005 }
1006 }
1007}
1008impl Directive {
1009 /// How many words this directive takes up in memory.
1010 fn word_len(&self) -> u16 {
1011 match self {
1012 Directive::Orig(_) => 0,
1013 Directive::Fill(_) => 1,
1014 Directive::Blkw(n) => n.get(),
1015 Directive::Stringz(s) => s.len() as u16 + 1, // lex should assure that s + 1 <= 65535
1016 Directive::End => 0,
1017 Directive::External(_) => 0,
1018 }
1019 }
1020}
1021
1022/// An object file.
1023///
1024/// This is the final product after assembly source code is fully assembled.
1025/// This can be loaded in the simulator to run the assembled code.
1026#[derive(Debug, PartialEq, Eq, Clone)]
1027pub struct ObjectFile {
1028 /// A mapping of each block's address to its corresponding data.
1029 ///
1030 /// Invariants:
1031 /// - The blocks are sorted in order.
1032 /// - Blocks cannot wrap around in memory.
1033 /// - Blocks cannot write into xFE00-xFFFF.
1034 /// - As a corollary, block's length must fit in a `u16`.
1035 block_map: BTreeMap<u16, Vec<Option<u16>>>,
1036
1037 /// Debug symbols.
1038 sym: Option<SymbolTable>
1039}
1040impl ObjectFile {
1041 /// Creates an empty object file.
1042 pub fn empty() -> Self {
1043 ObjectFile { block_map: BTreeMap::new(), sym: None }
1044 }
1045
1046 /// Creates a new object file from an assembly AST and a symbol table.
1047 fn new(ast: Vec<Stmt>, sym: SymbolTable, debug: bool) -> Result<Self, AsmErr> {
1048 /// A singular block which represents a singular region in an object file.
1049 struct ObjBlock {
1050 /// Starting address of the block.
1051 start: u16,
1052 /// The words in the block.
1053 words: Vec<Option<u16>>,
1054 /// Span of the orig statement.
1055 ///
1056 /// Used for error diagnostics in this function.
1057 orig_span: Range<usize>
1058 }
1059
1060 impl ObjBlock {
1061 fn range(&self) -> Range<u16> {
1062 // Assumes no overflow and there cannot be more than u16::MAX words
1063 // Both of these invariants are asserted by `push` and `try_extend`.
1064 self.start .. (self.start + self.words.len() as u16)
1065 }
1066
1067 fn push(&mut self, data: u16) {
1068 self.words.push(Some(data));
1069 }
1070 fn shift(&mut self, n: u16) {
1071 self.words.extend({
1072 std::iter::repeat(None)
1073 .take(usize::from(n))
1074 });
1075 }
1076 /// Writes the assembly for the given directive into the provided object block.
1077 fn write_directive(&mut self, directive: Directive, labels: &SymbolTable) -> Result<(), AsmErr> {
1078 match directive {
1079 Directive::Orig(_) => {},
1080 Directive::Fill(pc_offset) => {
1081 let off = match pc_offset {
1082 PCOffset::Offset(o) => o.get(),
1083 PCOffset::Label(l) => {
1084 labels.lookup_label(&l.name)
1085 .ok_or_else(|| AsmErr::new(AsmErrKind::CouldNotFindLabel, l.span()))?
1086 },
1087 };
1088
1089 self.push(off);
1090 },
1091 Directive::Blkw(n) => self.shift(n.get()),
1092 Directive::Stringz(n) => {
1093 self.extend(n.bytes().map(u16::from));
1094 self.push(0);
1095 },
1096 Directive::End => {},
1097 Directive::External(_) => {},
1098 }
1099
1100 Ok(())
1101 }
1102 }
1103
1104 impl Extend<u16> for ObjBlock {
1105 fn extend<T: IntoIterator<Item = u16>>(&mut self, iter: T) {
1106 self.words.extend(iter.into_iter().map(Some));
1107 }
1108 }
1109
1110 let mut block_map: BTreeMap<u16, ObjBlock> = BTreeMap::new();
1111
1112 // PASS 2
1113 // Holding both the LC and currently writing block
1114 let mut current: Option<(u16, ObjBlock)> = None;
1115
1116 for stmt in ast {
1117 match stmt.nucleus {
1118 StmtKind::Directive(Directive::Orig(off)) => {
1119 debug_assert!(current.is_none());
1120
1121 // Add new working block.
1122 let addr = off.get();
1123 current.replace((addr, ObjBlock { start: addr, orig_span: stmt.span, words: vec![] }));
1124 },
1125 StmtKind::Directive(Directive::End) => {
1126 // The current block is complete, so take it out and append it to the block map.
1127 let Some((_, block)) = current.take() else {
1128 // unreachable (because pass 1 should've found it)
1129 return Err(AsmErr::new(AsmErrKind::UnopenedOrig, stmt.span));
1130 };
1131
1132 // only append if it's not empty:
1133 if block.words.is_empty() { continue; }
1134
1135 // Check for overlap. Note this is probably overengineering:
1136 let m_overlapping = [
1137 block_map.range(..=block.start).next_back(), // previous block
1138 block_map.range(block.start..).next(), // next block
1139 ]
1140 .into_iter()
1141 .flatten()
1142 .find(|(_, b)| ranges_overlap(block.range(), b.range()));
1143
1144 // If found overlapping block, raise error:
1145 if let Some((_, overlapping_block)) = m_overlapping {
1146 let span0 = block.orig_span;
1147 let span1 = overlapping_block.orig_span.clone();
1148
1149 let order = match span0.start <= span1.start {
1150 true => [span0, span1],
1151 false => [span1, span0],
1152 };
1153
1154 return Err(AsmErr::new(AsmErrKind::OverlappingBlocks, order));
1155 }
1156
1157 block_map.insert(block.start, block);
1158 },
1159 StmtKind::Directive(Directive::External(_)) => {},
1160 StmtKind::Directive(directive) => {
1161 let Some((lc, block)) = &mut current else {
1162 return Err(AsmErr::new(AsmErrKind::UndetAddrStmt, stmt.span));
1163 };
1164
1165 let wl = directive.word_len();
1166 block.write_directive(directive, &sym)?;
1167 *lc = lc.wrapping_add(wl);
1168 },
1169 StmtKind::Instr(instr) => {
1170 let Some((lc, block)) = &mut current else {
1171 return Err(AsmErr::new(AsmErrKind::UndetAddrStmt, stmt.span));
1172 };
1173 let sim = instr.into_sim_instr(lc.wrapping_add(1), &sym)?;
1174 block.push(sim.encode());
1175 *lc = lc.wrapping_add(1);
1176 },
1177 }
1178 }
1179
1180 let block_map = block_map.into_iter()
1181 .map(|(start, ObjBlock { words, .. })| (start, words))
1182 .collect();
1183 Ok(Self {
1184 block_map,
1185 sym: debug.then_some(sym),
1186 })
1187 }
1188
1189 /// Gets a mutable reference to the value at the given address if defined in the object file.
1190 ///
1191 /// If the data is uninitialized, this returns `Some(None)`.
1192 fn get_mut(&mut self, addr: u16) -> Option<&mut Option<u16>> {
1193 let (&start, block) = self.block_map.range_mut(..=addr).next_back()?;
1194 block.get_mut(addr.wrapping_sub(start) as usize)
1195 }
1196
1197 /// Links two object files, combining them into one.
1198 ///
1199 /// The linking algorithm is as follows:
1200 /// - The list of regions in both object files are merged into one.
1201 /// - Overlaps between regions are checked. If any are found, error.
1202 /// - For every symbol in the symbol table, this is added to the new symbol table.
1203 /// - If any symbols appear more than once in different locations (and neither are external), error (duplicate labels).
1204 /// - If any symbols appear more than once in different locations (and one is external), pull out any relocation entries (from `.LINKER_INFO`) for the external and match them.
1205 /// - Merge the remaining relocation table entries.
1206 pub fn link(mut a_obj: Self, b_obj: Self) -> Result<Self, AsmErr> {
1207 let Self { block_map: b_block_map, sym: b_sym } = b_obj;
1208
1209 for (addr, block) in b_block_map {
1210 if a_obj.block_map.insert(addr, block).is_some() {
1211 return Err(AsmErr::new(AsmErrKind::OverlappingBlocks, []));
1212 }
1213 }
1214
1215 let first = a_obj.block_map.iter();
1216 let mut second = a_obj.block_map.iter();
1217 second.next();
1218 if std::iter::zip(first, second).any(|((&a_st, a_bl), (&b_st, b_bl))| {
1219 let ar = a_st .. (a_st + a_bl.len() as u16);
1220 let br = b_st .. (b_st + b_bl.len() as u16);
1221 ranges_overlap(ar, br)
1222 }) {
1223 return Err(AsmErr::new(AsmErrKind::OverlappingBlocks, []));
1224 }
1225
1226 // Merge symbol tables:
1227 let mut relocations = vec![];
1228 a_obj.sym = match (a_obj.sym, b_sym) {
1229 // If we have both symbol tables:
1230 (Some(mut a_sym), Some(b_sym)) => {
1231 let SymbolTable { label_map, rel_map, debug_symbols: b_debug_symbols } = b_sym;
1232 a_sym.debug_symbols = match (a_sym.debug_symbols, b_debug_symbols) {
1233 (Some(ads), Some(bds)) => Some(DebugSymbols::link(ads, bds)?),
1234 (m_ads, b_ads) => m_ads.or(b_ads)
1235 };
1236
1237 // Cannot overlap due to the above overlapping blocks invariant.
1238 a_sym.rel_map.extend(rel_map);
1239
1240 // For every label in symbol table B:
1241 for (label, b_sym_data) in label_map {
1242 match a_sym.label_map.entry(label) {
1243 Entry::Occupied(mut e) => {
1244 let &a_sym_data = e.get();
1245 match (a_sym_data.external, b_sym_data.external) {
1246 // Two external labels
1247 // Rel map entries are preserved, nothing changes.
1248 (true, true) => {},
1249
1250 // One external label
1251 // Bind all of the relocation entries corresponding to the external symbol
1252 // to the linked symbol
1253 (true, false) | (false, true) => {
1254 // The address to link to.
1255 let linked_sym = match a_sym_data.external {
1256 true => b_sym_data,
1257 false => a_sym_data
1258 };
1259 e.insert(linked_sym);
1260
1261 // Split the relocation map to unmatching & matching labels.
1262 let (rel_addrs, new_rel_map) = a_sym.rel_map.into_iter()
1263 .partition(|(_, v)| v == e.key());
1264 a_sym.rel_map = new_rel_map;
1265
1266 // Add matching labels to the "to relocate later" Vec.
1267 relocations.extend({
1268 rel_addrs.into_keys().map(|addr| (addr, linked_sym.addr))
1269 });
1270 },
1271
1272 // No external labels
1273 // If they point to the same addresses, nothing changes.
1274 // If they point to different addresses, raise conflict.
1275 (false, false) => if a_sym_data.addr != b_sym_data.addr {
1276 let a_span = a_sym_data.span(e.key());
1277 let b_span = b_sym_data.span(e.key());
1278
1279 // TODO: this error does not have correct spans (since the files are different).
1280 return Err(AsmErr::new(AsmErrKind::OverlappingLabels, [a_span, b_span]));
1281 }
1282 }
1283 },
1284 Entry::Vacant(e) => { e.insert(b_sym_data); },
1285 };
1286 }
1287 Some(a_sym)
1288 },
1289 (ma, mb) => ma.or(mb)
1290 };
1291 for (addr, linked_addr) in relocations {
1292 // TODO: handle case where the address needed is not found in block map
1293 // should really only occur from invalid manipulation of obj file
1294 a_obj.get_mut(addr)
1295 .unwrap_or_else(|| unreachable!("object file should have had address x{addr:04X} bound"))
1296 .replace(linked_addr);
1297 }
1298
1299 Ok(a_obj)
1300 }
1301
1302 /// Get an iterator over all of the blocks of the object file.
1303 pub(crate) fn block_iter(&self) -> impl Iterator<Item=(u16, &[Option<u16>])> {
1304 self.block_map.iter()
1305 .map(|(&addr, block)| (addr, block.as_slice()))
1306 }
1307 /// Checks if object file has any external symbols.
1308 pub(crate) fn get_external_symbol(&self) -> Option<&str> {
1309 self.symbol_table()
1310 .and_then(|s| s.label_map.iter().find(|(_, s)| s.external))
1311 .map(|(k, _)| k.as_str())
1312 }
1313 /// Gets an iterator over all of the memory locations defined in the object file.
1314 pub fn addr_iter(&self) -> impl Iterator<Item=(u16, Option<u16>)> + '_ {
1315 self.block_iter()
1316 .flat_map(|(addr, block)| {
1317 block.iter()
1318 .enumerate()
1319 .map(move |(i, &v)| (addr.wrapping_add(i as u16), v))
1320 })
1321 }
1322 /// Gets the symbol table if it is present in the object file.
1323 pub fn symbol_table(&self) -> Option<&SymbolTable> {
1324 self.sym.as_ref()
1325 }
1326}
1327
1328/// Used for [`std::fmt::Debug`] purposes.
1329#[repr(transparent)]
1330struct Addr(u16);
1331impl std::fmt::Debug for Addr {
1332 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1333 write!(f, "x{:04X}", self.0)
1334 }
1335}
1336
1337#[cfg(test)]
1338mod tests {
1339 use std::fmt::Write;
1340
1341 use crate::asm::encoding::TextFormat;
1342 use crate::asm::AsmErrKind;
1343 use crate::parse::parse_ast;
1344
1345 use super::encoding::{BinaryFormat, ObjFileFormat};
1346 use super::{assemble_debug, AsmErr, ObjectFile};
1347
1348 fn assemble_src(src: &str) -> Result<ObjectFile, AsmErr> {
1349 let ast = parse_ast(src).unwrap();
1350 assemble_debug(ast, src)
1351 }
1352 fn assert_asm_fail<T: std::fmt::Debug>(r: Result<T, AsmErr>, kind: AsmErrKind) {
1353 assert_eq!(r.unwrap_err().kind, kind);
1354 }
1355
1356 #[test]
1357 fn test_sym_basic() {
1358 let src = "
1359 .orig x3000
1360 A ADD R0, R0, #0
1361 AND R0, R0, #1
1362 C ADD R0, R0, #0
1363 D LD R0, #-1
1364 HALT
1365 HALT
1366 HALT
1367 HALT
1368 E BR C
1369 HALT
1370 HALT
1371 HALT
1372 B JSR A
1373 .end
1374 ";
1375
1376 let obj = assemble_src(src).unwrap();
1377 let sym = obj.symbol_table().unwrap();
1378 assert_eq!(sym.lookup_label("A"), Some(0x3000));
1379 assert_eq!(sym.lookup_label("C"), Some(0x3002));
1380 assert_eq!(sym.lookup_label("D"), Some(0x3003));
1381 assert_eq!(sym.lookup_label("E"), Some(0x3008));
1382 assert_eq!(sym.lookup_label("B"), Some(0x300C));
1383 }
1384
1385 #[test]
1386 fn test_region_overlap() {
1387 // Two orig blocks, one after another
1388 let src = "
1389 .orig x3000
1390 HALT
1391 HALT
1392 HALT
1393 HALT
1394 .end
1395
1396 .orig x3002
1397 HALT
1398 .end
1399 ";
1400
1401 let obj = assemble_src(src);
1402 assert_asm_fail(obj, AsmErrKind::OverlappingBlocks);
1403
1404 // Two orig blocks, one before another
1405 let src = "
1406 .orig x3002
1407 HALT
1408 .end
1409
1410 .orig x3000
1411 HALT
1412 HALT
1413 HALT
1414 HALT
1415 .end
1416 ";
1417
1418 let obj = assemble_src(src);
1419 assert_asm_fail(obj, AsmErrKind::OverlappingBlocks);
1420
1421 // Two orig blocks, one empty
1422 let src = "
1423 .orig x3000
1424 HALT
1425 HALT
1426 HALT
1427 HALT
1428 .end
1429
1430 .orig x3002
1431 .end
1432 ";
1433 assemble_src(src).unwrap();
1434
1435 // Two orig blocks, one empty
1436 let src = "
1437 .orig x3002
1438 .end
1439
1440 .orig x3000
1441 HALT
1442 HALT
1443 HALT
1444 HALT
1445 .end
1446 ";
1447
1448 assemble_src(src).unwrap();
1449 }
1450
1451 #[test]
1452 fn test_writing_into_io() {
1453 // write empty blocks
1454 let src = "
1455 .orig xFE00
1456 .end
1457 ";
1458 assemble_src(src).unwrap();
1459
1460 let src = "
1461 .orig xFE02
1462 .end
1463 ";
1464 assemble_src(src).unwrap();
1465
1466 // write actual block
1467 let src = "
1468 .orig xFE00
1469 AND R0, R0, #0
1470 .end
1471 ";
1472 let obj = assemble_src(src);
1473 assert_asm_fail(obj, AsmErrKind::BlockInIO);
1474 }
1475
1476 #[test]
1477 fn test_big_blocks() {
1478 // big BLKW
1479 let src = "
1480 .orig x3000
1481 .blkw xFFFF
1482 .end
1483 ";
1484
1485 let obj = assemble_src(src);
1486 assert_asm_fail(obj, AsmErrKind::WrappingBlock);
1487
1488 // Bunch of .fill:
1489 let mut src = String::from(".orig x0000\n");
1490 for i in 0x0000..=0xFFFF {
1491 writeln!(src, ".fill x{i:04X}").unwrap();
1492 }
1493 writeln!(src, ".end").unwrap();
1494
1495 let obj = assemble_src(&src);
1496 assert_asm_fail(obj, AsmErrKind::BlockInIO);
1497
1498 // perfectly aligns
1499 let src = "
1500 .orig xFFFF
1501 .blkw 1
1502 .end
1503 ";
1504 let obj = assemble_src(src);
1505 assert_asm_fail(obj, AsmErrKind::BlockInIO);
1506
1507 // perfectly aligns 2
1508 let src = "
1509 .orig x3000
1510 .blkw xD000
1511 .end
1512 ";
1513 let obj = assemble_src(src);
1514 assert_asm_fail(obj, AsmErrKind::BlockInIO);
1515
1516 // big BLKW
1517 let src = "
1518 .orig x3000
1519 .blkw xFFFF
1520 .blkw xFFFF
1521 .blkw xFFFF
1522 .end
1523 ";
1524 let obj = assemble_src(src);
1525 assert_asm_fail(obj, AsmErrKind::WrappingBlock);
1526
1527 // perfectly aligns and then does schenanigans
1528 let src = "
1529 .orig x3000
1530 LABEL1 .blkw xD000
1531 .fill x0000
1532 .fill x0001
1533 LABEL2 .fill x0002
1534 .fill x0003
1535 .end
1536 ";
1537 // Should error. Don't really care which error.
1538 assemble_src(src).unwrap_err();
1539 }
1540
1541 fn assert_obj_equal(deser: &mut ObjectFile, expected: &ObjectFile, m: &str) {
1542 let deser_src = deser.sym.as_mut()
1543 .and_then(|s| s.debug_symbols.as_mut())
1544 .map(|s| &mut s.src_info.src)
1545 .expect("deserialized object file has no source");
1546
1547 let expected_src = expected.sym.as_ref()
1548 .and_then(|s| s.debug_symbols.as_ref())
1549 .map(|s| &s.src_info.src)
1550 .expect("expected object file has no source");
1551
1552 let deser_lines = deser_src.trim().lines().map(str::trim);
1553 let expected_lines = expected_src.trim().lines().map(str::trim);
1554
1555 assert!(deser_lines.eq(expected_lines), "lines should have matched");
1556
1557 let mut buf = expected_src.to_string();
1558 std::mem::swap(deser_src, &mut buf);
1559 assert_eq!(deser, expected, "{m}");
1560
1561 // Revert change
1562 let deser_src = deser.sym.as_mut()
1563 .and_then(|s| s.debug_symbols.as_mut())
1564 .map(|s| &mut s.src_info.src)
1565 .expect("deserialized object file has no source");
1566 std::mem::swap(deser_src, &mut buf);
1567 }
1568
1569 #[test]
1570 fn test_ser_deser() {
1571 let src = "
1572 .orig x3000
1573 AND R0, R0, #0
1574 ADD R0, R0, #15
1575 MINUS_R0 NOT R1, R0
1576 ADD R1, R1, #1
1577 HALT
1578 .end
1579 ";
1580
1581 let obj = assemble_src(src).unwrap();
1582
1583 // Binary format
1584 let ser = BinaryFormat::serialize(&obj);
1585 let mut de = BinaryFormat::deserialize(&ser).expect("binary encoding should've been parseable");
1586 assert_obj_equal(&mut de, &obj, "binary encoding could not be roundtripped");
1587
1588 // Text format
1589 let ser = TextFormat::serialize(&obj);
1590 let mut de = TextFormat::deserialize(&ser).expect("text encoding should've been parseable");
1591 assert_obj_equal(&mut de, &obj, "text encoding could not be roundtripped");
1592 }
1593
1594 #[test]
1595 fn test_ser_deser_crlf() {
1596 // With CRLF:
1597 let src = "\r
1598 .orig x3000\r
1599 AND R0, R0, #0\r
1600 ADD R0, R0, #15\r
1601 MINUS_R0 NOT R1, R0\r
1602 ADD R1, R1, #1\r
1603 HALT\r
1604 .end\r
1605 ";
1606
1607 let obj = assemble_src(src).unwrap();
1608
1609 // Text format
1610 let ser = TextFormat::serialize(&obj);
1611 let mut de = TextFormat::deserialize(&ser).expect("text encoding should've been parseable");
1612 assert_obj_equal(&mut de, &obj, "text encoding could not be roundtripped");
1613 }
1614
1615 #[test]
1616 fn test_basic_link() {
1617 let library = "
1618 .orig x5000
1619 ADDER ADD R2, R0, R1
1620 RET
1621 .end
1622 ";
1623
1624 let program = "
1625 .external ADDER
1626
1627 .orig x4000
1628 LD R0, A
1629 LD R1, B
1630
1631 LD R3, ADDER_ADDR
1632 JSRR R3
1633
1634 HALT
1635
1636 A .fill 10
1637 B .fill 20
1638 ADDER_ADDR .fill ADDER
1639 .end
1640 ";
1641
1642 let lib_obj = assemble_src(library).unwrap();
1643 let prog_obj = assemble_src(program).unwrap();
1644 ObjectFile::link(lib_obj, prog_obj).unwrap();
1645 }
1646 #[test]
1647 fn test_link_overlapping_labels() {
1648 let library = "
1649 .orig x5000
1650 LOOP BR LOOP
1651 RET
1652 .end
1653 ";
1654
1655 let program = "
1656 .orig x4000
1657 LOOP BR LOOP
1658 RET
1659 .end
1660 ";
1661
1662 let lib_obj = assemble_src(library).unwrap();
1663 let prog_obj = assemble_src(program).unwrap();
1664 let link_obj = ObjectFile::link(lib_obj, prog_obj);
1665 assert_asm_fail(link_obj, AsmErrKind::OverlappingLabels);
1666 }
1667
1668 #[test]
1669 fn test_link_overlapping_blocks() {
1670 // Overlapping blocks, but not identically overlapping
1671 let library = "
1672 .orig x3000
1673 ADD R0, R0, #0
1674 ADD R0, R0, #1
1675 ADD R0, R0, #2
1676 ADD R0, R0, #3
1677 .end
1678 ";
1679
1680 let program = "
1681 .orig x3001
1682 ADD R0, R0, #0
1683 ADD R0, R0, #1
1684 ADD R0, R0, #2
1685 ADD R0, R0, #3
1686 .end
1687 ";
1688
1689 let lib_obj = assemble_src(library).unwrap();
1690 let prog_obj = assemble_src(program).unwrap();
1691 let link_obj = ObjectFile::link(lib_obj, prog_obj);
1692 assert_asm_fail(link_obj, AsmErrKind::OverlappingBlocks);
1693
1694 // Overlapping blocks and identically overlapping
1695 let library = "
1696 .orig x3000
1697 ADD R0, R0, #0
1698 ADD R0, R0, #1
1699 ADD R0, R0, #2
1700 ADD R0, R0, #3
1701 .end
1702 ";
1703
1704 let program = "
1705 .orig x3000
1706 ADD R0, R0, #0
1707 ADD R0, R0, #1
1708 ADD R0, R0, #2
1709 ADD R0, R0, #3
1710 .end
1711 ";
1712
1713 let lib_obj = assemble_src(library).unwrap();
1714 let prog_obj = assemble_src(program).unwrap();
1715 let link_obj = ObjectFile::link(lib_obj, prog_obj);
1716 assert_asm_fail(link_obj, AsmErrKind::OverlappingBlocks);
1717
1718 // Contiguous blocks
1719 let library = "
1720 .orig x3000
1721 ADD R0, R0, #0
1722 ADD R0, R0, #1
1723 ADD R0, R0, #2
1724 ADD R0, R0, #3
1725 .end
1726 ";
1727
1728 let program = "
1729 .orig x3004
1730 ADD R0, R0, #4
1731 ADD R0, R0, #5
1732 ADD R0, R0, #6
1733 ADD R0, R0, #7
1734 .end
1735 ";
1736
1737 let lib_obj = assemble_src(library).unwrap();
1738 let prog_obj = assemble_src(program).unwrap();
1739 ObjectFile::link(lib_obj, prog_obj).unwrap();
1740 }
1741
1742 #[test]
1743 fn test_link_order_agnostic() {
1744 let library = "
1745 .orig x5000
1746 ;; very functional MULTIPLY subroutine
1747 MULTIPLY RET
1748 .end
1749 ";
1750 let program1 = "
1751 .external MULTIPLY
1752
1753 .orig x3000
1754 LD R1, MADDR1
1755 JSRR R1
1756 HALT
1757
1758 MADDR1 .fill MULTIPLY
1759 .end
1760 ";
1761 let program2 = "
1762 .external MULTIPLY
1763
1764 .orig x4000
1765 LD R2, MADDR2
1766 JSRR R2
1767 HALT
1768
1769 MADDR2 .fill MULTIPLY
1770 .end
1771 ";
1772
1773 let lib_obj = assemble_src(library).unwrap();
1774 let prog1_obj = assemble_src(program1).unwrap();
1775 let prog2_obj = assemble_src(program2).unwrap();
1776
1777 // (lib + prog1) + prog2:
1778 let link_obj = ObjectFile::link(lib_obj.clone(), prog1_obj.clone()).unwrap();
1779 ObjectFile::link(link_obj.clone(), prog2_obj.clone())
1780 .expect("(lib + prog1) + prog2 should've succeeded");
1781 // prog2 + (lib + prog1):
1782 ObjectFile::link(prog2_obj.clone(), link_obj.clone())
1783 .expect("prog2 + (lib + prog1) should've succeeded");
1784
1785 // (prog1 + lib) + prog2:
1786 let link_obj = ObjectFile::link(prog1_obj.clone(), lib_obj.clone()).unwrap();
1787 ObjectFile::link(link_obj.clone(), prog2_obj.clone())
1788 .expect("(prog1 + lib) + prog2 should've succeeded");
1789 // prog2 + (prog1 + lib):
1790 ObjectFile::link(prog2_obj.clone(), link_obj.clone())
1791 .expect("prog2 + (prog1 + lib) should've succeeded");
1792
1793 // (prog1 + prog2) + lib:
1794 let link_obj = ObjectFile::link(prog1_obj.clone(), prog2_obj.clone()).unwrap();
1795 ObjectFile::link(link_obj.clone(), lib_obj.clone())
1796 .expect("(prog1 + prog2) + lib should've succeeded");
1797 // lib + (prog1 + prog2):
1798 ObjectFile::link(lib_obj.clone(), link_obj.clone())
1799 .expect("lib + (prog1 + prog2) should've succeeded");
1800 }
1801
1802 #[test]
1803 fn test_link_external_place() {
1804 let program = "
1805 .external X
1806 .orig x3000
1807 .fill X
1808 .end
1809 ";
1810 assemble_src(program).unwrap();
1811
1812 let program = "
1813 .external X
1814 .orig x3000
1815 LD R0, X
1816 .end
1817 ";
1818 assert_asm_fail(assemble_src(program), AsmErrKind::OffsetExternal);
1819
1820 let program = "
1821 .external X
1822 .orig x3000
1823 JSR X
1824 .end
1825 ";
1826 assert_asm_fail(assemble_src(program), AsmErrKind::OffsetExternal);
1827
1828 let program = "
1829 .external X
1830 .orig x3000
1831 BR X
1832 .end
1833 ";
1834 assert_asm_fail(assemble_src(program), AsmErrKind::OffsetExternal);
1835 }
1836}