Skip to main content

asm_rs/
linker.rs

1//! Label resolution, branch relaxation, and final layout.
2//!
3//! The linker collects fragments (fixed code/data, alignment padding, and
4//! relaxable branches), resolves labels, performs Szymanski-style branch
5//! relaxation (monotonic growth, guaranteed convergence), applies
6//! relocations, and emits the final machine code.
7
8use alloc::collections::BTreeMap;
9#[allow(unused_imports)]
10use alloc::format;
11use alloc::string::String;
12use alloc::string::ToString;
13#[allow(unused_imports)]
14use alloc::vec;
15use alloc::vec::Vec;
16
17use crate::encoder::{InstrBytes, RelaxInfo, RelocKind, Relocation};
18use crate::error::{AsmError, Span};
19
20// ─── FragmentBytes ─────────────────────────────────────────
21
22/// Compact byte storage for fragment payloads.
23///
24/// Instructions (≤15 bytes) are stored inline as [`InstrBytes`] — zero heap
25/// allocations on the hot encoding path.  Data directives that may exceed
26/// 15 bytes fall back to a heap-allocated `Vec<u8>`.
27#[derive(Debug, Clone)]
28pub enum FragmentBytes {
29    /// Inline storage (≤15 bytes) — no heap allocation.
30    Inline(InstrBytes),
31    /// Heap-allocated storage for data larger than 15 bytes.
32    Heap(Vec<u8>),
33}
34
35impl core::ops::Deref for FragmentBytes {
36    type Target = [u8];
37    #[inline]
38    fn deref(&self) -> &[u8] {
39        match self {
40            FragmentBytes::Inline(ib) => ib,
41            FragmentBytes::Heap(v) => v,
42        }
43    }
44}
45
46impl core::ops::DerefMut for FragmentBytes {
47    #[inline]
48    fn deref_mut(&mut self) -> &mut [u8] {
49        match self {
50            FragmentBytes::Inline(ib) => ib,
51            FragmentBytes::Heap(v) => v,
52        }
53    }
54}
55
56/// Maximum number of relaxation iterations before giving up.
57const MAX_RELAXATION_ITERS: usize = 100;
58
59/// Read a little-endian u32 from `bytes` at `offset`, with bounds checking.
60/// Returns a descriptive error instead of panicking on out-of-bounds access.
61#[cfg(any(feature = "arm", feature = "aarch64", feature = "riscv"))]
62fn read_le32(bytes: &[u8], offset: usize, label: &str, span: Span) -> Result<u32, AsmError> {
63    if offset + 4 > bytes.len() {
64        return Err(AsmError::Syntax {
65            msg: alloc::format!(
66                "relocation offset {offset} out of bounds (buffer len {}) for label '{label}'",
67                bytes.len()
68            ),
69            span,
70        });
71    }
72    // The bounds check above guarantees the slice is exactly 4 bytes,
73    // so the try_into conversion is infallible.
74    let arr: [u8; 4] = match bytes[offset..offset + 4].try_into() {
75        Ok(a) => a,
76        Err(_) => {
77            return Err(AsmError::Syntax {
78                msg: alloc::format!(
79                    "relocation offset {offset} out of bounds (buffer len {}) for label '{label}'",
80                    bytes.len()
81                ),
82                span,
83            });
84        }
85    };
86    Ok(u32::from_le_bytes(arr))
87}
88
89/// Read a little-endian u16 from `bytes` at `offset`, with bounds checking.
90#[cfg(any(feature = "arm", feature = "riscv"))]
91fn read_le16(bytes: &[u8], offset: usize, label: &str, span: Span) -> Result<u16, AsmError> {
92    if offset + 2 > bytes.len() {
93        return Err(AsmError::Syntax {
94            msg: alloc::format!(
95                "relocation offset {offset} out of bounds (buffer len {}) for label '{label}'",
96                bytes.len()
97            ),
98            span,
99        });
100    }
101    let arr: [u8; 2] = match bytes[offset..offset + 2].try_into() {
102        Ok(a) => a,
103        Err(_) => {
104            return Err(AsmError::Syntax {
105                msg: alloc::format!(
106                    "relocation offset {offset} out of bounds (buffer len {}) for label '{label}'",
107                    bytes.len()
108                ),
109                span,
110            });
111        }
112    };
113    Ok(u16::from_le_bytes(arr))
114}
115
116/// ARM modified-immediate encoder for the linker: find (imm8, rot) such that
117/// `value == imm8.rotate_right(rot * 2)`.
118#[cfg(feature = "arm")]
119fn encode_arm_imm_for_linker(value: u32) -> Option<(u8, u8)> {
120    for rot in 0..16u8 {
121        let shift = rot * 2;
122        let rotated = value.rotate_left(shift as u32);
123        if rotated <= 0xFF {
124            return Some((rotated as u8, rot));
125        }
126    }
127    None
128}
129
130/// The resolved output: (machine code bytes, label→address table, applied relocations, fragment offsets).
131type ResolveOutput = (
132    Vec<u8>,
133    Vec<(String, u64)>,
134    Vec<AppliedRelocation>,
135    Vec<u64>,
136);
137
138/// An applied relocation in the final output — describes where a label
139/// reference was patched. Useful for tooling, debugging, and re-linking.
140#[derive(Debug, Clone, PartialEq, Eq)]
141#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
142pub struct AppliedRelocation {
143    /// Offset in the output byte stream where the value was written.
144    pub offset: usize,
145    /// Size of the relocated value in bytes (1, 2, 4, or 8).
146    pub size: u8,
147    /// Target label name.
148    pub label: String,
149    /// How the linker patches the target address into the instruction bytes.
150    pub kind: RelocKind,
151    /// Addend.
152    pub addend: i64,
153}
154
155// ─── Fragment ──────────────────────────────────────────────
156
157/// A fragment of assembled output.
158///
159/// The linker operates on an ordered list of fragments.  During branch
160/// relaxation the sizes of [`Fragment::Relaxable`] and [`Fragment::Align`]
161/// fragments may change, but they only ever *grow* (monotonic), which
162/// guarantees convergence.
163#[derive(Debug, Clone)]
164pub enum Fragment {
165    /// Fixed-size bytes with optional relocation.
166    Fixed {
167        /// The raw assembled bytes (inline for instructions, heap for large data).
168        bytes: FragmentBytes,
169        /// Optional relocation to apply to these bytes.
170        relocation: Option<Relocation>,
171        /// Source span of the originating instruction or directive.
172        span: Span,
173    },
174    /// Alignment padding — size depends on preceding layout.
175    ///
176    /// When `use_nop` is true (x86/x86-64 code alignment with no explicit
177    /// fill byte), pad with Intel-recommended multi-byte NOP sequences
178    /// instead of repeating a single fill byte.
179    Align {
180        /// Required byte alignment (must be a power of two).
181        alignment: u32,
182        /// Byte value used for padding when `use_nop` is false.
183        fill: u8,
184        /// If set, skip this alignment entirely when the required padding
185        /// exceeds this many bytes.
186        max_skip: Option<u32>,
187        /// Use multi-byte NOP sequences instead of repeating `fill`.
188        use_nop: bool,
189        /// Source span of the alignment directive.
190        span: Span,
191    },
192    /// A relaxable branch instruction.
193    ///
194    /// Starts in short form (rel8).  If the linker determines the target is
195    /// beyond ±127 bytes it promotes to the long form (rel32) and re-lays
196    /// out.  Promotion is irreversible (Szymanski monotonic growth).
197    Relaxable {
198        /// Short-form bytes (e.g. `[0xEB, 0x00]` for JMP rel8).
199        short_bytes: InstrBytes,
200        /// Offset of the rel8 displacement within `short_bytes`.
201        short_reloc_offset: usize,
202        /// Optional relocation for the short form.  When `Some`, the linker
203        /// applies this relocation to `short_bytes` instead of raw byte-patching.
204        /// Used for RISC-V B-type branches.
205        short_relocation: Option<Relocation>,
206        /// Long-form bytes (e.g. `[0xE9, 0,0,0,0]` for JMP rel32).
207        long_bytes: InstrBytes,
208        /// Relocation for the long form (contains label, offset, etc.).
209        long_relocation: Relocation,
210        /// Whether this fragment has been promoted to long form.
211        is_long: bool,
212        /// Source span.
213        span: Span,
214    },
215    /// Advance the location counter to an absolute address, padding with
216    /// `fill` bytes.  If the target is behind the current position, an
217    /// error is raised during emission.
218    Org {
219        /// Absolute target address for the location counter.
220        target: u64,
221        /// Byte value used to fill the gap.
222        fill: u8,
223        /// Source span of the `.org` directive.
224        span: Span,
225    },
226}
227
228// ─── Linker internals ──────────────────────────────────────
229
230/// A label definition tracking which fragment it precedes.
231#[derive(Debug, Clone)]
232struct LabelDef {
233    fragment_index: usize,
234    span: Span,
235}
236
237/// Numeric label tracking (supports forward/backward references like `1:` / `1b` / `1f`).
238#[derive(Debug, Clone, Default)]
239struct NumericLabels {
240    defs: BTreeMap<u32, Vec<usize>>,
241}
242
243// ─── Public API ────────────────────────────────────────────
244
245/// The linker: collects fragments and labels, resolves everything.
246#[derive(Debug)]
247pub struct Linker {
248    fragments: Vec<Fragment>,
249    labels: BTreeMap<String, LabelDef>,
250    externals: BTreeMap<String, u64>,
251    numeric: NumericLabels,
252    constants: BTreeMap<String, i128>,
253    base_address: u64,
254}
255
256impl Default for Linker {
257    fn default() -> Self {
258        Self::new()
259    }
260}
261
262impl Linker {
263    /// Create a new, empty linker with base address 0.
264    pub fn new() -> Self {
265        Self {
266            fragments: Vec::new(),
267            labels: BTreeMap::new(),
268            externals: BTreeMap::new(),
269            numeric: NumericLabels::default(),
270            constants: BTreeMap::new(),
271            base_address: 0,
272        }
273    }
274
275    /// Set the base (origin) address for the assembled output.
276    pub fn set_base_address(&mut self, addr: u64) {
277        self.base_address = addr;
278    }
279
280    /// Get the base address.
281    pub fn base_address(&self) -> u64 {
282        self.base_address
283    }
284
285    /// The number of fragments currently added.
286    pub fn fragment_count(&self) -> usize {
287        self.fragments.len()
288    }
289
290    /// Define an external label at a known absolute address.
291    pub fn define_external(&mut self, name: &str, addr: u64) {
292        self.externals.insert(String::from(name), addr);
293    }
294
295    /// Define a constant value (`.equ` / `.set`).
296    pub fn define_constant(&mut self, name: &str, value: i128) {
297        self.constants.insert(String::from(name), value);
298    }
299
300    /// Look up a constant value by name.
301    pub fn get_constant(&self, name: &str) -> Option<&i128> {
302        self.constants.get(name)
303    }
304
305    /// Add a label definition at the current position (before the next fragment).
306    pub fn add_label(&mut self, name: &str, span: Span) -> Result<(), AsmError> {
307        // Numeric labels (e.g. `1:`) can be redefined.
308        if let Ok(n) = name.parse::<u32>() {
309            self.numeric
310                .defs
311                .entry(n)
312                .or_default()
313                .push(self.fragments.len());
314            return Ok(());
315        }
316
317        if let Some(existing) = self.labels.get(name) {
318            return Err(AsmError::DuplicateLabel {
319                label: String::from(name),
320                span,
321                first_span: existing.span,
322            });
323        }
324        self.labels.insert(
325            String::from(name),
326            LabelDef {
327                fragment_index: self.fragments.len(),
328                span,
329            },
330        );
331        Ok(())
332    }
333
334    /// Add a pre-built fragment.
335    pub fn add_fragment(&mut self, fragment: Fragment) {
336        self.fragments.push(fragment);
337    }
338
339    /// Convenience: add fixed bytes (data, non-branch instructions, etc.).
340    pub fn add_bytes(&mut self, bytes: Vec<u8>, span: Span) {
341        self.fragments.push(Fragment::Fixed {
342            bytes: FragmentBytes::Heap(bytes),
343            relocation: None,
344            span,
345        });
346    }
347
348    /// Add an encoded instruction, automatically choosing `Fixed` or `Relaxable`.
349    pub fn add_encoded(
350        &mut self,
351        bytes: InstrBytes,
352        relocation: Option<Relocation>,
353        relax: Option<RelaxInfo>,
354        span: Span,
355    ) -> Result<(), AsmError> {
356        if let Some(ri) = relax {
357            let long_relocation = relocation.ok_or_else(|| AsmError::Syntax {
358                msg: String::from("internal: relaxable instruction missing relocation"),
359                span,
360            })?;
361            self.fragments.push(Fragment::Relaxable {
362                short_bytes: ri.short_bytes,
363                short_reloc_offset: ri.short_reloc_offset,
364                short_relocation: ri.short_relocation,
365                long_bytes: bytes,
366                long_relocation,
367                is_long: false,
368                span,
369            });
370        } else {
371            self.fragments.push(Fragment::Fixed {
372                bytes: FragmentBytes::Inline(bytes),
373                relocation,
374                span,
375            });
376        }
377        Ok(())
378    }
379
380    /// Add alignment padding.
381    pub fn add_alignment(
382        &mut self,
383        alignment: u32,
384        fill: u8,
385        max_skip: Option<u32>,
386        use_nop: bool,
387        span: Span,
388    ) {
389        self.fragments.push(Fragment::Align {
390            alignment,
391            fill,
392            max_skip,
393            use_nop,
394            span,
395        });
396    }
397
398    /// Add an `.org` directive: advance the location counter to `target`,
399    /// padding with `fill` bytes.
400    pub fn add_org(&mut self, target: u64, fill: u8, span: Span) {
401        self.fragments.push(Fragment::Org { target, fill, span });
402    }
403
404    // ── resolve ────────────────────────────────────────────
405
406    /// Resolve all labels, perform branch relaxation, and return
407    /// the final bytes together with a label→address table and applied relocations.
408    ///
409    /// # Note
410    ///
411    /// This method **consumes** the linker's internal fragment list. After calling
412    /// `resolve()`, the `Linker` is left in an empty state — calling `resolve()`
413    /// again will produce an empty output. If you need to re-link, create a new
414    /// `Linker` instance and re-add all fragments.
415    pub fn resolve(&mut self) -> Result<ResolveOutput, AsmError> {
416        // Phase 1: branch relaxation (Szymanski monotonic growth)
417        let offsets = self.relax()?;
418
419        // Phase 2: emit final bytes with patched relocations (reuse offsets)
420        self.emit_final(offsets)
421    }
422
423    // ── branch relaxation ──────────────────────────────────
424
425    /// Iteratively grow short branches that cannot reach their targets.
426    /// Returns the final computed offsets on success so callers can reuse them.
427    fn relax(&mut self) -> Result<Vec<u64>, AsmError> {
428        let mut offsets = Vec::with_capacity(self.fragments.len() + 1);
429        let mut to_expand: Vec<usize> = Vec::new();
430
431        for _iter in 0..MAX_RELAXATION_ITERS {
432            self.compute_offsets_into(&mut offsets);
433            to_expand.clear();
434
435            for (i, frag) in self.fragments.iter().enumerate() {
436                if let Fragment::Relaxable {
437                    short_bytes,
438                    short_relocation,
439                    long_relocation,
440                    is_long,
441                    ..
442                } = frag
443                {
444                    if !is_long {
445                        let frag_end = offsets[i] + short_bytes.len() as u64;
446                        match self.resolve_label_with_offsets(&long_relocation.label, i, &offsets) {
447                            Ok(target) => {
448                                let disp = target as i64 - frag_end as i64 + long_relocation.addend;
449                                let in_range = if let Some(ref sr) = short_relocation {
450                                    // Architecture-specific short form range check
451                                    match sr.kind {
452                                        #[cfg(feature = "riscv")]
453                                        RelocKind::RvBranch12 => {
454                                            // B-type: PC-relative from instruction start
455                                            let pc_offset = disp + (short_bytes.len() as i64);
456                                            (-(1i64 << 12)..(1i64 << 12)).contains(&pc_offset)
457                                        }
458                                        #[cfg(feature = "riscv")]
459                                        RelocKind::RvCBranch8 => {
460                                            // CB-type c.beqz/c.bnez: ±256 B (9-bit signed)
461                                            let pc_offset = disp + (short_bytes.len() as i64);
462                                            (-(1i64 << 8)..(1i64 << 8)).contains(&pc_offset)
463                                        }
464                                        #[cfg(feature = "riscv")]
465                                        RelocKind::RvCJump11 => {
466                                            // CJ-type c.j: ±2 KB (12-bit signed)
467                                            let pc_offset = disp + (short_bytes.len() as i64);
468                                            (-(1i64 << 11)..(1i64 << 11)).contains(&pc_offset)
469                                        }
470                                        #[cfg(feature = "aarch64")]
471                                        RelocKind::Aarch64Branch19 => {
472                                            // B.cond / CBZ / CBNZ: ±1 MB (19-bit signed × 4)
473                                            let pc_offset = disp + (short_bytes.len() as i64);
474                                            (-(1i64 << 20)..(1i64 << 20)).contains(&pc_offset)
475                                        }
476                                        #[cfg(feature = "aarch64")]
477                                        RelocKind::Aarch64Branch14 => {
478                                            // TBZ / TBNZ: ±32 KB (14-bit signed × 4)
479                                            let pc_offset = disp + (short_bytes.len() as i64);
480                                            (-(1i64 << 15)..(1i64 << 15)).contains(&pc_offset)
481                                        }
482                                        #[cfg(feature = "aarch64")]
483                                        RelocKind::Aarch64Adr21 => {
484                                            // ADR: ±1 MB (21-bit signed)
485                                            let pc_offset = disp + (short_bytes.len() as i64);
486                                            (-(1i64 << 20)..(1i64 << 20)).contains(&pc_offset)
487                                        }
488                                        #[cfg(feature = "arm")]
489                                        RelocKind::ThumbBranch8 => {
490                                            // B<cond> narrow: signed 8-bit offset >> 1 → ±256 B
491                                            // PC = instr + 4 in Thumb
492                                            let pc_offset = disp + (short_bytes.len() as i64);
493                                            (-(1i64 << 8)..(1i64 << 8)).contains(&pc_offset)
494                                        }
495                                        #[cfg(feature = "arm")]
496                                        RelocKind::ThumbBranch11 => {
497                                            // B narrow: signed 11-bit offset >> 1 → ±2 KB
498                                            let pc_offset = disp + (short_bytes.len() as i64);
499                                            (-(1i64 << 11)..(1i64 << 11)).contains(&pc_offset)
500                                        }
501                                        _ => (-128..=127).contains(&disp),
502                                    }
503                                } else {
504                                    // x86 default: rel8 ±127 from frag_end
505                                    (-128..=127).contains(&disp)
506                                };
507                                if !in_range {
508                                    to_expand.push(i);
509                                }
510                            }
511                            Err(_) => {
512                                // Undefined label — conservatively assume long form.
513                                // The real error will surface during emit_final.
514                                to_expand.push(i);
515                            }
516                        }
517                    }
518                }
519            }
520
521            if to_expand.is_empty() {
522                return Ok(offsets);
523            }
524
525            for &idx in &to_expand {
526                if let Fragment::Relaxable {
527                    ref mut is_long, ..
528                } = self.fragments[idx]
529                {
530                    *is_long = true;
531                }
532            }
533        }
534
535        Err(AsmError::RelaxationLimit {
536            max: MAX_RELAXATION_ITERS,
537        })
538    }
539
540    // ── offset computation ─────────────────────────────────
541
542    /// Build an offset table: `offsets[i]` is the absolute address of fragment `i`.
543    ///
544    /// `offsets[fragments.len()]` is a sentinel for the total end address.
545    /// Reuses the provided vector to avoid repeated allocation.
546    fn compute_offsets_into(&self, offsets: &mut Vec<u64>) {
547        offsets.clear();
548        let mut current = self.base_address;
549        for frag in &self.fragments {
550            offsets.push(current);
551            match frag {
552                Fragment::Fixed { bytes, .. } => {
553                    current += bytes.len() as u64;
554                }
555                Fragment::Align {
556                    alignment,
557                    max_skip,
558                    ..
559                } => {
560                    let a = *alignment as u64;
561                    if a > 1 {
562                        let aligned = current.div_ceil(a) * a;
563                        let padding = aligned - current;
564                        if max_skip.map_or(true, |ms| padding <= ms as u64) {
565                            current = aligned;
566                        }
567                    }
568                }
569                Fragment::Relaxable {
570                    short_bytes,
571                    long_bytes,
572                    is_long,
573                    ..
574                } => {
575                    if *is_long {
576                        current += long_bytes.len() as u64;
577                    } else {
578                        current += short_bytes.len() as u64;
579                    }
580                }
581                Fragment::Org { target, .. } => {
582                    if *target > current {
583                        current = *target;
584                    }
585                    // If target <= current, no advancement (error at emit time)
586                }
587            }
588        }
589        offsets.push(current);
590    }
591
592    // ── final emit ─────────────────────────────────────────
593
594    fn emit_final(&mut self, offsets: Vec<u64>) -> Result<ResolveOutput, AsmError> {
595        let total_size = offsets.last().copied().unwrap_or(self.base_address) - self.base_address;
596        let mut output = Vec::with_capacity(total_size as usize);
597        let mut applied_relocs = Vec::new();
598
599        // Take fragments out so `self` is free for apply_relocation / resolve_label calls.
600        // This avoids cloning every fragment's byte buffer on the final emit path.
601        let mut fragments = core::mem::take(&mut self.fragments);
602
603        for (i, frag) in fragments.iter_mut().enumerate() {
604            match frag {
605                Fragment::Fixed {
606                    bytes,
607                    relocation,
608                    span,
609                } => {
610                    if let Some(ref mut reloc) = relocation {
611                        let frag_output_offset = output.len();
612                        // Patch in-place — no heap clone needed.
613                        self.apply_relocation(bytes, reloc, offsets[i], &offsets, i, *span)?;
614                        applied_relocs.push(AppliedRelocation {
615                            offset: frag_output_offset + reloc.offset,
616                            size: reloc.size,
617                            // Take ownership — emit_final is terminal, labels won't be read again.
618                            label: reloc.label.to_string(),
619                            kind: reloc.kind,
620                            addend: reloc.addend,
621                        });
622                        output.extend_from_slice(bytes);
623                    } else {
624                        output.extend_from_slice(bytes);
625                    }
626                }
627
628                Fragment::Align {
629                    alignment,
630                    fill,
631                    max_skip,
632                    use_nop,
633                    ..
634                } => {
635                    let a = *alignment as u64;
636                    if a > 1 {
637                        let current = offsets[i];
638                        let aligned = current.div_ceil(a) * a;
639                        let padding = (aligned - current) as usize;
640                        // skip if max_skip is exceeded
641                        if max_skip.is_some_and(|ms| padding > ms as usize) {
642                            // no padding emitted
643                        } else if *use_nop {
644                            emit_nop_padding(&mut output, padding);
645                        } else {
646                            output.extend(core::iter::repeat(*fill).take(padding));
647                        }
648                    }
649                }
650
651                Fragment::Relaxable {
652                    short_bytes,
653                    short_reloc_offset,
654                    short_relocation,
655                    long_bytes,
656                    long_relocation,
657                    is_long,
658                    span,
659                } => {
660                    if *is_long {
661                        let frag_output_offset = output.len();
662                        // Patch long_bytes in-place — no heap clone needed.
663                        self.apply_relocation(
664                            long_bytes,
665                            long_relocation,
666                            offsets[i],
667                            &offsets,
668                            i,
669                            *span,
670                        )?;
671                        applied_relocs.push(AppliedRelocation {
672                            offset: frag_output_offset + long_relocation.offset,
673                            size: long_relocation.size,
674                            label: (*long_relocation.label).into(),
675                            kind: long_relocation.kind,
676                            addend: long_relocation.addend,
677                        });
678                        output.extend_from_slice(long_bytes);
679                    } else if let Some(ref mut sr) = short_relocation {
680                        // Short form with architecture-specific relocation
681                        // (e.g. RISC-V B-type branch).
682                        let frag_output_offset = output.len();
683                        // Patch short_bytes in-place — no heap clone needed.
684                        self.apply_relocation(short_bytes, sr, offsets[i], &offsets, i, *span)?;
685                        applied_relocs.push(AppliedRelocation {
686                            offset: frag_output_offset + sr.offset,
687                            size: sr.size,
688                            label: (*sr.label).into(),
689                            kind: sr.kind,
690                            addend: sr.addend,
691                        });
692                        output.extend_from_slice(short_bytes);
693                    } else {
694                        // Short form — patch rel8 displacement (x86)
695                        let frag_output_offset = output.len();
696                        let target =
697                            self.resolve_label_with_offsets(&long_relocation.label, i, &offsets)?;
698                        let frag_end = offsets[i] + short_bytes.len() as u64;
699                        let disp = target as i64 - frag_end as i64 + long_relocation.addend;
700                        if !(-128..=127).contains(&disp) {
701                            return Err(AsmError::BranchOutOfRange {
702                                label: long_relocation.label.to_string(),
703                                disp,
704                                max: 127,
705                                span: *span,
706                            });
707                        }
708                        // Patch short_bytes in-place — no heap clone needed.
709                        short_bytes[*short_reloc_offset] = disp as i8 as u8;
710                        applied_relocs.push(AppliedRelocation {
711                            offset: frag_output_offset + *short_reloc_offset,
712                            size: 1,
713                            label: (*long_relocation.label).into(),
714                            kind: RelocKind::X86Relative,
715                            addend: long_relocation.addend,
716                        });
717                        output.extend_from_slice(short_bytes);
718                    }
719                }
720
721                Fragment::Org {
722                    target, fill, span, ..
723                } => {
724                    let current = offsets[i];
725                    if *target < current {
726                        return Err(AsmError::Syntax {
727                            msg: alloc::format!(
728                                ".org target 0x{:X} is behind current position 0x{:X}",
729                                target,
730                                current
731                            ),
732                            span: *span,
733                        });
734                    }
735                    let padding = (*target - current) as usize;
736                    output.extend(core::iter::repeat(*fill).take(padding));
737                }
738            }
739        }
740
741        // Restore fragments (now patched in-place, but structure intact).
742        self.fragments = fragments;
743
744        // Collect label addresses
745        let label_table: Vec<(String, u64)> = self
746            .labels
747            .iter()
748            .map(|(name, def)| (name.clone(), offsets[def.fragment_index]))
749            .collect();
750
751        Ok((output, label_table, applied_relocs, offsets))
752    }
753
754    // ── relocation patching ────────────────────────────────
755
756    fn apply_relocation(
757        &self,
758        bytes: &mut [u8],
759        reloc: &Relocation,
760        frag_abs: u64,
761        offsets: &[u64],
762        from_fragment: usize,
763        span: Span,
764    ) -> Result<(), AsmError> {
765        let target_addr = self.resolve_label_with_offsets(&reloc.label, from_fragment, offsets)?;
766        let reloc_abs = frag_abs + reloc.offset as u64;
767
768        match reloc.kind {
769            RelocKind::X86Relative => {
770                // RIP = address past the entire instruction, not just past the reloc field.
771                // trailing_bytes accounts for any immediate bytes following the displacement.
772                let rip = reloc_abs + reloc.size as u64 + reloc.trailing_bytes as u64;
773                let rel = (target_addr as i64)
774                    .wrapping_sub(rip as i64)
775                    .wrapping_add(reloc.addend);
776                match reloc.size {
777                    1 => {
778                        if rel < i8::MIN as i64 || rel > i8::MAX as i64 {
779                            return Err(AsmError::BranchOutOfRange {
780                                label: reloc.label.to_string(),
781                                disp: rel,
782                                max: 127,
783                                span,
784                            });
785                        }
786                        bytes[reloc.offset] = rel as i8 as u8;
787                    }
788                    4 => {
789                        if rel < i32::MIN as i64 || rel > i32::MAX as i64 {
790                            return Err(AsmError::BranchOutOfRange {
791                                label: reloc.label.to_string(),
792                                disp: rel,
793                                max: i32::MAX as i64,
794                                span,
795                            });
796                        }
797                        let b = (rel as i32).to_le_bytes();
798                        bytes[reloc.offset..reloc.offset + 4].copy_from_slice(&b);
799                    }
800                    other => {
801                        return Err(AsmError::Syntax {
802                            msg: alloc::format!(
803                                "unsupported RIP-relative relocation size: {other}"
804                            ),
805                            span,
806                        });
807                    }
808                }
809            }
810            RelocKind::Absolute => {
811                let addr = target_addr.wrapping_add(reloc.addend as u64);
812                match reloc.size {
813                    1 => {
814                        if addr > u8::MAX as u64 {
815                            return Err(AsmError::Syntax {
816                                msg: alloc::format!(
817                                    "absolute address 0x{addr:X} exceeds 8-bit relocation range for '{}'",
818                                    reloc.label
819                                ),
820                                span,
821                            });
822                        }
823                        bytes[reloc.offset] = addr as u8;
824                    }
825                    2 => {
826                        if addr > u16::MAX as u64 {
827                            return Err(AsmError::Syntax {
828                                msg: alloc::format!(
829                                    "absolute address 0x{addr:X} exceeds 16-bit relocation range for '{}'",
830                                    reloc.label
831                                ),
832                                span,
833                            });
834                        }
835                        bytes[reloc.offset..reloc.offset + 2]
836                            .copy_from_slice(&(addr as u16).to_le_bytes());
837                    }
838                    4 => {
839                        if addr > u32::MAX as u64 {
840                            return Err(AsmError::Syntax {
841                                msg: alloc::format!(
842                                    "absolute address 0x{addr:X} exceeds 32-bit relocation range for '{}'",
843                                    reloc.label
844                                ),
845                                span,
846                            });
847                        }
848                        bytes[reloc.offset..reloc.offset + 4]
849                            .copy_from_slice(&(addr as u32).to_le_bytes());
850                    }
851                    8 => {
852                        bytes[reloc.offset..reloc.offset + 8].copy_from_slice(&addr.to_le_bytes());
853                    }
854                    other => {
855                        return Err(AsmError::Syntax {
856                            msg: alloc::format!("unsupported absolute relocation size: {other}"),
857                            span,
858                        });
859                    }
860                }
861            }
862            #[cfg(feature = "arm")]
863            RelocKind::ArmBranch24 => {
864                // ARM32 B/BL: PC = instr_addr + 8, offset = (target - PC) >> 2, packed bits 23:0
865                let pc = reloc_abs + 8;
866                let rel = (target_addr as i64)
867                    .wrapping_sub(pc as i64)
868                    .wrapping_add(reloc.addend);
869                let offset = rel >> 2;
870                if !(-(1 << 23)..(1 << 23)).contains(&offset) {
871                    return Err(AsmError::BranchOutOfRange {
872                        label: reloc.label.to_string(),
873                        disp: rel,
874                        max: (1 << 25) - 4,
875                        span,
876                    });
877                }
878                let imm24 = (offset as u32) & 0x00FF_FFFF;
879                let mut word = read_le32(bytes, reloc.offset, &reloc.label, span)?;
880                word = (word & 0xFF00_0000) | imm24;
881                bytes[reloc.offset..reloc.offset + 4].copy_from_slice(&word.to_le_bytes());
882            }
883            #[cfg(feature = "arm")]
884            RelocKind::ArmLdrLit => {
885                // ARM32 LDR Rd, label: PC = instr_addr + 8, 12-bit offset, U-bit (bit 23)
886                let pc = reloc_abs + 8;
887                let rel = (target_addr as i64)
888                    .wrapping_sub(pc as i64)
889                    .wrapping_add(reloc.addend);
890                let abs_rel = rel.unsigned_abs();
891                if abs_rel > 4095 {
892                    return Err(AsmError::BranchOutOfRange {
893                        label: reloc.label.to_string(),
894                        disp: rel,
895                        max: 4095,
896                        span,
897                    });
898                }
899                let u_bit = if rel >= 0 { 1u32 } else { 0u32 };
900                let imm12 = (abs_rel as u32) & 0xFFF;
901                let mut word = read_le32(bytes, reloc.offset, &reloc.label, span)?;
902                word = (word & 0xFF7F_F000) | (u_bit << 23) | imm12;
903                bytes[reloc.offset..reloc.offset + 4].copy_from_slice(&word.to_le_bytes());
904            }
905            #[cfg(feature = "arm")]
906            RelocKind::ArmAdr => {
907                // ARM32 ADR Rd, label → ADD/SUB Rd, PC, #rotated_imm
908                // PC = instr_addr + 8 (ARM pipeline)
909                // The data-processing immediate format uses 8-bit imm + 4-bit rotation
910                let pc = reloc_abs + 8;
911                let rel = (target_addr as i64)
912                    .wrapping_sub(pc as i64)
913                    .wrapping_add(reloc.addend);
914                let abs_rel = rel.unsigned_abs() as u32;
915                let (op, imm8, rot) = if rel >= 0 {
916                    // ADD Rd, PC, #imm
917                    let (i, r) = encode_arm_imm_for_linker(abs_rel).ok_or_else(|| {
918                        AsmError::BranchOutOfRange {
919                            label: reloc.label.to_string(),
920                            disp: rel,
921                            max: 255, // max unrotated; actual range depends on pattern
922                            span,
923                        }
924                    })?;
925                    (0x4u32, i, r)
926                } else {
927                    // SUB Rd, PC, #imm
928                    let (i, r) = encode_arm_imm_for_linker(abs_rel).ok_or_else(|| {
929                        AsmError::BranchOutOfRange {
930                            label: reloc.label.to_string(),
931                            disp: rel,
932                            max: 255,
933                            span,
934                        }
935                    })?;
936                    (0x2u32, i, r)
937                };
938                let mut word = read_le32(bytes, reloc.offset, &reloc.label, span)?;
939                // Clear opcode (bits 24:21) and immediate field (bits 11:0)
940                word = (word & 0xF1F0_F000) | (op << 21) | ((rot as u32) << 8) | (imm8 as u32);
941                bytes[reloc.offset..reloc.offset + 4].copy_from_slice(&word.to_le_bytes());
942            }
943            #[cfg(feature = "arm")]
944            RelocKind::ThumbBranch8 => {
945                // Thumb conditional branch (16-bit): PC = instr + 4
946                let pc = reloc_abs + 4;
947                let rel = (target_addr as i64)
948                    .wrapping_sub(pc as i64)
949                    .wrapping_add(reloc.addend);
950                let offset = rel >> 1;
951                if !(-(1i64 << 7)..(1i64 << 7)).contains(&offset) {
952                    return Err(AsmError::BranchOutOfRange {
953                        label: reloc.label.to_string(),
954                        disp: rel,
955                        max: 254,
956                        span,
957                    });
958                }
959                let imm8 = (offset as u8) as u16;
960                let mut hw = read_le16(bytes, reloc.offset, &reloc.label, span)?;
961                hw = (hw & 0xFF00) | (imm8 & 0xFF);
962                bytes[reloc.offset..reloc.offset + 2].copy_from_slice(&hw.to_le_bytes());
963            }
964            #[cfg(feature = "arm")]
965            RelocKind::ThumbBranch11 => {
966                // Thumb unconditional branch (16-bit): PC = instr + 4
967                let pc = reloc_abs + 4;
968                let rel = (target_addr as i64)
969                    .wrapping_sub(pc as i64)
970                    .wrapping_add(reloc.addend);
971                let offset = rel >> 1;
972                if !(-(1i64 << 10)..(1i64 << 10)).contains(&offset) {
973                    return Err(AsmError::BranchOutOfRange {
974                        label: reloc.label.to_string(),
975                        disp: rel,
976                        max: 2046,
977                        span,
978                    });
979                }
980                let imm11 = (offset as u16) & 0x7FF;
981                let mut hw = read_le16(bytes, reloc.offset, &reloc.label, span)?;
982                hw = (hw & 0xF800) | imm11;
983                bytes[reloc.offset..reloc.offset + 2].copy_from_slice(&hw.to_le_bytes());
984            }
985            #[cfg(feature = "arm")]
986            RelocKind::ThumbBl => {
987                // Thumb-2 BL (32-bit): PC = instr + 4
988                let pc = reloc_abs + 4;
989                let rel = (target_addr as i64)
990                    .wrapping_sub(pc as i64)
991                    .wrapping_add(reloc.addend);
992                let offset = rel >> 1;
993                if !(-(1i64 << 23)..(1i64 << 23)).contains(&offset) {
994                    return Err(AsmError::BranchOutOfRange {
995                        label: reloc.label.to_string(),
996                        disp: rel,
997                        max: (1 << 24) - 2,
998                        span,
999                    });
1000                }
1001                let s = if offset < 0 { 1_u16 } else { 0 };
1002                let imm = offset as u32;
1003                let imm10 = ((imm >> 11) & 0x3FF) as u16;
1004                let imm11 = (imm & 0x7FF) as u16;
1005                let j1 = (!((imm >> 23) ^ (s as u32)) & 1) as u16;
1006                let j2 = (!((imm >> 22) ^ (s as u32)) & 1) as u16;
1007                let hw1 = 0xF000 | (s << 10) | imm10;
1008                let hw2 = 0xD000 | (j1 << 13) | (j2 << 11) | imm11;
1009                bytes[reloc.offset..reloc.offset + 2].copy_from_slice(&hw1.to_le_bytes());
1010                bytes[reloc.offset + 2..reloc.offset + 4].copy_from_slice(&hw2.to_le_bytes());
1011            }
1012            #[cfg(feature = "arm")]
1013            RelocKind::ThumbBranchW => {
1014                // Thumb-2 B.W (32-bit wide unconditional): PC = instr + 4
1015                let pc = reloc_abs + 4;
1016                let rel = (target_addr as i64)
1017                    .wrapping_sub(pc as i64)
1018                    .wrapping_add(reloc.addend);
1019                let offset = rel >> 1;
1020                if !(-(1i64 << 23)..(1i64 << 23)).contains(&offset) {
1021                    return Err(AsmError::BranchOutOfRange {
1022                        label: reloc.label.to_string(),
1023                        disp: rel,
1024                        max: (1 << 24) - 2,
1025                        span,
1026                    });
1027                }
1028                let s = if offset < 0 { 1_u16 } else { 0 };
1029                let imm = offset as u32;
1030                let imm10 = ((imm >> 11) & 0x3FF) as u16;
1031                let imm11 = (imm & 0x7FF) as u16;
1032                let j1 = (!((imm >> 23) ^ (s as u32)) & 1) as u16;
1033                let j2 = (!((imm >> 22) ^ (s as u32)) & 1) as u16;
1034                let hw1 = 0xF000 | (s << 10) | imm10;
1035                let hw2 = 0x9000 | (j1 << 13) | (j2 << 11) | imm11;
1036                bytes[reloc.offset..reloc.offset + 2].copy_from_slice(&hw1.to_le_bytes());
1037                bytes[reloc.offset + 2..reloc.offset + 4].copy_from_slice(&hw2.to_le_bytes());
1038            }
1039            #[cfg(feature = "arm")]
1040            RelocKind::ThumbCondBranchW => {
1041                // Thumb-2 B<cond>.W (32-bit wide conditional): PC = instr + 4
1042                let pc = reloc_abs + 4;
1043                let rel = (target_addr as i64)
1044                    .wrapping_sub(pc as i64)
1045                    .wrapping_add(reloc.addend);
1046                let offset = rel >> 1;
1047                if !(-(1i64 << 19)..(1i64 << 19)).contains(&offset) {
1048                    return Err(AsmError::BranchOutOfRange {
1049                        label: reloc.label.to_string(),
1050                        disp: rel,
1051                        max: (1 << 20) - 2,
1052                        span,
1053                    });
1054                }
1055                let s = if offset < 0 { 1_u16 } else { 0 };
1056                let imm = offset as u32;
1057                let imm6 = ((imm >> 11) & 0x3F) as u16;
1058                let imm11 = (imm & 0x7FF) as u16;
1059                let j1 = ((imm >> 17) & 1) as u16;
1060                let j2 = ((imm >> 18) & 1) as u16;
1061                // Read existing hw1 to preserve condition code bits
1062                let existing_hw1 = read_le16(bytes, reloc.offset, &reloc.label, span)?;
1063                let cond = (existing_hw1 >> 6) & 0xF;
1064                let hw1 = 0xF000 | (s << 10) | (cond << 6) | imm6;
1065                let hw2 = 0x8000 | (j1 << 13) | (j2 << 11) | imm11;
1066                bytes[reloc.offset..reloc.offset + 2].copy_from_slice(&hw1.to_le_bytes());
1067                bytes[reloc.offset + 2..reloc.offset + 4].copy_from_slice(&hw2.to_le_bytes());
1068            }
1069            #[cfg(feature = "arm")]
1070            RelocKind::ThumbLdrLit8 => {
1071                // Thumb LDR Rt, [PC, #imm8×4]: PC = (instr_addr + 4) & ~3
1072                // Forward only, word-aligned, 0..1020 byte range
1073                let pc = (reloc_abs + 4) & !3;
1074                let rel = (target_addr as i64)
1075                    .wrapping_sub(pc as i64)
1076                    .wrapping_add(reloc.addend);
1077                if !(0..=1020).contains(&rel) || (rel & 3) != 0 {
1078                    return Err(AsmError::BranchOutOfRange {
1079                        label: reloc.label.to_string(),
1080                        disp: rel,
1081                        max: 1020,
1082                        span,
1083                    });
1084                }
1085                let imm8 = (rel >> 2) as u16;
1086                let existing = read_le16(bytes, reloc.offset, &reloc.label, span)?;
1087                let hw = (existing & 0xFF00) | imm8;
1088                bytes[reloc.offset..reloc.offset + 2].copy_from_slice(&hw.to_le_bytes());
1089            }
1090            #[cfg(feature = "aarch64")]
1091            RelocKind::Aarch64Jump26 => {
1092                // AArch64 B/BL: PC-relative offset >> 2 in bits 25:0
1093                let rel = (target_addr as i64)
1094                    .wrapping_sub(reloc_abs as i64)
1095                    .wrapping_add(reloc.addend);
1096                let offset = rel >> 2;
1097                if !(-(1 << 25)..(1 << 25)).contains(&offset) {
1098                    return Err(AsmError::BranchOutOfRange {
1099                        label: reloc.label.to_string(),
1100                        disp: rel,
1101                        max: (1 << 27) - 4,
1102                        span,
1103                    });
1104                }
1105                let imm26 = (offset as u32) & 0x03FF_FFFF;
1106                let mut word = read_le32(bytes, reloc.offset, &reloc.label, span)?;
1107                word = (word & 0xFC00_0000) | imm26;
1108                bytes[reloc.offset..reloc.offset + 4].copy_from_slice(&word.to_le_bytes());
1109            }
1110            #[cfg(feature = "aarch64")]
1111            RelocKind::Aarch64Branch19 => {
1112                // AArch64 B.cond / CBZ / CBNZ: PC-relative offset >> 2 in bits 23:5
1113                let rel = (target_addr as i64)
1114                    .wrapping_sub(reloc_abs as i64)
1115                    .wrapping_add(reloc.addend);
1116                let offset = rel >> 2;
1117                if !(-(1 << 18)..(1 << 18)).contains(&offset) {
1118                    return Err(AsmError::BranchOutOfRange {
1119                        label: reloc.label.to_string(),
1120                        disp: rel,
1121                        max: (1 << 20) - 4,
1122                        span,
1123                    });
1124                }
1125                let imm19 = (offset as u32) & 0x7FFFF;
1126                let mut word = read_le32(bytes, reloc.offset, &reloc.label, span)?;
1127                word = (word & 0xFF00_001F) | (imm19 << 5);
1128                bytes[reloc.offset..reloc.offset + 4].copy_from_slice(&word.to_le_bytes());
1129            }
1130            #[cfg(feature = "aarch64")]
1131            RelocKind::Aarch64Branch14 => {
1132                // AArch64 TBZ / TBNZ: PC-relative offset >> 2 in bits 18:5
1133                let rel = (target_addr as i64)
1134                    .wrapping_sub(reloc_abs as i64)
1135                    .wrapping_add(reloc.addend);
1136                let offset = rel >> 2;
1137                if !(-(1 << 13)..(1 << 13)).contains(&offset) {
1138                    return Err(AsmError::BranchOutOfRange {
1139                        label: reloc.label.to_string(),
1140                        disp: rel,
1141                        max: (1 << 15) - 4,
1142                        span,
1143                    });
1144                }
1145                let imm14 = (offset as u32) & 0x3FFF;
1146                let mut word = read_le32(bytes, reloc.offset, &reloc.label, span)?;
1147                word = (word & 0xFFF8_001F) | (imm14 << 5);
1148                bytes[reloc.offset..reloc.offset + 4].copy_from_slice(&word.to_le_bytes());
1149            }
1150            #[cfg(feature = "aarch64")]
1151            RelocKind::Aarch64LdrLit19 => {
1152                // AArch64 LDR (literal): PC-relative offset >> 2 in bits 23:5
1153                let rel = (target_addr as i64)
1154                    .wrapping_sub(reloc_abs as i64)
1155                    .wrapping_add(reloc.addend);
1156                let offset = rel >> 2;
1157                if !(-(1 << 18)..(1 << 18)).contains(&offset) {
1158                    return Err(AsmError::BranchOutOfRange {
1159                        label: reloc.label.to_string(),
1160                        disp: rel,
1161                        max: (1 << 20) - 4,
1162                        span,
1163                    });
1164                }
1165                let imm19 = (offset as u32) & 0x7FFFF;
1166                let mut word = read_le32(bytes, reloc.offset, &reloc.label, span)?;
1167                word = (word & 0xFF00_001F) | (imm19 << 5);
1168                bytes[reloc.offset..reloc.offset + 4].copy_from_slice(&word.to_le_bytes());
1169            }
1170            #[cfg(feature = "aarch64")]
1171            RelocKind::Aarch64Adr21 => {
1172                // AArch64 ADR: PC-relative, immhi (bits 23:5), immlo (bits 30:29)
1173                let rel = (target_addr as i64)
1174                    .wrapping_sub(reloc_abs as i64)
1175                    .wrapping_add(reloc.addend);
1176                if !(-(1 << 20)..(1 << 20)).contains(&rel) {
1177                    return Err(AsmError::BranchOutOfRange {
1178                        label: reloc.label.to_string(),
1179                        disp: rel,
1180                        max: (1 << 20) - 1,
1181                        span,
1182                    });
1183                }
1184                let immhi = ((rel >> 2) as u32) & 0x7FFFF;
1185                let immlo = (rel as u32) & 0x3;
1186                let mut word = read_le32(bytes, reloc.offset, &reloc.label, span)?;
1187                word = (word & 0x9F00_001F) | (immlo << 29) | (immhi << 5);
1188                bytes[reloc.offset..reloc.offset + 4].copy_from_slice(&word.to_le_bytes());
1189            }
1190            #[cfg(feature = "aarch64")]
1191            RelocKind::Aarch64Adrp => {
1192                // AArch64 ADRP: page-relative, page = addr & ~0xFFF
1193                let pc_page = reloc_abs & !0xFFF;
1194                let target_page = target_addr.wrapping_add(reloc.addend as u64) & !0xFFF;
1195                let rel = (target_page as i64).wrapping_sub(pc_page as i64);
1196                let page_off = rel >> 12;
1197                if !(-(1 << 20)..(1 << 20)).contains(&page_off) {
1198                    return Err(AsmError::BranchOutOfRange {
1199                        label: reloc.label.to_string(),
1200                        disp: rel,
1201                        max: (1i64 << 32) - 1,
1202                        span,
1203                    });
1204                }
1205                let immhi = ((page_off >> 2) as u32) & 0x7FFFF;
1206                let immlo = (page_off as u32) & 0x3;
1207                let mut word = read_le32(bytes, reloc.offset, &reloc.label, span)?;
1208                word = (word & 0x9F00_001F) | (immlo << 29) | (immhi << 5);
1209                bytes[reloc.offset..reloc.offset + 4].copy_from_slice(&word.to_le_bytes());
1210            }
1211            #[cfg(feature = "aarch64")]
1212            RelocKind::Aarch64AdrpAddPair => {
1213                // AArch64 ADRP+ADD pair: first word is ADRP, second is ADD.
1214                // ADRP: page-relative offset in immhi/immlo
1215                let pc_page = reloc_abs & !0xFFF;
1216                let target_with_addend = target_addr.wrapping_add(reloc.addend as u64);
1217                let target_page = target_with_addend & !0xFFF;
1218                let rel = (target_page as i64).wrapping_sub(pc_page as i64);
1219                let page_off = rel >> 12;
1220                if !(-(1 << 20)..(1 << 20)).contains(&page_off) {
1221                    return Err(AsmError::BranchOutOfRange {
1222                        label: reloc.label.to_string(),
1223                        disp: rel,
1224                        max: (1i64 << 32) - 1,
1225                        span,
1226                    });
1227                }
1228                let immhi_p = ((page_off >> 2) as u32) & 0x7FFFF;
1229                let immlo_p = (page_off as u32) & 0x3;
1230                let mut adrp_word = read_le32(bytes, reloc.offset, &reloc.label, span)?;
1231                adrp_word = (adrp_word & 0x9F00_001F) | (immlo_p << 29) | (immhi_p << 5);
1232                bytes[reloc.offset..reloc.offset + 4].copy_from_slice(&adrp_word.to_le_bytes());
1233
1234                // ADD: lo12 bits of target address in imm12 (bits 21:10)
1235                let lo12 = (target_with_addend & 0xFFF) as u32;
1236                let add_offset = reloc.offset + 4;
1237                let mut add_word = read_le32(bytes, add_offset, &reloc.label, span)?;
1238                add_word = (add_word & 0xFFC003FF) | (lo12 << 10);
1239                bytes[add_offset..add_offset + 4].copy_from_slice(&add_word.to_le_bytes());
1240            }
1241            #[cfg(feature = "riscv")]
1242            RelocKind::RvJal20 => {
1243                // RISC-V JAL: 21-bit signed PC-relative offset (bit 0 always 0)
1244                // J-type immediate: imm[20|10:1|11|19:12] packed into bits 31:12
1245                let rel = (target_addr as i64)
1246                    .wrapping_sub(reloc_abs as i64)
1247                    .wrapping_add(reloc.addend);
1248                if !(-(1i64 << 20)..(1i64 << 20)).contains(&rel) {
1249                    return Err(AsmError::BranchOutOfRange {
1250                        label: reloc.label.to_string(),
1251                        disp: rel,
1252                        max: (1 << 20) - 2,
1253                        span,
1254                    });
1255                }
1256                let imm = rel as u32;
1257                let packed = (imm & 0x0010_0000)          // imm[20]   → bit 31
1258                    | ((imm & 0x7FE) << 20)               // imm[10:1] → bits 30:21
1259                    | ((imm & 0x800) << 9)                // imm[11]   → bit 20
1260                    | (imm & 0x000F_F000); // imm[19:12]→ bits 19:12
1261                let mut word = read_le32(bytes, reloc.offset, &reloc.label, span)?;
1262                word = (word & 0xFFF) | packed;
1263                bytes[reloc.offset..reloc.offset + 4].copy_from_slice(&word.to_le_bytes());
1264            }
1265            #[cfg(feature = "riscv")]
1266            RelocKind::RvBranch12 => {
1267                // RISC-V B-type: 13-bit signed PC-relative offset (bit 0 always 0)
1268                // B-type immediate: imm[12|10:5] in bits 31:25, imm[4:1|11] in bits 11:7
1269                let rel = (target_addr as i64)
1270                    .wrapping_sub(reloc_abs as i64)
1271                    .wrapping_add(reloc.addend);
1272                if !(-(1i64 << 12)..(1i64 << 12)).contains(&rel) {
1273                    return Err(AsmError::BranchOutOfRange {
1274                        label: reloc.label.to_string(),
1275                        disp: rel,
1276                        max: (1 << 12) - 2,
1277                        span,
1278                    });
1279                }
1280                let imm = rel as u32;
1281                let packed_hi = ((imm & 0x1000) << 19)    // imm[12]   → bit 31
1282                    | ((imm & 0x7E0) << 20); // imm[10:5] → bits 30:25
1283                let packed_lo = ((imm & 0x1E) << 7)       // imm[4:1]  → bits 11:8
1284                    | ((imm & 0x800) >> 4); // imm[11]   → bit 7
1285                let mut word = read_le32(bytes, reloc.offset, &reloc.label, span)?;
1286                word = (word & 0x01FF_F07F) | packed_hi | packed_lo;
1287                bytes[reloc.offset..reloc.offset + 4].copy_from_slice(&word.to_le_bytes());
1288            }
1289            #[cfg(feature = "riscv")]
1290            RelocKind::RvAuipc20 => {
1291                // RISC-V AUIPC+JALR pair: patches both instructions.
1292                // AUIPC at reloc.offset, JALR at reloc.offset+4.
1293                // hi20 = (offset + 0x800) >> 12  (rounds for sign-extension of lo12)
1294                // lo12 = offset - (hi20 << 12)
1295                let rel = (target_addr as i64)
1296                    .wrapping_sub(reloc_abs as i64)
1297                    .wrapping_add(reloc.addend);
1298                let hi20 = ((rel + 0x800) >> 12) as u32;
1299                let lo12 = (rel as u32).wrapping_sub(hi20 << 12);
1300                // Patch AUIPC: upper 20 bits in bits 31:12
1301                let mut word = read_le32(bytes, reloc.offset, &reloc.label, span)?;
1302                word = (word & 0xFFF) | (hi20 << 12);
1303                bytes[reloc.offset..reloc.offset + 4].copy_from_slice(&word.to_le_bytes());
1304                // Patch JALR: lower 12 bits in bits 31:20 (I-type immediate)
1305                let jalr_off = reloc.offset + 4;
1306                let mut jalr = read_le32(bytes, jalr_off, &reloc.label, span)?;
1307                jalr = (jalr & 0x000F_FFFF) | ((lo12 & 0xFFF) << 20);
1308                bytes[jalr_off..jalr_off + 4].copy_from_slice(&jalr.to_le_bytes());
1309            }
1310            #[cfg(feature = "riscv")]
1311            RelocKind::RvCBranch8 => {
1312                // RISC-V C-extension CB-type branch: 9-bit signed PC-relative offset
1313                // CB-type immediate: imm[8|4:3] in bits 12:10, imm[7:6|2:1|5] in bits 6:2
1314                let rel = (target_addr as i64)
1315                    .wrapping_sub(reloc_abs as i64)
1316                    .wrapping_add(reloc.addend);
1317                if !(-(1i64 << 8)..(1i64 << 8)).contains(&rel) {
1318                    return Err(AsmError::BranchOutOfRange {
1319                        label: reloc.label.to_string(),
1320                        disp: rel,
1321                        max: (1 << 8) - 2,
1322                        span,
1323                    });
1324                }
1325                let imm = rel as u16;
1326                // Reconstruct the CB-type halfword with the new offset
1327                let mut hw = read_le16(bytes, reloc.offset, &reloc.label, span)?;
1328                // Clear the immediate fields: bits 12:10 and 6:2
1329                hw &= 0xE383; // keep funct3(15:13), rs1'(9:7), op(1:0)
1330                              // Pack: bit8→12, bit4:3→11:10, bit7:6→6:5, bit2:1→4:3, bit5→2
1331                hw |= ((imm >> 8) & 1) << 12;
1332                hw |= ((imm >> 3) & 3) << 10;
1333                hw |= ((imm >> 6) & 3) << 5;
1334                hw |= ((imm >> 1) & 3) << 3;
1335                hw |= ((imm >> 5) & 1) << 2;
1336                bytes[reloc.offset..reloc.offset + 2].copy_from_slice(&hw.to_le_bytes());
1337            }
1338            #[cfg(feature = "riscv")]
1339            RelocKind::RvCJump11 => {
1340                // RISC-V C-extension CJ-type jump: 12-bit signed PC-relative offset
1341                // CJ-type immediate: imm[11|4|9:8|10|6|7|3:1|5] in bits 12:2
1342                let rel = (target_addr as i64)
1343                    .wrapping_sub(reloc_abs as i64)
1344                    .wrapping_add(reloc.addend);
1345                if !(-(1i64 << 11)..(1i64 << 11)).contains(&rel) {
1346                    return Err(AsmError::BranchOutOfRange {
1347                        label: reloc.label.to_string(),
1348                        disp: rel,
1349                        max: (1 << 11) - 2,
1350                        span,
1351                    });
1352                }
1353                let imm = rel as u16;
1354                let mut hw = read_le16(bytes, reloc.offset, &reloc.label, span)?;
1355                // Clear immediate fields: bits 12:2
1356                hw &= 0xE003; // keep funct3(15:13) and op(1:0)
1357                              // Pack: bit11→12, bit4→11, bit9:8→10:9, bit10→8, bit6→7, bit7→6, bit3:1→5:3, bit5→2
1358                hw |= ((imm >> 11) & 1) << 12;
1359                hw |= ((imm >> 4) & 1) << 11;
1360                hw |= ((imm >> 8) & 3) << 9;
1361                hw |= ((imm >> 10) & 1) << 8;
1362                hw |= ((imm >> 6) & 1) << 7;
1363                hw |= ((imm >> 7) & 1) << 6;
1364                hw |= ((imm >> 1) & 7) << 3;
1365                hw |= ((imm >> 5) & 1) << 2;
1366                bytes[reloc.offset..reloc.offset + 2].copy_from_slice(&hw.to_le_bytes());
1367            }
1368        }
1369        Ok(())
1370    }
1371
1372    // ── label resolution ───────────────────────────────────
1373
1374    fn resolve_label_with_offsets(
1375        &self,
1376        name: &str,
1377        from_fragment: usize,
1378        offsets: &[u64],
1379    ) -> Result<u64, AsmError> {
1380        // Constants
1381        if let Some(&value) = self.constants.get(name) {
1382            // Treat constant as a signed value that fits the target address space.
1383            // For negative constants, we rely on wrapping semantics (e.g., -1 → 0xFFFF_FFFF_FFFF_FFFF).
1384            return Ok(value as i64 as u64);
1385        }
1386        // Externals
1387        if let Some(&addr) = self.externals.get(name) {
1388            return Ok(addr);
1389        }
1390        // Numeric labels (e.g. "1f", "1b")
1391        if name.len() >= 2 {
1392            let last = name.as_bytes()[name.len() - 1];
1393            let num_part = &name[..name.len() - 1];
1394            if last == b'f' || last == b'b' {
1395                if let Ok(n) = num_part.parse::<u32>() {
1396                    return self.resolve_numeric_with_offsets(
1397                        n,
1398                        from_fragment,
1399                        last == b'f',
1400                        offsets,
1401                    );
1402                }
1403            }
1404        }
1405        // Named labels
1406        if let Some(def) = self.labels.get(name) {
1407            return Ok(offsets[def.fragment_index]);
1408        }
1409
1410        Err(AsmError::UndefinedLabel {
1411            label: String::from(name),
1412            span: Span::new(0, 0, 0, 0),
1413        })
1414    }
1415
1416    fn resolve_numeric_with_offsets(
1417        &self,
1418        num: u32,
1419        from_fragment: usize,
1420        forward: bool,
1421        offsets: &[u64],
1422    ) -> Result<u64, AsmError> {
1423        if let Some(defs) = self.numeric.defs.get(&num) {
1424            if forward {
1425                for &def_idx in defs {
1426                    if def_idx > from_fragment {
1427                        return Ok(offsets[def_idx]);
1428                    }
1429                }
1430            } else {
1431                for &def_idx in defs.iter().rev() {
1432                    if def_idx <= from_fragment {
1433                        return Ok(offsets[def_idx]);
1434                    }
1435                }
1436            }
1437        }
1438        Err(AsmError::UndefinedLabel {
1439            label: alloc::format!("{}{}", num, if forward { 'f' } else { 'b' }),
1440            span: Span::new(0, 0, 0, 0),
1441        })
1442    }
1443}
1444
1445// ─── Multi-byte NOP padding (x86/x86-64) ──────────────────
1446
1447/// Intel-recommended multi-byte NOP instruction sequences.
1448///
1449/// These are architecturally guaranteed to behave as NOPs on all modern
1450/// x86/x86-64 processors and execute in a single cycle on most
1451/// microarchitectures.
1452const NOP_SEQUENCES: [&[u8]; 10] = [
1453    &[],                                                     // 0 bytes (unused)
1454    &[0x90],                                                 // 1 byte : NOP
1455    &[0x66, 0x90],                                           // 2 bytes: 66 NOP
1456    &[0x0F, 0x1F, 0x00],                                     // 3 bytes: NOP DWORD ptr [EAX]
1457    &[0x0F, 0x1F, 0x40, 0x00],                               // 4 bytes: NOP DWORD ptr [EAX + 00H]
1458    &[0x0F, 0x1F, 0x44, 0x00, 0x00], // 5 bytes: NOP DWORD ptr [EAX + EAX*1 + 00H]
1459    &[0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00], // 6 bytes: 66 NOP DWORD ptr [EAX + EAX*1 + 00H]
1460    &[0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00], // 7 bytes: NOP DWORD ptr [EAX + 00000000H]
1461    &[0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00], // 8 bytes: NOP DWORD ptr [EAX + EAX*1 + 00000000H]
1462    &[0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00], // 9 bytes: 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H]
1463];
1464
1465/// Emit optimal multi-byte NOP padding of exactly `n` bytes.
1466///
1467/// Uses the largest available NOP sequences first, then fills the
1468/// remainder with smaller ones.
1469fn emit_nop_padding(output: &mut Vec<u8>, mut n: usize) {
1470    while n > 0 {
1471        let chunk = core::cmp::min(n, 9);
1472        output.extend_from_slice(NOP_SEQUENCES[chunk]);
1473        n -= chunk;
1474    }
1475}
1476
1477// ─── Tests ─────────────────────────────────────────────────
1478
1479#[cfg(test)]
1480mod tests {
1481    use super::*;
1482
1483    fn span() -> Span {
1484        Span::new(1, 1, 0, 0)
1485    }
1486
1487    fn fixed(bytes: Vec<u8>, reloc: Option<Relocation>) -> Fragment {
1488        Fragment::Fixed {
1489            bytes: FragmentBytes::Heap(bytes),
1490            relocation: reloc,
1491            span: span(),
1492        }
1493    }
1494
1495    fn nop() -> Fragment {
1496        fixed(vec![0x90], None)
1497    }
1498
1499    fn relaxable_jmp(label: &str) -> Fragment {
1500        Fragment::Relaxable {
1501            short_bytes: InstrBytes::from_slice(&[0xEB, 0x00]),
1502            short_reloc_offset: 1,
1503            short_relocation: None,
1504            long_bytes: InstrBytes::from_slice(&[0xE9, 0, 0, 0, 0]),
1505            long_relocation: Relocation {
1506                offset: 1,
1507                size: 4,
1508                label: alloc::rc::Rc::from(label),
1509                kind: RelocKind::X86Relative,
1510                addend: 0,
1511                trailing_bytes: 0,
1512            },
1513            is_long: false,
1514            span: span(),
1515        }
1516    }
1517
1518    fn relaxable_jcc(cc: u8, label: &str) -> Fragment {
1519        Fragment::Relaxable {
1520            short_bytes: InstrBytes::from_slice(&[0x70 + cc, 0x00]),
1521            short_reloc_offset: 1,
1522            short_relocation: None,
1523            long_bytes: InstrBytes::from_slice(&[0x0F, 0x80 + cc, 0, 0, 0, 0]),
1524            long_relocation: Relocation {
1525                offset: 2,
1526                size: 4,
1527                label: alloc::rc::Rc::from(label),
1528                kind: RelocKind::X86Relative,
1529                addend: 0,
1530                trailing_bytes: 0,
1531            },
1532            is_long: false,
1533            span: span(),
1534        }
1535    }
1536
1537    // ── Basic label resolution (fixed fragments) ────────
1538
1539    #[test]
1540    fn resolve_forward_label() {
1541        let mut linker = Linker::new();
1542        linker.add_fragment(fixed(
1543            vec![0xE9, 0, 0, 0, 0],
1544            Some(Relocation {
1545                offset: 1,
1546                size: 4,
1547                label: alloc::rc::Rc::from("target"),
1548                kind: RelocKind::X86Relative,
1549                addend: 0,
1550                trailing_bytes: 0,
1551            }),
1552        ));
1553        linker.add_label("target", span()).unwrap();
1554        linker.add_fragment(nop());
1555
1556        let (output, _, _, _) = linker.resolve().unwrap();
1557        assert_eq!(output, vec![0xE9, 0x00, 0x00, 0x00, 0x00, 0x90]);
1558    }
1559
1560    #[test]
1561    fn resolve_backward_label() {
1562        let mut linker = Linker::new();
1563        linker.add_label("top", span()).unwrap();
1564        linker.add_fragment(nop());
1565        linker.add_fragment(fixed(
1566            vec![0xE9, 0, 0, 0, 0],
1567            Some(Relocation {
1568                offset: 1,
1569                size: 4,
1570                label: alloc::rc::Rc::from("top"),
1571                kind: RelocKind::X86Relative,
1572                addend: 0,
1573                trailing_bytes: 0,
1574            }),
1575        ));
1576
1577        let (output, _, _, _) = linker.resolve().unwrap();
1578        let rel = i32::from_le_bytes([output[2], output[3], output[4], output[5]]);
1579        assert_eq!(rel, -6);
1580    }
1581
1582    #[test]
1583    fn resolve_with_base_address() {
1584        let mut linker = Linker::new();
1585        linker.set_base_address(0x1000);
1586        linker.add_fragment(fixed(
1587            vec![0xE9, 0, 0, 0, 0],
1588            Some(Relocation {
1589                offset: 1,
1590                size: 4,
1591                label: alloc::rc::Rc::from("target"),
1592                kind: RelocKind::X86Relative,
1593                addend: 0,
1594                trailing_bytes: 0,
1595            }),
1596        ));
1597        linker.add_label("target", span()).unwrap();
1598        linker.add_fragment(nop());
1599
1600        let (output, _, _, _) = linker.resolve().unwrap();
1601        assert_eq!(output, vec![0xE9, 0x00, 0x00, 0x00, 0x00, 0x90]);
1602    }
1603
1604    #[test]
1605    fn resolve_external_label() {
1606        let mut linker = Linker::new();
1607        linker.define_external("printf", 0xDEAD_BEEF);
1608        linker.add_fragment(fixed(
1609            vec![0x48, 0xB8, 0, 0, 0, 0, 0, 0, 0, 0],
1610            Some(Relocation {
1611                offset: 2,
1612                size: 8,
1613                label: alloc::rc::Rc::from("printf"),
1614                kind: RelocKind::Absolute,
1615                addend: 0,
1616                trailing_bytes: 0,
1617            }),
1618        ));
1619
1620        let (output, _, _, _) = linker.resolve().unwrap();
1621        assert_eq!(output[2..10], 0xDEAD_BEEFu64.to_le_bytes());
1622    }
1623
1624    #[test]
1625    fn resolve_constant() {
1626        let mut linker = Linker::new();
1627        linker.define_constant("SYS_WRITE", 1);
1628        linker.add_fragment(fixed(
1629            vec![0xB8, 0, 0, 0, 0],
1630            Some(Relocation {
1631                offset: 1,
1632                size: 4,
1633                label: alloc::rc::Rc::from("SYS_WRITE"),
1634                kind: RelocKind::Absolute,
1635                addend: 0,
1636                trailing_bytes: 0,
1637            }),
1638        ));
1639
1640        let (output, _, _, _) = linker.resolve().unwrap();
1641        assert_eq!(output, vec![0xB8, 0x01, 0x00, 0x00, 0x00]);
1642    }
1643
1644    #[test]
1645    fn duplicate_label_error() {
1646        let mut linker = Linker::new();
1647        linker.add_label("foo", span()).unwrap();
1648        linker.add_fragment(nop());
1649        let err = linker.add_label("foo", span()).unwrap_err();
1650        assert!(matches!(err, AsmError::DuplicateLabel { .. }));
1651    }
1652
1653    #[test]
1654    fn undefined_label_error() {
1655        let mut linker = Linker::new();
1656        linker.add_fragment(fixed(
1657            vec![0xE9, 0, 0, 0, 0],
1658            Some(Relocation {
1659                offset: 1,
1660                size: 4,
1661                label: alloc::rc::Rc::from("nowhere"),
1662                kind: RelocKind::X86Relative,
1663                addend: 0,
1664                trailing_bytes: 0,
1665            }),
1666        ));
1667        let err = linker.resolve().unwrap_err();
1668        assert!(matches!(err, AsmError::UndefinedLabel { .. }));
1669    }
1670
1671    // ── Numeric labels ────────
1672
1673    #[test]
1674    fn numeric_label_forward() {
1675        let mut linker = Linker::new();
1676        linker.add_fragment(fixed(
1677            vec![0xE9, 0, 0, 0, 0],
1678            Some(Relocation {
1679                offset: 1,
1680                size: 4,
1681                label: alloc::rc::Rc::from("1f"),
1682                kind: RelocKind::X86Relative,
1683                addend: 0,
1684                trailing_bytes: 0,
1685            }),
1686        ));
1687        linker.add_label("1", span()).unwrap();
1688        linker.add_fragment(nop());
1689
1690        let (output, _, _, _) = linker.resolve().unwrap();
1691        assert_eq!(&output[1..5], &[0, 0, 0, 0]);
1692    }
1693
1694    #[test]
1695    fn numeric_label_backward() {
1696        let mut linker = Linker::new();
1697        linker.add_label("1", span()).unwrap();
1698        linker.add_fragment(nop());
1699        linker.add_fragment(fixed(
1700            vec![0xE9, 0, 0, 0, 0],
1701            Some(Relocation {
1702                offset: 1,
1703                size: 4,
1704                label: alloc::rc::Rc::from("1b"),
1705                kind: RelocKind::X86Relative,
1706                addend: 0,
1707                trailing_bytes: 0,
1708            }),
1709        ));
1710
1711        let (output, _, _, _) = linker.resolve().unwrap();
1712        let rel = i32::from_le_bytes([output[2], output[3], output[4], output[5]]);
1713        assert_eq!(rel, -6);
1714    }
1715
1716    // ── Branch relaxation ────────
1717
1718    #[test]
1719    fn relaxation_short_jmp_forward() {
1720        let mut linker = Linker::new();
1721        linker.add_fragment(relaxable_jmp("target"));
1722        linker.add_label("target", span()).unwrap();
1723        linker.add_fragment(nop());
1724
1725        let (output, _, _, _) = linker.resolve().unwrap();
1726        // Short form: EB 00 90
1727        assert_eq!(output, vec![0xEB, 0x00, 0x90]);
1728    }
1729
1730    #[test]
1731    fn relaxation_short_jmp_backward() {
1732        let mut linker = Linker::new();
1733        linker.add_label("top", span()).unwrap();
1734        linker.add_fragment(nop());
1735        linker.add_fragment(relaxable_jmp("top"));
1736
1737        let (output, _, _, _) = linker.resolve().unwrap();
1738        // top=0, nop@0 (1B), jmp_short@1 (2B), frag_end=3, disp=0-3=-3
1739        assert_eq!(output, vec![0x90, 0xEB, 0xFD]);
1740    }
1741
1742    #[test]
1743    fn relaxation_promotes_jmp_to_long() {
1744        let mut linker = Linker::new();
1745        linker.add_fragment(relaxable_jmp("target"));
1746        linker.add_fragment(fixed(vec![0x90; 200], None));
1747        linker.add_label("target", span()).unwrap();
1748        linker.add_fragment(nop());
1749
1750        let (output, _, _, _) = linker.resolve().unwrap();
1751        assert_eq!(output[0], 0xE9); // long form
1752        assert_eq!(output.len(), 5 + 200 + 1);
1753        let rel = i32::from_le_bytes([output[1], output[2], output[3], output[4]]);
1754        assert_eq!(rel, 200);
1755    }
1756
1757    #[test]
1758    fn relaxation_short_jcc() {
1759        let mut linker = Linker::new();
1760        linker.add_fragment(relaxable_jcc(0x4, "done")); // je
1761        linker.add_label("done", span()).unwrap();
1762        linker.add_fragment(nop());
1763
1764        let (output, _, _, _) = linker.resolve().unwrap();
1765        assert_eq!(output, vec![0x74, 0x00, 0x90]);
1766    }
1767
1768    #[test]
1769    fn relaxation_promotes_jcc_to_long() {
1770        let mut linker = Linker::new();
1771        linker.add_fragment(relaxable_jcc(0x4, "done"));
1772        linker.add_fragment(fixed(vec![0x90; 200], None));
1773        linker.add_label("done", span()).unwrap();
1774        linker.add_fragment(nop());
1775
1776        let (output, _, _, _) = linker.resolve().unwrap();
1777        assert_eq!(output[0], 0x0F);
1778        assert_eq!(output[1], 0x84);
1779        let rel = i32::from_le_bytes([output[2], output[3], output[4], output[5]]);
1780        assert_eq!(rel, 200);
1781    }
1782
1783    #[test]
1784    fn relaxation_boundary_127() {
1785        // Exactly 127 bytes displacement: should stay short
1786        let mut linker = Linker::new();
1787        linker.add_fragment(relaxable_jmp("target"));
1788        linker.add_fragment(fixed(vec![0x90; 125], None)); // 2 + 125 = 127
1789        linker.add_label("target", span()).unwrap();
1790        linker.add_fragment(nop());
1791
1792        let (output, _, _, _) = linker.resolve().unwrap();
1793        assert_eq!(output[0], 0xEB); // still short
1794        assert_eq!(output[1], 125u8); // disp = 127 - 2 = 125
1795    }
1796
1797    #[test]
1798    fn relaxation_boundary_128() {
1799        // 128 bytes displacement: must go long
1800        // short jmp = 2B, 128 NOPs: target at 130, frag_end at 2, disp = 128 > 127 → promote
1801        let mut linker = Linker::new();
1802        linker.add_fragment(relaxable_jmp("target"));
1803        linker.add_fragment(fixed(vec![0x90; 128], None));
1804        linker.add_label("target", span()).unwrap();
1805        linker.add_fragment(nop());
1806
1807        let (output, _, _, _) = linker.resolve().unwrap();
1808        assert_eq!(output[0], 0xE9); // promoted to long
1809    }
1810
1811    #[test]
1812    fn cascading_relaxation() {
1813        // Two branches where expanding the second forces the first to expand too.
1814        let mut linker = Linker::new();
1815
1816        // jmp L1 (2 or 5 bytes)
1817        linker.add_fragment(relaxable_jmp("L1"));
1818        // 125 NOPs
1819        linker.add_fragment(fixed(vec![0x90; 125], None));
1820        // jne L2 (2 or 6 bytes)
1821        linker.add_fragment(relaxable_jcc(0x5, "L2"));
1822
1823        linker.add_label("L1", span()).unwrap();
1824        linker.add_fragment(fixed(vec![0x90; 130], None));
1825        linker.add_label("L2", span()).unwrap();
1826        linker.add_fragment(nop());
1827
1828        let (output, _, _, _) = linker.resolve().unwrap();
1829        // Both should be long form due to cascading
1830        assert_eq!(output[0], 0xE9); // jmp rel32
1831        assert_eq!(output[5 + 125], 0x0F); // jne rel32
1832        assert_eq!(output[5 + 125 + 1], 0x85);
1833    }
1834
1835    // ── Alignment ────────
1836
1837    #[test]
1838    fn alignment_fragment() {
1839        let mut linker = Linker::new();
1840        linker.add_fragment(nop()); // 1 byte
1841        linker.add_alignment(4, 0x00, None, false, span());
1842        linker.add_fragment(nop());
1843
1844        let (output, _, _, _) = linker.resolve().unwrap();
1845        assert_eq!(output, vec![0x90, 0x00, 0x00, 0x00, 0x90]);
1846    }
1847
1848    #[test]
1849    fn alignment_already_aligned() {
1850        let mut linker = Linker::new();
1851        linker.add_fragment(fixed(vec![0x90; 4], None));
1852        linker.add_alignment(4, 0xCC, None, false, span());
1853        linker.add_fragment(nop());
1854
1855        let (output, _, _, _) = linker.resolve().unwrap();
1856        assert_eq!(output, vec![0x90, 0x90, 0x90, 0x90, 0x90]);
1857    }
1858
1859    #[test]
1860    fn alignment_with_base_address() {
1861        let mut linker = Linker::new();
1862        linker.set_base_address(0x1001); // base is 1 past alignment
1863        linker.add_alignment(4, 0xCC, None, false, span());
1864        linker.add_fragment(nop());
1865
1866        let (output, _, _, _) = linker.resolve().unwrap();
1867        // Needs 3 bytes padding to reach 0x1004
1868        assert_eq!(output, vec![0xCC, 0xCC, 0xCC, 0x90]);
1869    }
1870
1871    // ── Label table ────────
1872
1873    #[test]
1874    fn label_table_exported() {
1875        let mut linker = Linker::new();
1876        linker.add_label("start", span()).unwrap();
1877        linker.add_fragment(nop());
1878        linker.add_fragment(nop());
1879        linker.add_label("end", span()).unwrap();
1880        linker.add_fragment(nop());
1881
1882        let (_, labels, _, _) = linker.resolve().unwrap();
1883        let m: BTreeMap<String, u64> = labels.into_iter().collect();
1884        assert_eq!(m["start"], 0);
1885        assert_eq!(m["end"], 2);
1886    }
1887
1888    #[test]
1889    fn label_table_with_base_address() {
1890        let mut linker = Linker::new();
1891        linker.set_base_address(0x1000);
1892        linker.add_label("func", span()).unwrap();
1893        linker.add_fragment(fixed(vec![0x90; 10], None));
1894
1895        let (_, labels, _, _) = linker.resolve().unwrap();
1896        assert_eq!(labels[0].1, 0x1000);
1897    }
1898
1899    // ── Misc ────────
1900
1901    #[test]
1902    fn multiple_fragments_no_reloc() {
1903        let mut linker = Linker::new();
1904        linker.add_fragment(nop());
1905        linker.add_fragment(fixed(vec![0xCC], None));
1906        linker.add_fragment(fixed(vec![0xC3], None));
1907        let (output, _, _, _) = linker.resolve().unwrap();
1908        assert_eq!(output, vec![0x90, 0xCC, 0xC3]);
1909    }
1910
1911    #[test]
1912    fn empty_linker() {
1913        let mut linker = Linker::new();
1914        let (output, labels, _, _) = linker.resolve().unwrap();
1915        assert!(output.is_empty());
1916        assert!(labels.is_empty());
1917    }
1918
1919    #[test]
1920    fn relocation_with_addend() {
1921        let mut linker = Linker::new();
1922        linker.add_label("data", span()).unwrap();
1923        linker.add_fragment(fixed(vec![0; 16], None));
1924        linker.add_fragment(fixed(
1925            vec![0x48, 0x8D, 0x05, 0, 0, 0, 0],
1926            Some(Relocation {
1927                offset: 3,
1928                size: 4,
1929                label: alloc::rc::Rc::from("data"),
1930                kind: RelocKind::X86Relative,
1931                addend: 4,
1932                trailing_bytes: 0,
1933            }),
1934        ));
1935
1936        let (output, _, _, _) = linker.resolve().unwrap();
1937        let rel = i32::from_le_bytes([output[19], output[20], output[21], output[22]]);
1938        assert_eq!(rel, -19);
1939    }
1940
1941    // ── Relaxation + alignment interaction ────────
1942
1943    #[test]
1944    fn relaxation_with_alignment() {
1945        let mut linker = Linker::new();
1946        linker.add_label("top", span()).unwrap();
1947        linker.add_fragment(nop()); // 1 byte
1948        linker.add_alignment(16, 0xCC, None, false, span()); // pad to 16
1949                                                             // jne top — after alignment, offset = 16, so disp = 0-18 = -18 for short (or -22 for long)
1950        linker.add_fragment(relaxable_jcc(0x5, "top"));
1951
1952        let (output, _, _, _) = linker.resolve().unwrap();
1953        // 1 NOP + 15 padding = 16 bytes. With short jne (2B): disp = 0 - 18 = -18, fits in rel8.
1954        assert_eq!(output[0], 0x90);
1955        assert_eq!(output[16], 0x75); // short jne
1956        let disp = output[17] as i8;
1957        assert_eq!(disp, -18);
1958    }
1959
1960    // ── add_encoded helper ────────
1961
1962    #[test]
1963    fn add_encoded_creates_relaxable() {
1964        let mut linker = Linker::new();
1965        linker
1966            .add_encoded(
1967                InstrBytes::from_slice(&[0xE9, 0, 0, 0, 0]),
1968                Some(Relocation {
1969                    offset: 1,
1970                    size: 4,
1971                    label: alloc::rc::Rc::from("target"),
1972                    kind: RelocKind::X86Relative,
1973                    addend: 0,
1974                    trailing_bytes: 0,
1975                }),
1976                Some(RelaxInfo {
1977                    short_bytes: InstrBytes::from_slice(&[0xEB, 0x00]),
1978                    short_reloc_offset: 1,
1979                    short_relocation: None,
1980                }),
1981                span(),
1982            )
1983            .unwrap();
1984        linker.add_label("target", span()).unwrap();
1985        linker.add_fragment(nop());
1986
1987        let (output, _, _, _) = linker.resolve().unwrap();
1988        assert_eq!(output, vec![0xEB, 0x00, 0x90]);
1989    }
1990
1991    #[test]
1992    fn add_encoded_creates_fixed() {
1993        let mut linker = Linker::new();
1994        linker
1995            .add_encoded(InstrBytes::from_slice(&[0x90]), None, None, span())
1996            .unwrap();
1997
1998        let (output, _, _, _) = linker.resolve().unwrap();
1999        assert_eq!(output, vec![0x90]);
2000    }
2001
2002    // ── Multi-byte NOP alignment ────────
2003
2004    #[test]
2005    fn alignment_with_nop_padding() {
2006        let mut linker = Linker::new();
2007        linker.add_fragment(nop()); // 1 byte at offset 0
2008                                    // Align to 4 bytes with NOP padding (use_nop=true)
2009        linker.add_alignment(4, 0x00, None, true, span());
2010        linker.add_fragment(nop());
2011
2012        let (output, _, _, _) = linker.resolve().unwrap();
2013        // 1 NOP + 3-byte NOP + 1 NOP = 5 bytes
2014        assert_eq!(output.len(), 5);
2015        assert_eq!(output[0], 0x90); // 1-byte NOP
2016                                     // Next 3 bytes should be the Intel 3-byte NOP: 0F 1F 00
2017        assert_eq!(&output[1..4], &[0x0F, 0x1F, 0x00]);
2018        assert_eq!(output[4], 0x90); // final NOP
2019    }
2020
2021    #[test]
2022    fn alignment_nop_padding_large() {
2023        let mut linker = Linker::new();
2024        linker.add_fragment(nop()); // 1 byte at offset 0
2025                                    // Align to 16 bytes with NOP padding
2026        linker.add_alignment(16, 0x00, None, true, span());
2027        linker.add_fragment(nop());
2028
2029        let (output, _, _, _) = linker.resolve().unwrap();
2030        // 1 + 15 padding + 1 = 17 bytes
2031        assert_eq!(output.len(), 17);
2032        assert_eq!(output[0], 0x90);
2033        // 15 bytes of NOP padding: 9-byte NOP + 6-byte NOP
2034        assert_eq!(
2035            &output[1..10],
2036            &[0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00]
2037        );
2038        assert_eq!(&output[10..16], &[0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00]);
2039        assert_eq!(output[16], 0x90);
2040    }
2041
2042    #[test]
2043    fn alignment_max_skip_respected() {
2044        let mut linker = Linker::new();
2045        linker.add_fragment(nop()); // 1 byte at offset 0
2046                                    // Align to 16, but max_skip = 2 — padding needed is 15, which exceeds 2
2047        linker.add_alignment(16, 0x00, Some(2), false, span());
2048        linker.add_fragment(nop());
2049
2050        let (output, _, _, _) = linker.resolve().unwrap();
2051        // Alignment should be skipped — just 2 NOPs
2052        assert_eq!(output, vec![0x90, 0x90]);
2053    }
2054
2055    #[test]
2056    fn alignment_max_skip_allows_small_padding() {
2057        let mut linker = Linker::new();
2058        linker.add_fragment(fixed(vec![0x90; 3], None)); // 3 bytes
2059                                                         // Align to 4 with max_skip = 2 — padding needed is 1, which is ≤ 2
2060        linker.add_alignment(4, 0xCC, Some(2), false, span());
2061        linker.add_fragment(nop());
2062
2063        let (output, _, _, _) = linker.resolve().unwrap();
2064        assert_eq!(output, vec![0x90, 0x90, 0x90, 0xCC, 0x90]);
2065    }
2066
2067    // ── .org directive ────────
2068
2069    #[test]
2070    fn org_forward_padding() {
2071        let mut linker = Linker::new();
2072        linker.set_base_address(0x100);
2073        linker.add_fragment(nop()); // 1 byte at 0x100
2074        linker.add_org(0x110, 0x00, span()); // advance to 0x110
2075        linker.add_fragment(nop());
2076
2077        let (output, _, _, _) = linker.resolve().unwrap();
2078        // 1 NOP + 15 zero-fill + 1 NOP = 17 bytes
2079        assert_eq!(output.len(), 17);
2080        assert_eq!(output[0], 0x90);
2081        // 15 zero bytes
2082        assert!(output[1..16].iter().all(|&b| b == 0x00));
2083        assert_eq!(output[16], 0x90);
2084    }
2085
2086    #[test]
2087    fn org_already_at_target() {
2088        let mut linker = Linker::new();
2089        linker.set_base_address(0x100);
2090        linker.add_fragment(fixed(vec![0x90; 16], None)); // exactly at 0x110
2091        linker.add_org(0x110, 0x00, span()); // already there
2092        linker.add_fragment(nop());
2093
2094        let (output, _, _, _) = linker.resolve().unwrap();
2095        assert_eq!(output.len(), 17); // 16 + 0 padding + 1
2096    }
2097
2098    #[test]
2099    fn org_backward_error() {
2100        let mut linker = Linker::new();
2101        linker.set_base_address(0x200);
2102        linker.add_fragment(fixed(vec![0x90; 16], None)); // at 0x210
2103        linker.add_org(0x100, 0x00, span()); // behind!
2104
2105        let err = linker.resolve().unwrap_err();
2106        assert!(matches!(err, AsmError::Syntax { .. }));
2107    }
2108
2109    #[test]
2110    fn org_with_labels() {
2111        let mut linker = Linker::new();
2112        linker.set_base_address(0x1000);
2113        linker.add_fragment(nop());
2114        linker.add_org(0x1010, 0x00, span());
2115        linker.add_label("after_org", span()).unwrap();
2116        linker.add_fragment(nop());
2117
2118        let (_, labels, _, _) = linker.resolve().unwrap();
2119        let m: BTreeMap<String, u64> = labels.into_iter().collect();
2120        assert_eq!(m["after_org"], 0x1010);
2121    }
2122
2123    // === 8th Audit: Branch relaxation with addend ===
2124
2125    #[test]
2126    fn relaxable_jmp_with_positive_addend() {
2127        // target: (offset 0)
2128        //   nop   (1 byte)
2129        //   jmp target+1  (short: EB xx, 2 bytes)
2130        // frag_end = 1 + 2 = 3.  target=0.  addend=1.
2131        // disp = 0 + 1 - 3 = -2  → short fits, should encode [EB FE]
2132        let mut linker = Linker::new();
2133        linker.add_label("target", span()).unwrap();
2134        linker.add_fragment(nop());
2135        linker.add_fragment(Fragment::Relaxable {
2136            short_bytes: InstrBytes::from_slice(&[0xEB, 0x00]),
2137            short_reloc_offset: 1,
2138            short_relocation: None,
2139            long_bytes: InstrBytes::from_slice(&[0xE9, 0, 0, 0, 0]),
2140            long_relocation: Relocation {
2141                offset: 1,
2142                size: 4,
2143                label: alloc::rc::Rc::from("target"),
2144                kind: RelocKind::X86Relative,
2145                addend: 1,
2146                trailing_bytes: 0,
2147            },
2148            is_long: false,
2149            span: span(),
2150        });
2151
2152        let (output, _, _, _) = linker.resolve().unwrap();
2153        // nop + jmp target+1 → [0x90, 0xEB, 0xFE]
2154        assert_eq!(output, vec![0x90, 0xEB, 0xFE_u8]); // -2 as i8 = 0xFE
2155    }
2156
2157    #[test]
2158    fn relaxable_jmp_addend_forces_long_form() {
2159        // target: (offset 0)
2160        //   .space 126  (126 bytes of NOP)
2161        //   jmp target - 200  (addend = -200)
2162        // frag_end = 126 + 2 = 128.  target=0.  addend=-200.
2163        // disp = 0 + (-200) - 128 = -328  → doesn't fit rel8, must use long form
2164        let mut linker = Linker::new();
2165        linker.add_label("target", span()).unwrap();
2166        // Add 126 bytes of padding
2167        linker.add_fragment(fixed(vec![0x90; 126], None));
2168        linker.add_fragment(Fragment::Relaxable {
2169            short_bytes: InstrBytes::from_slice(&[0xEB, 0x00]),
2170            short_reloc_offset: 1,
2171            short_relocation: None,
2172            long_bytes: InstrBytes::from_slice(&[0xE9, 0, 0, 0, 0]),
2173            long_relocation: Relocation {
2174                offset: 1,
2175                size: 4,
2176                label: alloc::rc::Rc::from("target"),
2177                kind: RelocKind::X86Relative,
2178                addend: -200,
2179                trailing_bytes: 0,
2180            },
2181            is_long: false,
2182            span: span(),
2183        });
2184
2185        let (output, _, _, _) = linker.resolve().unwrap();
2186        // Should have been promoted to long form: 0xE9 + 4 bytes
2187        assert_eq!(output.len(), 126 + 5); // 126 nops + 5-byte jmp
2188                                           // Long form: disp32 = target + addend - frag_end = 0 + (-200) - (126+5) = -331
2189        let disp = i32::from_le_bytes([output[127], output[128], output[129], output[130]]);
2190        assert_eq!(disp, -331);
2191    }
2192}