Skip to main content

asm_rs/
assembler.rs

1//! Public assembler API — builder pattern and one-shot assembly.
2//!
3//! This module ties together the lexer, parser, encoder, and linker
4//! into a fluent API for assembling code.
5
6#[allow(unused_imports)]
7use alloc::format;
8use alloc::string::String;
9use alloc::string::ToString;
10#[allow(unused_imports)]
11use alloc::vec;
12use alloc::vec::Vec;
13
14use crate::encoder;
15use crate::error::{AsmError, Span};
16use crate::ir::*;
17use crate::lexer;
18use crate::linker::{AppliedRelocation, Linker};
19use crate::parser;
20use crate::preprocessor::Preprocessor;
21
22/// The result of a successful assembly operation.
23#[derive(Debug, Clone)]
24#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
25#[must_use]
26pub struct AssemblyResult {
27    /// The assembled machine code.
28    bytes: Vec<u8>,
29    /// Label addresses (name → absolute address).
30    labels: Vec<(String, u64)>,
31    /// Applied relocations in the output.
32    relocations: Vec<AppliedRelocation>,
33    /// Base address used during assembly.
34    base_address: u64,
35    /// Source text annotations: `(output_offset, source_text)` for listing.
36    source_annotations: Vec<(u64, String)>,
37}
38
39impl AssemblyResult {
40    /// Get the assembled bytes.
41    ///
42    /// # Examples
43    ///
44    /// ```
45    /// use asm_rs::{Assembler, Arch};
46    ///
47    /// let mut asm = Assembler::new(Arch::X86_64);
48    /// asm.emit("nop")?;
49    /// let result = asm.finish()?;
50    /// assert_eq!(result.bytes(), &[0x90]);
51    /// # Ok::<(), asm_rs::AsmError>(())
52    /// ```
53    #[must_use]
54    pub fn bytes(&self) -> &[u8] {
55        &self.bytes
56    }
57
58    /// Consume and return the bytes.
59    ///
60    /// # Examples
61    ///
62    /// ```
63    /// use asm_rs::{Assembler, Arch};
64    ///
65    /// let mut asm = Assembler::new(Arch::X86_64);
66    /// asm.emit("ret")?;
67    /// let bytes = asm.finish()?.into_bytes();
68    /// assert_eq!(bytes, vec![0xC3]);
69    /// # Ok::<(), asm_rs::AsmError>(())
70    /// ```
71    #[must_use]
72    pub fn into_bytes(self) -> Vec<u8> {
73        self.bytes
74    }
75
76    /// Get the byte count.
77    ///
78    /// # Examples
79    ///
80    /// ```
81    /// use asm_rs::{Assembler, Arch};
82    ///
83    /// let mut asm = Assembler::new(Arch::X86_64);
84    /// asm.emit("nop\nret")?;
85    /// let result = asm.finish()?;
86    /// assert_eq!(result.len(), 2); // nop(1) + ret(1)
87    /// # Ok::<(), asm_rs::AsmError>(())
88    /// ```
89    #[must_use]
90    pub fn len(&self) -> usize {
91        self.bytes.len()
92    }
93
94    /// Whether the result is empty.
95    ///
96    /// # Examples
97    ///
98    /// ```
99    /// use asm_rs::{Assembler, Arch};
100    ///
101    /// let result = Assembler::new(Arch::X86_64).finish()?;
102    /// assert!(result.is_empty());
103    /// # Ok::<(), asm_rs::AsmError>(())
104    /// ```
105    #[must_use]
106    pub fn is_empty(&self) -> bool {
107        self.bytes.is_empty()
108    }
109
110    /// Get label addresses (name, absolute address).
111    ///
112    /// # Examples
113    ///
114    /// ```
115    /// use asm_rs::{Assembler, Arch};
116    ///
117    /// let mut asm = Assembler::new(Arch::X86_64);
118    /// asm.emit("start: nop\nend: ret")?;
119    /// let result = asm.finish()?;
120    /// let labels = result.labels();
121    /// assert!(labels.iter().any(|(name, _)| name == "start"));
122    /// assert!(labels.iter().any(|(name, _)| name == "end"));
123    /// # Ok::<(), asm_rs::AsmError>(())
124    /// ```
125    #[must_use]
126    pub fn labels(&self) -> &[(String, u64)] {
127        &self.labels
128    }
129
130    /// Look up a label address by name.
131    ///
132    /// # Examples
133    ///
134    /// ```
135    /// use asm_rs::{Assembler, Arch};
136    ///
137    /// let mut asm = Assembler::new(Arch::X86_64);
138    /// asm.emit("start: nop\nnop\nend: ret")?;
139    /// let result = asm.finish()?;
140    /// assert_eq!(result.label_address("start"), Some(0));
141    /// assert_eq!(result.label_address("end"), Some(2));
142    /// assert_eq!(result.label_address("missing"), None);
143    /// # Ok::<(), asm_rs::AsmError>(())
144    /// ```
145    #[must_use]
146    pub fn label_address(&self, name: &str) -> Option<u64> {
147        self.labels.iter().find(|(n, _)| n == name).map(|(_, a)| *a)
148    }
149
150    /// Get the applied relocations — where label references were patched.
151    ///
152    /// # Examples
153    ///
154    /// ```
155    /// use asm_rs::{Assembler, Arch};
156    ///
157    /// let mut asm = Assembler::new(Arch::X86_64);
158    /// asm.emit("target: jmp target")?;
159    /// let result = asm.finish()?;
160    /// let relocs = result.relocations();
161    /// assert!(!relocs.is_empty());
162    /// assert_eq!(relocs[0].label, "target");
163    /// # Ok::<(), asm_rs::AsmError>(())
164    /// ```
165    #[must_use]
166    pub fn relocations(&self) -> &[AppliedRelocation] {
167        &self.relocations
168    }
169
170    /// Get the base address used during assembly.
171    ///
172    /// # Examples
173    ///
174    /// ```
175    /// use asm_rs::{Assembler, Arch};
176    ///
177    /// let mut asm = Assembler::new(Arch::X86_64);
178    /// asm.base_address(0x1000);
179    /// asm.emit("nop")?;
180    /// let result = asm.finish()?;
181    /// assert_eq!(result.base_address(), 0x1000);
182    /// # Ok::<(), asm_rs::AsmError>(())
183    /// ```
184    #[must_use]
185    pub fn base_address(&self) -> u64 {
186        self.base_address
187    }
188
189    /// Produce a human-readable listing of address, hex bytes.
190    ///
191    /// Labels are shown on their own line with their resolved address.
192    /// Machine code is shown in rows of up to 8 bytes each.
193    ///
194    /// # Example output
195    ///
196    /// ```text
197    /// 00000000                  entry:
198    /// 00000000  55              push rbp
199    /// 00000001  4889E5          mov rbp, rsp
200    /// ```
201    #[must_use]
202    pub fn listing(&self) -> String {
203        use core::fmt::Write;
204
205        let mut out = String::new();
206        let base = self.base_address;
207
208        // First, collect labels sorted by address
209        let mut sorted_labels = self.labels.clone();
210        sorted_labels.sort_by_key(|(_, addr)| *addr);
211
212        // Build a map: offset → list of label names
213        let mut label_at: alloc::collections::BTreeMap<u64, Vec<&str>> =
214            alloc::collections::BTreeMap::new();
215        for (name, addr) in &sorted_labels {
216            label_at.entry(*addr).or_default().push(name);
217        }
218
219        // Build a map: offset → source text annotation
220        let mut source_at: alloc::collections::BTreeMap<u64, &str> =
221            alloc::collections::BTreeMap::new();
222        for (offset, text) in &self.source_annotations {
223            if !text.is_empty() {
224                source_at.insert(*offset, text);
225            }
226        }
227
228        // Collect all label offsets as split points (where we must break a chunk)
229        let mut split_offsets: alloc::collections::BTreeSet<u64> =
230            label_at.keys().copied().collect();
231
232        // Also split at source annotation offsets so each instruction gets its own line
233        for &ann_off in source_at.keys() {
234            split_offsets.insert(ann_off);
235        }
236
237        // Walk through bytes, breaking at label and annotation boundaries
238        let bytes = &self.bytes;
239        let mut offset: u64 = base;
240        let mut i = 0;
241
242        while i < bytes.len() {
243            // Print any labels at this offset
244            if let Some(names) = label_at.get(&offset) {
245                for name in names {
246                    let _ = writeln!(out, "{:08X}                  {}:", offset, name);
247                }
248            }
249
250            // Determine chunk size: up to 8 bytes, but break at the next split point
251            let max_end = core::cmp::min(i + 8, bytes.len());
252            let mut chunk_end = max_end;
253
254            // Check if any split point falls within (offset+1..offset+chunk_len)
255            let range_end = offset + (max_end - i) as u64;
256            if range_end > offset + 1 {
257                for &split_off in split_offsets.range((offset + 1)..range_end) {
258                    let split_at = (split_off - base) as usize;
259                    if split_at < chunk_end && split_at > i {
260                        chunk_end = split_at;
261                        break;
262                    }
263                }
264            }
265
266            let chunk = &bytes[i..chunk_end];
267            let hex: String = chunk.iter().fold(String::new(), |mut acc, b| {
268                let _ = write!(acc, "{:02X}", b);
269                acc
270            });
271
272            // Look up source annotation for this offset
273            if let Some(source_text) = source_at.get(&offset) {
274                let _ = writeln!(out, "{:08X}  {:<16}  {}", offset, hex, source_text);
275            } else {
276                let _ = writeln!(out, "{:08X}  {:<16}", offset, hex);
277            }
278
279            let chunk_len = chunk.len();
280            i += chunk_len;
281            offset += chunk_len as u64;
282        }
283
284        // Print labels at the very end (e.g. a label after the last instruction)
285        if let Some(names) = label_at.get(&offset) {
286            for name in names {
287                let _ = writeln!(out, "{:08X}                  {}:", offset, name);
288            }
289        }
290
291        out
292    }
293}
294
295/// Configurable resource limits for defense against denial-of-service.
296///
297/// When processing untrusted assembly input, these limits prevent pathological
298/// inputs from consuming unbounded memory or CPU time. All limits default to
299/// generous values that are sufficient for any reasonable assembly program.
300///
301/// # Examples
302///
303/// ```rust
304/// use asm_rs::{Assembler, Arch};
305/// use asm_rs::assembler::ResourceLimits;
306///
307/// let mut asm = Assembler::new(Arch::X86_64);
308/// asm.limits(ResourceLimits {
309///     max_statements: 1_000,
310///     max_labels: 100,
311///     max_output_bytes: 4096,
312///     max_errors: 16,
313///     max_recursion_depth: 64,
314///     max_source_bytes: 64 * 1024 * 1024,
315///     max_iterations: 100_000,
316/// });
317/// // Assembly of very large or pathological inputs will now error early.
318/// ```
319#[derive(Debug, Clone, Copy, PartialEq, Eq)]
320#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
321pub struct ResourceLimits {
322    /// Maximum number of parsed statements (instructions + directives + labels).
323    /// Default: 1,000,000.
324    pub max_statements: usize,
325    /// Maximum number of labels that can be defined. Default: 100,000.
326    pub max_labels: usize,
327    /// Maximum output size in bytes. Default: 16 MiB.
328    pub max_output_bytes: usize,
329    /// Maximum accumulated errors before bailing. Default: 64.
330    pub max_errors: usize,
331    /// Maximum macro expansion recursion depth. Default: 256.
332    pub max_recursion_depth: usize,
333    /// Maximum input source bytes per `emit()` call. Default: 64 MiB.
334    /// Guards against multi-gigabyte inputs consuming unbounded memory
335    /// during lexing/parsing before any other limit can fire.
336    pub max_source_bytes: usize,
337    /// Maximum total preprocessor iterations (`.rept`/`.irp`/`.irpc`).
338    /// Default: 100,000.
339    pub max_iterations: usize,
340}
341
342impl Default for ResourceLimits {
343    fn default() -> Self {
344        Self {
345            max_statements: 1_000_000,
346            max_labels: 100_000,
347            max_output_bytes: 16 * 1024 * 1024,
348            max_errors: 64,
349            max_recursion_depth: 256,
350            max_source_bytes: 64 * 1024 * 1024,
351            max_iterations: 100_000,
352        }
353    }
354}
355
356/// Builder-pattern assembler.
357///
358/// # Examples
359///
360/// ```rust
361/// use asm_rs::{Assembler, Arch};
362///
363/// let mut asm = Assembler::new(Arch::X86_64);
364/// asm.emit("push rbp").unwrap();
365/// asm.emit("mov rbp, rsp").unwrap();
366/// asm.emit("pop rbp").unwrap();
367/// asm.emit("ret").unwrap();
368/// let result = asm.finish().unwrap();
369/// assert!(!result.is_empty());
370/// ```
371#[derive(Debug)]
372pub struct Assembler {
373    arch: Arch,
374    /// Current x86 encoding mode — tracks `.code16`/`.code32`/`.code64` switches.
375    /// Only meaningful when `arch` is `X86` or `X86_64`.
376    x86_mode: crate::ir::X86Mode,
377    syntax: Syntax,
378    opt_level: OptLevel,
379    linker: Linker,
380    /// Preprocessor for macros, conditionals, and loops.
381    preprocessor: Preprocessor,
382    /// Accumulated errors for multi-error mode.
383    errors: Vec<AsmError>,
384    /// Maps linker fragment index → source text for listing.
385    fragment_annotations: Vec<(usize, String)>,
386    /// Whether to collect source annotations for listing output.
387    /// Off by default to avoid per-statement String allocations.
388    listing_enabled: bool,
389    /// Resource limits for DoS protection.
390    resource_limits: ResourceLimits,
391    /// Running count of parsed statements so far.
392    statement_count: usize,
393    /// Running count of defined labels so far.
394    label_count: usize,
395    /// Pending literal pool entries: (value, size_bytes, synthetic_label).
396    /// Flushed at `.ltorg`, unconditional branches, or `finish()`.
397    literal_pool: Vec<LiteralPoolEntry>,
398    /// Counter for generating unique literal pool labels.
399    literal_pool_counter: usize,
400    /// Whether RISC-V C extension auto-narrowing is enabled (`.option rvc`).
401    /// When true, 32-bit instructions are automatically compressed to 16-bit
402    /// equivalents when possible.
403    rvc_enabled: bool,
404    /// Whether the next label should be marked as a Thumb function (`.thumb_func`).
405    /// When true, the label's address will have the LSB set to indicate Thumb mode.
406    thumb_func_pending: bool,
407    /// Labels marked as Thumb functions via `.thumb_func`.
408    /// Their resolved addresses will have the LSB set.
409    thumb_labels: Vec<String>,
410    /// Running estimate of cumulative output bytes — incremented by builder
411    /// methods (`db`, `fill`, `space`, etc.) and `emit()` to catch
412    /// `max_output_bytes` overflows *before* the allocation happens.
413    estimated_output_bytes: usize,
414}
415
416/// A pending literal pool entry.
417#[derive(Debug, Clone)]
418struct LiteralPoolEntry {
419    /// The constant value to place in the pool.
420    value: i128,
421    /// Size in bytes (4 for W-regs, 8 for X-regs).
422    size: u8,
423    /// Synthetic label that the LDR references.
424    label: String,
425}
426
427impl Assembler {
428    /// Create a new assembler for the given architecture.
429    pub fn new(arch: Arch) -> Self {
430        let syntax = match arch {
431            Arch::Arm | Arch::Thumb | Arch::Aarch64 => Syntax::Ual,
432            Arch::Rv32 | Arch::Rv64 => Syntax::RiscV,
433            _ => Syntax::Intel,
434        };
435        let x86_mode = match arch {
436            Arch::X86 => crate::ir::X86Mode::Mode32,
437            Arch::X86_64 => crate::ir::X86Mode::Mode64,
438            _ => crate::ir::X86Mode::Mode64, // unused for non-x86
439        };
440        Self {
441            arch,
442            x86_mode,
443            syntax,
444            opt_level: OptLevel::default(),
445            linker: Linker::new(),
446            preprocessor: Preprocessor::new(),
447            errors: Vec::new(),
448            fragment_annotations: Vec::new(),
449            listing_enabled: false,
450            resource_limits: ResourceLimits::default(),
451            statement_count: 0,
452            label_count: 0,
453            literal_pool: Vec::new(),
454            literal_pool_counter: 0,
455            rvc_enabled: false,
456            thumb_func_pending: false,
457            thumb_labels: Vec::new(),
458            estimated_output_bytes: 0,
459        }
460    }
461
462    /// Set resource limits for defense against pathological inputs.
463    ///
464    /// See [`ResourceLimits`] for the available limits and their defaults.
465    pub fn limits(&mut self, limits: ResourceLimits) -> &mut Self {
466        self.resource_limits = limits;
467        self.preprocessor
468            .set_max_recursion_depth(limits.max_recursion_depth);
469        self.preprocessor.set_max_iterations(limits.max_iterations);
470        self
471    }
472
473    /// Set the syntax dialect.
474    ///
475    /// Currently only [`Syntax::Intel`] is supported. Attempting to emit code
476    /// after selecting an unsupported dialect will return an error.
477    pub fn syntax(&mut self, syntax: Syntax) -> &mut Self {
478        self.syntax = syntax;
479        self
480    }
481
482    /// Set the optimization level.
483    ///
484    /// `OptLevel::Size` (default) prefers shortest encodings. `OptLevel::None`
485    /// disables encoding optimizations for predictable output. Extended
486    /// peephole optimizations (zero-idiom, REX elimination) are planned.
487    pub fn optimize(&mut self, level: OptLevel) -> &mut Self {
488        self.opt_level = level;
489        self
490    }
491
492    /// Enable source annotations for listing output.
493    ///
494    /// When enabled, the assembler records source text for each emitted
495    /// fragment, making it available in [`AssemblyResult::listing()`].
496    /// This adds a per-statement `String` allocation; leave disabled (the
497    /// default) when listing output is not needed.
498    ///
499    /// # Examples
500    ///
501    /// ```
502    /// use asm_rs::{Assembler, Arch};
503    ///
504    /// let mut asm = Assembler::new(Arch::X86_64);
505    /// asm.enable_listing();
506    /// asm.emit("nop")?;
507    /// let result = asm.finish()?;
508    /// let listing = result.listing();
509    /// assert!(listing.contains("90")); // NOP opcode in hex listing
510    /// # Ok::<(), asm_rs::AsmError>(())
511    /// ```
512    pub fn enable_listing(&mut self) -> &mut Self {
513        self.listing_enabled = true;
514        self
515    }
516
517    /// Set the base virtual address for the assembly.
518    pub fn base_address(&mut self, addr: u64) -> &mut Self {
519        self.linker.set_base_address(addr);
520        self
521    }
522
523    /// Define an external label at a known absolute address.
524    ///
525    /// # Examples
526    ///
527    /// ```
528    /// use asm_rs::{Assembler, Arch};
529    ///
530    /// let mut asm = Assembler::new(Arch::X86_64);
531    /// asm.define_external("puts", 0x4000);
532    /// asm.emit("call puts")?;
533    /// let result = asm.finish()?;
534    /// assert!(!result.bytes().is_empty());
535    /// # Ok::<(), asm_rs::AsmError>(())
536    /// ```
537    pub fn define_external(&mut self, name: &str, addr: u64) -> &mut Self {
538        self.linker.define_external(name, addr);
539        self
540    }
541
542    /// Define a named constant value.
543    pub fn define_constant(&mut self, name: &str, value: i128) -> &mut Self {
544        self.linker.define_constant(name, value);
545        self
546    }
547
548    /// Emit assembly source text. Can be called multiple times.
549    ///
550    /// # Errors
551    ///
552    /// Returns [`AsmError`] on parse or encoding errors, unsupported syntax,
553    /// or if resource limits are exceeded.
554    pub fn emit(&mut self, source: &str) -> Result<&mut Self, AsmError> {
555        // Check source size limit before any work
556        if source.len() > self.resource_limits.max_source_bytes {
557            return Err(AsmError::ResourceLimitExceeded {
558                resource: String::from("source bytes"),
559                limit: self.resource_limits.max_source_bytes,
560            });
561        }
562        // Run preprocessor to expand macros, loops, and conditionals
563        let expanded = self.preprocessor.process(source)?;
564        let mut statements = parse_source(&expanded, self.arch, self.syntax)?;
565        self.process_statements(&mut statements, &expanded)?;
566        Ok(self)
567    }
568
569    /// Define a preprocessor symbol for conditional assembly.
570    ///
571    /// Symbols defined here are available in `.ifdef`/`.ifndef` and `.if defined()`
572    /// conditionals within assembly source.
573    pub fn define_preprocessor_symbol(&mut self, name: &str, value: i128) -> &mut Self {
574        self.preprocessor.define_symbol(name, value);
575        self
576    }
577
578    /// Add a label at the current position (builder API).
579    ///
580    /// # Examples
581    ///
582    /// ```
583    /// use asm_rs::{Assembler, Arch};
584    ///
585    /// let mut asm = Assembler::new(Arch::X86_64);
586    /// asm.label("entry")?;
587    /// asm.emit("nop")?;
588    /// let result = asm.finish()?;
589    /// assert_eq!(result.label_address("entry"), Some(0));
590    /// # Ok::<(), asm_rs::AsmError>(())
591    /// ```
592    ///
593    /// # Errors
594    ///
595    /// Returns [`AsmError::DuplicateLabel`] if the label was already defined,
596    /// or [`AsmError::ResourceLimitExceeded`] if the label limit is reached.
597    pub fn label(&mut self, name: &str) -> Result<&mut Self, AsmError> {
598        self.label_count += 1;
599        if self.label_count > self.resource_limits.max_labels {
600            return Err(AsmError::ResourceLimitExceeded {
601                resource: String::from("labels"),
602                limit: self.resource_limits.max_labels,
603            });
604        }
605        self.linker.add_label(name, Span::new(0, 0, 0, 0))?;
606        Ok(self)
607    }
608
609    /// Emit raw bytes (builder API for `.byte`/`.db`).
610    ///
611    /// # Errors
612    ///
613    /// Returns [`AsmError::ResourceLimitExceeded`] if the output size limit
614    /// would be exceeded.
615    pub fn db(&mut self, bytes: &[u8]) -> Result<&mut Self, AsmError> {
616        self.check_output_limit(bytes.len())?;
617        self.linker.add_bytes(bytes.to_vec(), Span::new(0, 0, 0, 0));
618        Ok(self)
619    }
620
621    /// Emit a 16-bit value (builder API for `.word`/`.dw`).
622    ///
623    /// # Errors
624    ///
625    /// Returns [`AsmError::ResourceLimitExceeded`] if the output size limit
626    /// would be exceeded.
627    pub fn dw(&mut self, value: u16) -> Result<&mut Self, AsmError> {
628        self.check_output_limit(2)?;
629        self.linker
630            .add_bytes(value.to_le_bytes().to_vec(), Span::new(0, 0, 0, 0));
631        Ok(self)
632    }
633
634    /// Emit a 32-bit value (builder API for `.long`/`.dd`).
635    ///
636    /// # Errors
637    ///
638    /// Returns [`AsmError::ResourceLimitExceeded`] if the output size limit
639    /// would be exceeded.
640    pub fn dd(&mut self, value: u32) -> Result<&mut Self, AsmError> {
641        self.check_output_limit(4)?;
642        self.linker
643            .add_bytes(value.to_le_bytes().to_vec(), Span::new(0, 0, 0, 0));
644        Ok(self)
645    }
646
647    /// Emit a 64-bit value (builder API for `.quad`/`.dq`).
648    ///
649    /// # Errors
650    ///
651    /// Returns [`AsmError::ResourceLimitExceeded`] if the output size limit
652    /// would be exceeded.
653    pub fn dq(&mut self, value: u64) -> Result<&mut Self, AsmError> {
654        self.check_output_limit(8)?;
655        self.linker
656            .add_bytes(value.to_le_bytes().to_vec(), Span::new(0, 0, 0, 0));
657        Ok(self)
658    }
659
660    /// Emit a string without NUL terminator (builder API for `.ascii`).
661    ///
662    /// # Errors
663    ///
664    /// Returns [`AsmError::ResourceLimitExceeded`] if the output size limit
665    /// would be exceeded.
666    pub fn ascii(&mut self, s: &str) -> Result<&mut Self, AsmError> {
667        self.check_output_limit(s.len())?;
668        self.linker
669            .add_bytes(s.as_bytes().to_vec(), Span::new(0, 0, 0, 0));
670        Ok(self)
671    }
672
673    /// Emit a NUL-terminated string (builder API for `.asciz`/`.string`).
674    ///
675    /// # Errors
676    ///
677    /// Returns [`AsmError::ResourceLimitExceeded`] if the output size limit
678    /// would be exceeded.
679    pub fn asciz(&mut self, s: &str) -> Result<&mut Self, AsmError> {
680        self.check_output_limit(s.len() + 1)?;
681        let mut bytes = s.as_bytes().to_vec();
682        bytes.push(0);
683        self.linker.add_bytes(bytes, Span::new(0, 0, 0, 0));
684        Ok(self)
685    }
686
687    /// Align to a byte boundary (builder API for `.align`).
688    ///
689    /// Uses multi-byte NOP padding for x86/x86-64 architectures.
690    pub fn align(&mut self, alignment: u32) -> &mut Self {
691        let use_nop = matches!(self.arch, Arch::X86 | Arch::X86_64);
692        self.linker
693            .add_alignment(alignment, 0x00, None, use_nop, Span::new(0, 0, 0, 0));
694        self
695    }
696
697    /// Align to a byte boundary with explicit fill byte (builder API).
698    pub fn align_with_fill(&mut self, alignment: u32, fill: u8) -> &mut Self {
699        self.linker
700            .add_alignment(alignment, fill, None, false, Span::new(0, 0, 0, 0));
701        self
702    }
703
704    /// Set the location counter to an absolute address (builder API for `.org`).
705    pub fn org(&mut self, target: u64) -> &mut Self {
706        self.linker.add_org(target, 0x00, Span::new(0, 0, 0, 0));
707        self
708    }
709
710    /// Set the location counter with explicit fill byte (builder API for `.org`).
711    pub fn org_with_fill(&mut self, target: u64, fill: u8) -> &mut Self {
712        self.linker.add_org(target, fill, Span::new(0, 0, 0, 0));
713        self
714    }
715
716    /// Emit fill bytes (builder API for `.fill`).
717    ///
718    /// Produces `count * size` bytes, each `size`-byte unit filled with `value`.
719    ///
720    /// # Errors
721    ///
722    /// Returns [`AsmError::ResourceLimitExceeded`] if the output size limit
723    /// would be exceeded.
724    pub fn fill(&mut self, count: u32, size: u8, value: i64) -> Result<&mut Self, AsmError> {
725        let total = count as usize * size as usize;
726        self.check_output_limit(total)?;
727        let mut bytes = Vec::with_capacity(total);
728        // GAS semantics: value is a LE integer padded to `size` bytes
729        let val_bytes = value.to_le_bytes();
730        for _ in 0..count {
731            for &b in val_bytes.iter().take(size as usize) {
732                bytes.push(b);
733            }
734            // Pad with zeros if size > 8
735            if (size as usize) > 8 {
736                bytes.resize(bytes.len() + size as usize - 8, 0);
737            }
738        }
739        self.linker.add_bytes(bytes, Span::new(0, 0, 0, 0));
740        Ok(self)
741    }
742
743    /// Emit zero-filled space (builder API for `.space`/`.skip`).
744    ///
745    /// # Errors
746    ///
747    /// Returns [`AsmError::ResourceLimitExceeded`] if the output size limit
748    /// would be exceeded.
749    pub fn space(&mut self, n: u32) -> Result<&mut Self, AsmError> {
750        self.check_output_limit(n as usize)?;
751        let bytes = alloc::vec![0u8; n as usize];
752        self.linker.add_bytes(bytes, Span::new(0, 0, 0, 0));
753        Ok(self)
754    }
755
756    /// Returns the current number of fragments (instructions + data) emitted so far.
757    ///
758    /// Useful for estimating output size before calling [`finish()`](Assembler::finish).
759    pub fn current_fragment_count(&self) -> usize {
760        self.linker.fragment_count()
761    }
762
763    /// Assemble a single instruction and return its raw bytes immediately,
764    /// without label resolution.
765    ///
766    /// This is useful for one-shot encoding when labels are not needed.
767    /// The instruction is NOT added to the assembler's internal state.
768    ///
769    /// # Examples
770    ///
771    /// ```
772    /// use asm_rs::{Assembler, Arch};
773    ///
774    /// let asm = Assembler::new(Arch::X86_64);
775    /// let bytes = asm.encode_one("xor eax, eax")?;
776    /// assert_eq!(bytes, [0x31, 0xC0]);
777    /// # Ok::<(), asm_rs::AsmError>(())
778    /// ```
779    ///
780    /// # Errors
781    ///
782    /// Returns [`AsmError`] if the instruction cannot be parsed or encoded.
783    pub fn encode_one(&self, source: &str) -> Result<Vec<u8>, AsmError> {
784        use crate::encoder::encode_instruction;
785
786        let tokens = crate::lexer::tokenize(source)?;
787        let stmts = crate::parser::parse_with_syntax(&tokens, self.arch, self.syntax)?;
788        if stmts.is_empty() {
789            return Ok(Vec::new());
790        }
791        match &stmts[0] {
792            crate::ir::Statement::Instruction(instr) => {
793                // Resolve any constants defined via define_constant() / .equ / .set
794                let mut instr = instr.clone();
795                self.resolve_constants_in_instruction(&mut instr);
796                let encoded = encode_instruction(&instr, self.arch)?;
797                Ok(encoded.bytes.to_vec())
798            }
799            _ => Err(AsmError::Syntax {
800                msg: String::from("expected an instruction"),
801                span: crate::error::Span::new(0, 0, 0, 0),
802            }),
803        }
804    }
805
806    /// Reset the assembler to its initial state, keeping configuration
807    /// (architecture, syntax, optimization level, limits) intact.
808    ///
809    /// This allows reusing the same `Assembler` for multiple assembly operations
810    /// without reallocating configuration state.
811    ///
812    /// # Examples
813    ///
814    /// ```
815    /// use asm_rs::{Assembler, Arch};
816    ///
817    /// let mut asm = Assembler::new(Arch::X86_64);
818    /// asm.emit("nop")?;
819    /// asm.reset();
820    /// asm.emit("ret")?;
821    /// let result = asm.finish()?;
822    /// assert_eq!(result.bytes(), &[0xC3]); // only ret, nop was reset
823    /// # Ok::<(), asm_rs::AsmError>(())
824    /// ```
825    pub fn reset(&mut self) -> &mut Self {
826        self.linker = Linker::new();
827        self.preprocessor = Preprocessor::new();
828        self.errors.clear();
829        self.fragment_annotations.clear();
830        // listing_enabled is configuration, preserved across resets
831        self.statement_count = 0;
832        self.label_count = 0;
833        self.literal_pool.clear();
834        self.literal_pool_counter = 0;
835        self.thumb_func_pending = false;
836        self.thumb_labels.clear();
837        self.estimated_output_bytes = 0;
838        // Note: rvc_enabled, x86_mode, and arch are configuration state
839        // deliberately preserved across resets (like syntax and opt_level).
840        self
841    }
842
843    /// Check that adding `n` bytes would not exceed the output size limit.
844    ///
845    /// Called by builder methods to enforce `max_output_bytes` eagerly —
846    /// *before* allocating the data — rather than only at `finish()` time.
847    fn check_output_limit(&mut self, additional: usize) -> Result<(), AsmError> {
848        self.estimated_output_bytes += additional;
849        if self.estimated_output_bytes > self.resource_limits.max_output_bytes {
850            return Err(AsmError::ResourceLimitExceeded {
851                resource: String::from("output bytes"),
852                limit: self.resource_limits.max_output_bytes,
853            });
854        }
855        Ok(())
856    }
857
858    /// Finalize assembly: resolve labels, apply relocations, return result.
859    ///
860    /// # Errors
861    ///
862    /// Returns [`AsmError`] if label resolution fails, relocations cannot be
863    /// applied, accumulated errors exist, or resource limits are exceeded.
864    pub fn finish(mut self) -> Result<AssemblyResult, AsmError> {
865        if !self.errors.is_empty() {
866            if self.errors.len() == 1 {
867                return Err(self.errors.remove(0));
868            }
869            return Err(AsmError::Multiple {
870                errors: self.errors,
871            });
872        }
873
874        let base = self.linker.base_address();
875
876        // Flush any remaining literal pool entries before resolving.
877        let flush_span = crate::error::Span::new(0, 0, 0, 0);
878        self.flush_literal_pool(flush_span)?;
879
880        let (bytes, mut labels, relocations, offsets) = self.linker.resolve()?;
881
882        // Set LSB on Thumb function label addresses for interworking
883        for (name, addr) in labels.iter_mut() {
884            if self.thumb_labels.iter().any(|t| t == name) {
885                *addr |= 1;
886            }
887        }
888
889        // Enforce output size limit
890        if bytes.len() > self.resource_limits.max_output_bytes {
891            return Err(AsmError::ResourceLimitExceeded {
892                resource: String::from("output bytes"),
893                limit: self.resource_limits.max_output_bytes,
894            });
895        }
896
897        // Build source annotations: map fragment index → output offset,
898        // then look up the source text for each annotated fragment.
899        let source_annotations = self.build_source_annotations(&offsets);
900
901        Ok(AssemblyResult {
902            bytes,
903            labels,
904            relocations,
905            base_address: base,
906            source_annotations,
907        })
908    }
909
910    /// Build source text annotations by mapping fragment indices to output
911    /// offsets and extracting the source text from the stored source strings.
912    fn build_source_annotations(&self, offsets: &[u64]) -> Vec<(u64, String)> {
913        let mut annotations = Vec::new();
914        for &(frag_idx, ref text) in &self.fragment_annotations {
915            if frag_idx < offsets.len() {
916                annotations.push((offsets[frag_idx], text.clone()));
917            }
918        }
919        annotations
920    }
921
922    fn process_statements(
923        &mut self,
924        statements: &mut [Statement],
925        source: &str,
926    ) -> Result<(), AsmError> {
927        // Check statement count limit (total across all emit() calls)
928        self.statement_count += statements.len();
929        if self.statement_count > self.resource_limits.max_statements {
930            return Err(AsmError::ResourceLimitExceeded {
931                resource: String::from("statements"),
932                limit: self.resource_limits.max_statements,
933            });
934        }
935
936        for stmt in statements.iter_mut() {
937            match stmt {
938                Statement::Label(name, span) => {
939                    self.label_count += 1;
940                    if self.label_count > self.resource_limits.max_labels {
941                        return Err(AsmError::ResourceLimitExceeded {
942                            resource: String::from("labels"),
943                            limit: self.resource_limits.max_labels,
944                        });
945                    }
946                    self.linker.add_label(name, *span)?;
947                    // Mark as Thumb function if .thumb_func was pending
948                    if self.thumb_func_pending {
949                        self.thumb_labels.push(name.clone());
950                        self.thumb_func_pending = false;
951                    }
952                }
953
954                Statement::Instruction(instr) => {
955                    let frag_idx = self.linker.fragment_count();
956                    // Resolve any constant references in operands before encoding
957                    self.resolve_constants_in_instruction(instr);
958                    // Transform literal pool operands: =value → label reference
959                    self.transform_literal_pool_operands(instr);
960                    // Apply peephole optimizations when OptLevel::Size is active
961                    if self.opt_level == OptLevel::Size {
962                        crate::optimize::optimize_instruction(instr, self.arch);
963                    }
964                    let encode_result = if self.x86_mode == crate::ir::X86Mode::Mode16 {
965                        #[cfg(feature = "x86")]
966                        {
967                            encoder::encode_instruction_16(instr)
968                        }
969                        #[cfg(not(feature = "x86"))]
970                        {
971                            encoder::encode_instruction(instr, self.arch)
972                        }
973                    } else {
974                        encoder::encode_instruction(instr, self.arch)
975                    };
976                    // RISC-V auto-narrowing: when .option rvc is active and the
977                    // instruction is a 4-byte standard form, try to compress it
978                    // to a 16-bit C-extension equivalent.
979                    #[cfg(feature = "riscv")]
980                    let encode_result = if self.rvc_enabled
981                        && matches!(self.arch, Arch::Rv32 | Arch::Rv64)
982                        && !instr.mnemonic.starts_with("c.")
983                    {
984                        match encode_result {
985                            Ok(ref enc) if enc.bytes.len() == 4 && enc.relocation.is_none() => {
986                                let is_rv64 = self.arch == Arch::Rv64;
987                                if let Some(hw) = crate::riscv::try_compress(
988                                    &instr.mnemonic,
989                                    &instr.operands,
990                                    is_rv64,
991                                    instr.span,
992                                ) {
993                                    Ok(crate::riscv::rvc_instr(hw))
994                                } else {
995                                    encode_result
996                                }
997                            }
998                            _ => encode_result,
999                        }
1000                    } else {
1001                        encode_result
1002                    };
1003                    match encode_result {
1004                        Ok(encoded) => {
1005                            self.check_output_limit(encoded.bytes.len())?;
1006                            self.linker.add_encoded(
1007                                encoded.bytes,
1008                                encoded.relocation,
1009                                encoded.relax,
1010                                instr.span,
1011                            )?;
1012                            self.annotate(frag_idx, source, instr.span);
1013                        }
1014                        Err(e) => {
1015                            self.errors.push(e);
1016                            if self.errors.len() >= self.resource_limits.max_errors {
1017                                return Err(AsmError::ResourceLimitExceeded {
1018                                    resource: String::from("errors"),
1019                                    limit: self.resource_limits.max_errors,
1020                                });
1021                            }
1022                        }
1023                    }
1024                }
1025
1026                Statement::Data(data) => {
1027                    let frag_idx = self.linker.fragment_count();
1028                    let span = data.span;
1029                    self.emit_data(data)?;
1030                    self.annotate(frag_idx, source, span);
1031                }
1032
1033                Statement::Align(align) => {
1034                    let frag_idx = self.linker.fragment_count();
1035                    let span = align.span;
1036                    // When no explicit fill byte is given and the target is
1037                    // x86/x86-64, pad with multi-byte NOP sequences instead
1038                    // of zero bytes — optimal for code-section alignment.
1039                    let use_nop =
1040                        align.fill.is_none() && matches!(self.arch, Arch::X86 | Arch::X86_64);
1041                    self.linker.add_alignment(
1042                        align.alignment,
1043                        align.fill.unwrap_or(0x00),
1044                        align.max_skip,
1045                        use_nop,
1046                        align.span,
1047                    );
1048                    self.annotate(frag_idx, source, span);
1049                }
1050
1051                Statement::Const(c) => {
1052                    self.linker.define_constant(&c.name, c.value);
1053                }
1054
1055                Statement::Fill(fill) => {
1056                    let frag_idx = self.linker.fragment_count();
1057                    let span = fill.span;
1058                    let total = fill.count as usize * fill.size as usize;
1059                    self.check_output_limit(total)?;
1060                    let mut bytes = Vec::with_capacity(total);
1061                    // GAS semantics: value is a LE integer padded to `size` bytes.
1062                    // .fill 2, 4, 0x90 → [90 00 00 00  90 00 00 00]
1063                    let val_bytes = fill.value.to_le_bytes();
1064                    for _ in 0..fill.count {
1065                        for &b in val_bytes.iter().take(fill.size as usize) {
1066                            bytes.push(b);
1067                        }
1068                        // Pad with zeros if size > 8
1069                        if (fill.size as usize) > 8 {
1070                            bytes.resize(bytes.len() + fill.size as usize - 8, 0);
1071                        }
1072                    }
1073                    self.linker.add_bytes(bytes, fill.span);
1074                    self.annotate(frag_idx, source, span);
1075                }
1076
1077                Statement::Space(space) => {
1078                    let frag_idx = self.linker.fragment_count();
1079                    let span = space.span;
1080                    self.check_output_limit(space.size as usize)?;
1081                    let bytes = alloc::vec![space.fill; space.size as usize];
1082                    self.linker.add_bytes(bytes, space.span);
1083                    self.annotate(frag_idx, source, span);
1084                }
1085
1086                Statement::Org(org) => {
1087                    let frag_idx = self.linker.fragment_count();
1088                    let span = org.span;
1089                    // .org sets the location counter to an absolute address.
1090                    // The linker emits fill bytes to pad from current position
1091                    // to the target.
1092                    self.linker.add_org(org.offset, org.fill, org.span);
1093                    self.annotate(frag_idx, source, span);
1094                }
1095
1096                Statement::CodeMode(mode, span) => {
1097                    // .code16 / .code32 / .code64 — switch x86 encoding mode
1098                    if !matches!(self.arch, Arch::X86 | Arch::X86_64) {
1099                        return Err(AsmError::Syntax {
1100                            msg: String::from(".code16/.code32/.code64 only valid for x86/x86-64"),
1101                            span: *span,
1102                        });
1103                    }
1104                    self.x86_mode = *mode;
1105                    // Update the arch to match the new mode for encoding dispatch
1106                    match mode {
1107                        crate::ir::X86Mode::Mode16 | crate::ir::X86Mode::Mode32 => {
1108                            self.arch = Arch::X86;
1109                        }
1110                        crate::ir::X86Mode::Mode64 => {
1111                            self.arch = Arch::X86_64;
1112                        }
1113                    }
1114                }
1115
1116                Statement::Ltorg(span) => {
1117                    // Flush pending literal pool entries
1118                    let span = *span;
1119                    self.flush_literal_pool(span)?;
1120                }
1121
1122                Statement::OptionRvc(enable, span) => {
1123                    // .option rvc / .option norvc — toggle RISC-V C extension auto-narrowing
1124                    if !matches!(self.arch, Arch::Rv32 | Arch::Rv64) {
1125                        return Err(AsmError::Syntax {
1126                            msg: String::from(".option rvc/norvc is only valid for RISC-V"),
1127                            span: *span,
1128                        });
1129                    }
1130                    self.rvc_enabled = *enable;
1131                }
1132
1133                Statement::ThumbMode(is_thumb, span) => {
1134                    // .thumb / .arm — switch between Thumb and ARM modes
1135                    if !matches!(self.arch, Arch::Arm | Arch::Thumb) {
1136                        return Err(AsmError::Syntax {
1137                            msg: String::from(".thumb/.arm only valid for ARM"),
1138                            span: *span,
1139                        });
1140                    }
1141                    self.arch = if *is_thumb { Arch::Thumb } else { Arch::Arm };
1142                }
1143
1144                Statement::ThumbFunc(span) => {
1145                    // .thumb_func — mark next label as Thumb function (LSB set)
1146                    if !matches!(self.arch, Arch::Arm | Arch::Thumb) {
1147                        return Err(AsmError::Syntax {
1148                            msg: String::from(".thumb_func only valid for ARM/Thumb"),
1149                            span: *span,
1150                        });
1151                    }
1152                    // Also switch to Thumb mode (GNU as behavior)
1153                    self.arch = Arch::Thumb;
1154                    self.thumb_func_pending = true;
1155                }
1156            }
1157        }
1158        Ok(())
1159    }
1160
1161    /// Record a source-text annotation for a fragment, if listing is enabled.
1162    #[inline]
1163    fn annotate(&mut self, frag_idx: usize, source: &str, span: Span) {
1164        if self.listing_enabled {
1165            let src_text = extract_source_line(source, span);
1166            if !src_text.is_empty() {
1167                self.fragment_annotations
1168                    .push((frag_idx, src_text.to_string()));
1169            }
1170        }
1171    }
1172
1173    /// Transform `Operand::LiteralPoolValue(val)` into `Operand::Label(label)`
1174    /// and queue the constant for emission in the next literal pool flush.
1175    ///
1176    /// The destination register width determines pool entry size:
1177    /// - X-registers → 8-byte entry (`.quad`)
1178    /// - W-registers → 4-byte entry (`.long`)
1179    ///
1180    /// Duplicate values with the same size are deduplicated to share a single
1181    /// pool entry.
1182    fn transform_literal_pool_operands(&mut self, instr: &mut Instruction) {
1183        // Determine pool entry size from the first register operand.
1184        // - ARM registers → always 4 bytes (32-bit)
1185        // - AArch64 X-registers → 8 bytes, W-registers → 4 bytes
1186        // - Default: 8 bytes (AArch64 64-bit) if no register is found
1187        let size: u8 = instr
1188            .operands
1189            .iter()
1190            .find_map(|op| {
1191                if let Operand::Register(r) = op {
1192                    if r.is_arm() {
1193                        return Some(4u8); // ARM32 is always 4 bytes
1194                    }
1195                    if r.is_aarch64() {
1196                        return Some(if r.is_a64_64bit() { 8u8 } else { 4u8 });
1197                    }
1198                }
1199                None
1200            })
1201            .unwrap_or(8);
1202
1203        for op in &mut instr.operands {
1204            if let Operand::LiteralPoolValue(val) = op {
1205                let val = *val;
1206
1207                // Check for existing pool entry with same value + size (dedup).
1208                let label = if let Some(existing) = self
1209                    .literal_pool
1210                    .iter()
1211                    .find(|e| e.value == val && e.size == size)
1212                {
1213                    existing.label.clone()
1214                } else {
1215                    let label = alloc::format!(".Lpool_{}", self.literal_pool_counter);
1216                    self.literal_pool_counter += 1;
1217                    self.literal_pool.push(LiteralPoolEntry {
1218                        value: val,
1219                        size,
1220                        label: label.clone(),
1221                    });
1222                    label
1223                };
1224
1225                *op = Operand::Label(label);
1226            }
1227        }
1228    }
1229
1230    /// Flush all pending literal pool entries as labeled data fragments.
1231    ///
1232    /// Emits alignment padding followed by each pool entry's label + data.
1233    /// Called at `.ltorg` directives, unconditional branches (future), or `finish()`.
1234    fn flush_literal_pool(&mut self, span: Span) -> Result<(), AsmError> {
1235        if self.literal_pool.is_empty() {
1236            return Ok(());
1237        }
1238
1239        // Align pool to the largest entry size for natural alignment.
1240        let max_align = self
1241            .literal_pool
1242            .iter()
1243            .map(|e| e.size as u32)
1244            .max()
1245            .unwrap_or(4);
1246        self.linker
1247            .add_alignment(max_align, 0x00, None, false, span);
1248
1249        // Drain and emit each entry.
1250        let entries: Vec<LiteralPoolEntry> = core::mem::take(&mut self.literal_pool);
1251        for entry in &entries {
1252            self.linker.add_label(&entry.label, span)?;
1253            let bytes = match entry.size {
1254                4 => (entry.value as u32).to_le_bytes().to_vec(),
1255                8 => (entry.value as u64).to_le_bytes().to_vec(),
1256                _ => (entry.value as u64).to_le_bytes().to_vec(),
1257            };
1258            self.linker.add_bytes(bytes, span);
1259        }
1260
1261        Ok(())
1262    }
1263
1264    /// Replace label operands with immediate values when they refer to known constants.
1265    ///
1266    /// Also resolves constants inside `Operand::Expression` trees and collapses
1267    /// fully-numeric expressions to `Operand::Immediate`.
1268    fn resolve_constants_in_instruction(&self, instr: &mut Instruction) {
1269        for op in &mut instr.operands {
1270            match op {
1271                Operand::Label(name) => {
1272                    if let Some(&value) = self.linker.get_constant(name) {
1273                        *op = Operand::Immediate(value);
1274                    }
1275                }
1276                Operand::Expression(expr) => {
1277                    // Substitute any constants referenced inside the expression tree.
1278                    expr.resolve_constants(|name| self.linker.get_constant(name).copied());
1279                    // If the expression is now purely numeric, collapse to Immediate.
1280                    if let Some(val) = expr.eval() {
1281                        *op = Operand::Immediate(val);
1282                    }
1283                }
1284                Operand::Memory(mem) => {
1285                    // Resolve constants used as displacement labels (e.g., [rbp + MY_CONST])
1286                    if let Some(ref label) = mem.disp_label {
1287                        if let Some(&value) = self.linker.get_constant(label) {
1288                            mem.disp = mem.disp.wrapping_add(value as i64);
1289                            mem.disp_label = None;
1290                        }
1291                    }
1292                }
1293                _ => {}
1294            }
1295        }
1296    }
1297
1298    /// Emit a data declaration, handling label references via relocations.
1299    fn emit_data(&mut self, data: &DataDecl) -> Result<(), AsmError> {
1300        use crate::encoder::Relocation;
1301
1302        let data_item_size: usize = match data.size {
1303            DataSize::Byte => 1,
1304            DataSize::Word => 2,
1305            DataSize::Long => 4,
1306            DataSize::Quad => 8,
1307        };
1308
1309        // Accumulate contiguous non-label bytes, flush when we hit a label.
1310        let mut pending: Vec<u8> = Vec::new();
1311
1312        for value in &data.values {
1313            match value {
1314                DataValue::Integer(n) => match data.size {
1315                    DataSize::Byte => pending.push(*n as u8),
1316                    DataSize::Word => pending.extend_from_slice(&(*n as u16).to_le_bytes()),
1317                    DataSize::Long => pending.extend_from_slice(&(*n as u32).to_le_bytes()),
1318                    DataSize::Quad => pending.extend_from_slice(&(*n as u64).to_le_bytes()),
1319                },
1320                DataValue::Bytes(b) => {
1321                    pending.extend_from_slice(b);
1322                }
1323                DataValue::Label(name, addend) => {
1324                    // Check if this is a constant (defined via .equ) rather than a label
1325                    if let Some(&const_val) = self.linker.get_constant(name) {
1326                        let val = const_val.wrapping_add(*addend as i128);
1327                        match data.size {
1328                            DataSize::Byte => pending.push(val as u8),
1329                            DataSize::Word => {
1330                                pending.extend_from_slice(&(val as u16).to_le_bytes())
1331                            }
1332                            DataSize::Long => {
1333                                pending.extend_from_slice(&(val as u32).to_le_bytes())
1334                            }
1335                            DataSize::Quad => {
1336                                pending.extend_from_slice(&(val as u64).to_le_bytes())
1337                            }
1338                        }
1339                        continue;
1340                    }
1341
1342                    // Flush any pending plain bytes first
1343                    if !pending.is_empty() {
1344                        self.linker
1345                            .add_bytes(core::mem::take(&mut pending), data.span);
1346                    }
1347                    // Emit a zero-filled data slot with an absolute relocation for the label
1348                    let mut slot = encoder::InstrBytes::new();
1349                    for _ in 0..data_item_size {
1350                        slot.push(0);
1351                    }
1352                    let reloc = Relocation {
1353                        offset: 0,
1354                        size: data_item_size as u8,
1355                        label: alloc::rc::Rc::from(name.as_str()),
1356                        kind: encoder::RelocKind::Absolute,
1357                        addend: *addend,
1358                        trailing_bytes: 0,
1359                    };
1360                    // Use add_encoded which will make a Fixed fragment
1361                    self.linker
1362                        .add_encoded(slot, Some(reloc), None, data.span)?;
1363                }
1364            }
1365        }
1366
1367        // Flush remaining bytes
1368        if !pending.is_empty() {
1369            self.linker.add_bytes(pending, data.span);
1370        }
1371
1372        Ok(())
1373    }
1374}
1375
1376/// Parse source text into statements.
1377fn parse_source(source: &str, arch: Arch, syntax: Syntax) -> Result<Vec<Statement>, AsmError> {
1378    let tokens = lexer::tokenize(source)?;
1379    parser::parse_with_syntax(&tokens, arch, syntax)
1380}
1381
1382/// Extract the source text for a span from the original source string.
1383///
1384/// Returns the trimmed text of the line containing the span, or a brief
1385/// fallback if the span is out-of-range.
1386fn extract_source_line(source: &str, span: Span) -> &str {
1387    let offset = span.offset;
1388    if offset >= source.len() {
1389        return "";
1390    }
1391    // Find the start of the line containing this span
1392    let line_start = source[..offset].rfind('\n').map_or(0, |p| p + 1);
1393    // Find the end of the line
1394    let line_end = source[offset..]
1395        .find('\n')
1396        .map_or(source.len(), |p| offset + p);
1397    source[line_start..line_end].trim()
1398}
1399
1400#[cfg(test)]
1401mod tests {
1402    use super::*;
1403
1404    // === One-Shot API ===
1405
1406    #[test]
1407    fn assemble_nop() {
1408        let mut asm = Assembler::new(Arch::X86_64);
1409        asm.emit("nop").unwrap();
1410        let result = asm.finish().unwrap();
1411        assert_eq!(result.bytes(), &[0x90]);
1412    }
1413
1414    #[test]
1415    fn assemble_ret() {
1416        let mut asm = Assembler::new(Arch::X86_64);
1417        asm.emit("ret").unwrap();
1418        let result = asm.finish().unwrap();
1419        assert_eq!(result.bytes(), &[0xC3]);
1420    }
1421
1422    #[test]
1423    fn assemble_multiple_instructions() {
1424        let mut asm = Assembler::new(Arch::X86_64);
1425        asm.emit("nop\nret").unwrap();
1426        let result = asm.finish().unwrap();
1427        assert_eq!(result.bytes(), &[0x90, 0xC3]);
1428    }
1429
1430    #[test]
1431    fn assemble_push_pop() {
1432        let mut asm = Assembler::new(Arch::X86_64);
1433        asm.emit("push rbp").unwrap();
1434        asm.emit("mov rbp, rsp").unwrap();
1435        asm.emit("pop rbp").unwrap();
1436        asm.emit("ret").unwrap();
1437        let result = asm.finish().unwrap();
1438        let bytes = result.bytes();
1439        assert_eq!(bytes[0], 0x55); // push rbp
1440        assert_eq!(*bytes.last().unwrap(), 0xC3); // ret
1441    }
1442
1443    #[test]
1444    fn assemble_with_label() {
1445        let mut asm = Assembler::new(Arch::X86_64);
1446        asm.emit("jmp target\ntarget:\nnop").unwrap();
1447        let result = asm.finish().unwrap();
1448        let bytes = result.bytes();
1449        // Branch relaxation: short form EB rel8 since target is right after
1450        assert_eq!(bytes[0], 0xEB); // jmp rel8
1451        assert_eq!(bytes[1], 0x00); // rel8 = 0
1452        assert_eq!(bytes[2], 0x90); // nop
1453    }
1454
1455    #[test]
1456    fn assemble_backward_jump() {
1457        let mut asm = Assembler::new(Arch::X86_64);
1458        asm.emit("loop_start:\nnop\njmp loop_start").unwrap();
1459        let result = asm.finish().unwrap();
1460        let bytes = result.bytes();
1461        assert_eq!(bytes[0], 0x90); // nop
1462                                    // Branch relaxation: short form EB rel8
1463        assert_eq!(bytes[1], 0xEB); // jmp rel8
1464                                    // target=0, frag_end=1+2=3, disp=0-3=-3=0xFD
1465        assert_eq!(bytes[2], 0xFD);
1466    }
1467
1468    #[test]
1469    fn assemble_conditional_jump() {
1470        let mut asm = Assembler::new(Arch::X86_64);
1471        asm.emit("cmp rax, 0\nje done\nnop\ndone:\nret").unwrap();
1472        let result = asm.finish().unwrap();
1473        let bytes = result.bytes();
1474        // Should contain: cmp, je, nop, ret
1475        assert!(!bytes.is_empty());
1476        // Last byte should be ret
1477        assert_eq!(*bytes.last().unwrap(), 0xC3);
1478    }
1479
1480    #[test]
1481    fn assemble_xor_self() {
1482        let mut asm = Assembler::new(Arch::X86_64);
1483        asm.emit("xor eax, eax").unwrap();
1484        let result = asm.finish().unwrap();
1485        assert_eq!(result.bytes(), &[0x31, 0xC0]);
1486    }
1487
1488    #[test]
1489    fn assemble_syscall_stub() {
1490        let mut asm = Assembler::new(Arch::X86_64);
1491        asm.emit("mov eax, 60\nxor edi, edi\nsyscall").unwrap();
1492        let result = asm.finish().unwrap();
1493        let bytes = result.bytes();
1494        // mov eax, 60 → B8 3C 00 00 00
1495        assert_eq!(&bytes[0..5], &[0xB8, 0x3C, 0x00, 0x00, 0x00]);
1496        // Last 2 bytes: syscall → 0F 05
1497        assert_eq!(&bytes[bytes.len() - 2..], &[0x0F, 0x05]);
1498    }
1499
1500    // === Builder API ===
1501
1502    #[test]
1503    fn builder_api() {
1504        let mut asm = Assembler::new(Arch::X86_64);
1505        asm.emit("push rbp").unwrap();
1506        asm.db(&[0xCC]).unwrap(); // int3
1507        asm.emit("pop rbp").unwrap();
1508        asm.emit("ret").unwrap();
1509        let result = asm.finish().unwrap();
1510        let bytes = result.bytes();
1511        assert_eq!(bytes[0], 0x55); // push rbp
1512        assert_eq!(bytes[1], 0xCC); // int3
1513    }
1514
1515    #[test]
1516    fn builder_label() {
1517        let mut asm = Assembler::new(Arch::X86_64);
1518        asm.emit("jmp target").unwrap();
1519        asm.label("target").unwrap();
1520        asm.emit("ret").unwrap();
1521        let result = asm.finish().unwrap();
1522        let bytes = result.bytes();
1523        // Short form: EB 00 C3
1524        assert_eq!(bytes[0], 0xEB);
1525        assert_eq!(*bytes.last().unwrap(), 0xC3);
1526    }
1527
1528    #[test]
1529    fn builder_data_words() {
1530        let mut asm = Assembler::new(Arch::X86_64);
1531        asm.dw(0x1234).unwrap();
1532        asm.dd(0xDEADBEEF).unwrap();
1533        let result = asm.finish().unwrap();
1534        let bytes = result.bytes();
1535        assert_eq!(&bytes[0..2], &[0x34, 0x12]);
1536        assert_eq!(&bytes[2..6], &[0xEF, 0xBE, 0xAD, 0xDE]);
1537    }
1538
1539    // === Data Directives ===
1540
1541    #[test]
1542    fn assemble_byte_directive() {
1543        let mut asm = Assembler::new(Arch::X86_64);
1544        asm.emit(".byte 0x90, 0xCC, 0xC3").unwrap();
1545        let result = asm.finish().unwrap();
1546        assert_eq!(result.bytes(), &[0x90, 0xCC, 0xC3]);
1547    }
1548
1549    #[test]
1550    fn assemble_word_directive() {
1551        let mut asm = Assembler::new(Arch::X86_64);
1552        asm.emit(".word 0x1234").unwrap();
1553        let result = asm.finish().unwrap();
1554        assert_eq!(result.bytes(), &[0x34, 0x12]);
1555    }
1556
1557    #[test]
1558    fn assemble_asciz_directive() {
1559        let mut asm = Assembler::new(Arch::X86_64);
1560        asm.emit(".asciz \"hello\"").unwrap();
1561        let result = asm.finish().unwrap();
1562        assert_eq!(result.bytes(), b"hello\0");
1563    }
1564
1565    #[test]
1566    fn assemble_equ_constant() {
1567        let mut asm = Assembler::new(Arch::X86_64);
1568        asm.emit(".equ EXIT, 60\nmov eax, EXIT").unwrap();
1569        // Note: constants are resolved at link time if referenced by label
1570        let _result = asm.finish();
1571        // This test mainly verifies parsing succeeds
1572    }
1573
1574    #[test]
1575    fn assemble_fill_directive() {
1576        let mut asm = Assembler::new(Arch::X86_64);
1577        asm.emit(".fill 3, 1, 0x90").unwrap();
1578        let result = asm.finish().unwrap();
1579        assert_eq!(result.bytes(), &[0x90, 0x90, 0x90]);
1580    }
1581
1582    #[test]
1583    fn assemble_space_directive() {
1584        let mut asm = Assembler::new(Arch::X86_64);
1585        asm.emit(".space 4").unwrap();
1586        let result = asm.finish().unwrap();
1587        assert_eq!(result.bytes(), &[0, 0, 0, 0]);
1588    }
1589
1590    // === Error Cases ===
1591
1592    #[test]
1593    fn unknown_mnemonic_error() {
1594        let mut asm = Assembler::new(Arch::X86_64);
1595        asm.emit("foobar").unwrap(); // error is collected, not fail-fast
1596        let err = asm.finish().unwrap_err();
1597        assert!(matches!(err, AsmError::UnknownMnemonic { .. }));
1598    }
1599
1600    #[test]
1601    fn duplicate_label_error() {
1602        let mut asm = Assembler::new(Arch::X86_64);
1603        let err = asm.emit("foo:\nfoo:").unwrap_err();
1604        assert!(matches!(err, AsmError::DuplicateLabel { .. }));
1605    }
1606
1607    #[test]
1608    fn undefined_label_error() {
1609        let mut asm = Assembler::new(Arch::X86_64);
1610        asm.emit("jmp nowhere").unwrap();
1611        let err = asm.finish().unwrap_err();
1612        assert!(matches!(err, AsmError::UndefinedLabel { .. }));
1613    }
1614
1615    // === External Labels ===
1616
1617    #[test]
1618    fn assemble_with_external() {
1619        let mut asm = Assembler::new(Arch::X86_64);
1620        asm.define_external("printf", 0x400000);
1621        asm.emit("mov rax, printf").unwrap();
1622        let result = asm.finish().unwrap();
1623        let bytes = result.bytes();
1624        // movabs rax, imm64 with printf address
1625        assert_eq!(&bytes[bytes.len() - 8..], &0x400000u64.to_le_bytes());
1626    }
1627
1628    // === Base Address ===
1629
1630    #[test]
1631    fn assemble_with_base_address() {
1632        let mut asm = Assembler::new(Arch::X86_64);
1633        asm.base_address(0x1000);
1634        asm.emit("nop").unwrap();
1635        let result = asm.finish().unwrap();
1636        assert_eq!(result.bytes(), &[0x90]);
1637    }
1638
1639    // === Complex Programs ===
1640
1641    #[test]
1642    fn assemble_loop() {
1643        let mut asm = Assembler::new(Arch::X86_64);
1644        asm.emit(
1645            r#"
1646            mov ecx, 10
1647        loop_start:
1648            dec ecx
1649            jnz loop_start
1650            ret
1651        "#,
1652        )
1653        .unwrap();
1654        let result = asm.finish().unwrap();
1655        assert!(!result.is_empty());
1656        assert_eq!(*result.bytes().last().unwrap(), 0xC3);
1657    }
1658
1659    #[test]
1660    fn assemble_function_prologue_epilogue() {
1661        let mut asm = Assembler::new(Arch::X86_64);
1662        asm.emit(
1663            r#"
1664            push rbp
1665            mov rbp, rsp
1666            sub rsp, 0x20
1667            add rsp, 0x20
1668            pop rbp
1669            ret
1670        "#,
1671        )
1672        .unwrap();
1673        let result = asm.finish().unwrap();
1674        let bytes = result.bytes();
1675        assert_eq!(bytes[0], 0x55); // push rbp
1676        assert_eq!(*bytes.last().unwrap(), 0xC3); // ret
1677    }
1678
1679    #[test]
1680    fn result_length() {
1681        let mut asm = Assembler::new(Arch::X86_64);
1682        asm.emit("nop\nnop\nnop").unwrap();
1683        let result = asm.finish().unwrap();
1684        assert_eq!(result.len(), 3);
1685    }
1686
1687    #[test]
1688    fn result_into_bytes() {
1689        let mut asm = Assembler::new(Arch::X86_64);
1690        asm.emit("ret").unwrap();
1691        let result = asm.finish().unwrap();
1692        let bytes = result.into_bytes();
1693        assert_eq!(bytes, vec![0xC3]);
1694    }
1695
1696    // === Semicolon Separated ===
1697
1698    #[test]
1699    fn semicolon_separated_instructions() {
1700        let mut asm = Assembler::new(Arch::X86_64);
1701        asm.emit("nop; nop; ret").unwrap();
1702        let result = asm.finish().unwrap();
1703        assert_eq!(result.bytes(), &[0x90, 0x90, 0xC3]);
1704    }
1705
1706    // === Labels export ===
1707
1708    #[test]
1709    fn labels_returned() {
1710        let mut asm = Assembler::new(Arch::X86_64);
1711        asm.emit("start:\nnop\nnop\nend:\nret").unwrap();
1712        let result = asm.finish().unwrap();
1713        assert_eq!(result.label_address("start"), Some(0));
1714        // nop=1B, nop=1B → end is at offset 2
1715        assert_eq!(result.label_address("end"), Some(2));
1716    }
1717
1718    #[test]
1719    fn labels_with_base_address() {
1720        let mut asm = Assembler::new(Arch::X86_64);
1721        asm.base_address(0x400000);
1722        asm.emit("entry:\nnop").unwrap();
1723        let result = asm.finish().unwrap();
1724        assert_eq!(result.label_address("entry"), Some(0x400000));
1725    }
1726
1727    #[test]
1728    fn builder_label_address() {
1729        let mut asm = Assembler::new(Arch::X86_64);
1730        asm.label("before").unwrap();
1731        asm.emit("nop; nop; nop").unwrap();
1732        asm.label("after").unwrap();
1733        asm.emit("ret").unwrap();
1734        let result = asm.finish().unwrap();
1735        assert_eq!(result.label_address("before"), Some(0));
1736        assert_eq!(result.label_address("after"), Some(3));
1737    }
1738
1739    // === Syntax / OptLevel builder ===
1740
1741    #[test]
1742    fn builder_syntax_and_optimize() {
1743        let mut asm = Assembler::new(Arch::X86_64);
1744        asm.syntax(Syntax::Intel);
1745        asm.optimize(OptLevel::Size);
1746        asm.emit("nop").unwrap();
1747        let result = asm.finish().unwrap();
1748        assert_eq!(result.bytes(), &[0x90]);
1749    }
1750
1751    // === define_constant builder ===
1752
1753    #[test]
1754    fn builder_define_constant() {
1755        let mut asm = Assembler::new(Arch::X86_64);
1756        asm.define_constant("EXIT", 60);
1757        asm.emit("mov eax, EXIT").unwrap();
1758        let result = asm.finish().unwrap();
1759        // mov eax, 60 → B8 3C 00 00 00
1760        assert_eq!(result.bytes(), &[0xB8, 0x3C, 0x00, 0x00, 0x00]);
1761    }
1762
1763    // === Branch relaxation observable from public API ===
1764
1765    #[test]
1766    fn short_branch_uses_rel8() {
1767        let mut asm = Assembler::new(Arch::X86_64);
1768        asm.emit("je done\ndone:\nret").unwrap();
1769        let result = asm.finish().unwrap();
1770        // je rel8 = 74 00, ret = C3
1771        assert_eq!(result.bytes(), &[0x74, 0x00, 0xC3]);
1772    }
1773
1774    // === Data label references ===
1775
1776    #[test]
1777    fn quad_label_reference() {
1778        let mut asm = Assembler::new(Arch::X86_64);
1779        asm.base_address(0x1000);
1780        asm.emit("func:\nnop\nret\njump_table:\n.quad func")
1781            .unwrap();
1782        let result = asm.finish().unwrap();
1783        let bytes = result.bytes();
1784        // func is at 0x1000, nop=1, ret=1, so jump_table at 0x1002
1785        // .quad func → should contain 0x1000 as a u64 LE
1786        let qw = u64::from_le_bytes(bytes[2..10].try_into().unwrap());
1787        assert_eq!(qw, 0x1000);
1788    }
1789
1790    #[test]
1791    fn long_label_reference() {
1792        let mut asm = Assembler::new(Arch::X86_64);
1793        asm.base_address(0x2000);
1794        asm.emit("entry:\nnop\n.long entry").unwrap();
1795        let result = asm.finish().unwrap();
1796        let bytes = result.bytes();
1797        // entry at 0x2000, nop=1B, .long at offset 1 → value is 0x2000
1798        let dw = u32::from_le_bytes(bytes[1..5].try_into().unwrap());
1799        assert_eq!(dw, 0x2000);
1800    }
1801
1802    #[test]
1803    fn name_equals_constant_in_instruction() {
1804        let mut asm = Assembler::new(Arch::X86_64);
1805        asm.emit("ANSWER = 42\nmov eax, ANSWER").unwrap();
1806        let result = asm.finish().unwrap();
1807        // mov eax, 42 → B8 2A 00 00 00
1808        assert_eq!(result.bytes(), &[0xB8, 0x2A, 0x00, 0x00, 0x00]);
1809    }
1810
1811    // === Listing output ===
1812
1813    #[test]
1814    fn listing_simple() {
1815        let mut asm = Assembler::new(Arch::X86_64);
1816        asm.enable_listing();
1817        asm.emit("nop\nret").unwrap();
1818        let result = asm.finish().unwrap();
1819        let listing = result.listing();
1820        assert!(listing.contains("00000000"));
1821        assert!(listing.contains("90")); // nop
1822        assert!(listing.contains("C3")); // ret
1823                                         // Source text annotations
1824        assert!(listing.contains("nop"));
1825        assert!(listing.contains("ret"));
1826    }
1827
1828    #[test]
1829    fn listing_with_labels() {
1830        let mut asm = Assembler::new(Arch::X86_64);
1831        asm.enable_listing();
1832        asm.emit("start:\nnop\nend:\nret").unwrap();
1833        let result = asm.finish().unwrap();
1834        let listing = result.listing();
1835        assert!(listing.contains("start:"));
1836        assert!(listing.contains("end:"));
1837        // Source text with instructions
1838        assert!(listing.contains("nop"));
1839        assert!(listing.contains("ret"));
1840    }
1841
1842    #[test]
1843    fn listing_with_base_address() {
1844        let mut asm = Assembler::new(Arch::X86_64);
1845        asm.enable_listing();
1846        asm.base_address(0x401000);
1847        asm.emit("nop").unwrap();
1848        let result = asm.finish().unwrap();
1849        let listing = result.listing();
1850        assert!(listing.contains("00401000"));
1851        assert!(listing.contains("nop"));
1852    }
1853
1854    #[test]
1855    fn listing_base_address_accessor() {
1856        let mut asm = Assembler::new(Arch::X86_64);
1857        asm.base_address(0x1000);
1858        asm.emit("nop").unwrap();
1859        let result = asm.finish().unwrap();
1860        assert_eq!(result.base_address(), 0x1000);
1861    }
1862
1863    #[test]
1864    fn listing_hex_format() {
1865        let mut asm = Assembler::new(Arch::X86_64);
1866        asm.enable_listing();
1867        asm.emit("push rbp\nmov rbp, rsp").unwrap();
1868        let result = asm.finish().unwrap();
1869        let listing = result.listing();
1870        // push rbp = 55
1871        assert!(listing.contains("55"));
1872        // mov rbp, rsp = 48 89 E5
1873        assert!(listing.contains("4889E5"));
1874        // Source text appears
1875        assert!(listing.contains("push rbp"));
1876        assert!(listing.contains("mov rbp, rsp"));
1877    }
1878
1879    #[test]
1880    fn listing_source_annotations() {
1881        let mut asm = Assembler::new(Arch::X86_64);
1882        asm.enable_listing();
1883        asm.emit("mov eax, 1\nadd eax, 2\nret").unwrap();
1884        let result = asm.finish().unwrap();
1885        let listing = result.listing();
1886        // Each line should have source text
1887        assert!(listing.contains("mov eax, 1"));
1888        assert!(listing.contains("add eax, 2"));
1889        assert!(listing.contains("ret"));
1890    }
1891
1892    #[test]
1893    fn listing_data_annotation() {
1894        let mut asm = Assembler::new(Arch::X86_64);
1895        asm.enable_listing();
1896        asm.emit(".byte 0x90, 0xCC").unwrap();
1897        let result = asm.finish().unwrap();
1898        let listing = result.listing();
1899        assert!(listing.contains(".byte 0x90, 0xCC"));
1900    }
1901
1902    // === Relocations ===
1903
1904    #[test]
1905    fn relocations_returned() {
1906        let mut asm = Assembler::new(Arch::X86_64);
1907        asm.emit("jmp target\nnop\ntarget:\nret").unwrap();
1908        let result = asm.finish().unwrap();
1909        assert!(!result.relocations().is_empty());
1910        assert_eq!(result.relocations()[0].label, "target");
1911    }
1912
1913    #[test]
1914    fn relocations_for_call() {
1915        let mut asm = Assembler::new(Arch::X86_64);
1916        asm.emit("call func\nfunc:\nret").unwrap();
1917        let result = asm.finish().unwrap();
1918        let relocs = result.relocations();
1919        assert!(!relocs.is_empty());
1920        assert_eq!(relocs[0].label, "func");
1921        assert_eq!(relocs[0].kind, crate::encoder::RelocKind::X86Relative);
1922    }
1923
1924    // === Builder convenience methods ===
1925
1926    #[test]
1927    fn builder_ascii() {
1928        let mut asm = Assembler::new(Arch::X86_64);
1929        asm.ascii("AB").unwrap();
1930        let result = asm.finish().unwrap();
1931        assert_eq!(result.bytes(), &[0x41, 0x42]);
1932    }
1933
1934    #[test]
1935    fn builder_asciz() {
1936        let mut asm = Assembler::new(Arch::X86_64);
1937        asm.asciz("Hi").unwrap();
1938        let result = asm.finish().unwrap();
1939        assert_eq!(result.bytes(), &[0x48, 0x69, 0x00]);
1940    }
1941
1942    #[test]
1943    fn builder_align() {
1944        let mut asm = Assembler::new(Arch::X86_64);
1945        asm.db(&[0x90]).unwrap(); // 1 byte
1946        asm.align(4); // pad to 4-byte boundary
1947        asm.db(&[0xCC]).unwrap();
1948        let result = asm.finish().unwrap();
1949        assert_eq!(result.bytes().len(), 5); // 1 + 3 padding + 1
1950        assert_eq!(result.bytes()[4], 0xCC);
1951    }
1952
1953    #[test]
1954    fn builder_align_with_fill() {
1955        let mut asm = Assembler::new(Arch::X86_64);
1956        asm.db(&[0x90]).unwrap();
1957        asm.align_with_fill(4, 0xAA);
1958        asm.db(&[0xCC]).unwrap();
1959        let result = asm.finish().unwrap();
1960        assert_eq!(result.bytes()[1], 0xAA);
1961        assert_eq!(result.bytes()[2], 0xAA);
1962        assert_eq!(result.bytes()[3], 0xAA);
1963    }
1964
1965    #[test]
1966    fn builder_org() {
1967        let mut asm = Assembler::new(Arch::X86_64);
1968        asm.db(&[0x90]).unwrap();
1969        asm.org(4);
1970        asm.db(&[0xCC]).unwrap();
1971        let result = asm.finish().unwrap();
1972        assert_eq!(result.bytes(), &[0x90, 0x00, 0x00, 0x00, 0xCC]);
1973    }
1974
1975    #[test]
1976    fn builder_org_with_fill() {
1977        let mut asm = Assembler::new(Arch::X86_64);
1978        asm.db(&[0x90]).unwrap();
1979        asm.org_with_fill(4, 0xFF);
1980        asm.db(&[0xCC]).unwrap();
1981        let result = asm.finish().unwrap();
1982        assert_eq!(result.bytes(), &[0x90, 0xFF, 0xFF, 0xFF, 0xCC]);
1983    }
1984
1985    #[test]
1986    fn builder_fill() {
1987        // .fill 3, 2, 0xAB → 3 units of 2 bytes each, value=0xAB as LE integer
1988        // Each unit: [0xAB, 0x00] (LE encoding of 0xAB in 2 bytes)
1989        let mut asm = Assembler::new(Arch::X86_64);
1990        asm.fill(3, 2, 0xAB).unwrap();
1991        let result = asm.finish().unwrap();
1992        assert_eq!(result.bytes(), &[0xAB, 0x00, 0xAB, 0x00, 0xAB, 0x00]);
1993    }
1994
1995    #[test]
1996    fn builder_fill_size_1() {
1997        // .fill 4, 1, 0xCC → 4 units of 1 byte, value=0xCC → simple fill
1998        let mut asm = Assembler::new(Arch::X86_64);
1999        asm.fill(4, 1, 0xCC).unwrap();
2000        let result = asm.finish().unwrap();
2001        assert_eq!(result.bytes(), &[0xCC, 0xCC, 0xCC, 0xCC]);
2002    }
2003
2004    #[test]
2005    fn builder_fill_multi_byte_value() {
2006        // .fill 1, 4, 0xDEADBEEF → 1 unit of 4 bytes, value=0xDEADBEEF in LE
2007        let mut asm = Assembler::new(Arch::X86_64);
2008        asm.fill(1, 4, 0xDEADBEEFu32 as i64).unwrap();
2009        let result = asm.finish().unwrap();
2010        assert_eq!(result.bytes(), &[0xEF, 0xBE, 0xAD, 0xDE]);
2011    }
2012
2013    #[test]
2014    fn builder_fill_16bit_value() {
2015        // .fill 2, 2, 0x1234 → 2 units of 2 bytes
2016        let mut asm = Assembler::new(Arch::X86_64);
2017        asm.fill(2, 2, 0x1234).unwrap();
2018        let result = asm.finish().unwrap();
2019        assert_eq!(result.bytes(), &[0x34, 0x12, 0x34, 0x12]);
2020    }
2021
2022    #[test]
2023    fn builder_space() {
2024        let mut asm = Assembler::new(Arch::X86_64);
2025        asm.space(4).unwrap();
2026        let result = asm.finish().unwrap();
2027        assert_eq!(result.bytes(), &[0x00, 0x00, 0x00, 0x00]);
2028    }
2029
2030    // === Listing annotations for directives ===
2031
2032    #[test]
2033    fn listing_fill_annotation() {
2034        let mut asm = Assembler::new(Arch::X86_64);
2035        asm.enable_listing();
2036        asm.emit(".fill 2, 1, 0x90").unwrap();
2037        let result = asm.finish().unwrap();
2038        let listing = result.listing();
2039        assert!(listing.contains(".fill 2, 1, 0x90"));
2040    }
2041
2042    #[test]
2043    fn listing_space_annotation() {
2044        let mut asm = Assembler::new(Arch::X86_64);
2045        asm.enable_listing();
2046        asm.emit(".space 4").unwrap();
2047        let result = asm.finish().unwrap();
2048        let listing = result.listing();
2049        assert!(listing.contains(".space 4"));
2050    }
2051
2052    #[test]
2053    fn listing_align_annotation() {
2054        let mut asm = Assembler::new(Arch::X86_64);
2055        asm.enable_listing();
2056        asm.emit("nop\n.align 4\nnop").unwrap();
2057        let result = asm.finish().unwrap();
2058        let listing = result.listing();
2059        assert!(listing.contains(".align 4"));
2060    }
2061
2062    #[test]
2063    fn listing_org_annotation() {
2064        let mut asm = Assembler::new(Arch::X86_64);
2065        asm.enable_listing();
2066        asm.emit("nop\n.org 0x10\nnop").unwrap();
2067        let result = asm.finish().unwrap();
2068        let listing = result.listing();
2069        assert!(listing.contains(".org 0x10"));
2070    }
2071
2072    // === .org fill byte ===
2073
2074    #[test]
2075    fn org_with_fill_byte() {
2076        let mut asm = Assembler::new(Arch::X86_64);
2077        asm.emit("nop\n.org 0x04, 0xFF\nnop").unwrap();
2078        let result = asm.finish().unwrap();
2079        // nop (0x90) + 3 fill bytes (0xFF) + nop (0x90)
2080        assert_eq!(result.bytes(), &[0x90, 0xFF, 0xFF, 0xFF, 0x90]);
2081    }
2082
2083    // === AT&T Syntax ===
2084
2085    #[test]
2086    fn att_syntax_basic() {
2087        let mut asm = Assembler::new(Arch::X86_64);
2088        asm.syntax(Syntax::Att);
2089        asm.emit("movq $1, %rax").unwrap();
2090        let result = asm.finish().unwrap();
2091        // mov rax, 1 → optimizer narrows to mov eax, 1 = B8 01 00 00 00
2092        assert_eq!(result.bytes(), &[0xB8, 0x01, 0x00, 0x00, 0x00]);
2093    }
2094
2095    // === Resource Limits ===
2096
2097    #[test]
2098    fn resource_limit_max_statements() {
2099        let mut asm = Assembler::new(Arch::X86_64);
2100        asm.limits(ResourceLimits {
2101            max_statements: 3,
2102            ..ResourceLimits::default()
2103        });
2104        // 3 statements: ok
2105        asm.emit("nop; nop; nop").unwrap();
2106        // 2 more statements: total = 5 > 3, should fail
2107        let err = asm.emit("nop; nop").unwrap_err();
2108        match err {
2109            AsmError::ResourceLimitExceeded { resource, limit } => {
2110                assert_eq!(resource, "statements");
2111                assert_eq!(limit, 3);
2112            }
2113            other => panic!("expected ResourceLimitExceeded, got: {other:?}"),
2114        }
2115    }
2116
2117    #[test]
2118    fn resource_limit_max_labels() {
2119        let mut asm = Assembler::new(Arch::X86_64);
2120        asm.limits(ResourceLimits {
2121            max_labels: 2,
2122            ..ResourceLimits::default()
2123        });
2124        asm.label("a").unwrap();
2125        asm.label("b").unwrap();
2126        let err = asm.label("c").unwrap_err();
2127        match err {
2128            AsmError::ResourceLimitExceeded { resource, limit } => {
2129                assert_eq!(resource, "labels");
2130                assert_eq!(limit, 2);
2131            }
2132            other => panic!("expected ResourceLimitExceeded, got: {other:?}"),
2133        }
2134    }
2135
2136    #[test]
2137    fn resource_limit_max_labels_via_emit() {
2138        let mut asm = Assembler::new(Arch::X86_64);
2139        asm.limits(ResourceLimits {
2140            max_labels: 1,
2141            ..ResourceLimits::default()
2142        });
2143        asm.emit("a: nop").unwrap();
2144        let err = asm.emit("b: nop").unwrap_err();
2145        match err {
2146            AsmError::ResourceLimitExceeded { resource, limit } => {
2147                assert_eq!(resource, "labels");
2148                assert_eq!(limit, 1);
2149            }
2150            other => panic!("expected ResourceLimitExceeded, got: {other:?}"),
2151        }
2152    }
2153
2154    #[test]
2155    fn resource_limit_max_output_bytes() {
2156        let mut asm = Assembler::new(Arch::X86_64);
2157        asm.limits(ResourceLimits {
2158            max_output_bytes: 4,
2159            ..ResourceLimits::default()
2160        });
2161        asm.emit("nop; nop; nop; nop").unwrap(); // 4 bytes = exactly at limit: ok
2162        let result = asm.finish();
2163        assert!(result.is_ok());
2164
2165        // With eager checking, the limit is now caught at emit() time
2166        let mut asm2 = Assembler::new(Arch::X86_64);
2167        asm2.limits(ResourceLimits {
2168            max_output_bytes: 3,
2169            ..ResourceLimits::default()
2170        });
2171        let err = asm2.emit("nop; nop; nop; nop").unwrap_err(); // 4 bytes > 3: fail at emit
2172        match err {
2173            AsmError::ResourceLimitExceeded { resource, limit } => {
2174                assert_eq!(resource, "output bytes");
2175                assert_eq!(limit, 3);
2176            }
2177            other => panic!("expected ResourceLimitExceeded, got: {other:?}"),
2178        }
2179    }
2180
2181    #[test]
2182    fn resource_limits_default_does_not_interfere() {
2183        // Default limits should be generous enough for normal use
2184        let mut asm = Assembler::new(Arch::X86_64);
2185        // Emit a lot of instructions at once
2186        let source: String = (0..1000).map(|_| "nop; ").collect();
2187        asm.emit(&source).unwrap();
2188        let result = asm.finish().unwrap();
2189        assert_eq!(result.len(), 1000);
2190    }
2191
2192    #[test]
2193    fn resource_limit_max_recursion_depth() {
2194        let mut asm = Assembler::new(Arch::X86_64);
2195        asm.limits(ResourceLimits {
2196            max_recursion_depth: 3,
2197            ..ResourceLimits::default()
2198        });
2199        // A macro that calls itself — should hit the recursion limit quickly
2200        let result = asm.emit(".macro boom\nboom\n.endm\nboom");
2201        assert!(result.is_err());
2202        let err = result.unwrap_err();
2203        match err {
2204            AsmError::ResourceLimitExceeded { resource, limit } => {
2205                assert_eq!(resource, "macro recursion depth");
2206                assert_eq!(limit, 3);
2207            }
2208            _ => panic!("expected ResourceLimitExceeded, got {:?}", err),
2209        }
2210    }
2211
2212    // ─── encode_one ────────────────────────────────────────────────
2213
2214    #[test]
2215    fn encode_one_nop() {
2216        let asm = Assembler::new(Arch::X86_64);
2217        let bytes = asm.encode_one("nop").unwrap();
2218        assert_eq!(bytes, alloc::vec![0x90]);
2219    }
2220
2221    #[test]
2222    fn encode_one_ret() {
2223        let asm = Assembler::new(Arch::X86_64);
2224        let bytes = asm.encode_one("ret").unwrap();
2225        assert_eq!(bytes, alloc::vec![0xC3]);
2226    }
2227
2228    #[test]
2229    fn encode_one_empty_input() {
2230        let asm = Assembler::new(Arch::X86_64);
2231        let bytes = asm.encode_one("").unwrap();
2232        assert!(bytes.is_empty());
2233    }
2234
2235    #[test]
2236    fn encode_one_rejects_label() {
2237        let asm = Assembler::new(Arch::X86_64);
2238        assert!(asm.encode_one("foo:").is_err());
2239    }
2240
2241    #[test]
2242    fn encode_one_does_not_affect_state() {
2243        let asm = Assembler::new(Arch::X86_64);
2244        let _ = asm.encode_one("nop").unwrap();
2245        // Finish should produce empty output since encode_one doesn't
2246        // add to internal state
2247        let result = asm.finish().unwrap();
2248        assert!(result.is_empty());
2249    }
2250
2251    // ─── define_preprocessor_symbol ────────────────────────────────
2252
2253    #[test]
2254    fn define_preprocessor_symbol_ifdef() {
2255        let mut asm = Assembler::new(Arch::X86_64);
2256        asm.define_preprocessor_symbol("DEBUG", 1);
2257        asm.emit(".ifdef DEBUG\nnop\n.endif").unwrap();
2258        let result = asm.finish().unwrap();
2259        assert_eq!(result.bytes(), &[0x90]);
2260    }
2261
2262    #[test]
2263    fn define_preprocessor_symbol_skipped_when_missing() {
2264        let mut asm = Assembler::new(Arch::X86_64);
2265        // DEBUG is NOT defined — block should be skipped
2266        asm.emit(".ifdef DEBUG\nnop\n.endif\nret").unwrap();
2267        let result = asm.finish().unwrap();
2268        assert_eq!(result.bytes(), &[0xC3]); // only ret
2269    }
2270
2271    // ─── dq (64-bit data) ─────────────────────────────────────────
2272
2273    #[test]
2274    fn builder_dq() {
2275        let mut asm = Assembler::new(Arch::X86_64);
2276        asm.dq(0xDEAD_BEEF_CAFE_BABE).unwrap();
2277        let result = asm.finish().unwrap();
2278        assert_eq!(result.bytes(), &0xDEAD_BEEF_CAFE_BABEu64.to_le_bytes());
2279    }
2280
2281    // ─── reset ────────────────────────────────────────────────────
2282
2283    #[test]
2284    fn reset_clears_state_keeps_config() {
2285        let mut asm = Assembler::new(Arch::X86_64);
2286        asm.emit("nop").unwrap();
2287        asm.reset();
2288        asm.emit("ret").unwrap();
2289        let result = asm.finish().unwrap();
2290        // Only "ret" should be present — "nop" was cleared
2291        assert_eq!(result.bytes(), &[0xC3]);
2292    }
2293
2294    #[test]
2295    fn reset_allows_reuse() {
2296        let mut asm = Assembler::new(Arch::X86_64);
2297        asm.emit("nop").unwrap();
2298        // Reset discards the nop, then assemble fresh
2299        asm.reset();
2300        asm.emit("ret").unwrap();
2301        let result = asm.finish().unwrap();
2302        assert_eq!(result.bytes(), &[0xC3]);
2303    }
2304
2305    // ─── current_fragment_count ───────────────────────────────────
2306
2307    #[test]
2308    fn current_fragment_count_tracks_emissions() {
2309        let mut asm = Assembler::new(Arch::X86_64);
2310        assert_eq!(asm.current_fragment_count(), 0);
2311        asm.emit("nop").unwrap();
2312        assert!(asm.current_fragment_count() > 0);
2313    }
2314
2315    // ─── is_empty ─────────────────────────────────────────────────
2316
2317    #[test]
2318    fn empty_assembly_result() {
2319        let asm = Assembler::new(Arch::X86_64);
2320        let result = asm.finish().unwrap();
2321        assert!(result.is_empty());
2322        assert_eq!(result.len(), 0);
2323        assert!(result.bytes().is_empty());
2324    }
2325
2326    // ─── labels() direct access ───────────────────────────────────
2327
2328    #[test]
2329    fn labels_slice_access() {
2330        let mut asm = Assembler::new(Arch::X86_64);
2331        asm.emit("start:\nnop\nend:\nret").unwrap();
2332        let result = asm.finish().unwrap();
2333        let labels = result.labels();
2334        // Two labels defined
2335        assert_eq!(labels.len(), 2);
2336        // Check both are present (order may vary)
2337        assert!(labels.iter().any(|(name, _)| name == "start"));
2338        assert!(labels.iter().any(|(name, _)| name == "end"));
2339    }
2340
2341    // ─── assemble_with externals ──────────────────────────────────
2342
2343    #[test]
2344    fn assemble_with_external_labels() {
2345        use crate::assemble_with;
2346        let bytes =
2347            assemble_with("call target", Arch::X86_64, 0x1000, &[("target", 0x2000)]).unwrap();
2348        // call rel32: E8 xx xx xx xx — target is at 0x2000, PC after call = 0x1000+5 = 0x1005
2349        // rel32 = 0x2000 - 0x1005 = 0x0FFB
2350        assert_eq!(bytes[0], 0xE8);
2351        let rel = i32::from_le_bytes(bytes[1..5].try_into().unwrap());
2352        assert_eq!(rel, 0x0FFB);
2353    }
2354
2355    // ─── multiple errors (AsmError::Multiple) ────────────────────
2356
2357    #[test]
2358    fn multiple_errors_collected() {
2359        let mut asm = Assembler::new(Arch::X86_64);
2360        // Emit multiple bad mnemonics — errors are collected, not fail-fast
2361        asm.emit("badmnem1\nbadmnem2").unwrap();
2362        let err = asm.finish().unwrap_err();
2363        match err {
2364            AsmError::Multiple { errors } => assert_eq!(errors.len(), 2),
2365            _ => panic!("expected Multiple error, got: {err}"),
2366        }
2367    }
2368
2369    // ─── optimizer no-op for non-x86 ─────────────────────────────
2370
2371    #[cfg(feature = "arm")]
2372    #[test]
2373    fn optimizer_noop_for_arm() {
2374        let mut asm = Assembler::new(Arch::Arm);
2375        // ARM mov r0, 0 should NOT be optimized to xor (that's x86-only)
2376        asm.emit("mov r0, 0").unwrap();
2377        let result = asm.finish().unwrap();
2378        // ARM "mov r0, #0" encodes as: E3A00000 (condition AL, MOV, Rd=0, imm=0)
2379        assert_eq!(result.len(), 4);
2380        assert_eq!(result.bytes(), &[0x00, 0x00, 0xA0, 0xE3]);
2381    }
2382
2383    // ─── .org directive through assembler pipeline ───────────────
2384
2385    #[test]
2386    fn org_directive_via_emit() {
2387        let mut asm = Assembler::new(Arch::X86_64);
2388        asm.emit("nop\n.org 0x10\nnop").unwrap();
2389        let result = asm.finish().unwrap();
2390        // nop (1 byte) + padding to 0x10 (15 zero bytes) + nop (1 byte) = 17 bytes
2391        assert_eq!(result.len(), 17);
2392        assert_eq!(result.bytes()[0], 0x90); // first nop
2393        assert_eq!(result.bytes()[0x10], 0x90); // nop at offset 0x10
2394                                                // bytes 1..0x10 should be zero-fill
2395        for &b in &result.bytes()[1..0x10] {
2396            assert_eq!(b, 0x00);
2397        }
2398    }
2399
2400    // ─── listing output ──────────────────────────────────────────
2401
2402    #[test]
2403    fn listing_includes_label_and_hex() {
2404        let mut asm = Assembler::new(Arch::X86_64);
2405        asm.emit("start:\nnop\nret").unwrap();
2406        let result = asm.finish().unwrap();
2407        let listing = result.listing();
2408        // Listing should contain the label name
2409        assert!(
2410            listing.contains("start"),
2411            "listing should contain label 'start'"
2412        );
2413        // Listing should contain hex bytes (90 = nop, C3 = ret)
2414        assert!(
2415            listing.contains("90"),
2416            "listing should contain '90' for nop"
2417        );
2418        assert!(
2419            listing.contains("C3") || listing.contains("c3"),
2420            "listing should contain 'C3' for ret"
2421        );
2422    }
2423
2424    #[test]
2425    fn listing_with_base_address_format() {
2426        let mut asm = Assembler::new(Arch::X86_64);
2427        asm.base_address(0x401000);
2428        asm.emit("nop\nret").unwrap();
2429        let result = asm.finish().unwrap();
2430        let listing = result.listing();
2431        // Should include the base address in the listing
2432        assert!(
2433            listing.contains("00401000") || listing.contains("401000"),
2434            "listing should contain base address"
2435        );
2436    }
2437
2438    // ─── .org via builder method ─────────────────────────────────
2439
2440    #[test]
2441    fn org_builder_method() {
2442        let mut asm = Assembler::new(Arch::X86_64);
2443        asm.emit("nop").unwrap();
2444        asm.org(0x10);
2445        asm.emit("nop").unwrap();
2446        let result = asm.finish().unwrap();
2447        assert_eq!(result.len(), 17); // 1 + 15 padding + 1
2448    }
2449
2450    // ─── JECXZ relaxation (was BranchOutOfRange before relaxation support) ──
2451
2452    #[test]
2453    fn jecxz_relaxes_to_long_form() {
2454        // JECXZ targets beyond ±127 bytes now auto-relax to the compound
2455        // sequence: JECXZ +2 / JMP short +5 / JMP near rel32
2456        let mut asm = Assembler::new(Arch::X86_64);
2457        asm.emit("jecxz target").unwrap();
2458        asm.space(200).unwrap(); // 200 bytes > 127 (rel8 max)
2459        asm.emit("target:\nnop").unwrap();
2460        let result = asm.finish().unwrap();
2461        // Long form starts with 67 E3 02 EB 05 E9 [rel32]
2462        assert_eq!(result.bytes[0], 0x67);
2463        assert_eq!(result.bytes[1], 0xE3);
2464        assert_eq!(result.bytes[2], 0x02);
2465        assert_eq!(result.bytes[3], 0xEB);
2466        assert_eq!(result.bytes[4], 0x05);
2467        assert_eq!(result.bytes[5], 0xE9);
2468        // Target is at offset 10+200 = 210, RIP after JMP = 10
2469        // disp = 210 - 10 = 200 = 0xC8
2470        assert_eq!(result.bytes[6], 0xC8);
2471        assert_eq!(result.bytes[7], 0x00);
2472        assert_eq!(result.bytes[8], 0x00);
2473        assert_eq!(result.bytes[9], 0x00);
2474    }
2475
2476    #[test]
2477    fn jecxz_relaxes_to_short_form_when_near() {
2478        // JECXZ targets within ±127 bytes relax to the compact 67 E3 rel8
2479        let mut asm = Assembler::new(Arch::X86_64);
2480        asm.emit("jecxz target").unwrap();
2481        asm.emit("target:\nnop").unwrap();
2482        let result = asm.finish().unwrap();
2483        // Short form: 67 E3 rel8 (3 bytes)
2484        assert_eq!(result.bytes[0], 0x67);
2485        assert_eq!(result.bytes[1], 0xE3);
2486        // rel8 = 0 (target is immediately after the instruction)
2487        assert_eq!(result.bytes[2], 0x00);
2488        assert_eq!(result.bytes[3], 0x90); // NOP
2489    }
2490
2491    // ─── error collection: single error yields single, not Multiple ─
2492
2493    #[test]
2494    fn single_error_not_wrapped_in_multiple() {
2495        let mut asm = Assembler::new(Arch::X86_64);
2496        asm.emit("badmnem").unwrap();
2497        let err = asm.finish().unwrap_err();
2498        // A single encoding error should be returned directly, not wrapped
2499        assert!(matches!(err, AsmError::UnknownMnemonic { .. }));
2500    }
2501
2502    // ─── error collection: good + bad instructions ───────────────
2503
2504    #[test]
2505    fn errors_collected_with_valid_instructions() {
2506        let mut asm = Assembler::new(Arch::X86_64);
2507        // Mix valid and invalid instructions — valid ones still get encoded
2508        asm.emit("nop\nbadmnem\nret").unwrap();
2509        let err = asm.finish().unwrap_err();
2510        // Should be a single UnknownMnemonic (only 1 bad instruction)
2511        assert!(matches!(err, AsmError::UnknownMnemonic { .. }));
2512    }
2513
2514    // ─── error collection across multiple emit() calls ───────────
2515
2516    #[test]
2517    fn errors_collected_across_emit_calls() {
2518        let mut asm = Assembler::new(Arch::X86_64);
2519        asm.emit("bad1").unwrap();
2520        asm.emit("bad2").unwrap();
2521        asm.emit("bad3").unwrap();
2522        let err = asm.finish().unwrap_err();
2523        match err {
2524            AsmError::Multiple { errors } => assert_eq!(errors.len(), 3),
2525            _ => panic!("expected Multiple error with 3 errors, got: {err}"),
2526        }
2527    }
2528
2529    // ─── reset clears collected errors ───────────────────────────
2530
2531    #[test]
2532    fn reset_clears_errors() {
2533        let mut asm = Assembler::new(Arch::X86_64);
2534        asm.emit("badmnem").unwrap();
2535        asm.reset();
2536        asm.emit("nop").unwrap();
2537        let result = asm.finish().unwrap();
2538        assert_eq!(result.bytes(), &[0x90]);
2539    }
2540
2541    // ─── max_errors resource limit ───────────────────────────────
2542
2543    #[test]
2544    fn max_errors_limit_enforced() {
2545        let mut asm = Assembler::new(Arch::X86_64);
2546        asm.limits(ResourceLimits {
2547            max_errors: 2,
2548            ..ResourceLimits::default()
2549        });
2550        // Third bad mnemonic should trigger ResourceLimitExceeded
2551        let result = asm.emit("bad1\nbad2\nbad3");
2552        assert!(result.is_err());
2553        let err = result.unwrap_err();
2554        assert!(matches!(err, AsmError::ResourceLimitExceeded { .. }));
2555    }
2556
2557    // ─── literal pool ────────────────────────────────────────────
2558
2559    #[test]
2560    fn literal_pool_basic_x_reg() {
2561        // LDR X0, =0x12345678 → emits LDR (literal) + pool data at finish
2562        let mut asm = Assembler::new(Arch::Aarch64);
2563        asm.emit("ldr x0, =0x12345678").unwrap();
2564        let result = asm.finish().unwrap();
2565        let bytes = result.bytes();
2566        // First 4 bytes: LDR (literal) instruction
2567        assert!(
2568            bytes.len() >= 8,
2569            "expected at least 8 bytes, got {}",
2570            bytes.len()
2571        );
2572        // Pool data should contain 0x12345678 as 8 bytes LE
2573        let pool_start = bytes.len() - 8;
2574        let pool_val = u64::from_le_bytes(bytes[pool_start..pool_start + 8].try_into().unwrap());
2575        assert_eq!(pool_val, 0x12345678, "pool should contain the constant");
2576    }
2577
2578    #[test]
2579    fn literal_pool_basic_w_reg() {
2580        // LDR W0, =0x42 → emits LDR (literal) + 4-byte pool data
2581        let mut asm = Assembler::new(Arch::Aarch64);
2582        asm.emit("ldr w0, =0x42").unwrap();
2583        let result = asm.finish().unwrap();
2584        let bytes = result.bytes();
2585        // Pool data should contain 0x42 as 4 bytes LE
2586        let pool_start = bytes.len() - 4;
2587        let pool_val = u32::from_le_bytes(bytes[pool_start..pool_start + 4].try_into().unwrap());
2588        assert_eq!(pool_val, 0x42, "pool should contain the constant");
2589    }
2590
2591    #[test]
2592    fn literal_pool_with_ltorg() {
2593        // Explicit .ltorg flushes the pool
2594        let mut asm = Assembler::new(Arch::Aarch64);
2595        asm.emit("ldr x0, =0xCAFE\n.ltorg").unwrap();
2596        let result = asm.finish().unwrap();
2597        let bytes = result.bytes();
2598        // Should have: 4 bytes LDR + alignment + 8 bytes pool data
2599        assert!(bytes.len() >= 12);
2600        // Pool value at end
2601        let pool_start = bytes.len() - 8;
2602        let pool_val = u64::from_le_bytes(bytes[pool_start..pool_start + 8].try_into().unwrap());
2603        assert_eq!(pool_val, 0xCAFE);
2604    }
2605
2606    #[test]
2607    fn literal_pool_deduplication() {
2608        // Two LDR with same value should share one pool entry
2609        let mut asm = Assembler::new(Arch::Aarch64);
2610        asm.emit("ldr x0, =0x1234\nldr x1, =0x1234").unwrap();
2611        let result = asm.finish().unwrap();
2612        let bytes = result.bytes();
2613        // 2 LDR instructions (8 bytes) + alignment + 1 pool entry (8 bytes)
2614        // Without dedup: 8 + alignment + 16 = 24+
2615        // With dedup: 8 + alignment + 8 = 16+
2616        // The pool should contain exactly one 8-byte entry
2617        assert!(
2618            bytes.len() <= 24,
2619            "expected <= 24 bytes with dedup, got {}",
2620            bytes.len()
2621        );
2622    }
2623
2624    #[test]
2625    fn literal_pool_multiple_values() {
2626        // Two LDR with different values → two pool entries
2627        let mut asm = Assembler::new(Arch::Aarch64);
2628        asm.emit("ldr x0, =0xAAAA\nldr x1, =0xBBBB").unwrap();
2629        let result = asm.finish().unwrap();
2630        let bytes = result.bytes();
2631        // Should have both values in the pool
2632        let pool_end = bytes.len();
2633        let val2 = u64::from_le_bytes(bytes[pool_end - 8..pool_end].try_into().unwrap());
2634        let val1 = u64::from_le_bytes(bytes[pool_end - 16..pool_end - 8].try_into().unwrap());
2635        assert!(
2636            (val1 == 0xAAAA && val2 == 0xBBBB) || (val1 == 0xBBBB && val2 == 0xAAAA),
2637            "pool should contain both values, got {:#x} and {:#x}",
2638            val1,
2639            val2
2640        );
2641    }
2642
2643    #[test]
2644    fn literal_pool_ldr_encodes_pc_relative() {
2645        // Verify the LDR instruction word encodes imm19 pointing to the pool
2646        let mut asm = Assembler::new(Arch::Aarch64);
2647        asm.emit("ldr x0, =0xFF").unwrap();
2648        let result = asm.finish().unwrap();
2649        let bytes = result.bytes();
2650        // First 4 bytes are LDR (literal): opc=01 | 011000 | imm19 | Rt
2651        let word = u32::from_le_bytes(bytes[0..4].try_into().unwrap());
2652        // opc should be 01 (64-bit) at bits 31:30
2653        assert_eq!((word >> 30) & 0b11, 0b01, "opc should be 01 for 64-bit LDR");
2654        // bits 29:24 should be 011000
2655        assert_eq!(
2656            (word >> 24) & 0b111111,
2657            0b011000,
2658            "should be LDR literal encoding"
2659        );
2660        // Rt should be X0 = 0
2661        assert_eq!(word & 0x1F, 0, "Rt should be X0");
2662        // imm19 should be positive (pool is after the instruction)
2663        let imm19 = ((word >> 5) & 0x7FFFF) as i32;
2664        assert!(imm19 > 0, "imm19 should be positive (pool is after instr)");
2665    }
2666
2667    #[test]
2668    fn literal_pool_large_64bit_value() {
2669        let mut asm = Assembler::new(Arch::Aarch64);
2670        asm.emit("ldr x0, =0xDEADBEEFCAFEBABE").unwrap();
2671        let result = asm.finish().unwrap();
2672        let bytes = result.bytes();
2673        let pool_start = bytes.len() - 8;
2674        let pool_val = u64::from_le_bytes(bytes[pool_start..pool_start + 8].try_into().unwrap());
2675        assert_eq!(pool_val, 0xDEADBEEFCAFEBABE);
2676    }
2677
2678    #[test]
2679    fn literal_pool_negative_value() {
2680        let mut asm = Assembler::new(Arch::Aarch64);
2681        asm.emit("ldr x0, =-1").unwrap();
2682        let result = asm.finish().unwrap();
2683        let bytes = result.bytes();
2684        let pool_start = bytes.len() - 8;
2685        let pool_val = u64::from_le_bytes(bytes[pool_start..pool_start + 8].try_into().unwrap());
2686        // -1 as u64 = 0xFFFFFFFFFFFFFFFF
2687        assert_eq!(pool_val, 0xFFFFFFFFFFFFFFFF);
2688    }
2689
2690    #[test]
2691    fn literal_pool_pool_directive() {
2692        // .pool is an alias for .ltorg
2693        let mut asm = Assembler::new(Arch::Aarch64);
2694        asm.emit("ldr x0, =0xBEEF\n.pool").unwrap();
2695        let result = asm.finish().unwrap();
2696        let bytes = result.bytes();
2697        let pool_start = bytes.len() - 8;
2698        let pool_val = u64::from_le_bytes(bytes[pool_start..pool_start + 8].try_into().unwrap());
2699        assert_eq!(pool_val, 0xBEEF);
2700    }
2701
2702    #[test]
2703    fn literal_pool_reset_clears_pool() {
2704        let mut asm = Assembler::new(Arch::Aarch64);
2705        asm.emit("ldr x0, =0x1234").unwrap();
2706        asm.reset();
2707        // After reset, pool should be empty; emitting just a NOP should work
2708        asm.emit("nop").unwrap();
2709        let result = asm.finish().unwrap();
2710        assert_eq!(result.bytes(), &[0x1F, 0x20, 0x03, 0xD5]); // NOP only, no pool data
2711    }
2712
2713    // ─── ARM literal pool ────────────────────────────────────────
2714
2715    #[test]
2716    fn arm_literal_pool_basic() {
2717        // LDR R0, =0x12345678 on ARM → LDR (literal) + 4-byte pool entry
2718        let mut asm = Assembler::new(Arch::Arm);
2719        asm.emit("ldr r0, =0x12345678").unwrap();
2720        let result = asm.finish().unwrap();
2721        let bytes = result.bytes();
2722        // First 4 bytes: LDR instruction, then 4 bytes pool data
2723        assert!(
2724            bytes.len() >= 8,
2725            "expected at least 8 bytes, got {}",
2726            bytes.len()
2727        );
2728        // Pool data: 4 bytes LE for ARM
2729        let pool_start = bytes.len() - 4;
2730        let pool_val = u32::from_le_bytes(bytes[pool_start..pool_start + 4].try_into().unwrap());
2731        assert_eq!(pool_val, 0x12345678, "pool should contain the constant");
2732    }
2733
2734    #[test]
2735    fn arm_literal_pool_small_value() {
2736        // Even small values go through the literal pool path
2737        let mut asm = Assembler::new(Arch::Arm);
2738        asm.emit("ldr r3, =42").unwrap();
2739        let result = asm.finish().unwrap();
2740        let bytes = result.bytes();
2741        let pool_start = bytes.len() - 4;
2742        let pool_val = u32::from_le_bytes(bytes[pool_start..pool_start + 4].try_into().unwrap());
2743        assert_eq!(pool_val, 42);
2744    }
2745
2746    #[test]
2747    fn arm_literal_pool_negative_value() {
2748        let mut asm = Assembler::new(Arch::Arm);
2749        asm.emit("ldr r0, =-1").unwrap();
2750        let result = asm.finish().unwrap();
2751        let bytes = result.bytes();
2752        let pool_start = bytes.len() - 4;
2753        let pool_val = u32::from_le_bytes(bytes[pool_start..pool_start + 4].try_into().unwrap());
2754        // -1 as u32 = 0xFFFFFFFF
2755        assert_eq!(pool_val, 0xFFFFFFFF);
2756    }
2757
2758    #[test]
2759    fn arm_literal_pool_deduplication() {
2760        // Two LDR with same value should share one pool entry
2761        let mut asm = Assembler::new(Arch::Arm);
2762        asm.emit("ldr r0, =0xAABB\nldr r1, =0xAABB").unwrap();
2763        let result = asm.finish().unwrap();
2764        let bytes = result.bytes();
2765        // 2 LDR instructions (8 bytes) + alignment + 1 pool entry (4 bytes)
2766        // Without dedup: 8 + 8 = 16; with dedup: 8 + 4 = 12
2767        assert!(
2768            bytes.len() <= 16,
2769            "expected <=16 bytes with dedup, got {}",
2770            bytes.len()
2771        );
2772    }
2773
2774    #[test]
2775    fn arm_literal_pool_multiple_values() {
2776        // Different values → separate pool entries
2777        let mut asm = Assembler::new(Arch::Arm);
2778        asm.emit("ldr r0, =0x1111\nldr r1, =0x2222").unwrap();
2779        let result = asm.finish().unwrap();
2780        let bytes = result.bytes();
2781        // Two 4-byte pool entries at the end
2782        let pool_end = bytes.len();
2783        let val2 = u32::from_le_bytes(bytes[pool_end - 4..pool_end].try_into().unwrap());
2784        let val1 = u32::from_le_bytes(bytes[pool_end - 8..pool_end - 4].try_into().unwrap());
2785        assert!(
2786            (val1 == 0x1111 && val2 == 0x2222) || (val1 == 0x2222 && val2 == 0x1111),
2787            "pool should contain both values, got {:#x} and {:#x}",
2788            val1,
2789            val2
2790        );
2791    }
2792
2793    #[test]
2794    fn arm_literal_pool_with_ltorg() {
2795        // Explicit .ltorg flushes the pool
2796        let mut asm = Assembler::new(Arch::Arm);
2797        asm.emit("ldr r0, =0xCAFE\n.ltorg").unwrap();
2798        let result = asm.finish().unwrap();
2799        let bytes = result.bytes();
2800        assert!(bytes.len() >= 8);
2801        let pool_start = bytes.len() - 4;
2802        let pool_val = u32::from_le_bytes(bytes[pool_start..pool_start + 4].try_into().unwrap());
2803        assert_eq!(pool_val, 0xCAFE);
2804    }
2805
2806    #[test]
2807    fn arm_literal_pool_pool_directive() {
2808        // .pool is an alias for .ltorg
2809        let mut asm = Assembler::new(Arch::Arm);
2810        asm.emit("ldr r0, =0xBEEF\n.pool").unwrap();
2811        let result = asm.finish().unwrap();
2812        let bytes = result.bytes();
2813        let pool_start = bytes.len() - 4;
2814        let pool_val = u32::from_le_bytes(bytes[pool_start..pool_start + 4].try_into().unwrap());
2815        assert_eq!(pool_val, 0xBEEF);
2816    }
2817
2818    #[test]
2819    fn arm_literal_pool_ldr_encodes_pc_relative() {
2820        // Verify the LDR instruction uses PC-relative addressing to pool
2821        let mut asm = Assembler::new(Arch::Arm);
2822        asm.emit("ldr r0, =0xFF").unwrap();
2823        let result = asm.finish().unwrap();
2824        let bytes = result.bytes();
2825        // First 4 bytes: LDR Rd, [PC, #offset]
2826        let word = u32::from_le_bytes(bytes[0..4].try_into().unwrap());
2827        // bits [27:26] = 01 (load/store immediate offset)
2828        assert_eq!(
2829            (word >> 26) & 0b11,
2830            0b01,
2831            "should be load/store word encoding"
2832        );
2833        // bits [19:16] = Rn = 15 (PC)
2834        assert_eq!((word >> 16) & 0xF, 15, "Rn should be PC (R15)");
2835        // bits [15:12] = Rd = 0 (R0)
2836        assert_eq!((word >> 12) & 0xF, 0, "Rd should be R0");
2837        // L bit (bit 20) = 1 (load)
2838        assert_eq!((word >> 20) & 1, 1, "should be a load");
2839    }
2840
2841    #[test]
2842    fn arm_literal_pool_entry_always_4_bytes() {
2843        // ARM pool entries should always be 4 bytes regardless of register
2844        let mut asm = Assembler::new(Arch::Arm);
2845        asm.emit("ldr r0, =0x1\nldr r15, =0x2").unwrap();
2846        let result = asm.finish().unwrap();
2847        let bytes = result.bytes();
2848        // 2 LDR (8 bytes) + 2 pool entries (8 bytes) = 16 bytes
2849        // No alignment needed for 4-byte entries on 4-byte boundary
2850        assert!(
2851            bytes.len() <= 16,
2852            "ARM pool entries should be 4 bytes each, got {} total",
2853            bytes.len()
2854        );
2855    }
2856
2857    #[test]
2858    fn arm_literal_pool_hex_large() {
2859        let mut asm = Assembler::new(Arch::Arm);
2860        asm.emit("ldr r5, =0xDEADBEEF").unwrap();
2861        let result = asm.finish().unwrap();
2862        let bytes = result.bytes();
2863        let pool_start = bytes.len() - 4;
2864        let pool_val = u32::from_le_bytes(bytes[pool_start..pool_start + 4].try_into().unwrap());
2865        assert_eq!(pool_val, 0xDEADBEEF);
2866    }
2867}